{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.41394440726610415, "eval_steps": 500, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.89907345443507e-05, "grad_norm": 0.00011222957982681692, "learning_rate": 9.999379083389101e-06, "loss": 0.0051, "step": 10 }, { "epoch": 0.0001379814690887014, "grad_norm": 0.17417871952056885, "learning_rate": 9.998827157512746e-06, "loss": 0.3817, "step": 20 }, { "epoch": 0.00020697220363305208, "grad_norm": 8.138256072998047, "learning_rate": 9.998137250167303e-06, "loss": 0.0129, "step": 30 }, { "epoch": 0.0002759629381774028, "grad_norm": 0.0460987351834774, "learning_rate": 9.997516333556404e-06, "loss": 0.0364, "step": 40 }, { "epoch": 0.00034495367272175347, "grad_norm": 3.265582323074341, "learning_rate": 9.99682642621096e-06, "loss": 0.0011, "step": 50 }, { "epoch": 0.00041394440726610416, "grad_norm": 0.06020026281476021, "learning_rate": 9.996136518865516e-06, "loss": 0.0328, "step": 60 }, { "epoch": 0.00048293514181045486, "grad_norm": 0.0, "learning_rate": 9.995446611520073e-06, "loss": 0.0025, "step": 70 }, { "epoch": 0.0005519258763548055, "grad_norm": 0.0009600712219253182, "learning_rate": 9.99475670417463e-06, "loss": 0.0001, "step": 80 }, { "epoch": 0.0006209166108991562, "grad_norm": 1.0819045215271217e-08, "learning_rate": 9.994066796829188e-06, "loss": 0.0031, "step": 90 }, { "epoch": 0.0006899073454435069, "grad_norm": 0.00022638787049800158, "learning_rate": 9.993376889483744e-06, "loss": 0.0046, "step": 100 }, { "epoch": 0.0007588980799878576, "grad_norm": 1.1406569910832332e-08, "learning_rate": 9.9926869821383e-06, "loss": 0.005, "step": 110 }, { "epoch": 0.0008278888145322083, "grad_norm": 0.0012971981195732951, "learning_rate": 9.991997074792857e-06, "loss": 0.0018, "step": 120 }, { "epoch": 0.000896879549076559, "grad_norm": 1.264641391607313e-09, "learning_rate": 9.991307167447412e-06, "loss": 0.0, "step": 130 }, { "epoch": 0.0009658702836209097, "grad_norm": 4.786876161233522e-05, "learning_rate": 9.990617260101968e-06, "loss": 0.0001, "step": 140 }, { "epoch": 0.0010348610181652605, "grad_norm": 0.001031873282045126, "learning_rate": 9.989927352756525e-06, "loss": 0.0066, "step": 150 }, { "epoch": 0.001103851752709611, "grad_norm": 0.011338403448462486, "learning_rate": 9.989237445411081e-06, "loss": 0.0002, "step": 160 }, { "epoch": 0.001172842487253962, "grad_norm": 2.0826701474518927e-10, "learning_rate": 9.988547538065638e-06, "loss": 0.0013, "step": 170 }, { "epoch": 0.0012418332217983125, "grad_norm": 0.00012302969116717577, "learning_rate": 9.987857630720194e-06, "loss": 0.0, "step": 180 }, { "epoch": 0.0013108239563426633, "grad_norm": 9.173053741455078, "learning_rate": 9.987167723374751e-06, "loss": 0.0025, "step": 190 }, { "epoch": 0.0013798146908870139, "grad_norm": 4.416388037498109e-05, "learning_rate": 9.986477816029309e-06, "loss": 0.0, "step": 200 }, { "epoch": 0.0014488054254313647, "grad_norm": 0.00019524099479895085, "learning_rate": 9.985787908683866e-06, "loss": 0.0001, "step": 210 }, { "epoch": 0.0015177961599757153, "grad_norm": 4.591324806213379, "learning_rate": 9.985098001338422e-06, "loss": 0.001, "step": 220 }, { "epoch": 0.001586786894520066, "grad_norm": 0.0011398019269108772, "learning_rate": 9.984408093992979e-06, "loss": 0.0001, "step": 230 }, { "epoch": 0.0016557776290644166, "grad_norm": 9.500353392866145e-09, "learning_rate": 9.983718186647533e-06, "loss": 0.001, "step": 240 }, { "epoch": 0.0017247683636087674, "grad_norm": 3.374801593736265e-08, "learning_rate": 9.98302827930209e-06, "loss": 0.008, "step": 250 }, { "epoch": 0.001793759098153118, "grad_norm": 1.4565864603355294e-06, "learning_rate": 9.98240736269119e-06, "loss": 0.0563, "step": 260 }, { "epoch": 0.0018627498326974688, "grad_norm": 0.005463605280965567, "learning_rate": 9.981717455345747e-06, "loss": 0.0018, "step": 270 }, { "epoch": 0.0019317405672418194, "grad_norm": 0.0011831159936264157, "learning_rate": 9.981027548000303e-06, "loss": 0.0313, "step": 280 }, { "epoch": 0.0020007313017861702, "grad_norm": 1.5842642824281938e-06, "learning_rate": 9.980337640654862e-06, "loss": 0.0139, "step": 290 }, { "epoch": 0.002069722036330521, "grad_norm": 0.00014429337170440704, "learning_rate": 9.979647733309418e-06, "loss": 0.0001, "step": 300 }, { "epoch": 0.0021387127708748714, "grad_norm": 4.3330678636266384e-06, "learning_rate": 9.978957825963975e-06, "loss": 0.0002, "step": 310 }, { "epoch": 0.002207703505419222, "grad_norm": 3.0802716537436936e-06, "learning_rate": 9.978267918618531e-06, "loss": 0.0011, "step": 320 }, { "epoch": 0.002276694239963573, "grad_norm": 1.4535776376724243, "learning_rate": 9.977647002007632e-06, "loss": 0.264, "step": 330 }, { "epoch": 0.002345684974507924, "grad_norm": 1.2289291589695495e-05, "learning_rate": 9.976957094662187e-06, "loss": 0.0001, "step": 340 }, { "epoch": 0.002414675709052274, "grad_norm": 0.0065452903509140015, "learning_rate": 9.976267187316743e-06, "loss": 0.0, "step": 350 }, { "epoch": 0.002483666443596625, "grad_norm": 0.28994324803352356, "learning_rate": 9.9755772799713e-06, "loss": 0.0004, "step": 360 }, { "epoch": 0.0025526571781409758, "grad_norm": 39.13631057739258, "learning_rate": 9.974887372625858e-06, "loss": 0.3606, "step": 370 }, { "epoch": 0.0026216479126853266, "grad_norm": 0.004843785427510738, "learning_rate": 9.974197465280414e-06, "loss": 0.0, "step": 380 }, { "epoch": 0.002690638647229677, "grad_norm": 0.0035524864215403795, "learning_rate": 9.97350755793497e-06, "loss": 0.0, "step": 390 }, { "epoch": 0.0027596293817740277, "grad_norm": 0.08471328765153885, "learning_rate": 9.972817650589527e-06, "loss": 0.0001, "step": 400 }, { "epoch": 0.0028286201163183785, "grad_norm": 2.6892220006402567e-08, "learning_rate": 9.972127743244084e-06, "loss": 0.0002, "step": 410 }, { "epoch": 0.0028976108508627293, "grad_norm": 0.3459404706954956, "learning_rate": 9.97143783589864e-06, "loss": 0.0015, "step": 420 }, { "epoch": 0.0029666015854070797, "grad_norm": 0.0008964891894720495, "learning_rate": 9.970747928553197e-06, "loss": 0.0004, "step": 430 }, { "epoch": 0.0030355923199514305, "grad_norm": 9.523880180495325e-06, "learning_rate": 9.970058021207753e-06, "loss": 0.0, "step": 440 }, { "epoch": 0.0031045830544957813, "grad_norm": 0.0015833821380510926, "learning_rate": 9.96936811386231e-06, "loss": 0.0, "step": 450 }, { "epoch": 0.003173573789040132, "grad_norm": 6.80257244312088e-06, "learning_rate": 9.968678206516864e-06, "loss": 0.0061, "step": 460 }, { "epoch": 0.0032425645235844825, "grad_norm": 5.697336491472527e-10, "learning_rate": 9.967988299171421e-06, "loss": 0.003, "step": 470 }, { "epoch": 0.0033115552581288333, "grad_norm": 0.0, "learning_rate": 9.967298391825979e-06, "loss": 0.0, "step": 480 }, { "epoch": 0.003380545992673184, "grad_norm": 9.849828508379233e-09, "learning_rate": 9.966608484480536e-06, "loss": 0.0073, "step": 490 }, { "epoch": 0.003449536727217535, "grad_norm": 1.8166218751503038e-07, "learning_rate": 9.965918577135092e-06, "loss": 0.0004, "step": 500 }, { "epoch": 0.0035185274617618853, "grad_norm": 0.0043383194133639336, "learning_rate": 9.965228669789649e-06, "loss": 0.0, "step": 510 }, { "epoch": 0.003587518196306236, "grad_norm": 0.0009673055028542876, "learning_rate": 9.964538762444205e-06, "loss": 0.0026, "step": 520 }, { "epoch": 0.003656508930850587, "grad_norm": 0.0, "learning_rate": 9.963848855098762e-06, "loss": 0.0122, "step": 530 }, { "epoch": 0.0037254996653949377, "grad_norm": 8.976323897513794e-07, "learning_rate": 9.963158947753318e-06, "loss": 0.0002, "step": 540 }, { "epoch": 0.003794490399939288, "grad_norm": 3.026991367340088, "learning_rate": 9.962469040407875e-06, "loss": 0.0009, "step": 550 }, { "epoch": 0.003863481134483639, "grad_norm": 1.153221296235074e-09, "learning_rate": 9.961779133062431e-06, "loss": 0.0003, "step": 560 }, { "epoch": 0.003932471869027989, "grad_norm": 9.897039853967726e-05, "learning_rate": 9.961089225716986e-06, "loss": 0.2975, "step": 570 }, { "epoch": 0.0040014626035723404, "grad_norm": 2.8191266210342292e-08, "learning_rate": 9.960399318371542e-06, "loss": 0.0013, "step": 580 }, { "epoch": 0.004070453338116691, "grad_norm": 0.002458341885358095, "learning_rate": 9.9597094110261e-06, "loss": 0.0018, "step": 590 }, { "epoch": 0.004139444072661042, "grad_norm": 0.01742110401391983, "learning_rate": 9.959019503680657e-06, "loss": 0.0, "step": 600 }, { "epoch": 0.004208434807205392, "grad_norm": 2.6468876512808492e-06, "learning_rate": 9.958329596335214e-06, "loss": 0.0106, "step": 610 }, { "epoch": 0.004277425541749743, "grad_norm": 1.464867409595172e-07, "learning_rate": 9.95763968898977e-06, "loss": 0.0001, "step": 620 }, { "epoch": 0.004346416276294094, "grad_norm": 0.00010619535896694288, "learning_rate": 9.956949781644326e-06, "loss": 0.0, "step": 630 }, { "epoch": 0.004415407010838444, "grad_norm": 1.2944901754963212e-05, "learning_rate": 9.956259874298883e-06, "loss": 0.0, "step": 640 }, { "epoch": 0.004484397745382795, "grad_norm": 6.631881115026772e-05, "learning_rate": 9.95556996695344e-06, "loss": 0.0001, "step": 650 }, { "epoch": 0.004553388479927146, "grad_norm": 0.0, "learning_rate": 9.954880059607996e-06, "loss": 0.0012, "step": 660 }, { "epoch": 0.004622379214471496, "grad_norm": 9.929733641911298e-05, "learning_rate": 9.954190152262552e-06, "loss": 0.0011, "step": 670 }, { "epoch": 0.004691369949015848, "grad_norm": 0.2635310888290405, "learning_rate": 9.953500244917107e-06, "loss": 0.0, "step": 680 }, { "epoch": 0.004760360683560198, "grad_norm": 3.810168323070684e-07, "learning_rate": 9.952810337571664e-06, "loss": 0.0007, "step": 690 }, { "epoch": 0.004829351418104548, "grad_norm": 0.0, "learning_rate": 9.952120430226222e-06, "loss": 0.0, "step": 700 }, { "epoch": 0.0048983421526489, "grad_norm": 5.18747766964367e-10, "learning_rate": 9.951430522880778e-06, "loss": 0.0019, "step": 710 }, { "epoch": 0.00496733288719325, "grad_norm": 1.8703746107462393e-08, "learning_rate": 9.950740615535335e-06, "loss": 0.0, "step": 720 }, { "epoch": 0.0050363236217376, "grad_norm": 0.0, "learning_rate": 9.950050708189891e-06, "loss": 0.0, "step": 730 }, { "epoch": 0.0051053143562819515, "grad_norm": 0.0, "learning_rate": 9.949360800844448e-06, "loss": 0.5871, "step": 740 }, { "epoch": 0.005174305090826302, "grad_norm": 0.0001783966872608289, "learning_rate": 9.948670893499004e-06, "loss": 0.0, "step": 750 }, { "epoch": 0.005243295825370653, "grad_norm": 114.77677917480469, "learning_rate": 9.947980986153561e-06, "loss": 0.0385, "step": 760 }, { "epoch": 0.0053122865599150035, "grad_norm": 7.162038855312858e-07, "learning_rate": 9.947291078808117e-06, "loss": 0.0001, "step": 770 }, { "epoch": 0.005381277294459354, "grad_norm": 0.017689310014247894, "learning_rate": 9.946601171462674e-06, "loss": 0.0, "step": 780 }, { "epoch": 0.005450268029003705, "grad_norm": 5.28181831604968e-10, "learning_rate": 9.94591126411723e-06, "loss": 0.0, "step": 790 }, { "epoch": 0.0055192587635480555, "grad_norm": 3.7083958659422933e-07, "learning_rate": 9.945221356771785e-06, "loss": 0.0, "step": 800 }, { "epoch": 0.005588249498092406, "grad_norm": 1.8758004216579138e-07, "learning_rate": 9.944531449426343e-06, "loss": 0.0003, "step": 810 }, { "epoch": 0.005657240232636757, "grad_norm": 6.176499312005035e-10, "learning_rate": 9.9438415420809e-06, "loss": 0.0, "step": 820 }, { "epoch": 0.0057262309671811075, "grad_norm": 7.015024311840534e-05, "learning_rate": 9.943151634735456e-06, "loss": 0.0, "step": 830 }, { "epoch": 0.005795221701725459, "grad_norm": 2.8280794620513916, "learning_rate": 9.942461727390013e-06, "loss": 0.0005, "step": 840 }, { "epoch": 0.005864212436269809, "grad_norm": 0.0, "learning_rate": 9.94177182004457e-06, "loss": 0.0001, "step": 850 }, { "epoch": 0.005933203170814159, "grad_norm": 0.0, "learning_rate": 9.941081912699126e-06, "loss": 0.0, "step": 860 }, { "epoch": 0.006002193905358511, "grad_norm": 0.00056278525153175, "learning_rate": 9.940392005353682e-06, "loss": 0.0, "step": 870 }, { "epoch": 0.006071184639902861, "grad_norm": 0.0, "learning_rate": 9.939702098008239e-06, "loss": 0.6328, "step": 880 }, { "epoch": 0.006140175374447211, "grad_norm": 0.0, "learning_rate": 9.939012190662795e-06, "loss": 0.0, "step": 890 }, { "epoch": 0.006209166108991563, "grad_norm": 9.825647794059478e-06, "learning_rate": 9.938322283317352e-06, "loss": 0.0, "step": 900 }, { "epoch": 0.006278156843535913, "grad_norm": 0.2298017144203186, "learning_rate": 9.937632375971907e-06, "loss": 0.0029, "step": 910 }, { "epoch": 0.006347147578080264, "grad_norm": 9.138823952525854e-05, "learning_rate": 9.936942468626465e-06, "loss": 0.0, "step": 920 }, { "epoch": 0.006416138312624615, "grad_norm": 0.0308407973498106, "learning_rate": 9.936252561281021e-06, "loss": 0.0, "step": 930 }, { "epoch": 0.006485129047168965, "grad_norm": 4.297233681427315e-05, "learning_rate": 9.935562653935578e-06, "loss": 0.0011, "step": 940 }, { "epoch": 0.006554119781713316, "grad_norm": 2.0112407207489014, "learning_rate": 9.934872746590134e-06, "loss": 0.0334, "step": 950 }, { "epoch": 0.006623110516257667, "grad_norm": 0.31853970885276794, "learning_rate": 9.93418283924469e-06, "loss": 0.0001, "step": 960 }, { "epoch": 0.006692101250802017, "grad_norm": 6.041682154567241e-10, "learning_rate": 9.933492931899247e-06, "loss": 0.0745, "step": 970 }, { "epoch": 0.006761091985346368, "grad_norm": 1.3671727083774954e-09, "learning_rate": 9.932803024553804e-06, "loss": 0.0029, "step": 980 }, { "epoch": 0.0068300827198907186, "grad_norm": 0.00017256668070331216, "learning_rate": 9.93211311720836e-06, "loss": 0.0, "step": 990 }, { "epoch": 0.00689907345443507, "grad_norm": 0.11617961525917053, "learning_rate": 9.931423209862917e-06, "loss": 0.0004, "step": 1000 }, { "epoch": 0.00696806418897942, "grad_norm": 0.0, "learning_rate": 9.930733302517473e-06, "loss": 0.0002, "step": 1010 }, { "epoch": 0.0070370549235237705, "grad_norm": 0.0038849906995892525, "learning_rate": 9.930043395172028e-06, "loss": 0.3742, "step": 1020 }, { "epoch": 0.007106045658068122, "grad_norm": 5.140336489795061e-10, "learning_rate": 9.929353487826584e-06, "loss": 0.0, "step": 1030 }, { "epoch": 0.007175036392612472, "grad_norm": 0.5124749541282654, "learning_rate": 9.928663580481143e-06, "loss": 0.0001, "step": 1040 }, { "epoch": 0.0072440271271568225, "grad_norm": 0.0, "learning_rate": 9.927973673135699e-06, "loss": 0.0, "step": 1050 }, { "epoch": 0.007313017861701174, "grad_norm": 0.00016734229575376958, "learning_rate": 9.927283765790256e-06, "loss": 0.0, "step": 1060 }, { "epoch": 0.007382008596245524, "grad_norm": 9.173207899948466e-07, "learning_rate": 9.926593858444812e-06, "loss": 0.002, "step": 1070 }, { "epoch": 0.007450999330789875, "grad_norm": 0.0, "learning_rate": 9.925903951099369e-06, "loss": 0.0132, "step": 1080 }, { "epoch": 0.007519990065334226, "grad_norm": 3.566971145119169e-07, "learning_rate": 9.925214043753925e-06, "loss": 0.0, "step": 1090 }, { "epoch": 0.007588980799878576, "grad_norm": 0.0, "learning_rate": 9.924524136408482e-06, "loss": 0.0057, "step": 1100 }, { "epoch": 0.007657971534422927, "grad_norm": 0.0016905278898775578, "learning_rate": 9.923834229063038e-06, "loss": 0.0, "step": 1110 }, { "epoch": 0.007726962268967278, "grad_norm": 1.2363362312316895, "learning_rate": 9.923144321717595e-06, "loss": 0.0064, "step": 1120 }, { "epoch": 0.007795953003511628, "grad_norm": 1.342353894706605e-09, "learning_rate": 9.922454414372151e-06, "loss": 0.1642, "step": 1130 }, { "epoch": 0.007864943738055978, "grad_norm": 1.215474185300991e-06, "learning_rate": 9.921764507026706e-06, "loss": 0.0001, "step": 1140 }, { "epoch": 0.00793393447260033, "grad_norm": 0.42875736951828003, "learning_rate": 9.921074599681264e-06, "loss": 0.0001, "step": 1150 }, { "epoch": 0.008002925207144681, "grad_norm": 1.3860927765563247e-06, "learning_rate": 9.92038469233582e-06, "loss": 0.002, "step": 1160 }, { "epoch": 0.008071915941689031, "grad_norm": 1.143737904385489e-07, "learning_rate": 9.919694784990377e-06, "loss": 0.0, "step": 1170 }, { "epoch": 0.008140906676233382, "grad_norm": 0.007383116986602545, "learning_rate": 9.919004877644933e-06, "loss": 0.0002, "step": 1180 }, { "epoch": 0.008209897410777732, "grad_norm": 0.0, "learning_rate": 9.91831497029949e-06, "loss": 0.0286, "step": 1190 }, { "epoch": 0.008278888145322084, "grad_norm": 2.5686742901598336e-06, "learning_rate": 9.917625062954046e-06, "loss": 0.0, "step": 1200 }, { "epoch": 0.008347878879866434, "grad_norm": 5.234279676358256e-10, "learning_rate": 9.916935155608603e-06, "loss": 0.0, "step": 1210 }, { "epoch": 0.008416869614410785, "grad_norm": 5.294443994330322e-08, "learning_rate": 9.91624524826316e-06, "loss": 0.0, "step": 1220 }, { "epoch": 0.008485860348955135, "grad_norm": 2.6260105201458828e-08, "learning_rate": 9.915555340917716e-06, "loss": 0.0002, "step": 1230 }, { "epoch": 0.008554851083499486, "grad_norm": 0.0, "learning_rate": 9.914865433572272e-06, "loss": 0.0486, "step": 1240 }, { "epoch": 0.008623841818043836, "grad_norm": 5.224699998507276e-05, "learning_rate": 9.914175526226827e-06, "loss": 0.0, "step": 1250 }, { "epoch": 0.008692832552588188, "grad_norm": 0.0, "learning_rate": 9.913485618881385e-06, "loss": 0.0006, "step": 1260 }, { "epoch": 0.008761823287132538, "grad_norm": 0.2586248219013214, "learning_rate": 9.912795711535942e-06, "loss": 0.0, "step": 1270 }, { "epoch": 0.008830814021676889, "grad_norm": 0.0006744968122802675, "learning_rate": 9.912105804190498e-06, "loss": 0.0861, "step": 1280 }, { "epoch": 0.00889980475622124, "grad_norm": 0.0, "learning_rate": 9.911415896845055e-06, "loss": 0.0004, "step": 1290 }, { "epoch": 0.00896879549076559, "grad_norm": 0.0036323866806924343, "learning_rate": 9.910725989499611e-06, "loss": 0.0, "step": 1300 }, { "epoch": 0.009037786225309942, "grad_norm": 3.623789072036743, "learning_rate": 9.910036082154168e-06, "loss": 0.0006, "step": 1310 }, { "epoch": 0.009106776959854292, "grad_norm": 1.1184769777017323e-09, "learning_rate": 9.909346174808724e-06, "loss": 0.0, "step": 1320 }, { "epoch": 0.009175767694398642, "grad_norm": 7.058700361994852e-07, "learning_rate": 9.90865626746328e-06, "loss": 0.0, "step": 1330 }, { "epoch": 0.009244758428942993, "grad_norm": 0.0, "learning_rate": 9.907966360117837e-06, "loss": 0.0013, "step": 1340 }, { "epoch": 0.009313749163487343, "grad_norm": 1.382156006002333e-05, "learning_rate": 9.907276452772394e-06, "loss": 0.0, "step": 1350 }, { "epoch": 0.009382739898031695, "grad_norm": 0.0, "learning_rate": 9.906586545426949e-06, "loss": 0.0, "step": 1360 }, { "epoch": 0.009451730632576046, "grad_norm": 1.6501118249578894e-09, "learning_rate": 9.905896638081507e-06, "loss": 0.0, "step": 1370 }, { "epoch": 0.009520721367120396, "grad_norm": 0.0013124076649546623, "learning_rate": 9.905206730736063e-06, "loss": 0.0034, "step": 1380 }, { "epoch": 0.009589712101664746, "grad_norm": 1.388899272569688e-05, "learning_rate": 9.90451682339062e-06, "loss": 0.0, "step": 1390 }, { "epoch": 0.009658702836209097, "grad_norm": 0.0, "learning_rate": 9.903826916045176e-06, "loss": 0.0002, "step": 1400 }, { "epoch": 0.009727693570753447, "grad_norm": 1.2999422871473598e-08, "learning_rate": 9.903137008699733e-06, "loss": 0.0, "step": 1410 }, { "epoch": 0.0097966843052978, "grad_norm": 0.0, "learning_rate": 9.90244710135429e-06, "loss": 0.0, "step": 1420 }, { "epoch": 0.00986567503984215, "grad_norm": 0.0, "learning_rate": 9.901757194008846e-06, "loss": 0.334, "step": 1430 }, { "epoch": 0.0099346657743865, "grad_norm": 0.0, "learning_rate": 9.901067286663402e-06, "loss": 0.0019, "step": 1440 }, { "epoch": 0.01000365650893085, "grad_norm": 0.0, "learning_rate": 9.900377379317959e-06, "loss": 0.0018, "step": 1450 }, { "epoch": 0.0100726472434752, "grad_norm": 9.614233853483256e-10, "learning_rate": 9.899687471972515e-06, "loss": 0.0, "step": 1460 }, { "epoch": 0.010141637978019553, "grad_norm": 1.890835577000871e-08, "learning_rate": 9.898997564627072e-06, "loss": 0.0039, "step": 1470 }, { "epoch": 0.010210628712563903, "grad_norm": 0.0, "learning_rate": 9.898307657281628e-06, "loss": 0.08, "step": 1480 }, { "epoch": 0.010279619447108253, "grad_norm": 0.0, "learning_rate": 9.897617749936185e-06, "loss": 0.0, "step": 1490 }, { "epoch": 0.010348610181652604, "grad_norm": 0.0, "learning_rate": 9.896927842590741e-06, "loss": 0.0001, "step": 1500 }, { "epoch": 0.010417600916196954, "grad_norm": 3.2997333619277924e-05, "learning_rate": 9.896237935245298e-06, "loss": 0.0, "step": 1510 }, { "epoch": 0.010486591650741306, "grad_norm": 1.04883781659737e-06, "learning_rate": 9.895548027899854e-06, "loss": 0.0155, "step": 1520 }, { "epoch": 0.010555582385285657, "grad_norm": 9.53003836912103e-06, "learning_rate": 9.89485812055441e-06, "loss": 0.0032, "step": 1530 }, { "epoch": 0.010624573119830007, "grad_norm": 2.512200891757743e-09, "learning_rate": 9.894168213208967e-06, "loss": 0.0024, "step": 1540 }, { "epoch": 0.010693563854374357, "grad_norm": 0.007814495824277401, "learning_rate": 9.893478305863524e-06, "loss": 0.0, "step": 1550 }, { "epoch": 0.010762554588918708, "grad_norm": 0.00010314479732187465, "learning_rate": 9.89278839851808e-06, "loss": 0.0001, "step": 1560 }, { "epoch": 0.010831545323463058, "grad_norm": 0.0, "learning_rate": 9.892098491172637e-06, "loss": 0.0, "step": 1570 }, { "epoch": 0.01090053605800741, "grad_norm": 0.012568238191306591, "learning_rate": 9.891408583827193e-06, "loss": 0.6735, "step": 1580 }, { "epoch": 0.01096952679255176, "grad_norm": 0.0, "learning_rate": 9.89071867648175e-06, "loss": 0.0, "step": 1590 }, { "epoch": 0.011038517527096111, "grad_norm": 5.0949049182236195e-05, "learning_rate": 9.890028769136306e-06, "loss": 0.0, "step": 1600 }, { "epoch": 0.011107508261640461, "grad_norm": 0.0, "learning_rate": 9.889338861790863e-06, "loss": 0.0, "step": 1610 }, { "epoch": 0.011176498996184812, "grad_norm": 6.443872990757882e-08, "learning_rate": 9.888648954445419e-06, "loss": 0.0, "step": 1620 }, { "epoch": 0.011245489730729164, "grad_norm": 0.0, "learning_rate": 9.887959047099976e-06, "loss": 0.0, "step": 1630 }, { "epoch": 0.011314480465273514, "grad_norm": 0.0, "learning_rate": 9.887269139754532e-06, "loss": 0.0093, "step": 1640 }, { "epoch": 0.011383471199817865, "grad_norm": 0.00022462219931185246, "learning_rate": 9.886579232409089e-06, "loss": 0.0001, "step": 1650 }, { "epoch": 0.011452461934362215, "grad_norm": 0.0007001527701504529, "learning_rate": 9.885889325063645e-06, "loss": 0.0, "step": 1660 }, { "epoch": 0.011521452668906565, "grad_norm": 0.0, "learning_rate": 9.885199417718201e-06, "loss": 0.0, "step": 1670 }, { "epoch": 0.011590443403450917, "grad_norm": 2.575731195975095e-05, "learning_rate": 9.884509510372758e-06, "loss": 0.0, "step": 1680 }, { "epoch": 0.011659434137995268, "grad_norm": 0.0, "learning_rate": 9.883819603027314e-06, "loss": 0.0046, "step": 1690 }, { "epoch": 0.011728424872539618, "grad_norm": 0.0, "learning_rate": 9.883129695681871e-06, "loss": 0.0, "step": 1700 }, { "epoch": 0.011797415607083968, "grad_norm": 4.60377151512148e-07, "learning_rate": 9.882439788336427e-06, "loss": 0.0, "step": 1710 }, { "epoch": 0.011866406341628319, "grad_norm": 0.0, "learning_rate": 9.881749880990984e-06, "loss": 0.0, "step": 1720 }, { "epoch": 0.01193539707617267, "grad_norm": 4.682962351232334e-10, "learning_rate": 9.88105997364554e-06, "loss": 0.0, "step": 1730 }, { "epoch": 0.012004387810717021, "grad_norm": 1.7960427385332878e-06, "learning_rate": 9.880370066300097e-06, "loss": 0.0001, "step": 1740 }, { "epoch": 0.012073378545261372, "grad_norm": 0.0, "learning_rate": 9.879680158954653e-06, "loss": 0.0, "step": 1750 }, { "epoch": 0.012142369279805722, "grad_norm": 0.0, "learning_rate": 9.87899025160921e-06, "loss": 0.0, "step": 1760 }, { "epoch": 0.012211360014350072, "grad_norm": 1.149840682046488e-05, "learning_rate": 9.878300344263766e-06, "loss": 0.0, "step": 1770 }, { "epoch": 0.012280350748894423, "grad_norm": 6.41109954813146e-06, "learning_rate": 9.877610436918323e-06, "loss": 0.0, "step": 1780 }, { "epoch": 0.012349341483438775, "grad_norm": 0.0, "learning_rate": 9.87692052957288e-06, "loss": 0.0, "step": 1790 }, { "epoch": 0.012418332217983125, "grad_norm": 0.0, "learning_rate": 9.876230622227436e-06, "loss": 0.0, "step": 1800 }, { "epoch": 0.012487322952527476, "grad_norm": 0.0, "learning_rate": 9.875540714881992e-06, "loss": 0.0, "step": 1810 }, { "epoch": 0.012556313687071826, "grad_norm": 0.05592973902821541, "learning_rate": 9.874850807536549e-06, "loss": 0.0003, "step": 1820 }, { "epoch": 0.012625304421616176, "grad_norm": 0.0, "learning_rate": 9.874160900191105e-06, "loss": 0.0, "step": 1830 }, { "epoch": 0.012694295156160528, "grad_norm": 8.634918231109623e-06, "learning_rate": 9.873470992845662e-06, "loss": 0.0, "step": 1840 }, { "epoch": 0.012763285890704879, "grad_norm": 2.4053812012425624e-05, "learning_rate": 9.872781085500218e-06, "loss": 0.0212, "step": 1850 }, { "epoch": 0.01283227662524923, "grad_norm": 1.4181620827002916e-05, "learning_rate": 9.872091178154775e-06, "loss": 0.0003, "step": 1860 }, { "epoch": 0.01290126735979358, "grad_norm": 3.870967191232921e-08, "learning_rate": 9.871401270809331e-06, "loss": 0.0, "step": 1870 }, { "epoch": 0.01297025809433793, "grad_norm": 0.2523908019065857, "learning_rate": 9.870711363463888e-06, "loss": 0.0205, "step": 1880 }, { "epoch": 0.01303924882888228, "grad_norm": 23.197324752807617, "learning_rate": 9.870021456118444e-06, "loss": 0.0069, "step": 1890 }, { "epoch": 0.013108239563426632, "grad_norm": 0.00017800922796595842, "learning_rate": 9.869331548773e-06, "loss": 0.0003, "step": 1900 }, { "epoch": 0.013177230297970983, "grad_norm": 0.0, "learning_rate": 9.868641641427557e-06, "loss": 0.0, "step": 1910 }, { "epoch": 0.013246221032515333, "grad_norm": 0.0, "learning_rate": 9.867951734082114e-06, "loss": 0.0, "step": 1920 }, { "epoch": 0.013315211767059684, "grad_norm": 0.0007261246792040765, "learning_rate": 9.86726182673667e-06, "loss": 0.0004, "step": 1930 }, { "epoch": 0.013384202501604034, "grad_norm": 0.007969099096953869, "learning_rate": 9.866571919391227e-06, "loss": 0.0, "step": 1940 }, { "epoch": 0.013453193236148386, "grad_norm": 0.0, "learning_rate": 9.865882012045783e-06, "loss": 0.0862, "step": 1950 }, { "epoch": 0.013522183970692736, "grad_norm": 0.00247814878821373, "learning_rate": 9.86519210470034e-06, "loss": 0.0, "step": 1960 }, { "epoch": 0.013591174705237087, "grad_norm": 0.0, "learning_rate": 9.864502197354896e-06, "loss": 0.0307, "step": 1970 }, { "epoch": 0.013660165439781437, "grad_norm": 0.0, "learning_rate": 9.863812290009453e-06, "loss": 0.0035, "step": 1980 }, { "epoch": 0.013729156174325787, "grad_norm": 177.79531860351562, "learning_rate": 9.86312238266401e-06, "loss": 0.0829, "step": 1990 }, { "epoch": 0.01379814690887014, "grad_norm": 0.0, "learning_rate": 9.862432475318566e-06, "loss": 0.0057, "step": 2000 }, { "epoch": 0.01386713764341449, "grad_norm": 0.0, "learning_rate": 9.861742567973122e-06, "loss": 0.0003, "step": 2010 }, { "epoch": 0.01393612837795884, "grad_norm": 7.60870477733988e-09, "learning_rate": 9.861052660627679e-06, "loss": 0.1116, "step": 2020 }, { "epoch": 0.01400511911250319, "grad_norm": 2.393839849901269e-06, "learning_rate": 9.860362753282235e-06, "loss": 0.057, "step": 2030 }, { "epoch": 0.014074109847047541, "grad_norm": 2.7021491405321285e-05, "learning_rate": 9.859672845936792e-06, "loss": 0.0002, "step": 2040 }, { "epoch": 0.014143100581591891, "grad_norm": 5.14019937725152e-10, "learning_rate": 9.858982938591348e-06, "loss": 0.0, "step": 2050 }, { "epoch": 0.014212091316136244, "grad_norm": 0.0, "learning_rate": 9.858293031245905e-06, "loss": 0.0, "step": 2060 }, { "epoch": 0.014281082050680594, "grad_norm": 0.0, "learning_rate": 9.857603123900461e-06, "loss": 0.0001, "step": 2070 }, { "epoch": 0.014350072785224944, "grad_norm": 0.0026891904417425394, "learning_rate": 9.856913216555018e-06, "loss": 0.0, "step": 2080 }, { "epoch": 0.014419063519769295, "grad_norm": 5.3279332945521674e-08, "learning_rate": 9.856223309209574e-06, "loss": 0.0, "step": 2090 }, { "epoch": 0.014488054254313645, "grad_norm": 0.11718526482582092, "learning_rate": 9.85553340186413e-06, "loss": 0.084, "step": 2100 }, { "epoch": 0.014557044988857997, "grad_norm": 9.937484719557688e-07, "learning_rate": 9.854843494518687e-06, "loss": 0.0, "step": 2110 }, { "epoch": 0.014626035723402347, "grad_norm": 0.007573988754302263, "learning_rate": 9.854153587173244e-06, "loss": 0.0007, "step": 2120 }, { "epoch": 0.014695026457946698, "grad_norm": 0.009642322547733784, "learning_rate": 9.8534636798278e-06, "loss": 0.0, "step": 2130 }, { "epoch": 0.014764017192491048, "grad_norm": 2.301829908901709e-06, "learning_rate": 9.852773772482357e-06, "loss": 0.0, "step": 2140 }, { "epoch": 0.014833007927035399, "grad_norm": 0.0, "learning_rate": 9.852083865136913e-06, "loss": 0.0, "step": 2150 }, { "epoch": 0.01490199866157975, "grad_norm": 0.017099348828196526, "learning_rate": 9.85139395779147e-06, "loss": 0.0, "step": 2160 }, { "epoch": 0.014970989396124101, "grad_norm": 0.6120503544807434, "learning_rate": 9.850704050446026e-06, "loss": 0.0003, "step": 2170 }, { "epoch": 0.015039980130668451, "grad_norm": 2.8864574432373047, "learning_rate": 9.850014143100582e-06, "loss": 0.0009, "step": 2180 }, { "epoch": 0.015108970865212802, "grad_norm": 0.0, "learning_rate": 9.849324235755139e-06, "loss": 0.0, "step": 2190 }, { "epoch": 0.015177961599757152, "grad_norm": 0.0, "learning_rate": 9.848634328409695e-06, "loss": 0.0, "step": 2200 }, { "epoch": 0.015246952334301503, "grad_norm": 0.0, "learning_rate": 9.847944421064252e-06, "loss": 0.0002, "step": 2210 }, { "epoch": 0.015315943068845855, "grad_norm": 0.0, "learning_rate": 9.847254513718808e-06, "loss": 0.0003, "step": 2220 }, { "epoch": 0.015384933803390205, "grad_norm": 246.43312072753906, "learning_rate": 9.846564606373365e-06, "loss": 0.0809, "step": 2230 }, { "epoch": 0.015453924537934555, "grad_norm": 0.0, "learning_rate": 9.845874699027921e-06, "loss": 0.0, "step": 2240 }, { "epoch": 0.015522915272478906, "grad_norm": 0.001674336614087224, "learning_rate": 9.845184791682478e-06, "loss": 0.0, "step": 2250 }, { "epoch": 0.015591906007023256, "grad_norm": 0.6877070665359497, "learning_rate": 9.844494884337034e-06, "loss": 0.0003, "step": 2260 }, { "epoch": 0.015660896741567606, "grad_norm": 0.0, "learning_rate": 9.843804976991591e-06, "loss": 0.0, "step": 2270 }, { "epoch": 0.015729887476111957, "grad_norm": 0.02825876511633396, "learning_rate": 9.843115069646147e-06, "loss": 0.0, "step": 2280 }, { "epoch": 0.015798878210656307, "grad_norm": 6.0335845947265625, "learning_rate": 9.842425162300704e-06, "loss": 0.0012, "step": 2290 }, { "epoch": 0.01586786894520066, "grad_norm": 1.7432723709021047e-08, "learning_rate": 9.84173525495526e-06, "loss": 0.0, "step": 2300 }, { "epoch": 0.01593685967974501, "grad_norm": 0.0, "learning_rate": 9.841045347609817e-06, "loss": 0.0038, "step": 2310 }, { "epoch": 0.016005850414289362, "grad_norm": 0.0, "learning_rate": 9.840355440264373e-06, "loss": 0.0, "step": 2320 }, { "epoch": 0.016074841148833712, "grad_norm": 0.0008846206474117935, "learning_rate": 9.83966553291893e-06, "loss": 0.0, "step": 2330 }, { "epoch": 0.016143831883378063, "grad_norm": 13.107681274414062, "learning_rate": 9.838975625573486e-06, "loss": 0.0026, "step": 2340 }, { "epoch": 0.016212822617922413, "grad_norm": 6.843339974693663e-07, "learning_rate": 9.838285718228043e-06, "loss": 0.0647, "step": 2350 }, { "epoch": 0.016281813352466763, "grad_norm": 0.0, "learning_rate": 9.8375958108826e-06, "loss": 0.0, "step": 2360 }, { "epoch": 0.016350804087011114, "grad_norm": 0.0, "learning_rate": 9.836905903537156e-06, "loss": 0.0, "step": 2370 }, { "epoch": 0.016419794821555464, "grad_norm": 0.0, "learning_rate": 9.836215996191712e-06, "loss": 0.0007, "step": 2380 }, { "epoch": 0.016488785556099814, "grad_norm": 57.82735824584961, "learning_rate": 9.835526088846269e-06, "loss": 0.0168, "step": 2390 }, { "epoch": 0.016557776290644168, "grad_norm": 1.4099719919613563e-05, "learning_rate": 9.834836181500825e-06, "loss": 0.0001, "step": 2400 }, { "epoch": 0.01662676702518852, "grad_norm": 2.7681010261737526e-10, "learning_rate": 9.834146274155382e-06, "loss": 0.0, "step": 2410 }, { "epoch": 0.01669575775973287, "grad_norm": 1.6342320350304362e-06, "learning_rate": 9.833456366809938e-06, "loss": 0.0002, "step": 2420 }, { "epoch": 0.01676474849427722, "grad_norm": 0.0062063587829470634, "learning_rate": 9.832766459464495e-06, "loss": 0.0, "step": 2430 }, { "epoch": 0.01683373922882157, "grad_norm": 0.0, "learning_rate": 9.832076552119051e-06, "loss": 0.0141, "step": 2440 }, { "epoch": 0.01690272996336592, "grad_norm": 7.312730303965509e-05, "learning_rate": 9.831386644773608e-06, "loss": 0.0, "step": 2450 }, { "epoch": 0.01697172069791027, "grad_norm": 0.0, "learning_rate": 9.830696737428164e-06, "loss": 0.0012, "step": 2460 }, { "epoch": 0.01704071143245462, "grad_norm": 2.615517524784394e-10, "learning_rate": 9.83000683008272e-06, "loss": 0.0, "step": 2470 }, { "epoch": 0.01710970216699897, "grad_norm": 3.7908304761913314e-07, "learning_rate": 9.829316922737277e-06, "loss": 0.0002, "step": 2480 }, { "epoch": 0.01717869290154332, "grad_norm": 3.4277763916179538e-06, "learning_rate": 9.828627015391834e-06, "loss": 0.0, "step": 2490 }, { "epoch": 0.017247683636087672, "grad_norm": 4.6413342613682573e-10, "learning_rate": 9.82793710804639e-06, "loss": 0.0, "step": 2500 }, { "epoch": 0.017316674370632026, "grad_norm": 5.556948678986373e-08, "learning_rate": 9.827247200700947e-06, "loss": 0.0039, "step": 2510 }, { "epoch": 0.017385665105176376, "grad_norm": 2.60270555108022e-10, "learning_rate": 9.826557293355503e-06, "loss": 0.0141, "step": 2520 }, { "epoch": 0.017454655839720726, "grad_norm": 0.031119007617235184, "learning_rate": 9.82586738601006e-06, "loss": 0.0, "step": 2530 }, { "epoch": 0.017523646574265077, "grad_norm": 0.0, "learning_rate": 9.825177478664616e-06, "loss": 0.0, "step": 2540 }, { "epoch": 0.017592637308809427, "grad_norm": 5.535717995108769e-10, "learning_rate": 9.824556562053717e-06, "loss": 0.5637, "step": 2550 }, { "epoch": 0.017661628043353778, "grad_norm": 0.0, "learning_rate": 9.823866654708273e-06, "loss": 0.0, "step": 2560 }, { "epoch": 0.017730618777898128, "grad_norm": 0.0, "learning_rate": 9.82317674736283e-06, "loss": 0.001, "step": 2570 }, { "epoch": 0.01779960951244248, "grad_norm": 1.5067802223711624e-06, "learning_rate": 9.822486840017386e-06, "loss": 0.0658, "step": 2580 }, { "epoch": 0.01786860024698683, "grad_norm": 0.001151421689428389, "learning_rate": 9.821796932671943e-06, "loss": 0.001, "step": 2590 }, { "epoch": 0.01793759098153118, "grad_norm": 0.3574353754520416, "learning_rate": 9.8211070253265e-06, "loss": 0.0001, "step": 2600 }, { "epoch": 0.01800658171607553, "grad_norm": 0.0, "learning_rate": 9.820417117981056e-06, "loss": 0.0, "step": 2610 }, { "epoch": 0.018075572450619883, "grad_norm": 0.003819757141172886, "learning_rate": 9.819727210635612e-06, "loss": 0.0, "step": 2620 }, { "epoch": 0.018144563185164234, "grad_norm": 0.0, "learning_rate": 9.819037303290169e-06, "loss": 0.0, "step": 2630 }, { "epoch": 0.018213553919708584, "grad_norm": 0.0005704404320567846, "learning_rate": 9.818347395944725e-06, "loss": 0.0003, "step": 2640 }, { "epoch": 0.018282544654252934, "grad_norm": 3.914003343652439e-08, "learning_rate": 9.817657488599282e-06, "loss": 0.0, "step": 2650 }, { "epoch": 0.018351535388797285, "grad_norm": 0.0, "learning_rate": 9.816967581253838e-06, "loss": 0.5473, "step": 2660 }, { "epoch": 0.018420526123341635, "grad_norm": 0.2177378535270691, "learning_rate": 9.816277673908395e-06, "loss": 0.0, "step": 2670 }, { "epoch": 0.018489516857885985, "grad_norm": 7.915277092251927e-05, "learning_rate": 9.815587766562951e-06, "loss": 0.0, "step": 2680 }, { "epoch": 0.018558507592430336, "grad_norm": 0.10902252793312073, "learning_rate": 9.814897859217508e-06, "loss": 0.0005, "step": 2690 }, { "epoch": 0.018627498326974686, "grad_norm": 8.404355433810906e-09, "learning_rate": 9.814207951872064e-06, "loss": 0.0, "step": 2700 }, { "epoch": 0.018696489061519037, "grad_norm": 4.28242691308256e-10, "learning_rate": 9.81351804452662e-06, "loss": 0.0, "step": 2710 }, { "epoch": 0.01876547979606339, "grad_norm": 0.0, "learning_rate": 9.812828137181177e-06, "loss": 0.0, "step": 2720 }, { "epoch": 0.01883447053060774, "grad_norm": 0.0, "learning_rate": 9.812138229835734e-06, "loss": 0.0, "step": 2730 }, { "epoch": 0.01890346126515209, "grad_norm": 6.023535803478808e-08, "learning_rate": 9.81144832249029e-06, "loss": 0.0, "step": 2740 }, { "epoch": 0.01897245199969644, "grad_norm": 2.1577066945610568e-05, "learning_rate": 9.810758415144847e-06, "loss": 0.0009, "step": 2750 }, { "epoch": 0.019041442734240792, "grad_norm": 0.0007471358985640109, "learning_rate": 9.810068507799403e-06, "loss": 0.0, "step": 2760 }, { "epoch": 0.019110433468785142, "grad_norm": 0.0, "learning_rate": 9.80937860045396e-06, "loss": 0.0008, "step": 2770 }, { "epoch": 0.019179424203329493, "grad_norm": 0.02438054047524929, "learning_rate": 9.808688693108516e-06, "loss": 0.0001, "step": 2780 }, { "epoch": 0.019248414937873843, "grad_norm": 0.0013753767125308514, "learning_rate": 9.807998785763073e-06, "loss": 0.0475, "step": 2790 }, { "epoch": 0.019317405672418193, "grad_norm": 0.0, "learning_rate": 9.807308878417629e-06, "loss": 0.0, "step": 2800 }, { "epoch": 0.019386396406962544, "grad_norm": 0.006645300425589085, "learning_rate": 9.806618971072186e-06, "loss": 0.0027, "step": 2810 }, { "epoch": 0.019455387141506894, "grad_norm": 7.56134976853673e-09, "learning_rate": 9.805929063726742e-06, "loss": 0.0, "step": 2820 }, { "epoch": 0.019524377876051248, "grad_norm": 2.1843302278057308e-08, "learning_rate": 9.805239156381299e-06, "loss": 0.0, "step": 2830 }, { "epoch": 0.0195933686105956, "grad_norm": 0.0, "learning_rate": 9.804549249035855e-06, "loss": 0.0, "step": 2840 }, { "epoch": 0.01966235934513995, "grad_norm": 734.3946533203125, "learning_rate": 9.803859341690412e-06, "loss": 0.6703, "step": 2850 }, { "epoch": 0.0197313500796843, "grad_norm": 0.0, "learning_rate": 9.803169434344968e-06, "loss": 0.0, "step": 2860 }, { "epoch": 0.01980034081422865, "grad_norm": 225.9371337890625, "learning_rate": 9.802479526999525e-06, "loss": 0.0504, "step": 2870 }, { "epoch": 0.019869331548773, "grad_norm": 0.0, "learning_rate": 9.801789619654081e-06, "loss": 0.0, "step": 2880 }, { "epoch": 0.01993832228331735, "grad_norm": 2.3707333873801417e-09, "learning_rate": 9.801099712308637e-06, "loss": 0.0, "step": 2890 }, { "epoch": 0.0200073130178617, "grad_norm": 0.0, "learning_rate": 9.800409804963194e-06, "loss": 0.0, "step": 2900 }, { "epoch": 0.02007630375240605, "grad_norm": 0.0, "learning_rate": 9.79971989761775e-06, "loss": 0.0, "step": 2910 }, { "epoch": 0.0201452944869504, "grad_norm": 0.007832271046936512, "learning_rate": 9.799029990272307e-06, "loss": 0.012, "step": 2920 }, { "epoch": 0.02021428522149475, "grad_norm": 1.1749706345653976e-06, "learning_rate": 9.798340082926863e-06, "loss": 0.0, "step": 2930 }, { "epoch": 0.020283275956039105, "grad_norm": 0.0, "learning_rate": 9.79765017558142e-06, "loss": 0.0028, "step": 2940 }, { "epoch": 0.020352266690583456, "grad_norm": 0.0, "learning_rate": 9.796960268235976e-06, "loss": 0.0002, "step": 2950 }, { "epoch": 0.020421257425127806, "grad_norm": 0.0, "learning_rate": 9.796270360890533e-06, "loss": 0.0, "step": 2960 }, { "epoch": 0.020490248159672157, "grad_norm": 0.0, "learning_rate": 9.79558045354509e-06, "loss": 0.0, "step": 2970 }, { "epoch": 0.020559238894216507, "grad_norm": 0.04446733742952347, "learning_rate": 9.794890546199646e-06, "loss": 0.0002, "step": 2980 }, { "epoch": 0.020628229628760857, "grad_norm": 4.964587629885386e-10, "learning_rate": 9.794200638854202e-06, "loss": 0.0, "step": 2990 }, { "epoch": 0.020697220363305208, "grad_norm": 0.0, "learning_rate": 9.793510731508759e-06, "loss": 0.0418, "step": 3000 }, { "epoch": 0.020766211097849558, "grad_norm": 0.0, "learning_rate": 9.792820824163315e-06, "loss": 0.0, "step": 3010 }, { "epoch": 0.02083520183239391, "grad_norm": 0.0010562562383711338, "learning_rate": 9.792130916817872e-06, "loss": 0.0043, "step": 3020 }, { "epoch": 0.02090419256693826, "grad_norm": 0.0, "learning_rate": 9.791441009472428e-06, "loss": 0.0, "step": 3030 }, { "epoch": 0.020973183301482613, "grad_norm": 1.505286491010338e-06, "learning_rate": 9.790751102126985e-06, "loss": 0.0012, "step": 3040 }, { "epoch": 0.021042174036026963, "grad_norm": 0.0, "learning_rate": 9.790061194781541e-06, "loss": 0.0073, "step": 3050 }, { "epoch": 0.021111164770571313, "grad_norm": 0.0, "learning_rate": 9.789371287436098e-06, "loss": 0.0004, "step": 3060 }, { "epoch": 0.021180155505115664, "grad_norm": 0.0, "learning_rate": 9.788681380090654e-06, "loss": 0.1625, "step": 3070 }, { "epoch": 0.021249146239660014, "grad_norm": 0.0003871853114105761, "learning_rate": 9.78799147274521e-06, "loss": 0.0002, "step": 3080 }, { "epoch": 0.021318136974204364, "grad_norm": 0.0, "learning_rate": 9.787301565399767e-06, "loss": 0.0, "step": 3090 }, { "epoch": 0.021387127708748715, "grad_norm": 7.279188594111474e-07, "learning_rate": 9.786611658054324e-06, "loss": 0.0151, "step": 3100 }, { "epoch": 0.021456118443293065, "grad_norm": 0.0, "learning_rate": 9.78592175070888e-06, "loss": 0.0, "step": 3110 }, { "epoch": 0.021525109177837416, "grad_norm": 75.95635986328125, "learning_rate": 9.785231843363437e-06, "loss": 0.0326, "step": 3120 }, { "epoch": 0.021594099912381766, "grad_norm": 18.149946212768555, "learning_rate": 9.784541936017993e-06, "loss": 0.0039, "step": 3130 }, { "epoch": 0.021663090646926116, "grad_norm": 0.005141737405210733, "learning_rate": 9.78385202867255e-06, "loss": 0.4755, "step": 3140 }, { "epoch": 0.02173208138147047, "grad_norm": 0.0, "learning_rate": 9.783162121327106e-06, "loss": 0.0, "step": 3150 }, { "epoch": 0.02180107211601482, "grad_norm": 1.9227761640649987e-06, "learning_rate": 9.782472213981663e-06, "loss": 0.0015, "step": 3160 }, { "epoch": 0.02187006285055917, "grad_norm": 0.0, "learning_rate": 9.78178230663622e-06, "loss": 0.0, "step": 3170 }, { "epoch": 0.02193905358510352, "grad_norm": 0.0, "learning_rate": 9.781092399290776e-06, "loss": 0.0, "step": 3180 }, { "epoch": 0.02200804431964787, "grad_norm": 0.0, "learning_rate": 9.780402491945332e-06, "loss": 0.0, "step": 3190 }, { "epoch": 0.022077035054192222, "grad_norm": 0.0, "learning_rate": 9.779712584599889e-06, "loss": 0.0014, "step": 3200 }, { "epoch": 0.022146025788736572, "grad_norm": 0.0006382575375027955, "learning_rate": 9.779022677254445e-06, "loss": 0.0, "step": 3210 }, { "epoch": 0.022215016523280923, "grad_norm": 1.0402134656906128, "learning_rate": 9.778332769909002e-06, "loss": 0.0002, "step": 3220 }, { "epoch": 0.022284007257825273, "grad_norm": 5.689083648618976e-10, "learning_rate": 9.777642862563558e-06, "loss": 0.0, "step": 3230 }, { "epoch": 0.022352997992369623, "grad_norm": 0.0, "learning_rate": 9.776952955218115e-06, "loss": 0.0, "step": 3240 }, { "epoch": 0.022421988726913974, "grad_norm": 0.0, "learning_rate": 9.776263047872671e-06, "loss": 0.0, "step": 3250 }, { "epoch": 0.022490979461458328, "grad_norm": 1.0903780101045868e-09, "learning_rate": 9.775573140527228e-06, "loss": 0.0, "step": 3260 }, { "epoch": 0.022559970196002678, "grad_norm": 0.028465013951063156, "learning_rate": 9.774883233181784e-06, "loss": 0.0345, "step": 3270 }, { "epoch": 0.02262896093054703, "grad_norm": 0.0, "learning_rate": 9.77419332583634e-06, "loss": 0.0, "step": 3280 }, { "epoch": 0.02269795166509138, "grad_norm": 6.543196207076107e-09, "learning_rate": 9.773503418490897e-06, "loss": 0.0, "step": 3290 }, { "epoch": 0.02276694239963573, "grad_norm": 0.006624211091548204, "learning_rate": 9.772813511145454e-06, "loss": 0.0, "step": 3300 }, { "epoch": 0.02283593313418008, "grad_norm": 0.0, "learning_rate": 9.77212360380001e-06, "loss": 0.0, "step": 3310 }, { "epoch": 0.02290492386872443, "grad_norm": 0.0, "learning_rate": 9.771433696454567e-06, "loss": 0.0009, "step": 3320 }, { "epoch": 0.02297391460326878, "grad_norm": 0.0, "learning_rate": 9.770743789109123e-06, "loss": 0.0, "step": 3330 }, { "epoch": 0.02304290533781313, "grad_norm": 4.996880687002658e-10, "learning_rate": 9.77005388176368e-06, "loss": 0.0, "step": 3340 }, { "epoch": 0.02311189607235748, "grad_norm": 0.0, "learning_rate": 9.769363974418236e-06, "loss": 0.0, "step": 3350 }, { "epoch": 0.023180886806901835, "grad_norm": 0.0, "learning_rate": 9.768674067072793e-06, "loss": 0.0, "step": 3360 }, { "epoch": 0.023249877541446185, "grad_norm": 0.0, "learning_rate": 9.767984159727349e-06, "loss": 0.0064, "step": 3370 }, { "epoch": 0.023318868275990536, "grad_norm": 0.0, "learning_rate": 9.767294252381906e-06, "loss": 0.001, "step": 3380 }, { "epoch": 0.023387859010534886, "grad_norm": 0.22072994709014893, "learning_rate": 9.766604345036462e-06, "loss": 0.0001, "step": 3390 }, { "epoch": 0.023456849745079236, "grad_norm": 0.0, "learning_rate": 9.765914437691019e-06, "loss": 0.0, "step": 3400 }, { "epoch": 0.023525840479623587, "grad_norm": 0.001880093477666378, "learning_rate": 9.765224530345575e-06, "loss": 0.0, "step": 3410 }, { "epoch": 0.023594831214167937, "grad_norm": 8.820308372570196e-10, "learning_rate": 9.764534623000131e-06, "loss": 0.0, "step": 3420 }, { "epoch": 0.023663821948712287, "grad_norm": 0.0, "learning_rate": 9.763844715654688e-06, "loss": 0.0001, "step": 3430 }, { "epoch": 0.023732812683256638, "grad_norm": 332.05767822265625, "learning_rate": 9.763154808309246e-06, "loss": 0.0845, "step": 3440 }, { "epoch": 0.023801803417800988, "grad_norm": 0.0, "learning_rate": 9.762464900963801e-06, "loss": 0.0064, "step": 3450 }, { "epoch": 0.02387079415234534, "grad_norm": 0.0, "learning_rate": 9.761774993618357e-06, "loss": 0.0, "step": 3460 }, { "epoch": 0.023939784886889692, "grad_norm": 0.0018555239075794816, "learning_rate": 9.761085086272914e-06, "loss": 0.0001, "step": 3470 }, { "epoch": 0.024008775621434043, "grad_norm": 0.1549466997385025, "learning_rate": 9.76039517892747e-06, "loss": 0.7553, "step": 3480 }, { "epoch": 0.024077766355978393, "grad_norm": 4.988789328308485e-07, "learning_rate": 9.759705271582027e-06, "loss": 0.0, "step": 3490 }, { "epoch": 0.024146757090522743, "grad_norm": 4.5243656043858493e-10, "learning_rate": 9.759015364236583e-06, "loss": 0.0, "step": 3500 }, { "epoch": 0.024215747825067094, "grad_norm": 3.2983130040520336e-06, "learning_rate": 9.75832545689114e-06, "loss": 0.0001, "step": 3510 }, { "epoch": 0.024284738559611444, "grad_norm": 0.9399678707122803, "learning_rate": 9.757635549545696e-06, "loss": 0.0002, "step": 3520 }, { "epoch": 0.024353729294155795, "grad_norm": 2.515891901566647e-05, "learning_rate": 9.756945642200253e-06, "loss": 0.1279, "step": 3530 }, { "epoch": 0.024422720028700145, "grad_norm": 4.815369265998015e-06, "learning_rate": 9.75625573485481e-06, "loss": 0.0, "step": 3540 }, { "epoch": 0.024491710763244495, "grad_norm": 0.0, "learning_rate": 9.755565827509368e-06, "loss": 0.0015, "step": 3550 }, { "epoch": 0.024560701497788846, "grad_norm": 8.866207212854249e-10, "learning_rate": 9.754875920163922e-06, "loss": 0.0, "step": 3560 }, { "epoch": 0.024629692232333196, "grad_norm": 8.132279617711902e-06, "learning_rate": 9.754186012818479e-06, "loss": 0.001, "step": 3570 }, { "epoch": 0.02469868296687755, "grad_norm": 1.1343439837219194e-05, "learning_rate": 9.753496105473035e-06, "loss": 0.004, "step": 3580 }, { "epoch": 0.0247676737014219, "grad_norm": 2.4812104015836667e-07, "learning_rate": 9.752806198127592e-06, "loss": 0.0, "step": 3590 }, { "epoch": 0.02483666443596625, "grad_norm": 0.00022196717327460647, "learning_rate": 9.752116290782148e-06, "loss": 0.0001, "step": 3600 }, { "epoch": 0.0249056551705106, "grad_norm": 2.8421964088920504e-06, "learning_rate": 9.751426383436705e-06, "loss": 0.0007, "step": 3610 }, { "epoch": 0.02497464590505495, "grad_norm": 0.0, "learning_rate": 9.750736476091261e-06, "loss": 0.0001, "step": 3620 }, { "epoch": 0.0250436366395993, "grad_norm": 4.301780336390948e-06, "learning_rate": 9.750046568745818e-06, "loss": 0.0026, "step": 3630 }, { "epoch": 0.025112627374143652, "grad_norm": 6.158128144306829e-06, "learning_rate": 9.749356661400374e-06, "loss": 0.0003, "step": 3640 }, { "epoch": 0.025181618108688002, "grad_norm": 0.0, "learning_rate": 9.74866675405493e-06, "loss": 0.0, "step": 3650 }, { "epoch": 0.025250608843232353, "grad_norm": 748.8187255859375, "learning_rate": 9.747976846709489e-06, "loss": 0.334, "step": 3660 }, { "epoch": 0.025319599577776703, "grad_norm": 0.0, "learning_rate": 9.747286939364044e-06, "loss": 0.0, "step": 3670 }, { "epoch": 0.025388590312321057, "grad_norm": 0.16381755471229553, "learning_rate": 9.7465970320186e-06, "loss": 0.0078, "step": 3680 }, { "epoch": 0.025457581046865407, "grad_norm": 5.540079541788145e-07, "learning_rate": 9.745907124673157e-06, "loss": 0.0123, "step": 3690 }, { "epoch": 0.025526571781409758, "grad_norm": 0.00024552951799705625, "learning_rate": 9.745217217327713e-06, "loss": 0.0, "step": 3700 }, { "epoch": 0.025595562515954108, "grad_norm": 3.2780512810859364e-06, "learning_rate": 9.74452730998227e-06, "loss": 0.0101, "step": 3710 }, { "epoch": 0.02566455325049846, "grad_norm": 0.00011297911987639964, "learning_rate": 9.743837402636826e-06, "loss": 0.0, "step": 3720 }, { "epoch": 0.02573354398504281, "grad_norm": 1.1949259715038352e-05, "learning_rate": 9.743147495291383e-06, "loss": 0.7962, "step": 3730 }, { "epoch": 0.02580253471958716, "grad_norm": 2.413791833077994e-07, "learning_rate": 9.74245758794594e-06, "loss": 0.0002, "step": 3740 }, { "epoch": 0.02587152545413151, "grad_norm": 5.291036497823143e-10, "learning_rate": 9.741767680600496e-06, "loss": 0.0, "step": 3750 }, { "epoch": 0.02594051618867586, "grad_norm": 1.3636709539355252e-08, "learning_rate": 9.741077773255052e-06, "loss": 0.0002, "step": 3760 }, { "epoch": 0.02600950692322021, "grad_norm": 0.0, "learning_rate": 9.74038786590961e-06, "loss": 0.0, "step": 3770 }, { "epoch": 0.02607849765776456, "grad_norm": 0.0, "learning_rate": 9.739697958564167e-06, "loss": 0.0, "step": 3780 }, { "epoch": 0.026147488392308915, "grad_norm": 0.0, "learning_rate": 9.739008051218722e-06, "loss": 0.0, "step": 3790 }, { "epoch": 0.026216479126853265, "grad_norm": 0.0, "learning_rate": 9.738318143873278e-06, "loss": 0.2594, "step": 3800 }, { "epoch": 0.026285469861397615, "grad_norm": 1.0561984620238718e-09, "learning_rate": 9.737628236527835e-06, "loss": 0.0019, "step": 3810 }, { "epoch": 0.026354460595941966, "grad_norm": 0.0, "learning_rate": 9.736938329182391e-06, "loss": 0.0001, "step": 3820 }, { "epoch": 0.026423451330486316, "grad_norm": 0.0034547920804470778, "learning_rate": 9.736248421836948e-06, "loss": 0.0, "step": 3830 }, { "epoch": 0.026492442065030666, "grad_norm": 0.0, "learning_rate": 9.735558514491504e-06, "loss": 0.0001, "step": 3840 }, { "epoch": 0.026561432799575017, "grad_norm": 0.0, "learning_rate": 9.73486860714606e-06, "loss": 0.0, "step": 3850 }, { "epoch": 0.026630423534119367, "grad_norm": 0.0, "learning_rate": 9.734178699800617e-06, "loss": 0.0, "step": 3860 }, { "epoch": 0.026699414268663717, "grad_norm": 0.0, "learning_rate": 9.733488792455174e-06, "loss": 0.0092, "step": 3870 }, { "epoch": 0.026768405003208068, "grad_norm": 0.00023974105715751648, "learning_rate": 9.732798885109732e-06, "loss": 0.0002, "step": 3880 }, { "epoch": 0.026837395737752418, "grad_norm": 0.0, "learning_rate": 9.732108977764288e-06, "loss": 0.0001, "step": 3890 }, { "epoch": 0.026906386472296772, "grad_norm": 0.0, "learning_rate": 9.731419070418843e-06, "loss": 0.0, "step": 3900 }, { "epoch": 0.026975377206841122, "grad_norm": 0.0, "learning_rate": 9.7307291630734e-06, "loss": 0.0061, "step": 3910 }, { "epoch": 0.027044367941385473, "grad_norm": 0.0, "learning_rate": 9.730039255727956e-06, "loss": 0.0, "step": 3920 }, { "epoch": 0.027113358675929823, "grad_norm": 0.0, "learning_rate": 9.729349348382512e-06, "loss": 0.0221, "step": 3930 }, { "epoch": 0.027182349410474173, "grad_norm": 0.007771818898618221, "learning_rate": 9.728659441037069e-06, "loss": 0.0, "step": 3940 }, { "epoch": 0.027251340145018524, "grad_norm": 2.0619160068235942e-07, "learning_rate": 9.727969533691625e-06, "loss": 0.0007, "step": 3950 }, { "epoch": 0.027320330879562874, "grad_norm": 7.617491974087898e-06, "learning_rate": 9.727279626346182e-06, "loss": 0.0, "step": 3960 }, { "epoch": 0.027389321614107225, "grad_norm": 1.6165176930371672e-07, "learning_rate": 9.726589719000738e-06, "loss": 0.0806, "step": 3970 }, { "epoch": 0.027458312348651575, "grad_norm": 0.0, "learning_rate": 9.725899811655295e-06, "loss": 0.0, "step": 3980 }, { "epoch": 0.027527303083195925, "grad_norm": 0.0, "learning_rate": 9.725209904309853e-06, "loss": 0.0, "step": 3990 }, { "epoch": 0.02759629381774028, "grad_norm": 0.00930849090218544, "learning_rate": 9.72451999696441e-06, "loss": 0.0, "step": 4000 }, { "epoch": 0.02766528455228463, "grad_norm": 0.0, "learning_rate": 9.723830089618964e-06, "loss": 0.0002, "step": 4010 }, { "epoch": 0.02773427528682898, "grad_norm": 0.04425029456615448, "learning_rate": 9.723140182273521e-06, "loss": 0.0001, "step": 4020 }, { "epoch": 0.02780326602137333, "grad_norm": 0.0, "learning_rate": 9.722450274928077e-06, "loss": 0.0, "step": 4030 }, { "epoch": 0.02787225675591768, "grad_norm": 8.142186743498314e-06, "learning_rate": 9.721760367582634e-06, "loss": 0.0001, "step": 4040 }, { "epoch": 0.02794124749046203, "grad_norm": 4.0958007048175205e-06, "learning_rate": 9.72107046023719e-06, "loss": 0.0003, "step": 4050 }, { "epoch": 0.02801023822500638, "grad_norm": 0.0, "learning_rate": 9.720380552891747e-06, "loss": 0.0005, "step": 4060 }, { "epoch": 0.028079228959550732, "grad_norm": 0.0, "learning_rate": 9.719690645546303e-06, "loss": 0.0, "step": 4070 }, { "epoch": 0.028148219694095082, "grad_norm": 0.0, "learning_rate": 9.71900073820086e-06, "loss": 0.0018, "step": 4080 }, { "epoch": 0.028217210428639432, "grad_norm": 2.0809545198119395e-09, "learning_rate": 9.718310830855416e-06, "loss": 0.0, "step": 4090 }, { "epoch": 0.028286201163183783, "grad_norm": 0.0, "learning_rate": 9.717620923509975e-06, "loss": 0.0001, "step": 4100 }, { "epoch": 0.028355191897728137, "grad_norm": 0.0001445161906303838, "learning_rate": 9.716931016164531e-06, "loss": 0.468, "step": 4110 }, { "epoch": 0.028424182632272487, "grad_norm": 0.0, "learning_rate": 9.716241108819088e-06, "loss": 0.0005, "step": 4120 }, { "epoch": 0.028493173366816837, "grad_norm": 0.0, "learning_rate": 9.715551201473642e-06, "loss": 0.0001, "step": 4130 }, { "epoch": 0.028562164101361188, "grad_norm": 1.503429842841797e-07, "learning_rate": 9.714861294128199e-06, "loss": 0.0, "step": 4140 }, { "epoch": 0.028631154835905538, "grad_norm": 0.0, "learning_rate": 9.714171386782755e-06, "loss": 0.0, "step": 4150 }, { "epoch": 0.02870014557044989, "grad_norm": 0.0, "learning_rate": 9.713481479437312e-06, "loss": 0.0, "step": 4160 }, { "epoch": 0.02876913630499424, "grad_norm": 4.6401057995915096e-10, "learning_rate": 9.712791572091868e-06, "loss": 0.017, "step": 4170 }, { "epoch": 0.02883812703953859, "grad_norm": 0.001769772032275796, "learning_rate": 9.712101664746425e-06, "loss": 0.0, "step": 4180 }, { "epoch": 0.02890711777408294, "grad_norm": 0.0, "learning_rate": 9.711411757400981e-06, "loss": 0.0001, "step": 4190 }, { "epoch": 0.02897610850862729, "grad_norm": 1.992263378269854e-06, "learning_rate": 9.710721850055538e-06, "loss": 0.0, "step": 4200 }, { "epoch": 0.02904509924317164, "grad_norm": 0.0, "learning_rate": 9.710031942710096e-06, "loss": 0.0, "step": 4210 }, { "epoch": 0.029114089977715994, "grad_norm": 0.0, "learning_rate": 9.709342035364652e-06, "loss": 0.0339, "step": 4220 }, { "epoch": 0.029183080712260345, "grad_norm": 0.0, "learning_rate": 9.708652128019209e-06, "loss": 0.0, "step": 4230 }, { "epoch": 0.029252071446804695, "grad_norm": 0.0, "learning_rate": 9.707962220673764e-06, "loss": 0.0, "step": 4240 }, { "epoch": 0.029321062181349045, "grad_norm": 0.0, "learning_rate": 9.70727231332832e-06, "loss": 0.0, "step": 4250 }, { "epoch": 0.029390052915893396, "grad_norm": 0.0, "learning_rate": 9.706582405982877e-06, "loss": 0.0006, "step": 4260 }, { "epoch": 0.029459043650437746, "grad_norm": 0.0, "learning_rate": 9.705892498637433e-06, "loss": 0.0, "step": 4270 }, { "epoch": 0.029528034384982096, "grad_norm": 0.2454601377248764, "learning_rate": 9.70520259129199e-06, "loss": 0.0001, "step": 4280 }, { "epoch": 0.029597025119526447, "grad_norm": 0.0, "learning_rate": 9.704512683946546e-06, "loss": 0.0, "step": 4290 }, { "epoch": 0.029666015854070797, "grad_norm": 0.0, "learning_rate": 9.703822776601103e-06, "loss": 0.0, "step": 4300 }, { "epoch": 0.029735006588615148, "grad_norm": 0.0, "learning_rate": 9.703132869255659e-06, "loss": 0.0, "step": 4310 }, { "epoch": 0.0298039973231595, "grad_norm": 33.98332595825195, "learning_rate": 9.702442961910217e-06, "loss": 0.01, "step": 4320 }, { "epoch": 0.029872988057703852, "grad_norm": 0.00013368955114856362, "learning_rate": 9.701753054564774e-06, "loss": 0.0, "step": 4330 }, { "epoch": 0.029941978792248202, "grad_norm": 0.0, "learning_rate": 9.70106314721933e-06, "loss": 0.0, "step": 4340 }, { "epoch": 0.030010969526792552, "grad_norm": 0.0, "learning_rate": 9.700373239873885e-06, "loss": 0.1638, "step": 4350 }, { "epoch": 0.030079960261336903, "grad_norm": 0.0, "learning_rate": 9.699683332528442e-06, "loss": 0.0, "step": 4360 }, { "epoch": 0.030148950995881253, "grad_norm": 1.900823320966083e-07, "learning_rate": 9.698993425182998e-06, "loss": 0.1563, "step": 4370 }, { "epoch": 0.030217941730425604, "grad_norm": 13.705475807189941, "learning_rate": 9.698303517837555e-06, "loss": 0.0025, "step": 4380 }, { "epoch": 0.030286932464969954, "grad_norm": 0.0, "learning_rate": 9.697613610492111e-06, "loss": 0.0, "step": 4390 }, { "epoch": 0.030355923199514304, "grad_norm": 0.0, "learning_rate": 9.696923703146668e-06, "loss": 0.0, "step": 4400 }, { "epoch": 0.030424913934058655, "grad_norm": 0.0, "learning_rate": 9.696233795801224e-06, "loss": 0.0, "step": 4410 }, { "epoch": 0.030493904668603005, "grad_norm": 0.0, "learning_rate": 9.69554388845578e-06, "loss": 0.407, "step": 4420 }, { "epoch": 0.03056289540314736, "grad_norm": 0.0, "learning_rate": 9.694853981110339e-06, "loss": 0.0, "step": 4430 }, { "epoch": 0.03063188613769171, "grad_norm": 0.0, "learning_rate": 9.694164073764895e-06, "loss": 0.0011, "step": 4440 }, { "epoch": 0.03070087687223606, "grad_norm": 0.0, "learning_rate": 9.693474166419452e-06, "loss": 0.2846, "step": 4450 }, { "epoch": 0.03076986760678041, "grad_norm": 4.794936669938465e-10, "learning_rate": 9.692784259074008e-06, "loss": 0.0295, "step": 4460 }, { "epoch": 0.03083885834132476, "grad_norm": 0.0, "learning_rate": 9.692094351728563e-06, "loss": 0.6908, "step": 4470 }, { "epoch": 0.03090784907586911, "grad_norm": 0.0, "learning_rate": 9.69140444438312e-06, "loss": 0.0008, "step": 4480 }, { "epoch": 0.03097683981041346, "grad_norm": 0.0, "learning_rate": 9.690714537037676e-06, "loss": 0.0, "step": 4490 }, { "epoch": 0.03104583054495781, "grad_norm": 0.0, "learning_rate": 9.690024629692232e-06, "loss": 0.0002, "step": 4500 }, { "epoch": 0.031114821279502162, "grad_norm": 4.078829363152181e-08, "learning_rate": 9.689334722346789e-06, "loss": 0.0, "step": 4510 }, { "epoch": 0.031183812014046512, "grad_norm": 0.0, "learning_rate": 9.688644815001345e-06, "loss": 0.0613, "step": 4520 }, { "epoch": 0.031252802748590866, "grad_norm": 0.0, "learning_rate": 9.687954907655902e-06, "loss": 0.0013, "step": 4530 }, { "epoch": 0.03132179348313521, "grad_norm": 0.0, "learning_rate": 9.68726500031046e-06, "loss": 0.0, "step": 4540 }, { "epoch": 0.03139078421767957, "grad_norm": 0.002421376761049032, "learning_rate": 9.686575092965017e-06, "loss": 0.0, "step": 4550 }, { "epoch": 0.031459774952223914, "grad_norm": 0.0, "learning_rate": 9.685885185619573e-06, "loss": 0.0, "step": 4560 }, { "epoch": 0.03152876568676827, "grad_norm": 0.022848736494779587, "learning_rate": 9.68519527827413e-06, "loss": 0.0001, "step": 4570 }, { "epoch": 0.031597756421312614, "grad_norm": 0.0, "learning_rate": 9.684505370928684e-06, "loss": 0.0024, "step": 4580 }, { "epoch": 0.03166674715585697, "grad_norm": 0.0, "learning_rate": 9.683815463583241e-06, "loss": 0.0009, "step": 4590 }, { "epoch": 0.03173573789040132, "grad_norm": 1.9884506485823294e-08, "learning_rate": 9.683125556237797e-06, "loss": 0.0001, "step": 4600 }, { "epoch": 0.03180472862494567, "grad_norm": 0.0003966403310187161, "learning_rate": 9.682435648892354e-06, "loss": 0.0001, "step": 4610 }, { "epoch": 0.03187371935949002, "grad_norm": 0.0, "learning_rate": 9.68174574154691e-06, "loss": 0.0126, "step": 4620 }, { "epoch": 0.03194271009403437, "grad_norm": 5.939396032772493e-06, "learning_rate": 9.681055834201467e-06, "loss": 0.0012, "step": 4630 }, { "epoch": 0.032011700828578724, "grad_norm": 1.195463028125232e-06, "learning_rate": 9.680365926856023e-06, "loss": 0.016, "step": 4640 }, { "epoch": 0.03208069156312307, "grad_norm": 0.0, "learning_rate": 9.679676019510581e-06, "loss": 0.0031, "step": 4650 }, { "epoch": 0.032149682297667424, "grad_norm": 0.003359755966812372, "learning_rate": 9.678986112165138e-06, "loss": 0.0, "step": 4660 }, { "epoch": 0.03221867303221177, "grad_norm": 0.0, "learning_rate": 9.678296204819694e-06, "loss": 0.0018, "step": 4670 }, { "epoch": 0.032287663766756125, "grad_norm": 0.0, "learning_rate": 9.677606297474251e-06, "loss": 0.0, "step": 4680 }, { "epoch": 0.03235665450130047, "grad_norm": 1.3364478945732117e-05, "learning_rate": 9.676916390128806e-06, "loss": 0.0, "step": 4690 }, { "epoch": 0.032425645235844826, "grad_norm": 1.3066127735328337e-07, "learning_rate": 9.676226482783362e-06, "loss": 0.002, "step": 4700 }, { "epoch": 0.03249463597038918, "grad_norm": 0.0, "learning_rate": 9.675536575437919e-06, "loss": 0.0006, "step": 4710 }, { "epoch": 0.032563626704933527, "grad_norm": 0.0, "learning_rate": 9.674846668092475e-06, "loss": 0.0, "step": 4720 }, { "epoch": 0.03263261743947788, "grad_norm": 0.0, "learning_rate": 9.674156760747032e-06, "loss": 0.0, "step": 4730 }, { "epoch": 0.03270160817402223, "grad_norm": 0.0, "learning_rate": 9.673466853401588e-06, "loss": 0.0, "step": 4740 }, { "epoch": 0.03277059890856658, "grad_norm": 0.00019785226322710514, "learning_rate": 9.672776946056145e-06, "loss": 0.001, "step": 4750 }, { "epoch": 0.03283958964311093, "grad_norm": 0.0, "learning_rate": 9.672087038710703e-06, "loss": 0.0, "step": 4760 }, { "epoch": 0.03290858037765528, "grad_norm": 0.00433505279943347, "learning_rate": 9.67139713136526e-06, "loss": 0.0, "step": 4770 }, { "epoch": 0.03297757111219963, "grad_norm": 0.0, "learning_rate": 9.670707224019816e-06, "loss": 0.0, "step": 4780 }, { "epoch": 0.03304656184674398, "grad_norm": 0.0, "learning_rate": 9.670086307408917e-06, "loss": 0.2395, "step": 4790 }, { "epoch": 0.033115552581288336, "grad_norm": 0.0004065260582137853, "learning_rate": 9.669396400063471e-06, "loss": 0.0, "step": 4800 }, { "epoch": 0.03318454331583268, "grad_norm": 4.916876350513633e-10, "learning_rate": 9.668706492718028e-06, "loss": 0.0, "step": 4810 }, { "epoch": 0.03325353405037704, "grad_norm": 0.0, "learning_rate": 9.668016585372584e-06, "loss": 0.0, "step": 4820 }, { "epoch": 0.033322524784921384, "grad_norm": 0.015064802020788193, "learning_rate": 9.66732667802714e-06, "loss": 0.0005, "step": 4830 }, { "epoch": 0.03339151551946574, "grad_norm": 0.0, "learning_rate": 9.666636770681697e-06, "loss": 0.0, "step": 4840 }, { "epoch": 0.033460506254010085, "grad_norm": 0.0, "learning_rate": 9.665946863336255e-06, "loss": 0.0, "step": 4850 }, { "epoch": 0.03352949698855444, "grad_norm": 0.054494451731443405, "learning_rate": 9.665256955990812e-06, "loss": 0.0, "step": 4860 }, { "epoch": 0.033598487723098786, "grad_norm": 4.759086458250295e-10, "learning_rate": 9.664567048645368e-06, "loss": 0.0, "step": 4870 }, { "epoch": 0.03366747845764314, "grad_norm": 0.5415263175964355, "learning_rate": 9.663877141299925e-06, "loss": 0.0028, "step": 4880 }, { "epoch": 0.033736469192187486, "grad_norm": 0.0, "learning_rate": 9.663187233954481e-06, "loss": 0.0, "step": 4890 }, { "epoch": 0.03380545992673184, "grad_norm": 0.0, "learning_rate": 9.662497326609038e-06, "loss": 0.0, "step": 4900 }, { "epoch": 0.033874450661276194, "grad_norm": 2.639603614807129, "learning_rate": 9.661807419263593e-06, "loss": 0.0007, "step": 4910 }, { "epoch": 0.03394344139582054, "grad_norm": 0.0, "learning_rate": 9.66111751191815e-06, "loss": 0.0, "step": 4920 }, { "epoch": 0.034012432130364895, "grad_norm": 0.0, "learning_rate": 9.660427604572706e-06, "loss": 0.0, "step": 4930 }, { "epoch": 0.03408142286490924, "grad_norm": 8.912328720092773, "learning_rate": 9.659737697227262e-06, "loss": 0.0065, "step": 4940 }, { "epoch": 0.034150413599453595, "grad_norm": 0.011088664643466473, "learning_rate": 9.659047789881819e-06, "loss": 0.0, "step": 4950 }, { "epoch": 0.03421940433399794, "grad_norm": 0.0, "learning_rate": 9.658357882536377e-06, "loss": 0.0011, "step": 4960 }, { "epoch": 0.034288395068542296, "grad_norm": 0.0, "learning_rate": 9.657667975190933e-06, "loss": 0.0, "step": 4970 }, { "epoch": 0.03435738580308664, "grad_norm": 0.0, "learning_rate": 9.65697806784549e-06, "loss": 0.0007, "step": 4980 }, { "epoch": 0.034426376537631, "grad_norm": 0.0, "learning_rate": 9.656288160500046e-06, "loss": 0.0, "step": 4990 }, { "epoch": 0.034495367272175344, "grad_norm": 0.0, "learning_rate": 9.655598253154603e-06, "loss": 0.0, "step": 5000 }, { "epoch": 0.0345643580067197, "grad_norm": 0.0, "learning_rate": 9.65490834580916e-06, "loss": 0.0, "step": 5010 }, { "epoch": 0.03463334874126405, "grad_norm": 0.0032571053598076105, "learning_rate": 9.654218438463716e-06, "loss": 0.0, "step": 5020 }, { "epoch": 0.0347023394758084, "grad_norm": 0.0, "learning_rate": 9.65352853111827e-06, "loss": 0.0, "step": 5030 }, { "epoch": 0.03477133021035275, "grad_norm": 0.0, "learning_rate": 9.652838623772827e-06, "loss": 0.0, "step": 5040 }, { "epoch": 0.0348403209448971, "grad_norm": 0.0, "learning_rate": 9.652148716427384e-06, "loss": 0.0001, "step": 5050 }, { "epoch": 0.03490931167944145, "grad_norm": 0.0, "learning_rate": 9.65145880908194e-06, "loss": 0.0, "step": 5060 }, { "epoch": 0.0349783024139858, "grad_norm": 8.928826567000669e-10, "learning_rate": 9.650768901736498e-06, "loss": 0.0, "step": 5070 }, { "epoch": 0.035047293148530154, "grad_norm": 0.0, "learning_rate": 9.650078994391055e-06, "loss": 0.0, "step": 5080 }, { "epoch": 0.0351162838830745, "grad_norm": 0.0, "learning_rate": 9.649389087045611e-06, "loss": 0.0, "step": 5090 }, { "epoch": 0.035185274617618854, "grad_norm": 0.0, "learning_rate": 9.648699179700168e-06, "loss": 0.0, "step": 5100 }, { "epoch": 0.0352542653521632, "grad_norm": 7.675723552703857, "learning_rate": 9.648009272354724e-06, "loss": 0.0656, "step": 5110 }, { "epoch": 0.035323256086707555, "grad_norm": 0.0, "learning_rate": 9.64731936500928e-06, "loss": 0.0, "step": 5120 }, { "epoch": 0.03539224682125191, "grad_norm": 1.144585371017456, "learning_rate": 9.646629457663837e-06, "loss": 0.3927, "step": 5130 }, { "epoch": 0.035461237555796256, "grad_norm": 4.366060935012683e-08, "learning_rate": 9.645939550318392e-06, "loss": 0.0, "step": 5140 }, { "epoch": 0.03553022829034061, "grad_norm": 0.0, "learning_rate": 9.645249642972948e-06, "loss": 0.0, "step": 5150 }, { "epoch": 0.03559921902488496, "grad_norm": 0.00010898914479184896, "learning_rate": 9.644559735627505e-06, "loss": 0.0023, "step": 5160 }, { "epoch": 0.03566820975942931, "grad_norm": 7.119273170985707e-09, "learning_rate": 9.643869828282061e-06, "loss": 0.0, "step": 5170 }, { "epoch": 0.03573720049397366, "grad_norm": 0.0, "learning_rate": 9.64317992093662e-06, "loss": 0.0001, "step": 5180 }, { "epoch": 0.03580619122851801, "grad_norm": 0.0, "learning_rate": 9.642490013591176e-06, "loss": 0.1288, "step": 5190 }, { "epoch": 0.03587518196306236, "grad_norm": 0.0, "learning_rate": 9.641800106245733e-06, "loss": 0.1805, "step": 5200 }, { "epoch": 0.03594417269760671, "grad_norm": 8.536252607882489e-06, "learning_rate": 9.641110198900289e-06, "loss": 0.0, "step": 5210 }, { "epoch": 0.03601316343215106, "grad_norm": 0.0, "learning_rate": 9.640420291554846e-06, "loss": 0.0005, "step": 5220 }, { "epoch": 0.03608215416669541, "grad_norm": 0.008400880731642246, "learning_rate": 9.639730384209402e-06, "loss": 0.0, "step": 5230 }, { "epoch": 0.036151144901239766, "grad_norm": 0.0, "learning_rate": 9.639040476863959e-06, "loss": 0.3464, "step": 5240 }, { "epoch": 0.03622013563578411, "grad_norm": 0.0, "learning_rate": 9.638350569518513e-06, "loss": 0.011, "step": 5250 }, { "epoch": 0.03628912637032847, "grad_norm": 6.390629181396434e-08, "learning_rate": 9.63766066217307e-06, "loss": 0.0, "step": 5260 }, { "epoch": 0.036358117104872814, "grad_norm": 0.0, "learning_rate": 9.636970754827626e-06, "loss": 0.0, "step": 5270 }, { "epoch": 0.03642710783941717, "grad_norm": 0.0009792015189304948, "learning_rate": 9.636280847482183e-06, "loss": 0.0, "step": 5280 }, { "epoch": 0.036496098573961515, "grad_norm": 0.0, "learning_rate": 9.635590940136741e-06, "loss": 0.0, "step": 5290 }, { "epoch": 0.03656508930850587, "grad_norm": 0.0, "learning_rate": 9.634901032791298e-06, "loss": 0.0001, "step": 5300 }, { "epoch": 0.036634080043050216, "grad_norm": 2.826805484801298e-06, "learning_rate": 9.634211125445854e-06, "loss": 0.0, "step": 5310 }, { "epoch": 0.03670307077759457, "grad_norm": 14.800854682922363, "learning_rate": 9.63352121810041e-06, "loss": 0.0052, "step": 5320 }, { "epoch": 0.036772061512138916, "grad_norm": 1.2943366527906619e-05, "learning_rate": 9.632831310754967e-06, "loss": 0.1085, "step": 5330 }, { "epoch": 0.03684105224668327, "grad_norm": 0.0, "learning_rate": 9.632141403409524e-06, "loss": 0.0044, "step": 5340 }, { "epoch": 0.036910042981227624, "grad_norm": 0.0, "learning_rate": 9.63145149606408e-06, "loss": 0.0005, "step": 5350 }, { "epoch": 0.03697903371577197, "grad_norm": 0.0, "learning_rate": 9.630761588718636e-06, "loss": 0.0002, "step": 5360 }, { "epoch": 0.037048024450316325, "grad_norm": 0.6750802397727966, "learning_rate": 9.630071681373191e-06, "loss": 0.0002, "step": 5370 }, { "epoch": 0.03711701518486067, "grad_norm": 0.0, "learning_rate": 9.629381774027748e-06, "loss": 0.0002, "step": 5380 }, { "epoch": 0.037186005919405025, "grad_norm": 0.0, "learning_rate": 9.628691866682304e-06, "loss": 0.0, "step": 5390 }, { "epoch": 0.03725499665394937, "grad_norm": 0.0, "learning_rate": 9.628001959336862e-06, "loss": 0.0, "step": 5400 }, { "epoch": 0.037323987388493726, "grad_norm": 0.0, "learning_rate": 9.627312051991419e-06, "loss": 0.0, "step": 5410 }, { "epoch": 0.03739297812303807, "grad_norm": 1.7866976520508615e-07, "learning_rate": 9.626622144645975e-06, "loss": 0.0, "step": 5420 }, { "epoch": 0.03746196885758243, "grad_norm": 0.0, "learning_rate": 9.625932237300532e-06, "loss": 0.0, "step": 5430 }, { "epoch": 0.03753095959212678, "grad_norm": 4.721626423176417e-10, "learning_rate": 9.625242329955088e-06, "loss": 0.0001, "step": 5440 }, { "epoch": 0.03759995032667113, "grad_norm": 0.0, "learning_rate": 9.624552422609645e-06, "loss": 0.0, "step": 5450 }, { "epoch": 0.03766894106121548, "grad_norm": 4.6970615130703663e-07, "learning_rate": 9.623862515264201e-06, "loss": 0.0, "step": 5460 }, { "epoch": 0.03773793179575983, "grad_norm": 2.382010535484369e-07, "learning_rate": 9.623172607918758e-06, "loss": 0.0, "step": 5470 }, { "epoch": 0.03780692253030418, "grad_norm": 5.510453759960399e-10, "learning_rate": 9.622482700573313e-06, "loss": 0.0002, "step": 5480 }, { "epoch": 0.03787591326484853, "grad_norm": 5.554588794708252, "learning_rate": 9.62179279322787e-06, "loss": 0.0557, "step": 5490 }, { "epoch": 0.03794490399939288, "grad_norm": 7.04203102941392e-07, "learning_rate": 9.621102885882426e-06, "loss": 0.0, "step": 5500 }, { "epoch": 0.03801389473393723, "grad_norm": 1.1076947450637817, "learning_rate": 9.620412978536984e-06, "loss": 0.0001, "step": 5510 }, { "epoch": 0.038082885468481584, "grad_norm": 5.152320682100253e-06, "learning_rate": 9.61972307119154e-06, "loss": 0.0, "step": 5520 }, { "epoch": 0.03815187620302593, "grad_norm": 0.05701514706015587, "learning_rate": 9.619033163846097e-06, "loss": 0.0, "step": 5530 }, { "epoch": 0.038220866937570284, "grad_norm": 0.0, "learning_rate": 9.618343256500653e-06, "loss": 0.0, "step": 5540 }, { "epoch": 0.03828985767211464, "grad_norm": 4.2681944090183777e-10, "learning_rate": 9.61765334915521e-06, "loss": 0.0004, "step": 5550 }, { "epoch": 0.038358848406658985, "grad_norm": 0.0, "learning_rate": 9.616963441809766e-06, "loss": 0.0051, "step": 5560 }, { "epoch": 0.03842783914120334, "grad_norm": 0.0, "learning_rate": 9.616273534464323e-06, "loss": 0.0001, "step": 5570 }, { "epoch": 0.038496829875747686, "grad_norm": 0.0, "learning_rate": 9.61558362711888e-06, "loss": 0.0, "step": 5580 }, { "epoch": 0.03856582061029204, "grad_norm": 0.005056244321167469, "learning_rate": 9.614893719773434e-06, "loss": 0.0, "step": 5590 }, { "epoch": 0.03863481134483639, "grad_norm": 0.0, "learning_rate": 9.61420381242799e-06, "loss": 0.0, "step": 5600 }, { "epoch": 0.03870380207938074, "grad_norm": 1.372959650325356e-05, "learning_rate": 9.613513905082547e-06, "loss": 0.0, "step": 5610 }, { "epoch": 0.03877279281392509, "grad_norm": 9.360952013537371e-10, "learning_rate": 9.612823997737105e-06, "loss": 0.0, "step": 5620 }, { "epoch": 0.03884178354846944, "grad_norm": 0.0, "learning_rate": 9.612134090391662e-06, "loss": 0.0025, "step": 5630 }, { "epoch": 0.03891077428301379, "grad_norm": 0.047646623104810715, "learning_rate": 9.611444183046218e-06, "loss": 0.0, "step": 5640 }, { "epoch": 0.03897976501755814, "grad_norm": 0.000264080852502957, "learning_rate": 9.610754275700775e-06, "loss": 0.0, "step": 5650 }, { "epoch": 0.039048755752102496, "grad_norm": 0.07369674742221832, "learning_rate": 9.610064368355331e-06, "loss": 0.0861, "step": 5660 }, { "epoch": 0.03911774648664684, "grad_norm": 114.97257995605469, "learning_rate": 9.609374461009888e-06, "loss": 0.1089, "step": 5670 }, { "epoch": 0.0391867372211912, "grad_norm": 0.0, "learning_rate": 9.608684553664444e-06, "loss": 0.0, "step": 5680 }, { "epoch": 0.03925572795573554, "grad_norm": 7.174975991119936e-08, "learning_rate": 9.607994646319e-06, "loss": 0.0, "step": 5690 }, { "epoch": 0.0393247186902799, "grad_norm": 0.0, "learning_rate": 9.607304738973555e-06, "loss": 0.0, "step": 5700 }, { "epoch": 0.039393709424824244, "grad_norm": 0.0019032327691093087, "learning_rate": 9.606614831628112e-06, "loss": 0.0, "step": 5710 }, { "epoch": 0.0394627001593686, "grad_norm": 9.654377297607653e-10, "learning_rate": 9.605924924282668e-06, "loss": 0.0, "step": 5720 }, { "epoch": 0.039531690893912945, "grad_norm": 0.0, "learning_rate": 9.605235016937227e-06, "loss": 0.0004, "step": 5730 }, { "epoch": 0.0396006816284573, "grad_norm": 1.1168336255806821e-09, "learning_rate": 9.604545109591783e-06, "loss": 0.0002, "step": 5740 }, { "epoch": 0.039669672363001646, "grad_norm": 0.0015918408753350377, "learning_rate": 9.60385520224634e-06, "loss": 0.0, "step": 5750 }, { "epoch": 0.039738663097546, "grad_norm": 8.74627148732543e-05, "learning_rate": 9.603165294900896e-06, "loss": 0.0143, "step": 5760 }, { "epoch": 0.03980765383209035, "grad_norm": 0.0065117026679217815, "learning_rate": 9.602475387555453e-06, "loss": 0.0, "step": 5770 }, { "epoch": 0.0398766445666347, "grad_norm": 0.0, "learning_rate": 9.601785480210009e-06, "loss": 0.0062, "step": 5780 }, { "epoch": 0.039945635301179054, "grad_norm": 0.0, "learning_rate": 9.601095572864566e-06, "loss": 0.0003, "step": 5790 }, { "epoch": 0.0400146260357234, "grad_norm": 0.0, "learning_rate": 9.600405665519122e-06, "loss": 0.0, "step": 5800 }, { "epoch": 0.040083616770267755, "grad_norm": 0.0, "learning_rate": 9.599715758173679e-06, "loss": 0.0, "step": 5810 }, { "epoch": 0.0401526075048121, "grad_norm": 0.00011242792243137956, "learning_rate": 9.599025850828233e-06, "loss": 0.0, "step": 5820 }, { "epoch": 0.040221598239356456, "grad_norm": 0.0, "learning_rate": 9.59833594348279e-06, "loss": 0.1506, "step": 5830 }, { "epoch": 0.0402905889739008, "grad_norm": 0.0, "learning_rate": 9.597646036137348e-06, "loss": 0.0, "step": 5840 }, { "epoch": 0.040359579708445156, "grad_norm": 0.012896775268018246, "learning_rate": 9.596956128791905e-06, "loss": 0.0, "step": 5850 }, { "epoch": 0.0404285704429895, "grad_norm": 9.144940360528153e-10, "learning_rate": 9.596266221446461e-06, "loss": 0.0, "step": 5860 }, { "epoch": 0.04049756117753386, "grad_norm": 0.0, "learning_rate": 9.595576314101017e-06, "loss": 0.002, "step": 5870 }, { "epoch": 0.04056655191207821, "grad_norm": 4.3111356151648295e-10, "learning_rate": 9.594886406755574e-06, "loss": 0.0277, "step": 5880 }, { "epoch": 0.04063554264662256, "grad_norm": 0.0, "learning_rate": 9.59419649941013e-06, "loss": 0.1753, "step": 5890 }, { "epoch": 0.04070453338116691, "grad_norm": 0.0, "learning_rate": 9.593506592064687e-06, "loss": 0.0001, "step": 5900 }, { "epoch": 0.04077352411571126, "grad_norm": 0.0, "learning_rate": 9.592816684719243e-06, "loss": 0.0, "step": 5910 }, { "epoch": 0.04084251485025561, "grad_norm": 1.0126439775604013e-07, "learning_rate": 9.5921267773738e-06, "loss": 0.0001, "step": 5920 }, { "epoch": 0.04091150558479996, "grad_norm": 0.0, "learning_rate": 9.591436870028355e-06, "loss": 0.0, "step": 5930 }, { "epoch": 0.04098049631934431, "grad_norm": 0.0, "learning_rate": 9.590746962682911e-06, "loss": 0.0001, "step": 5940 }, { "epoch": 0.04104948705388866, "grad_norm": 0.13898760080337524, "learning_rate": 9.59005705533747e-06, "loss": 0.0, "step": 5950 }, { "epoch": 0.041118477788433014, "grad_norm": 0.0, "learning_rate": 9.589367147992026e-06, "loss": 0.0002, "step": 5960 }, { "epoch": 0.04118746852297736, "grad_norm": 0.0, "learning_rate": 9.588677240646582e-06, "loss": 0.0, "step": 5970 }, { "epoch": 0.041256459257521715, "grad_norm": 0.0, "learning_rate": 9.587987333301139e-06, "loss": 0.0005, "step": 5980 }, { "epoch": 0.04132544999206607, "grad_norm": 0.0, "learning_rate": 9.587297425955695e-06, "loss": 0.0, "step": 5990 }, { "epoch": 0.041394440726610415, "grad_norm": 0.001535198651254177, "learning_rate": 9.586607518610252e-06, "loss": 1.9234, "step": 6000 }, { "epoch": 0.04146343146115477, "grad_norm": 0.0, "learning_rate": 9.585917611264808e-06, "loss": 0.0001, "step": 6010 }, { "epoch": 0.041532422195699116, "grad_norm": 1.0176873729506042e-05, "learning_rate": 9.585227703919365e-06, "loss": 0.0, "step": 6020 }, { "epoch": 0.04160141293024347, "grad_norm": 0.00411896500736475, "learning_rate": 9.584537796573921e-06, "loss": 0.0, "step": 6030 }, { "epoch": 0.04167040366478782, "grad_norm": 0.0, "learning_rate": 9.583847889228476e-06, "loss": 0.0, "step": 6040 }, { "epoch": 0.04173939439933217, "grad_norm": 1.2623466716377152e-08, "learning_rate": 9.583157981883033e-06, "loss": 0.0538, "step": 6050 }, { "epoch": 0.04180838513387652, "grad_norm": 2.3080203845893266e-06, "learning_rate": 9.58246807453759e-06, "loss": 0.0, "step": 6060 }, { "epoch": 0.04187737586842087, "grad_norm": 3.325064596992888e-07, "learning_rate": 9.581778167192147e-06, "loss": 0.0008, "step": 6070 }, { "epoch": 0.041946366602965225, "grad_norm": 0.0031659924425184727, "learning_rate": 9.581088259846704e-06, "loss": 0.0003, "step": 6080 }, { "epoch": 0.04201535733750957, "grad_norm": 0.0, "learning_rate": 9.58039835250126e-06, "loss": 0.0, "step": 6090 }, { "epoch": 0.042084348072053926, "grad_norm": 4.2391895549442893e-10, "learning_rate": 9.579708445155817e-06, "loss": 0.0, "step": 6100 }, { "epoch": 0.04215333880659827, "grad_norm": 0.0009250965085811913, "learning_rate": 9.579018537810373e-06, "loss": 0.0001, "step": 6110 }, { "epoch": 0.04222232954114263, "grad_norm": 8.939063933510738e-10, "learning_rate": 9.57832863046493e-06, "loss": 0.0, "step": 6120 }, { "epoch": 0.042291320275686974, "grad_norm": 5.28221335116541e-06, "learning_rate": 9.577638723119486e-06, "loss": 0.0693, "step": 6130 }, { "epoch": 0.04236031101023133, "grad_norm": 0.0, "learning_rate": 9.576948815774043e-06, "loss": 0.0, "step": 6140 }, { "epoch": 0.042429301744775674, "grad_norm": 0.0, "learning_rate": 9.5762589084286e-06, "loss": 0.0009, "step": 6150 }, { "epoch": 0.04249829247932003, "grad_norm": 0.0, "learning_rate": 9.575569001083154e-06, "loss": 0.0, "step": 6160 }, { "epoch": 0.042567283213864375, "grad_norm": 0.0, "learning_rate": 9.574879093737712e-06, "loss": 0.0011, "step": 6170 }, { "epoch": 0.04263627394840873, "grad_norm": 669.7105102539062, "learning_rate": 9.574189186392269e-06, "loss": 0.1937, "step": 6180 }, { "epoch": 0.04270526468295308, "grad_norm": 0.0, "learning_rate": 9.573499279046825e-06, "loss": 0.0, "step": 6190 }, { "epoch": 0.04277425541749743, "grad_norm": 9.243245585821569e-05, "learning_rate": 9.572809371701382e-06, "loss": 0.0, "step": 6200 }, { "epoch": 0.04284324615204178, "grad_norm": 159.6192169189453, "learning_rate": 9.572119464355938e-06, "loss": 0.0274, "step": 6210 }, { "epoch": 0.04291223688658613, "grad_norm": 0.0, "learning_rate": 9.571429557010495e-06, "loss": 0.0, "step": 6220 }, { "epoch": 0.042981227621130484, "grad_norm": 0.0, "learning_rate": 9.570739649665051e-06, "loss": 0.0, "step": 6230 }, { "epoch": 0.04305021835567483, "grad_norm": 0.0, "learning_rate": 9.570049742319608e-06, "loss": 0.0133, "step": 6240 }, { "epoch": 0.043119209090219185, "grad_norm": 0.01762031577527523, "learning_rate": 9.569359834974164e-06, "loss": 0.0, "step": 6250 }, { "epoch": 0.04318819982476353, "grad_norm": 0.0, "learning_rate": 9.56866992762872e-06, "loss": 0.0, "step": 6260 }, { "epoch": 0.043257190559307886, "grad_norm": 0.000838885607663542, "learning_rate": 9.567980020283275e-06, "loss": 0.0004, "step": 6270 }, { "epoch": 0.04332618129385223, "grad_norm": 0.0, "learning_rate": 9.567290112937834e-06, "loss": 0.0, "step": 6280 }, { "epoch": 0.043395172028396586, "grad_norm": 0.0, "learning_rate": 9.56660020559239e-06, "loss": 0.0, "step": 6290 }, { "epoch": 0.04346416276294094, "grad_norm": 0.0, "learning_rate": 9.565910298246947e-06, "loss": 0.0, "step": 6300 }, { "epoch": 0.04353315349748529, "grad_norm": 0.0, "learning_rate": 9.565220390901503e-06, "loss": 0.0297, "step": 6310 }, { "epoch": 0.04360214423202964, "grad_norm": 0.0, "learning_rate": 9.56453048355606e-06, "loss": 0.0, "step": 6320 }, { "epoch": 0.04367113496657399, "grad_norm": 0.0, "learning_rate": 9.563840576210616e-06, "loss": 0.0, "step": 6330 }, { "epoch": 0.04374012570111834, "grad_norm": 0.0, "learning_rate": 9.563150668865173e-06, "loss": 0.0015, "step": 6340 }, { "epoch": 0.04380911643566269, "grad_norm": 0.0, "learning_rate": 9.562460761519729e-06, "loss": 0.0, "step": 6350 }, { "epoch": 0.04387810717020704, "grad_norm": 0.0, "learning_rate": 9.561770854174286e-06, "loss": 0.0127, "step": 6360 }, { "epoch": 0.04394709790475139, "grad_norm": 1.602687370905187e-05, "learning_rate": 9.561080946828842e-06, "loss": 0.0072, "step": 6370 }, { "epoch": 0.04401608863929574, "grad_norm": 0.00013067858526483178, "learning_rate": 9.560391039483397e-06, "loss": 0.0254, "step": 6380 }, { "epoch": 0.04408507937384009, "grad_norm": 0.0, "learning_rate": 9.559701132137955e-06, "loss": 0.0, "step": 6390 }, { "epoch": 0.044154070108384444, "grad_norm": 0.0, "learning_rate": 9.559011224792511e-06, "loss": 0.0, "step": 6400 }, { "epoch": 0.0442230608429288, "grad_norm": 0.10909099131822586, "learning_rate": 9.558321317447068e-06, "loss": 0.0, "step": 6410 }, { "epoch": 0.044292051577473145, "grad_norm": 0.0, "learning_rate": 9.557631410101624e-06, "loss": 0.1475, "step": 6420 }, { "epoch": 0.0443610423120175, "grad_norm": 0.0, "learning_rate": 9.556941502756181e-06, "loss": 0.0, "step": 6430 }, { "epoch": 0.044430033046561845, "grad_norm": 0.0, "learning_rate": 9.556251595410737e-06, "loss": 0.0537, "step": 6440 }, { "epoch": 0.0444990237811062, "grad_norm": 0.0, "learning_rate": 9.555561688065294e-06, "loss": 0.0002, "step": 6450 }, { "epoch": 0.044568014515650546, "grad_norm": 0.0, "learning_rate": 9.55487178071985e-06, "loss": 0.0024, "step": 6460 }, { "epoch": 0.0446370052501949, "grad_norm": 0.0, "learning_rate": 9.554181873374407e-06, "loss": 0.0, "step": 6470 }, { "epoch": 0.04470599598473925, "grad_norm": 0.0, "learning_rate": 9.553491966028963e-06, "loss": 0.0002, "step": 6480 }, { "epoch": 0.0447749867192836, "grad_norm": 1.6138839721679688, "learning_rate": 9.55280205868352e-06, "loss": 0.0025, "step": 6490 }, { "epoch": 0.04484397745382795, "grad_norm": 9.541729628637086e-10, "learning_rate": 9.552112151338076e-06, "loss": 0.012, "step": 6500 }, { "epoch": 0.0449129681883723, "grad_norm": 0.0, "learning_rate": 9.551422243992633e-06, "loss": 0.0023, "step": 6510 }, { "epoch": 0.044981958922916655, "grad_norm": 5.9090962167829275e-06, "learning_rate": 9.55073233664719e-06, "loss": 0.0123, "step": 6520 }, { "epoch": 0.045050949657461, "grad_norm": 0.0, "learning_rate": 9.550042429301746e-06, "loss": 0.0001, "step": 6530 }, { "epoch": 0.045119940392005356, "grad_norm": 0.0, "learning_rate": 9.549352521956302e-06, "loss": 0.0, "step": 6540 }, { "epoch": 0.0451889311265497, "grad_norm": 0.0, "learning_rate": 9.548662614610859e-06, "loss": 0.0, "step": 6550 }, { "epoch": 0.04525792186109406, "grad_norm": 0.0, "learning_rate": 9.547972707265415e-06, "loss": 0.0, "step": 6560 }, { "epoch": 0.045326912595638404, "grad_norm": 0.0, "learning_rate": 9.547282799919972e-06, "loss": 0.0, "step": 6570 }, { "epoch": 0.04539590333018276, "grad_norm": 0.0, "learning_rate": 9.546592892574528e-06, "loss": 0.0008, "step": 6580 }, { "epoch": 0.045464894064727104, "grad_norm": 0.029501691460609436, "learning_rate": 9.545902985229085e-06, "loss": 0.0, "step": 6590 }, { "epoch": 0.04553388479927146, "grad_norm": 0.28499457240104675, "learning_rate": 9.545213077883641e-06, "loss": 0.0001, "step": 6600 }, { "epoch": 0.045602875533815805, "grad_norm": 0.0, "learning_rate": 9.544523170538198e-06, "loss": 0.0, "step": 6610 }, { "epoch": 0.04567186626836016, "grad_norm": 0.0, "learning_rate": 9.543833263192754e-06, "loss": 0.0392, "step": 6620 }, { "epoch": 0.04574085700290451, "grad_norm": 4.873459968912641e-10, "learning_rate": 9.54314335584731e-06, "loss": 0.0, "step": 6630 }, { "epoch": 0.04580984773744886, "grad_norm": 0.0, "learning_rate": 9.542453448501867e-06, "loss": 0.0, "step": 6640 }, { "epoch": 0.045878838471993214, "grad_norm": 1.0703723773985985e-06, "learning_rate": 9.541763541156424e-06, "loss": 0.0, "step": 6650 }, { "epoch": 0.04594782920653756, "grad_norm": 1.568539664731361e-05, "learning_rate": 9.54107363381098e-06, "loss": 0.0, "step": 6660 }, { "epoch": 0.046016819941081914, "grad_norm": 1.4101584344672347e-09, "learning_rate": 9.540383726465537e-06, "loss": 0.0, "step": 6670 }, { "epoch": 0.04608581067562626, "grad_norm": 0.0, "learning_rate": 9.539693819120093e-06, "loss": 0.0, "step": 6680 }, { "epoch": 0.046154801410170615, "grad_norm": 0.00038418613257817924, "learning_rate": 9.53900391177465e-06, "loss": 0.0, "step": 6690 }, { "epoch": 0.04622379214471496, "grad_norm": 0.0, "learning_rate": 9.538314004429206e-06, "loss": 0.1913, "step": 6700 }, { "epoch": 0.046292782879259316, "grad_norm": 0.0, "learning_rate": 9.537624097083763e-06, "loss": 0.0, "step": 6710 }, { "epoch": 0.04636177361380367, "grad_norm": 0.0, "learning_rate": 9.53693418973832e-06, "loss": 0.0, "step": 6720 }, { "epoch": 0.046430764348348016, "grad_norm": 0.0, "learning_rate": 9.536244282392876e-06, "loss": 0.0, "step": 6730 }, { "epoch": 0.04649975508289237, "grad_norm": 0.0, "learning_rate": 9.535554375047432e-06, "loss": 0.0, "step": 6740 }, { "epoch": 0.04656874581743672, "grad_norm": 0.0, "learning_rate": 9.534864467701989e-06, "loss": 0.0, "step": 6750 }, { "epoch": 0.04663773655198107, "grad_norm": 0.0, "learning_rate": 9.534174560356545e-06, "loss": 0.0089, "step": 6760 }, { "epoch": 0.04670672728652542, "grad_norm": 4.657961483189865e-07, "learning_rate": 9.533484653011102e-06, "loss": 0.0133, "step": 6770 }, { "epoch": 0.04677571802106977, "grad_norm": 0.0, "learning_rate": 9.532794745665658e-06, "loss": 0.0, "step": 6780 }, { "epoch": 0.04684470875561412, "grad_norm": 0.0, "learning_rate": 9.532104838320215e-06, "loss": 0.0, "step": 6790 }, { "epoch": 0.04691369949015847, "grad_norm": 0.0, "learning_rate": 9.531414930974771e-06, "loss": 0.0, "step": 6800 }, { "epoch": 0.04698269022470282, "grad_norm": 0.001140069798566401, "learning_rate": 9.530725023629328e-06, "loss": 0.0248, "step": 6810 }, { "epoch": 0.04705168095924717, "grad_norm": 2.2088217366267315e-10, "learning_rate": 9.530035116283884e-06, "loss": 0.0, "step": 6820 }, { "epoch": 0.04712067169379153, "grad_norm": 1.510604079157929e-06, "learning_rate": 9.52934520893844e-06, "loss": 0.0, "step": 6830 }, { "epoch": 0.047189662428335874, "grad_norm": 0.0, "learning_rate": 9.528655301592997e-06, "loss": 0.0, "step": 6840 }, { "epoch": 0.04725865316288023, "grad_norm": 0.0, "learning_rate": 9.527965394247554e-06, "loss": 0.0, "step": 6850 }, { "epoch": 0.047327643897424575, "grad_norm": 0.0, "learning_rate": 9.52727548690211e-06, "loss": 0.0006, "step": 6860 }, { "epoch": 0.04739663463196893, "grad_norm": 0.0, "learning_rate": 9.526585579556667e-06, "loss": 0.0, "step": 6870 }, { "epoch": 0.047465625366513275, "grad_norm": 0.0, "learning_rate": 9.525895672211223e-06, "loss": 0.0, "step": 6880 }, { "epoch": 0.04753461610105763, "grad_norm": 0.0, "learning_rate": 9.52520576486578e-06, "loss": 0.0, "step": 6890 }, { "epoch": 0.047603606835601976, "grad_norm": 0.0, "learning_rate": 9.524515857520336e-06, "loss": 0.0001, "step": 6900 }, { "epoch": 0.04767259757014633, "grad_norm": 5.076705988926733e-08, "learning_rate": 9.523825950174892e-06, "loss": 0.0, "step": 6910 }, { "epoch": 0.04774158830469068, "grad_norm": 0.0, "learning_rate": 9.523136042829449e-06, "loss": 0.0, "step": 6920 }, { "epoch": 0.04781057903923503, "grad_norm": 2.04520400615138e-08, "learning_rate": 9.522446135484005e-06, "loss": 0.0, "step": 6930 }, { "epoch": 0.047879569773779385, "grad_norm": 0.0, "learning_rate": 9.521756228138562e-06, "loss": 0.0, "step": 6940 }, { "epoch": 0.04794856050832373, "grad_norm": 0.0002855256025213748, "learning_rate": 9.521066320793118e-06, "loss": 0.0089, "step": 6950 }, { "epoch": 0.048017551242868085, "grad_norm": 0.0, "learning_rate": 9.520376413447675e-06, "loss": 0.0, "step": 6960 }, { "epoch": 0.04808654197741243, "grad_norm": 0.0, "learning_rate": 9.519686506102231e-06, "loss": 0.0, "step": 6970 }, { "epoch": 0.048155532711956786, "grad_norm": 0.003345920704305172, "learning_rate": 9.518996598756788e-06, "loss": 0.0, "step": 6980 }, { "epoch": 0.04822452344650113, "grad_norm": 0.0, "learning_rate": 9.518306691411344e-06, "loss": 0.0, "step": 6990 }, { "epoch": 0.04829351418104549, "grad_norm": 0.0, "learning_rate": 9.517616784065901e-06, "loss": 0.0, "step": 7000 }, { "epoch": 0.048362504915589834, "grad_norm": 8.28308998279681e-07, "learning_rate": 9.516926876720457e-06, "loss": 0.0, "step": 7010 }, { "epoch": 0.04843149565013419, "grad_norm": 0.0, "learning_rate": 9.516236969375014e-06, "loss": 0.0, "step": 7020 }, { "epoch": 0.048500486384678534, "grad_norm": 0.0, "learning_rate": 9.51554706202957e-06, "loss": 0.0, "step": 7030 }, { "epoch": 0.04856947711922289, "grad_norm": 0.0, "learning_rate": 9.514857154684127e-06, "loss": 0.0, "step": 7040 }, { "epoch": 0.04863846785376724, "grad_norm": 1.907149282942555e-08, "learning_rate": 9.514167247338683e-06, "loss": 0.0235, "step": 7050 }, { "epoch": 0.04870745858831159, "grad_norm": 5.629592851619236e-05, "learning_rate": 9.51347733999324e-06, "loss": 0.0, "step": 7060 }, { "epoch": 0.04877644932285594, "grad_norm": 1.383468628546325e-07, "learning_rate": 9.51285642338234e-06, "loss": 0.2564, "step": 7070 }, { "epoch": 0.04884544005740029, "grad_norm": 0.0, "learning_rate": 9.512166516036897e-06, "loss": 0.0, "step": 7080 }, { "epoch": 0.048914430791944644, "grad_norm": 0.0, "learning_rate": 9.511476608691454e-06, "loss": 0.0131, "step": 7090 }, { "epoch": 0.04898342152648899, "grad_norm": 0.0, "learning_rate": 9.51078670134601e-06, "loss": 0.0, "step": 7100 }, { "epoch": 0.049052412261033344, "grad_norm": 0.0, "learning_rate": 9.510096794000566e-06, "loss": 0.0, "step": 7110 }, { "epoch": 0.04912140299557769, "grad_norm": 0.0, "learning_rate": 9.509406886655123e-06, "loss": 0.0, "step": 7120 }, { "epoch": 0.049190393730122045, "grad_norm": 0.0, "learning_rate": 9.50871697930968e-06, "loss": 0.0021, "step": 7130 }, { "epoch": 0.04925938446466639, "grad_norm": 0.0, "learning_rate": 9.508027071964236e-06, "loss": 0.0, "step": 7140 }, { "epoch": 0.049328375199210746, "grad_norm": 0.0, "learning_rate": 9.507337164618792e-06, "loss": 0.0, "step": 7150 }, { "epoch": 0.0493973659337551, "grad_norm": 0.0, "learning_rate": 9.506647257273349e-06, "loss": 0.0, "step": 7160 }, { "epoch": 0.04946635666829945, "grad_norm": 0.0, "learning_rate": 9.505957349927905e-06, "loss": 0.0, "step": 7170 }, { "epoch": 0.0495353474028438, "grad_norm": 0.0, "learning_rate": 9.505267442582462e-06, "loss": 0.0, "step": 7180 }, { "epoch": 0.04960433813738815, "grad_norm": 1.5708725452423096, "learning_rate": 9.504577535237018e-06, "loss": 0.0003, "step": 7190 }, { "epoch": 0.0496733288719325, "grad_norm": 0.0, "learning_rate": 9.503887627891575e-06, "loss": 0.0, "step": 7200 }, { "epoch": 0.04974231960647685, "grad_norm": 0.012020227499306202, "learning_rate": 9.503197720546131e-06, "loss": 0.0, "step": 7210 }, { "epoch": 0.0498113103410212, "grad_norm": 0.0, "learning_rate": 9.502507813200688e-06, "loss": 0.0001, "step": 7220 }, { "epoch": 0.04988030107556555, "grad_norm": 0.0, "learning_rate": 9.501817905855244e-06, "loss": 0.0, "step": 7230 }, { "epoch": 0.0499492918101099, "grad_norm": 8.542450302684301e-08, "learning_rate": 9.501127998509801e-06, "loss": 0.0003, "step": 7240 }, { "epoch": 0.05001828254465425, "grad_norm": 0.0, "learning_rate": 9.500438091164357e-06, "loss": 0.0, "step": 7250 }, { "epoch": 0.0500872732791986, "grad_norm": 5.3104047775268555, "learning_rate": 9.499748183818914e-06, "loss": 0.0016, "step": 7260 }, { "epoch": 0.05015626401374296, "grad_norm": 4.8928811224868696e-08, "learning_rate": 9.49905827647347e-06, "loss": 0.0107, "step": 7270 }, { "epoch": 0.050225254748287304, "grad_norm": 0.0, "learning_rate": 9.498368369128027e-06, "loss": 0.0, "step": 7280 }, { "epoch": 0.05029424548283166, "grad_norm": 0.0, "learning_rate": 9.497678461782583e-06, "loss": 0.0, "step": 7290 }, { "epoch": 0.050363236217376005, "grad_norm": 0.0, "learning_rate": 9.49698855443714e-06, "loss": 0.0, "step": 7300 }, { "epoch": 0.05043222695192036, "grad_norm": 0.0, "learning_rate": 9.496298647091696e-06, "loss": 0.0, "step": 7310 }, { "epoch": 0.050501217686464706, "grad_norm": 0.0, "learning_rate": 9.495608739746253e-06, "loss": 0.017, "step": 7320 }, { "epoch": 0.05057020842100906, "grad_norm": 4.380771940759587e-07, "learning_rate": 9.49491883240081e-06, "loss": 0.0, "step": 7330 }, { "epoch": 0.050639199155553406, "grad_norm": 0.0, "learning_rate": 9.494228925055366e-06, "loss": 0.0, "step": 7340 }, { "epoch": 0.05070818989009776, "grad_norm": 0.001265982398763299, "learning_rate": 9.493539017709922e-06, "loss": 0.0, "step": 7350 }, { "epoch": 0.050777180624642114, "grad_norm": 0.0, "learning_rate": 9.492849110364479e-06, "loss": 0.0003, "step": 7360 }, { "epoch": 0.05084617135918646, "grad_norm": 5.745352268218994, "learning_rate": 9.492159203019035e-06, "loss": 0.0009, "step": 7370 }, { "epoch": 0.050915162093730815, "grad_norm": 0.0, "learning_rate": 9.491469295673592e-06, "loss": 0.0, "step": 7380 }, { "epoch": 0.05098415282827516, "grad_norm": 6.952218245714903e-05, "learning_rate": 9.490779388328148e-06, "loss": 0.0017, "step": 7390 }, { "epoch": 0.051053143562819515, "grad_norm": 0.00034228002186864614, "learning_rate": 9.490089480982705e-06, "loss": 0.0, "step": 7400 }, { "epoch": 0.05112213429736386, "grad_norm": 0.0, "learning_rate": 9.489399573637261e-06, "loss": 0.0, "step": 7410 }, { "epoch": 0.051191125031908216, "grad_norm": 1.0179088860695629e-07, "learning_rate": 9.488709666291818e-06, "loss": 0.0137, "step": 7420 }, { "epoch": 0.05126011576645256, "grad_norm": 0.0, "learning_rate": 9.488019758946374e-06, "loss": 0.0005, "step": 7430 }, { "epoch": 0.05132910650099692, "grad_norm": 5.103669709072278e-10, "learning_rate": 9.48732985160093e-06, "loss": 0.058, "step": 7440 }, { "epoch": 0.051398097235541264, "grad_norm": 0.0, "learning_rate": 9.486639944255487e-06, "loss": 0.001, "step": 7450 }, { "epoch": 0.05146708797008562, "grad_norm": 0.0, "learning_rate": 9.485950036910044e-06, "loss": 0.0003, "step": 7460 }, { "epoch": 0.05153607870462997, "grad_norm": 0.0001053997257258743, "learning_rate": 9.4852601295646e-06, "loss": 0.0, "step": 7470 }, { "epoch": 0.05160506943917432, "grad_norm": 0.007867366075515747, "learning_rate": 9.484570222219157e-06, "loss": 0.0, "step": 7480 }, { "epoch": 0.05167406017371867, "grad_norm": 0.0, "learning_rate": 9.483880314873713e-06, "loss": 0.0108, "step": 7490 }, { "epoch": 0.05174305090826302, "grad_norm": 6.209116065747367e-08, "learning_rate": 9.48319040752827e-06, "loss": 0.0007, "step": 7500 }, { "epoch": 0.05181204164280737, "grad_norm": 20.135934829711914, "learning_rate": 9.482500500182826e-06, "loss": 0.0038, "step": 7510 }, { "epoch": 0.05188103237735172, "grad_norm": 0.0, "learning_rate": 9.481810592837383e-06, "loss": 0.0172, "step": 7520 }, { "epoch": 0.051950023111896074, "grad_norm": 0.0, "learning_rate": 9.481120685491939e-06, "loss": 0.0, "step": 7530 }, { "epoch": 0.05201901384644042, "grad_norm": 21.914880752563477, "learning_rate": 9.480430778146496e-06, "loss": 0.007, "step": 7540 }, { "epoch": 0.052088004580984774, "grad_norm": 0.0, "learning_rate": 9.479740870801052e-06, "loss": 0.0226, "step": 7550 }, { "epoch": 0.05215699531552912, "grad_norm": 0.0, "learning_rate": 9.479050963455609e-06, "loss": 0.0002, "step": 7560 }, { "epoch": 0.052225986050073475, "grad_norm": 0.0, "learning_rate": 9.478361056110165e-06, "loss": 0.0, "step": 7570 }, { "epoch": 0.05229497678461783, "grad_norm": 3.8140576918976876e-08, "learning_rate": 9.477671148764722e-06, "loss": 0.0, "step": 7580 }, { "epoch": 0.052363967519162176, "grad_norm": 7.151640613045629e-09, "learning_rate": 9.476981241419278e-06, "loss": 0.0368, "step": 7590 }, { "epoch": 0.05243295825370653, "grad_norm": 0.0, "learning_rate": 9.476291334073835e-06, "loss": 0.0, "step": 7600 }, { "epoch": 0.05250194898825088, "grad_norm": 0.0, "learning_rate": 9.475601426728391e-06, "loss": 0.0131, "step": 7610 }, { "epoch": 0.05257093972279523, "grad_norm": 0.0, "learning_rate": 9.474911519382947e-06, "loss": 0.0, "step": 7620 }, { "epoch": 0.05263993045733958, "grad_norm": 0.0, "learning_rate": 9.474221612037504e-06, "loss": 0.0, "step": 7630 }, { "epoch": 0.05270892119188393, "grad_norm": 0.0, "learning_rate": 9.47353170469206e-06, "loss": 0.0013, "step": 7640 }, { "epoch": 0.05277791192642828, "grad_norm": 0.0, "learning_rate": 9.472841797346617e-06, "loss": 0.0, "step": 7650 }, { "epoch": 0.05284690266097263, "grad_norm": 0.0, "learning_rate": 9.472151890001173e-06, "loss": 0.002, "step": 7660 }, { "epoch": 0.05291589339551698, "grad_norm": 0.0, "learning_rate": 9.47146198265573e-06, "loss": 0.0, "step": 7670 }, { "epoch": 0.05298488413006133, "grad_norm": 0.0, "learning_rate": 9.470772075310286e-06, "loss": 0.0, "step": 7680 }, { "epoch": 0.053053874864605687, "grad_norm": 4.2308642700383814e-10, "learning_rate": 9.470082167964843e-06, "loss": 0.0, "step": 7690 }, { "epoch": 0.05312286559915003, "grad_norm": 0.0, "learning_rate": 9.4693922606194e-06, "loss": 0.0, "step": 7700 }, { "epoch": 0.05319185633369439, "grad_norm": 0.0, "learning_rate": 9.468702353273956e-06, "loss": 0.0, "step": 7710 }, { "epoch": 0.053260847068238734, "grad_norm": 0.0, "learning_rate": 9.468012445928512e-06, "loss": 0.0, "step": 7720 }, { "epoch": 0.05332983780278309, "grad_norm": 9.698508662836502e-09, "learning_rate": 9.467322538583069e-06, "loss": 0.0139, "step": 7730 }, { "epoch": 0.053398828537327435, "grad_norm": 0.0, "learning_rate": 9.466632631237625e-06, "loss": 0.0, "step": 7740 }, { "epoch": 0.05346781927187179, "grad_norm": 0.0, "learning_rate": 9.465942723892182e-06, "loss": 0.0, "step": 7750 }, { "epoch": 0.053536810006416136, "grad_norm": 0.0, "learning_rate": 9.465252816546738e-06, "loss": 0.014, "step": 7760 }, { "epoch": 0.05360580074096049, "grad_norm": 0.0, "learning_rate": 9.464562909201295e-06, "loss": 0.0, "step": 7770 }, { "epoch": 0.053674791475504836, "grad_norm": 0.0, "learning_rate": 9.463873001855851e-06, "loss": 0.0, "step": 7780 }, { "epoch": 0.05374378221004919, "grad_norm": 0.0, "learning_rate": 9.463183094510408e-06, "loss": 0.0, "step": 7790 }, { "epoch": 0.053812772944593544, "grad_norm": 1.6528139894944616e-05, "learning_rate": 9.462493187164964e-06, "loss": 0.0002, "step": 7800 }, { "epoch": 0.05388176367913789, "grad_norm": 0.0, "learning_rate": 9.46180327981952e-06, "loss": 0.0011, "step": 7810 }, { "epoch": 0.053950754413682245, "grad_norm": 0.0, "learning_rate": 9.461113372474077e-06, "loss": 0.0, "step": 7820 }, { "epoch": 0.05401974514822659, "grad_norm": 0.0, "learning_rate": 9.460423465128634e-06, "loss": 0.0003, "step": 7830 }, { "epoch": 0.054088735882770946, "grad_norm": 1.1157307624816895, "learning_rate": 9.45973355778319e-06, "loss": 0.0002, "step": 7840 }, { "epoch": 0.05415772661731529, "grad_norm": 0.0, "learning_rate": 9.459043650437747e-06, "loss": 0.0002, "step": 7850 }, { "epoch": 0.054226717351859646, "grad_norm": 3.301774853525785e-09, "learning_rate": 9.458353743092303e-06, "loss": 0.0, "step": 7860 }, { "epoch": 0.05429570808640399, "grad_norm": 0.0, "learning_rate": 9.45766383574686e-06, "loss": 0.0, "step": 7870 }, { "epoch": 0.05436469882094835, "grad_norm": 0.0, "learning_rate": 9.456973928401416e-06, "loss": 0.0, "step": 7880 }, { "epoch": 0.054433689555492694, "grad_norm": 0.0, "learning_rate": 9.456284021055973e-06, "loss": 0.0001, "step": 7890 }, { "epoch": 0.05450268029003705, "grad_norm": 0.0, "learning_rate": 9.45559411371053e-06, "loss": 0.0005, "step": 7900 }, { "epoch": 0.0545716710245814, "grad_norm": 0.0, "learning_rate": 9.454904206365086e-06, "loss": 0.0002, "step": 7910 }, { "epoch": 0.05464066175912575, "grad_norm": 2.030442090017459e-07, "learning_rate": 9.454214299019642e-06, "loss": 0.0, "step": 7920 }, { "epoch": 0.0547096524936701, "grad_norm": 4.900098105053985e-10, "learning_rate": 9.453524391674199e-06, "loss": 0.226, "step": 7930 }, { "epoch": 0.05477864322821445, "grad_norm": 0.0, "learning_rate": 9.452834484328755e-06, "loss": 0.0, "step": 7940 }, { "epoch": 0.0548476339627588, "grad_norm": 0.0, "learning_rate": 9.452144576983312e-06, "loss": 0.0, "step": 7950 }, { "epoch": 0.05491662469730315, "grad_norm": 0.0, "learning_rate": 9.451454669637868e-06, "loss": 0.0, "step": 7960 }, { "epoch": 0.054985615431847504, "grad_norm": 0.19920563697814941, "learning_rate": 9.450764762292425e-06, "loss": 0.0, "step": 7970 }, { "epoch": 0.05505460616639185, "grad_norm": 0.00010274054511683062, "learning_rate": 9.450074854946981e-06, "loss": 0.0, "step": 7980 }, { "epoch": 0.055123596900936205, "grad_norm": 2.021112413785886e-05, "learning_rate": 9.449384947601538e-06, "loss": 0.0002, "step": 7990 }, { "epoch": 0.05519258763548056, "grad_norm": 0.00023247135686688125, "learning_rate": 9.448695040256094e-06, "loss": 0.0058, "step": 8000 }, { "epoch": 0.055261578370024905, "grad_norm": 211.25997924804688, "learning_rate": 9.44800513291065e-06, "loss": 0.0423, "step": 8010 }, { "epoch": 0.05533056910456926, "grad_norm": 0.0, "learning_rate": 9.447384216299751e-06, "loss": 0.7328, "step": 8020 }, { "epoch": 0.055399559839113606, "grad_norm": 0.0, "learning_rate": 9.446694308954308e-06, "loss": 0.0001, "step": 8030 }, { "epoch": 0.05546855057365796, "grad_norm": 0.0, "learning_rate": 9.446004401608864e-06, "loss": 0.0, "step": 8040 }, { "epoch": 0.05553754130820231, "grad_norm": 4.76493555723323e-09, "learning_rate": 9.44531449426342e-06, "loss": 0.0, "step": 8050 }, { "epoch": 0.05560653204274666, "grad_norm": 9.497167496874681e-10, "learning_rate": 9.444624586917977e-06, "loss": 0.0, "step": 8060 }, { "epoch": 0.05567552277729101, "grad_norm": 0.0023989698383957148, "learning_rate": 9.443934679572534e-06, "loss": 0.0, "step": 8070 }, { "epoch": 0.05574451351183536, "grad_norm": 0.0, "learning_rate": 9.44324477222709e-06, "loss": 0.0, "step": 8080 }, { "epoch": 0.05581350424637971, "grad_norm": 0.0, "learning_rate": 9.442554864881647e-06, "loss": 0.0003, "step": 8090 }, { "epoch": 0.05588249498092406, "grad_norm": 0.009688925929367542, "learning_rate": 9.441864957536203e-06, "loss": 0.0, "step": 8100 }, { "epoch": 0.055951485715468416, "grad_norm": 0.0, "learning_rate": 9.441175050190761e-06, "loss": 0.0, "step": 8110 }, { "epoch": 0.05602047645001276, "grad_norm": 0.0040071685798466206, "learning_rate": 9.440485142845316e-06, "loss": 0.0, "step": 8120 }, { "epoch": 0.05608946718455712, "grad_norm": 0.0, "learning_rate": 9.439795235499873e-06, "loss": 0.0009, "step": 8130 }, { "epoch": 0.056158457919101464, "grad_norm": 0.0, "learning_rate": 9.43910532815443e-06, "loss": 0.0, "step": 8140 }, { "epoch": 0.05622744865364582, "grad_norm": 0.0, "learning_rate": 9.438415420808986e-06, "loss": 0.0, "step": 8150 }, { "epoch": 0.056296439388190164, "grad_norm": 0.0001427438110113144, "learning_rate": 9.437725513463542e-06, "loss": 0.1418, "step": 8160 }, { "epoch": 0.05636543012273452, "grad_norm": 0.0, "learning_rate": 9.437035606118099e-06, "loss": 0.0, "step": 8170 }, { "epoch": 0.056434420857278865, "grad_norm": 0.0, "learning_rate": 9.436345698772655e-06, "loss": 0.0, "step": 8180 }, { "epoch": 0.05650341159182322, "grad_norm": 9.834275260800496e-06, "learning_rate": 9.435655791427212e-06, "loss": 0.0, "step": 8190 }, { "epoch": 0.056572402326367566, "grad_norm": 0.0, "learning_rate": 9.434965884081768e-06, "loss": 0.0, "step": 8200 }, { "epoch": 0.05664139306091192, "grad_norm": 1.7513029888505116e-05, "learning_rate": 9.434275976736325e-06, "loss": 0.0, "step": 8210 }, { "epoch": 0.05671038379545627, "grad_norm": 0.0, "learning_rate": 9.433586069390883e-06, "loss": 0.0304, "step": 8220 }, { "epoch": 0.05677937453000062, "grad_norm": 32.892425537109375, "learning_rate": 9.43289616204544e-06, "loss": 0.0072, "step": 8230 }, { "epoch": 0.056848365264544974, "grad_norm": 0.0, "learning_rate": 9.432206254699994e-06, "loss": 0.0, "step": 8240 }, { "epoch": 0.05691735599908932, "grad_norm": 0.0, "learning_rate": 9.43151634735455e-06, "loss": 0.0, "step": 8250 }, { "epoch": 0.056986346733633675, "grad_norm": 0.04160303622484207, "learning_rate": 9.430826440009107e-06, "loss": 0.0, "step": 8260 }, { "epoch": 0.05705533746817802, "grad_norm": 0.0, "learning_rate": 9.430136532663664e-06, "loss": 0.6965, "step": 8270 }, { "epoch": 0.057124328202722376, "grad_norm": 0.0, "learning_rate": 9.42944662531822e-06, "loss": 0.0, "step": 8280 }, { "epoch": 0.05719331893726672, "grad_norm": 9.750915808126592e-08, "learning_rate": 9.428756717972777e-06, "loss": 0.0, "step": 8290 }, { "epoch": 0.057262309671811076, "grad_norm": 0.0, "learning_rate": 9.428066810627333e-06, "loss": 0.0058, "step": 8300 }, { "epoch": 0.05733130040635542, "grad_norm": 0.0, "learning_rate": 9.42737690328189e-06, "loss": 0.0964, "step": 8310 }, { "epoch": 0.05740029114089978, "grad_norm": 0.0, "learning_rate": 9.426686995936446e-06, "loss": 0.0, "step": 8320 }, { "epoch": 0.05746928187544413, "grad_norm": 0.12781912088394165, "learning_rate": 9.425997088591004e-06, "loss": 0.0, "step": 8330 }, { "epoch": 0.05753827260998848, "grad_norm": 0.0, "learning_rate": 9.42530718124556e-06, "loss": 0.0001, "step": 8340 }, { "epoch": 0.05760726334453283, "grad_norm": 0.0053254892118275166, "learning_rate": 9.424617273900115e-06, "loss": 0.0071, "step": 8350 }, { "epoch": 0.05767625407907718, "grad_norm": 0.0, "learning_rate": 9.423927366554672e-06, "loss": 0.0, "step": 8360 }, { "epoch": 0.05774524481362153, "grad_norm": 0.002254385966807604, "learning_rate": 9.423237459209228e-06, "loss": 0.0, "step": 8370 }, { "epoch": 0.05781423554816588, "grad_norm": 0.0, "learning_rate": 9.422547551863785e-06, "loss": 0.0, "step": 8380 }, { "epoch": 0.05788322628271023, "grad_norm": 1.6992156588457874e-06, "learning_rate": 9.421857644518341e-06, "loss": 0.0, "step": 8390 }, { "epoch": 0.05795221701725458, "grad_norm": 2.8296074816580585e-08, "learning_rate": 9.421167737172898e-06, "loss": 0.0, "step": 8400 }, { "epoch": 0.058021207751798934, "grad_norm": 0.0, "learning_rate": 9.420477829827454e-06, "loss": 0.0754, "step": 8410 }, { "epoch": 0.05809019848634328, "grad_norm": 0.0, "learning_rate": 9.419787922482011e-06, "loss": 0.0, "step": 8420 }, { "epoch": 0.058159189220887635, "grad_norm": 9.419382877240423e-06, "learning_rate": 9.419098015136567e-06, "loss": 0.0, "step": 8430 }, { "epoch": 0.05822817995543199, "grad_norm": 0.0, "learning_rate": 9.418408107791126e-06, "loss": 0.4281, "step": 8440 }, { "epoch": 0.058297170689976335, "grad_norm": 0.0, "learning_rate": 9.417718200445682e-06, "loss": 0.0, "step": 8450 }, { "epoch": 0.05836616142452069, "grad_norm": 0.0, "learning_rate": 9.417028293100237e-06, "loss": 0.004, "step": 8460 }, { "epoch": 0.058435152159065036, "grad_norm": 0.0, "learning_rate": 9.416338385754793e-06, "loss": 0.0, "step": 8470 }, { "epoch": 0.05850414289360939, "grad_norm": 0.0, "learning_rate": 9.41564847840935e-06, "loss": 0.0, "step": 8480 }, { "epoch": 0.05857313362815374, "grad_norm": 0.0010733334347605705, "learning_rate": 9.414958571063906e-06, "loss": 0.0, "step": 8490 }, { "epoch": 0.05864212436269809, "grad_norm": 0.0, "learning_rate": 9.414268663718463e-06, "loss": 0.0267, "step": 8500 }, { "epoch": 0.05871111509724244, "grad_norm": 0.0, "learning_rate": 9.41357875637302e-06, "loss": 0.0, "step": 8510 }, { "epoch": 0.05878010583178679, "grad_norm": 2.7318827051203698e-05, "learning_rate": 9.412888849027576e-06, "loss": 0.0, "step": 8520 }, { "epoch": 0.05884909656633114, "grad_norm": 0.0, "learning_rate": 9.412198941682132e-06, "loss": 0.0007, "step": 8530 }, { "epoch": 0.05891808730087549, "grad_norm": 0.0, "learning_rate": 9.411509034336689e-06, "loss": 0.0, "step": 8540 }, { "epoch": 0.058987078035419846, "grad_norm": 0.0, "learning_rate": 9.410819126991247e-06, "loss": 0.0087, "step": 8550 }, { "epoch": 0.05905606876996419, "grad_norm": 0.0, "learning_rate": 9.410129219645803e-06, "loss": 0.0008, "step": 8560 }, { "epoch": 0.05912505950450855, "grad_norm": 0.0, "learning_rate": 9.40943931230036e-06, "loss": 0.0, "step": 8570 }, { "epoch": 0.059194050239052894, "grad_norm": 0.0, "learning_rate": 9.408749404954915e-06, "loss": 0.0003, "step": 8580 }, { "epoch": 0.05926304097359725, "grad_norm": 9.69321511945509e-10, "learning_rate": 9.408059497609471e-06, "loss": 0.0012, "step": 8590 }, { "epoch": 0.059332031708141594, "grad_norm": 0.00021176903101149946, "learning_rate": 9.407369590264028e-06, "loss": 0.0, "step": 8600 }, { "epoch": 0.05940102244268595, "grad_norm": 3.326289061078569e-06, "learning_rate": 9.406679682918584e-06, "loss": 0.0029, "step": 8610 }, { "epoch": 0.059470013177230295, "grad_norm": 0.0, "learning_rate": 9.40598977557314e-06, "loss": 0.0, "step": 8620 }, { "epoch": 0.05953900391177465, "grad_norm": 0.0, "learning_rate": 9.405299868227697e-06, "loss": 0.0, "step": 8630 }, { "epoch": 0.059607994646319, "grad_norm": 0.0, "learning_rate": 9.404609960882254e-06, "loss": 0.0007, "step": 8640 }, { "epoch": 0.05967698538086335, "grad_norm": 7.834999996703118e-05, "learning_rate": 9.40392005353681e-06, "loss": 0.0, "step": 8650 }, { "epoch": 0.059745976115407703, "grad_norm": 0.0001593024644535035, "learning_rate": 9.403230146191368e-06, "loss": 0.0001, "step": 8660 }, { "epoch": 0.05981496684995205, "grad_norm": 0.0, "learning_rate": 9.402540238845925e-06, "loss": 0.0, "step": 8670 }, { "epoch": 0.059883957584496404, "grad_norm": 1.7578850020072423e-07, "learning_rate": 9.401850331500481e-06, "loss": 0.0, "step": 8680 }, { "epoch": 0.05995294831904075, "grad_norm": 0.013763878494501114, "learning_rate": 9.401160424155036e-06, "loss": 0.0, "step": 8690 }, { "epoch": 0.060021939053585105, "grad_norm": 0.0, "learning_rate": 9.400470516809593e-06, "loss": 0.0001, "step": 8700 }, { "epoch": 0.06009092978812945, "grad_norm": 0.0, "learning_rate": 9.399780609464149e-06, "loss": 0.0, "step": 8710 }, { "epoch": 0.060159920522673806, "grad_norm": 2.6696572604123503e-05, "learning_rate": 9.399090702118706e-06, "loss": 0.0003, "step": 8720 }, { "epoch": 0.06022891125721815, "grad_norm": 0.0005639257724396884, "learning_rate": 9.398400794773262e-06, "loss": 0.0, "step": 8730 }, { "epoch": 0.060297901991762506, "grad_norm": 0.0, "learning_rate": 9.397710887427819e-06, "loss": 0.0, "step": 8740 }, { "epoch": 0.06036689272630686, "grad_norm": 0.0, "learning_rate": 9.397020980082375e-06, "loss": 0.0041, "step": 8750 }, { "epoch": 0.06043588346085121, "grad_norm": 0.0, "learning_rate": 9.396331072736932e-06, "loss": 0.0004, "step": 8760 }, { "epoch": 0.06050487419539556, "grad_norm": 0.029361022636294365, "learning_rate": 9.39564116539149e-06, "loss": 0.0, "step": 8770 }, { "epoch": 0.06057386492993991, "grad_norm": 0.00011345672828610986, "learning_rate": 9.394951258046046e-06, "loss": 0.0, "step": 8780 }, { "epoch": 0.06064285566448426, "grad_norm": 0.0, "learning_rate": 9.394261350700603e-06, "loss": 0.0, "step": 8790 }, { "epoch": 0.06071184639902861, "grad_norm": 0.0, "learning_rate": 9.393571443355158e-06, "loss": 0.0, "step": 8800 }, { "epoch": 0.06078083713357296, "grad_norm": 0.0, "learning_rate": 9.392881536009714e-06, "loss": 0.302, "step": 8810 }, { "epoch": 0.06084982786811731, "grad_norm": 0.0, "learning_rate": 9.39219162866427e-06, "loss": 0.0, "step": 8820 }, { "epoch": 0.06091881860266166, "grad_norm": 0.0, "learning_rate": 9.391501721318827e-06, "loss": 0.0, "step": 8830 }, { "epoch": 0.06098780933720601, "grad_norm": 0.0, "learning_rate": 9.390811813973383e-06, "loss": 0.0, "step": 8840 }, { "epoch": 0.061056800071750364, "grad_norm": 0.0, "learning_rate": 9.39012190662794e-06, "loss": 0.0, "step": 8850 }, { "epoch": 0.06112579080629472, "grad_norm": 0.0, "learning_rate": 9.389431999282496e-06, "loss": 0.0, "step": 8860 }, { "epoch": 0.061194781540839065, "grad_norm": 1.7742786440066993e-05, "learning_rate": 9.388742091937053e-06, "loss": 0.0, "step": 8870 }, { "epoch": 0.06126377227538342, "grad_norm": 0.001497775549069047, "learning_rate": 9.388052184591611e-06, "loss": 0.0232, "step": 8880 }, { "epoch": 0.061332763009927765, "grad_norm": 0.00032529831514693797, "learning_rate": 9.387362277246168e-06, "loss": 0.0004, "step": 8890 }, { "epoch": 0.06140175374447212, "grad_norm": 0.00010866899538086727, "learning_rate": 9.386672369900724e-06, "loss": 0.0021, "step": 8900 }, { "epoch": 0.061470744479016466, "grad_norm": 1.0541617712078732e-07, "learning_rate": 9.38598246255528e-06, "loss": 0.0014, "step": 8910 }, { "epoch": 0.06153973521356082, "grad_norm": 0.0, "learning_rate": 9.385292555209835e-06, "loss": 0.0, "step": 8920 }, { "epoch": 0.06160872594810517, "grad_norm": 0.0, "learning_rate": 9.384602647864392e-06, "loss": 0.0017, "step": 8930 }, { "epoch": 0.06167771668264952, "grad_norm": 0.0, "learning_rate": 9.383912740518948e-06, "loss": 0.0, "step": 8940 }, { "epoch": 0.06174670741719387, "grad_norm": 0.0, "learning_rate": 9.383222833173505e-06, "loss": 0.0014, "step": 8950 }, { "epoch": 0.06181569815173822, "grad_norm": 0.0, "learning_rate": 9.382532925828061e-06, "loss": 0.0, "step": 8960 }, { "epoch": 0.061884688886282575, "grad_norm": 0.0, "learning_rate": 9.381843018482618e-06, "loss": 0.0, "step": 8970 }, { "epoch": 0.06195367962082692, "grad_norm": 0.0, "learning_rate": 9.381153111137174e-06, "loss": 0.0, "step": 8980 }, { "epoch": 0.062022670355371276, "grad_norm": 0.0, "learning_rate": 9.380463203791733e-06, "loss": 0.0, "step": 8990 }, { "epoch": 0.06209166108991562, "grad_norm": 0.0, "learning_rate": 9.379773296446289e-06, "loss": 0.0, "step": 9000 }, { "epoch": 0.06216065182445998, "grad_norm": 0.0, "learning_rate": 9.379083389100846e-06, "loss": 0.0017, "step": 9010 }, { "epoch": 0.062229642559004324, "grad_norm": 3.527745502651669e-05, "learning_rate": 9.378393481755402e-06, "loss": 0.0, "step": 9020 }, { "epoch": 0.06229863329354868, "grad_norm": 0.0, "learning_rate": 9.377703574409957e-06, "loss": 0.0001, "step": 9030 }, { "epoch": 0.062367624028093024, "grad_norm": 0.0, "learning_rate": 9.377013667064513e-06, "loss": 0.0011, "step": 9040 }, { "epoch": 0.06243661476263738, "grad_norm": 0.011230291798710823, "learning_rate": 9.37632375971907e-06, "loss": 0.0011, "step": 9050 }, { "epoch": 0.06250560549718173, "grad_norm": 0.0, "learning_rate": 9.375633852373626e-06, "loss": 0.0, "step": 9060 }, { "epoch": 0.06257459623172608, "grad_norm": 0.0, "learning_rate": 9.374943945028183e-06, "loss": 0.0007, "step": 9070 }, { "epoch": 0.06264358696627043, "grad_norm": 0.0, "learning_rate": 9.37425403768274e-06, "loss": 0.0, "step": 9080 }, { "epoch": 0.06271257770081479, "grad_norm": 0.0, "learning_rate": 9.373564130337296e-06, "loss": 0.0, "step": 9090 }, { "epoch": 0.06278156843535913, "grad_norm": 0.0, "learning_rate": 9.372874222991854e-06, "loss": 0.0, "step": 9100 }, { "epoch": 0.06285055916990348, "grad_norm": 0.0, "learning_rate": 9.37218431564641e-06, "loss": 0.1701, "step": 9110 }, { "epoch": 0.06291954990444783, "grad_norm": 1.9915714233320614e-07, "learning_rate": 9.371494408300967e-06, "loss": 0.0, "step": 9120 }, { "epoch": 0.06298854063899219, "grad_norm": 184.47238159179688, "learning_rate": 9.370804500955523e-06, "loss": 0.0305, "step": 9130 }, { "epoch": 0.06305753137353654, "grad_norm": 0.0001221702987095341, "learning_rate": 9.370114593610078e-06, "loss": 0.0, "step": 9140 }, { "epoch": 0.06312652210808088, "grad_norm": 0.0, "learning_rate": 9.369424686264635e-06, "loss": 0.0, "step": 9150 }, { "epoch": 0.06319551284262523, "grad_norm": 0.0, "learning_rate": 9.368734778919191e-06, "loss": 0.5698, "step": 9160 }, { "epoch": 0.06326450357716959, "grad_norm": 0.0, "learning_rate": 9.368044871573748e-06, "loss": 0.0, "step": 9170 }, { "epoch": 0.06333349431171394, "grad_norm": 0.0, "learning_rate": 9.367354964228304e-06, "loss": 0.0001, "step": 9180 }, { "epoch": 0.06340248504625828, "grad_norm": 0.0, "learning_rate": 9.36666505688286e-06, "loss": 0.0, "step": 9190 }, { "epoch": 0.06347147578080264, "grad_norm": 1.4486658983514644e-08, "learning_rate": 9.365975149537417e-06, "loss": 0.0, "step": 9200 }, { "epoch": 0.06354046651534699, "grad_norm": 0.0, "learning_rate": 9.365285242191975e-06, "loss": 0.0027, "step": 9210 }, { "epoch": 0.06360945724989134, "grad_norm": 0.0004837385204154998, "learning_rate": 9.364595334846532e-06, "loss": 0.0001, "step": 9220 }, { "epoch": 0.06367844798443568, "grad_norm": 0.0, "learning_rate": 9.363905427501088e-06, "loss": 0.0, "step": 9230 }, { "epoch": 0.06374743871898005, "grad_norm": 0.0, "learning_rate": 9.363215520155645e-06, "loss": 0.0, "step": 9240 }, { "epoch": 0.06381642945352439, "grad_norm": 0.0, "learning_rate": 9.362525612810201e-06, "loss": 0.0, "step": 9250 }, { "epoch": 0.06388542018806874, "grad_norm": 1.93648362159729, "learning_rate": 9.361835705464756e-06, "loss": 0.0003, "step": 9260 }, { "epoch": 0.06395441092261309, "grad_norm": 0.0, "learning_rate": 9.361145798119313e-06, "loss": 0.0003, "step": 9270 }, { "epoch": 0.06402340165715745, "grad_norm": 0.0, "learning_rate": 9.360455890773869e-06, "loss": 0.0, "step": 9280 }, { "epoch": 0.0640923923917018, "grad_norm": 0.00012504179903771728, "learning_rate": 9.359765983428426e-06, "loss": 0.0, "step": 9290 }, { "epoch": 0.06416138312624614, "grad_norm": 0.0, "learning_rate": 9.359076076082982e-06, "loss": 0.0, "step": 9300 }, { "epoch": 0.0642303738607905, "grad_norm": 0.0024786265566945076, "learning_rate": 9.358386168737539e-06, "loss": 0.0, "step": 9310 }, { "epoch": 0.06429936459533485, "grad_norm": 0.0, "learning_rate": 9.357696261392097e-06, "loss": 0.0, "step": 9320 }, { "epoch": 0.0643683553298792, "grad_norm": 0.0005138832493685186, "learning_rate": 9.357006354046653e-06, "loss": 0.0002, "step": 9330 }, { "epoch": 0.06443734606442354, "grad_norm": 0.0, "learning_rate": 9.35631644670121e-06, "loss": 0.0001, "step": 9340 }, { "epoch": 0.0645063367989679, "grad_norm": 0.0, "learning_rate": 9.355626539355766e-06, "loss": 0.0033, "step": 9350 }, { "epoch": 0.06457532753351225, "grad_norm": 0.0, "learning_rate": 9.354936632010323e-06, "loss": 0.0, "step": 9360 }, { "epoch": 0.0646443182680566, "grad_norm": 0.0, "learning_rate": 9.354246724664877e-06, "loss": 0.0002, "step": 9370 }, { "epoch": 0.06471330900260094, "grad_norm": 0.0, "learning_rate": 9.353556817319434e-06, "loss": 0.0, "step": 9380 }, { "epoch": 0.0647822997371453, "grad_norm": 0.0, "learning_rate": 9.35286690997399e-06, "loss": 0.0, "step": 9390 }, { "epoch": 0.06485129047168965, "grad_norm": 0.0, "learning_rate": 9.352177002628547e-06, "loss": 0.0, "step": 9400 }, { "epoch": 0.064920281206234, "grad_norm": 0.0, "learning_rate": 9.351487095283103e-06, "loss": 0.0017, "step": 9410 }, { "epoch": 0.06498927194077836, "grad_norm": 0.0, "learning_rate": 9.35079718793766e-06, "loss": 0.0, "step": 9420 }, { "epoch": 0.0650582626753227, "grad_norm": 0.0, "learning_rate": 9.350107280592218e-06, "loss": 0.0, "step": 9430 }, { "epoch": 0.06512725340986705, "grad_norm": 0.0, "learning_rate": 9.349417373246775e-06, "loss": 0.011, "step": 9440 }, { "epoch": 0.0651962441444114, "grad_norm": 84.49259948730469, "learning_rate": 9.348727465901331e-06, "loss": 0.0343, "step": 9450 }, { "epoch": 0.06526523487895576, "grad_norm": 0.8491132259368896, "learning_rate": 9.348037558555888e-06, "loss": 0.0012, "step": 9460 }, { "epoch": 0.06533422561350011, "grad_norm": 0.0, "learning_rate": 9.347347651210444e-06, "loss": 0.0, "step": 9470 }, { "epoch": 0.06540321634804445, "grad_norm": 2.747396683844272e-05, "learning_rate": 9.346657743864999e-06, "loss": 0.0, "step": 9480 }, { "epoch": 0.0654722070825888, "grad_norm": 0.0, "learning_rate": 9.345967836519555e-06, "loss": 0.0005, "step": 9490 }, { "epoch": 0.06554119781713316, "grad_norm": 0.0, "learning_rate": 9.345277929174112e-06, "loss": 0.0001, "step": 9500 }, { "epoch": 0.06561018855167751, "grad_norm": 0.0, "learning_rate": 9.344588021828668e-06, "loss": 0.0131, "step": 9510 }, { "epoch": 0.06567917928622186, "grad_norm": 0.0, "learning_rate": 9.343898114483225e-06, "loss": 0.0107, "step": 9520 }, { "epoch": 0.06574817002076622, "grad_norm": 0.0, "learning_rate": 9.343208207137781e-06, "loss": 0.002, "step": 9530 }, { "epoch": 0.06581716075531056, "grad_norm": 0.0, "learning_rate": 9.34251829979234e-06, "loss": 0.0, "step": 9540 }, { "epoch": 0.06588615148985491, "grad_norm": 0.07264412939548492, "learning_rate": 9.341828392446896e-06, "loss": 0.0, "step": 9550 }, { "epoch": 0.06595514222439926, "grad_norm": 1.20164349937113e-05, "learning_rate": 9.341138485101452e-06, "loss": 0.0, "step": 9560 }, { "epoch": 0.06602413295894362, "grad_norm": 0.0, "learning_rate": 9.340448577756009e-06, "loss": 0.0, "step": 9570 }, { "epoch": 0.06609312369348797, "grad_norm": 0.0, "learning_rate": 9.339758670410565e-06, "loss": 0.0, "step": 9580 }, { "epoch": 0.06616211442803231, "grad_norm": 0.0, "learning_rate": 9.33906876306512e-06, "loss": 0.0, "step": 9590 }, { "epoch": 0.06623110516257667, "grad_norm": 0.0, "learning_rate": 9.338378855719677e-06, "loss": 0.0, "step": 9600 }, { "epoch": 0.06630009589712102, "grad_norm": 0.0, "learning_rate": 9.337688948374233e-06, "loss": 0.0, "step": 9610 }, { "epoch": 0.06636908663166537, "grad_norm": 0.0, "learning_rate": 9.33699904102879e-06, "loss": 0.0, "step": 9620 }, { "epoch": 0.06643807736620971, "grad_norm": 0.0, "learning_rate": 9.336309133683346e-06, "loss": 0.0004, "step": 9630 }, { "epoch": 0.06650706810075407, "grad_norm": 0.0, "learning_rate": 9.335619226337903e-06, "loss": 0.0, "step": 9640 }, { "epoch": 0.06657605883529842, "grad_norm": 0.0, "learning_rate": 9.334929318992461e-06, "loss": 0.0, "step": 9650 }, { "epoch": 0.06664504956984277, "grad_norm": 0.0, "learning_rate": 9.334239411647017e-06, "loss": 0.0, "step": 9660 }, { "epoch": 0.06671404030438711, "grad_norm": 0.0, "learning_rate": 9.333549504301574e-06, "loss": 0.0, "step": 9670 }, { "epoch": 0.06678303103893148, "grad_norm": 0.0, "learning_rate": 9.33285959695613e-06, "loss": 0.0, "step": 9680 }, { "epoch": 0.06685202177347582, "grad_norm": 9.999718167819083e-05, "learning_rate": 9.332169689610687e-06, "loss": 0.0007, "step": 9690 }, { "epoch": 0.06692101250802017, "grad_norm": 0.0, "learning_rate": 9.331479782265243e-06, "loss": 0.0001, "step": 9700 }, { "epoch": 0.06699000324256453, "grad_norm": 0.0, "learning_rate": 9.330789874919798e-06, "loss": 0.0, "step": 9710 }, { "epoch": 0.06705899397710888, "grad_norm": 0.0, "learning_rate": 9.330099967574355e-06, "loss": 0.1551, "step": 9720 }, { "epoch": 0.06712798471165322, "grad_norm": 3.7993876844666374e-07, "learning_rate": 9.329410060228911e-06, "loss": 0.2504, "step": 9730 }, { "epoch": 0.06719697544619757, "grad_norm": 0.0, "learning_rate": 9.328720152883468e-06, "loss": 0.0, "step": 9740 }, { "epoch": 0.06726596618074193, "grad_norm": 0.0, "learning_rate": 9.328030245538024e-06, "loss": 0.0002, "step": 9750 }, { "epoch": 0.06733495691528628, "grad_norm": 1.4668836811893016e-08, "learning_rate": 9.327340338192582e-06, "loss": 0.0418, "step": 9760 }, { "epoch": 0.06740394764983063, "grad_norm": 0.0013452752027660608, "learning_rate": 9.326650430847139e-06, "loss": 0.0, "step": 9770 }, { "epoch": 0.06747293838437497, "grad_norm": 0.0003280251694377512, "learning_rate": 9.325960523501695e-06, "loss": 0.0, "step": 9780 }, { "epoch": 0.06754192911891933, "grad_norm": 0.0004096842894796282, "learning_rate": 9.325270616156252e-06, "loss": 0.0, "step": 9790 }, { "epoch": 0.06761091985346368, "grad_norm": 0.0, "learning_rate": 9.324580708810808e-06, "loss": 0.0, "step": 9800 }, { "epoch": 0.06767991058800803, "grad_norm": 0.0, "learning_rate": 9.323890801465365e-06, "loss": 0.044, "step": 9810 }, { "epoch": 0.06774890132255239, "grad_norm": 0.00012699179933406413, "learning_rate": 9.32320089411992e-06, "loss": 0.0, "step": 9820 }, { "epoch": 0.06781789205709673, "grad_norm": 0.0, "learning_rate": 9.322510986774476e-06, "loss": 0.0108, "step": 9830 }, { "epoch": 0.06788688279164108, "grad_norm": 0.0, "learning_rate": 9.321821079429033e-06, "loss": 0.0003, "step": 9840 }, { "epoch": 0.06795587352618543, "grad_norm": 0.0, "learning_rate": 9.321131172083589e-06, "loss": 0.0, "step": 9850 }, { "epoch": 0.06802486426072979, "grad_norm": 0.0, "learning_rate": 9.320441264738146e-06, "loss": 0.0004, "step": 9860 }, { "epoch": 0.06809385499527414, "grad_norm": 0.0, "learning_rate": 9.319751357392704e-06, "loss": 0.0, "step": 9870 }, { "epoch": 0.06816284572981848, "grad_norm": 9.521896604525182e-10, "learning_rate": 9.31906145004726e-06, "loss": 0.0, "step": 9880 }, { "epoch": 0.06823183646436283, "grad_norm": 0.0, "learning_rate": 9.318371542701817e-06, "loss": 0.0, "step": 9890 }, { "epoch": 0.06830082719890719, "grad_norm": 0.00018718295905273408, "learning_rate": 9.317681635356373e-06, "loss": 0.0216, "step": 9900 }, { "epoch": 0.06836981793345154, "grad_norm": 0.0, "learning_rate": 9.31699172801093e-06, "loss": 0.002, "step": 9910 }, { "epoch": 0.06843880866799588, "grad_norm": 0.0, "learning_rate": 9.316301820665486e-06, "loss": 0.0, "step": 9920 }, { "epoch": 0.06850779940254025, "grad_norm": 73.73307037353516, "learning_rate": 9.315611913320041e-06, "loss": 0.0091, "step": 9930 }, { "epoch": 0.06857679013708459, "grad_norm": 3.7655553342119674e-08, "learning_rate": 9.314922005974597e-06, "loss": 0.0, "step": 9940 }, { "epoch": 0.06864578087162894, "grad_norm": 0.0, "learning_rate": 9.314232098629154e-06, "loss": 0.0081, "step": 9950 }, { "epoch": 0.06871477160617329, "grad_norm": 9.238464437899552e-10, "learning_rate": 9.31354219128371e-06, "loss": 0.0, "step": 9960 }, { "epoch": 0.06878376234071765, "grad_norm": 0.0003441699664108455, "learning_rate": 9.312852283938267e-06, "loss": 0.0, "step": 9970 }, { "epoch": 0.068852753075262, "grad_norm": 4.399174429181585e-07, "learning_rate": 9.312162376592825e-06, "loss": 0.0, "step": 9980 }, { "epoch": 0.06892174380980634, "grad_norm": 0.0, "learning_rate": 9.311472469247382e-06, "loss": 0.0, "step": 9990 }, { "epoch": 0.06899073454435069, "grad_norm": 0.0, "learning_rate": 9.310782561901938e-06, "loss": 0.0, "step": 10000 }, { "epoch": 0.06905972527889505, "grad_norm": 0.0, "learning_rate": 9.310092654556495e-06, "loss": 0.0003, "step": 10010 }, { "epoch": 0.0691287160134394, "grad_norm": 0.0, "learning_rate": 9.309402747211051e-06, "loss": 0.0, "step": 10020 }, { "epoch": 0.06919770674798374, "grad_norm": 0.0, "learning_rate": 9.308712839865608e-06, "loss": 0.0009, "step": 10030 }, { "epoch": 0.0692666974825281, "grad_norm": 0.0, "learning_rate": 9.308022932520164e-06, "loss": 0.0, "step": 10040 }, { "epoch": 0.06933568821707245, "grad_norm": 0.0, "learning_rate": 9.307333025174719e-06, "loss": 0.0, "step": 10050 }, { "epoch": 0.0694046789516168, "grad_norm": 0.0, "learning_rate": 9.306643117829275e-06, "loss": 0.0, "step": 10060 }, { "epoch": 0.06947366968616114, "grad_norm": 5.556299242925888e-07, "learning_rate": 9.305953210483832e-06, "loss": 0.0, "step": 10070 }, { "epoch": 0.0695426604207055, "grad_norm": 0.0, "learning_rate": 9.305263303138388e-06, "loss": 0.0, "step": 10080 }, { "epoch": 0.06961165115524985, "grad_norm": 0.0004480912466533482, "learning_rate": 9.304573395792946e-06, "loss": 0.0002, "step": 10090 }, { "epoch": 0.0696806418897942, "grad_norm": 0.18314163386821747, "learning_rate": 9.303883488447503e-06, "loss": 0.2578, "step": 10100 }, { "epoch": 0.06974963262433855, "grad_norm": 3.770653123069678e-09, "learning_rate": 9.30319358110206e-06, "loss": 0.0, "step": 10110 }, { "epoch": 0.0698186233588829, "grad_norm": 1.0981832474499242e-06, "learning_rate": 9.302503673756616e-06, "loss": 0.0004, "step": 10120 }, { "epoch": 0.06988761409342725, "grad_norm": 0.0, "learning_rate": 9.301813766411172e-06, "loss": 0.0, "step": 10130 }, { "epoch": 0.0699566048279716, "grad_norm": 0.0, "learning_rate": 9.301123859065729e-06, "loss": 0.0001, "step": 10140 }, { "epoch": 0.07002559556251596, "grad_norm": 0.0, "learning_rate": 9.300433951720285e-06, "loss": 0.0, "step": 10150 }, { "epoch": 0.07009458629706031, "grad_norm": 0.0006030003423802555, "learning_rate": 9.29974404437484e-06, "loss": 0.0008, "step": 10160 }, { "epoch": 0.07016357703160465, "grad_norm": 3.748638377487623e-08, "learning_rate": 9.299054137029397e-06, "loss": 0.0001, "step": 10170 }, { "epoch": 0.070232567766149, "grad_norm": 0.0, "learning_rate": 9.298364229683953e-06, "loss": 0.0, "step": 10180 }, { "epoch": 0.07030155850069336, "grad_norm": 1.876984242699109e-05, "learning_rate": 9.29767432233851e-06, "loss": 0.0285, "step": 10190 }, { "epoch": 0.07037054923523771, "grad_norm": 0.0, "learning_rate": 9.296984414993068e-06, "loss": 0.0, "step": 10200 }, { "epoch": 0.07043953996978206, "grad_norm": 0.0, "learning_rate": 9.296294507647624e-06, "loss": 0.0, "step": 10210 }, { "epoch": 0.0705085307043264, "grad_norm": 1.489681289967848e-05, "learning_rate": 9.295604600302181e-06, "loss": 0.0067, "step": 10220 }, { "epoch": 0.07057752143887076, "grad_norm": 2.6676072462095135e-09, "learning_rate": 9.294914692956737e-06, "loss": 0.0, "step": 10230 }, { "epoch": 0.07064651217341511, "grad_norm": 0.00038919990765862167, "learning_rate": 9.294224785611294e-06, "loss": 0.0001, "step": 10240 }, { "epoch": 0.07071550290795946, "grad_norm": 0.0, "learning_rate": 9.29353487826585e-06, "loss": 0.0, "step": 10250 }, { "epoch": 0.07078449364250382, "grad_norm": 0.0, "learning_rate": 9.292844970920407e-06, "loss": 0.0, "step": 10260 }, { "epoch": 0.07085348437704816, "grad_norm": 0.0, "learning_rate": 9.292155063574962e-06, "loss": 0.0001, "step": 10270 }, { "epoch": 0.07092247511159251, "grad_norm": 0.0, "learning_rate": 9.291465156229518e-06, "loss": 0.0, "step": 10280 }, { "epoch": 0.07099146584613686, "grad_norm": 0.0, "learning_rate": 9.290775248884075e-06, "loss": 0.0, "step": 10290 }, { "epoch": 0.07106045658068122, "grad_norm": 8.037535735638812e-05, "learning_rate": 9.290085341538631e-06, "loss": 0.0, "step": 10300 }, { "epoch": 0.07112944731522557, "grad_norm": 1.4089010619500186e-05, "learning_rate": 9.28939543419319e-06, "loss": 0.0, "step": 10310 }, { "epoch": 0.07119843804976991, "grad_norm": 9.730531047580371e-08, "learning_rate": 9.288705526847746e-06, "loss": 0.0266, "step": 10320 }, { "epoch": 0.07126742878431426, "grad_norm": 0.0, "learning_rate": 9.288015619502302e-06, "loss": 0.0139, "step": 10330 }, { "epoch": 0.07133641951885862, "grad_norm": 0.0, "learning_rate": 9.287325712156859e-06, "loss": 0.0, "step": 10340 }, { "epoch": 0.07140541025340297, "grad_norm": 0.0, "learning_rate": 9.286635804811415e-06, "loss": 0.0, "step": 10350 }, { "epoch": 0.07147440098794731, "grad_norm": 0.0, "learning_rate": 9.285945897465972e-06, "loss": 0.0, "step": 10360 }, { "epoch": 0.07154339172249168, "grad_norm": 0.0, "learning_rate": 9.285255990120528e-06, "loss": 0.0, "step": 10370 }, { "epoch": 0.07161238245703602, "grad_norm": 0.10475505143404007, "learning_rate": 9.284566082775085e-06, "loss": 0.0, "step": 10380 }, { "epoch": 0.07168137319158037, "grad_norm": 0.0, "learning_rate": 9.28387617542964e-06, "loss": 0.0, "step": 10390 }, { "epoch": 0.07175036392612472, "grad_norm": 0.0, "learning_rate": 9.283186268084196e-06, "loss": 0.0, "step": 10400 }, { "epoch": 0.07181935466066908, "grad_norm": 0.0, "learning_rate": 9.282496360738752e-06, "loss": 0.0, "step": 10410 }, { "epoch": 0.07188834539521342, "grad_norm": 0.039523545652627945, "learning_rate": 9.28180645339331e-06, "loss": 0.0, "step": 10420 }, { "epoch": 0.07195733612975777, "grad_norm": 0.000720381794963032, "learning_rate": 9.281116546047867e-06, "loss": 0.0001, "step": 10430 }, { "epoch": 0.07202632686430212, "grad_norm": 2.1007210762036266e-06, "learning_rate": 9.280426638702424e-06, "loss": 0.0, "step": 10440 }, { "epoch": 0.07209531759884648, "grad_norm": 0.8526678681373596, "learning_rate": 9.27973673135698e-06, "loss": 0.0002, "step": 10450 }, { "epoch": 0.07216430833339083, "grad_norm": 53.298274993896484, "learning_rate": 9.279046824011537e-06, "loss": 0.0058, "step": 10460 }, { "epoch": 0.07223329906793517, "grad_norm": 2.7131068502939115e-09, "learning_rate": 9.278356916666093e-06, "loss": 0.0, "step": 10470 }, { "epoch": 0.07230228980247953, "grad_norm": 0.0, "learning_rate": 9.27766700932065e-06, "loss": 0.0001, "step": 10480 }, { "epoch": 0.07237128053702388, "grad_norm": 0.00017692340770736337, "learning_rate": 9.276977101975206e-06, "loss": 1.0219, "step": 10490 }, { "epoch": 0.07244027127156823, "grad_norm": 0.06454253196716309, "learning_rate": 9.276287194629761e-06, "loss": 0.0, "step": 10500 }, { "epoch": 0.07250926200611257, "grad_norm": 0.33074289560317993, "learning_rate": 9.275597287284317e-06, "loss": 0.0001, "step": 10510 }, { "epoch": 0.07257825274065693, "grad_norm": 0.0, "learning_rate": 9.274907379938874e-06, "loss": 0.0, "step": 10520 }, { "epoch": 0.07264724347520128, "grad_norm": 0.0, "learning_rate": 9.274217472593432e-06, "loss": 0.0, "step": 10530 }, { "epoch": 0.07271623420974563, "grad_norm": 5.425266014214003e-09, "learning_rate": 9.273527565247989e-06, "loss": 0.1383, "step": 10540 }, { "epoch": 0.07278522494428998, "grad_norm": 0.0, "learning_rate": 9.272837657902545e-06, "loss": 0.0, "step": 10550 }, { "epoch": 0.07285421567883434, "grad_norm": 0.0010381789179518819, "learning_rate": 9.272147750557102e-06, "loss": 0.0, "step": 10560 }, { "epoch": 0.07292320641337868, "grad_norm": 0.0001351818209514022, "learning_rate": 9.271457843211658e-06, "loss": 0.0, "step": 10570 }, { "epoch": 0.07299219714792303, "grad_norm": 125.50298309326172, "learning_rate": 9.270767935866215e-06, "loss": 0.0289, "step": 10580 }, { "epoch": 0.07306118788246739, "grad_norm": 0.00037610583240166306, "learning_rate": 9.270078028520771e-06, "loss": 0.0, "step": 10590 }, { "epoch": 0.07313017861701174, "grad_norm": 0.0, "learning_rate": 9.269388121175327e-06, "loss": 0.0, "step": 10600 }, { "epoch": 0.07319916935155608, "grad_norm": 0.0, "learning_rate": 9.268698213829882e-06, "loss": 0.0, "step": 10610 }, { "epoch": 0.07326816008610043, "grad_norm": 3.6756453482666984e-05, "learning_rate": 9.268008306484439e-06, "loss": 0.0003, "step": 10620 }, { "epoch": 0.07333715082064479, "grad_norm": 0.8746658563613892, "learning_rate": 9.267318399138995e-06, "loss": 0.0014, "step": 10630 }, { "epoch": 0.07340614155518914, "grad_norm": 1.756311576173175e-05, "learning_rate": 9.266628491793553e-06, "loss": 0.0, "step": 10640 }, { "epoch": 0.07347513228973349, "grad_norm": 0.0003972606500610709, "learning_rate": 9.26593858444811e-06, "loss": 0.0024, "step": 10650 }, { "epoch": 0.07354412302427783, "grad_norm": 1.6024492651922628e-05, "learning_rate": 9.265248677102666e-06, "loss": 0.0, "step": 10660 }, { "epoch": 0.0736131137588222, "grad_norm": 3.27560119330883e-05, "learning_rate": 9.264558769757223e-06, "loss": 0.0, "step": 10670 }, { "epoch": 0.07368210449336654, "grad_norm": 0.0, "learning_rate": 9.26386886241178e-06, "loss": 0.0007, "step": 10680 }, { "epoch": 0.07375109522791089, "grad_norm": 3.7856531143188477, "learning_rate": 9.263178955066336e-06, "loss": 0.0015, "step": 10690 }, { "epoch": 0.07382008596245525, "grad_norm": 0.00032519918750040233, "learning_rate": 9.262489047720892e-06, "loss": 0.0, "step": 10700 }, { "epoch": 0.0738890766969996, "grad_norm": 0.0, "learning_rate": 9.261799140375449e-06, "loss": 0.0, "step": 10710 }, { "epoch": 0.07395806743154394, "grad_norm": 0.0, "learning_rate": 9.261109233030005e-06, "loss": 0.4078, "step": 10720 }, { "epoch": 0.07402705816608829, "grad_norm": 2.287138256562571e-09, "learning_rate": 9.26041932568456e-06, "loss": 0.0, "step": 10730 }, { "epoch": 0.07409604890063265, "grad_norm": 0.0, "learning_rate": 9.259729418339117e-06, "loss": 0.0, "step": 10740 }, { "epoch": 0.074165039635177, "grad_norm": 0.0, "learning_rate": 9.259039510993675e-06, "loss": 0.0001, "step": 10750 }, { "epoch": 0.07423403036972134, "grad_norm": 0.0, "learning_rate": 9.258349603648231e-06, "loss": 0.0001, "step": 10760 }, { "epoch": 0.0743030211042657, "grad_norm": 0.00011291095142951235, "learning_rate": 9.257659696302788e-06, "loss": 0.0001, "step": 10770 }, { "epoch": 0.07437201183881005, "grad_norm": 0.07228134572505951, "learning_rate": 9.256969788957344e-06, "loss": 0.0001, "step": 10780 }, { "epoch": 0.0744410025733544, "grad_norm": 15.591343879699707, "learning_rate": 9.2562798816119e-06, "loss": 0.0033, "step": 10790 }, { "epoch": 0.07450999330789874, "grad_norm": 6.5625176429748535, "learning_rate": 9.255589974266457e-06, "loss": 0.0017, "step": 10800 }, { "epoch": 0.0745789840424431, "grad_norm": 0.0, "learning_rate": 9.254900066921014e-06, "loss": 0.0, "step": 10810 }, { "epoch": 0.07464797477698745, "grad_norm": 0.0, "learning_rate": 9.25421015957557e-06, "loss": 0.0, "step": 10820 }, { "epoch": 0.0747169655115318, "grad_norm": 0.0, "learning_rate": 9.253520252230127e-06, "loss": 0.0, "step": 10830 }, { "epoch": 0.07478595624607615, "grad_norm": 0.0, "learning_rate": 9.252830344884682e-06, "loss": 0.0005, "step": 10840 }, { "epoch": 0.07485494698062051, "grad_norm": 1.6427664206730697e-07, "learning_rate": 9.252140437539238e-06, "loss": 0.0, "step": 10850 }, { "epoch": 0.07492393771516485, "grad_norm": 0.00334505969658494, "learning_rate": 9.251450530193796e-06, "loss": 0.0009, "step": 10860 }, { "epoch": 0.0749929284497092, "grad_norm": 0.0, "learning_rate": 9.250760622848353e-06, "loss": 0.0, "step": 10870 }, { "epoch": 0.07506191918425356, "grad_norm": 0.0012655858881771564, "learning_rate": 9.25007071550291e-06, "loss": 0.0044, "step": 10880 }, { "epoch": 0.07513090991879791, "grad_norm": 1.8479919816627444e-08, "learning_rate": 9.249380808157466e-06, "loss": 0.0, "step": 10890 }, { "epoch": 0.07519990065334226, "grad_norm": 0.0, "learning_rate": 9.248690900812022e-06, "loss": 0.0, "step": 10900 }, { "epoch": 0.0752688913878866, "grad_norm": 4.5717191143879177e-10, "learning_rate": 9.248000993466579e-06, "loss": 0.0, "step": 10910 }, { "epoch": 0.07533788212243096, "grad_norm": 0.0, "learning_rate": 9.247311086121135e-06, "loss": 0.168, "step": 10920 }, { "epoch": 0.07540687285697531, "grad_norm": 0.07586784660816193, "learning_rate": 9.246621178775692e-06, "loss": 0.0, "step": 10930 }, { "epoch": 0.07547586359151966, "grad_norm": 0.0, "learning_rate": 9.245931271430248e-06, "loss": 0.0, "step": 10940 }, { "epoch": 0.075544854326064, "grad_norm": 0.0, "learning_rate": 9.245241364084803e-06, "loss": 0.0, "step": 10950 }, { "epoch": 0.07561384506060836, "grad_norm": 0.0, "learning_rate": 9.24455145673936e-06, "loss": 0.0001, "step": 10960 }, { "epoch": 0.07568283579515271, "grad_norm": 0.0, "learning_rate": 9.243861549393918e-06, "loss": 0.0, "step": 10970 }, { "epoch": 0.07575182652969706, "grad_norm": 0.0, "learning_rate": 9.243171642048474e-06, "loss": 0.0004, "step": 10980 }, { "epoch": 0.07582081726424142, "grad_norm": 0.0, "learning_rate": 9.24248173470303e-06, "loss": 0.4141, "step": 10990 }, { "epoch": 0.07588980799878577, "grad_norm": 0.0, "learning_rate": 9.241791827357587e-06, "loss": 0.0, "step": 11000 }, { "epoch": 0.07595879873333011, "grad_norm": 0.0, "learning_rate": 9.241101920012144e-06, "loss": 0.0294, "step": 11010 }, { "epoch": 0.07602778946787446, "grad_norm": 8.191610589847187e-08, "learning_rate": 9.2404120126667e-06, "loss": 0.0, "step": 11020 }, { "epoch": 0.07609678020241882, "grad_norm": 0.0, "learning_rate": 9.239722105321257e-06, "loss": 0.0, "step": 11030 }, { "epoch": 0.07616577093696317, "grad_norm": 0.0, "learning_rate": 9.239032197975813e-06, "loss": 0.0, "step": 11040 }, { "epoch": 0.07623476167150751, "grad_norm": 3.5035125620197505e-05, "learning_rate": 9.23834229063037e-06, "loss": 0.0, "step": 11050 }, { "epoch": 0.07630375240605186, "grad_norm": 0.0, "learning_rate": 9.237652383284924e-06, "loss": 0.0015, "step": 11060 }, { "epoch": 0.07637274314059622, "grad_norm": 0.0, "learning_rate": 9.23696247593948e-06, "loss": 0.0, "step": 11070 }, { "epoch": 0.07644173387514057, "grad_norm": 0.0, "learning_rate": 9.236341559328583e-06, "loss": 0.2465, "step": 11080 }, { "epoch": 0.07651072460968492, "grad_norm": 0.0, "learning_rate": 9.23565165198314e-06, "loss": 0.0, "step": 11090 }, { "epoch": 0.07657971534422928, "grad_norm": 0.07076693326234818, "learning_rate": 9.234961744637696e-06, "loss": 0.0001, "step": 11100 }, { "epoch": 0.07664870607877362, "grad_norm": 0.0, "learning_rate": 9.234271837292253e-06, "loss": 0.0, "step": 11110 }, { "epoch": 0.07671769681331797, "grad_norm": 0.0, "learning_rate": 9.23358192994681e-06, "loss": 0.1383, "step": 11120 }, { "epoch": 0.07678668754786232, "grad_norm": 1.263082822333672e-06, "learning_rate": 9.232892022601366e-06, "loss": 0.0, "step": 11130 }, { "epoch": 0.07685567828240668, "grad_norm": 0.0, "learning_rate": 9.232202115255922e-06, "loss": 0.0, "step": 11140 }, { "epoch": 0.07692466901695102, "grad_norm": 0.0006859014392830431, "learning_rate": 9.231512207910479e-06, "loss": 0.0, "step": 11150 }, { "epoch": 0.07699365975149537, "grad_norm": 208.89547729492188, "learning_rate": 9.230822300565035e-06, "loss": 0.0165, "step": 11160 }, { "epoch": 0.07706265048603972, "grad_norm": 0.0, "learning_rate": 9.230132393219592e-06, "loss": 0.0, "step": 11170 }, { "epoch": 0.07713164122058408, "grad_norm": 0.00013802248577121645, "learning_rate": 9.229442485874148e-06, "loss": 0.0, "step": 11180 }, { "epoch": 0.07720063195512843, "grad_norm": 0.0, "learning_rate": 9.228752578528705e-06, "loss": 0.0241, "step": 11190 }, { "epoch": 0.07726962268967277, "grad_norm": 0.0, "learning_rate": 9.228062671183261e-06, "loss": 0.376, "step": 11200 }, { "epoch": 0.07733861342421713, "grad_norm": 9.614611329311629e-10, "learning_rate": 9.227372763837818e-06, "loss": 0.0, "step": 11210 }, { "epoch": 0.07740760415876148, "grad_norm": 0.0, "learning_rate": 9.226682856492374e-06, "loss": 0.002, "step": 11220 }, { "epoch": 0.07747659489330583, "grad_norm": 0.0, "learning_rate": 9.22599294914693e-06, "loss": 0.0, "step": 11230 }, { "epoch": 0.07754558562785017, "grad_norm": 0.0, "learning_rate": 9.225303041801487e-06, "loss": 0.0, "step": 11240 }, { "epoch": 0.07761457636239454, "grad_norm": 0.0, "learning_rate": 9.224613134456044e-06, "loss": 0.0057, "step": 11250 }, { "epoch": 0.07768356709693888, "grad_norm": 1.3128318786621094, "learning_rate": 9.2239232271106e-06, "loss": 0.0003, "step": 11260 }, { "epoch": 0.07775255783148323, "grad_norm": 0.0, "learning_rate": 9.223233319765157e-06, "loss": 0.0, "step": 11270 }, { "epoch": 0.07782154856602758, "grad_norm": 0.0, "learning_rate": 9.222543412419713e-06, "loss": 0.0, "step": 11280 }, { "epoch": 0.07789053930057194, "grad_norm": 0.0, "learning_rate": 9.22185350507427e-06, "loss": 0.0001, "step": 11290 }, { "epoch": 0.07795953003511628, "grad_norm": 0.0, "learning_rate": 9.221163597728826e-06, "loss": 0.0003, "step": 11300 }, { "epoch": 0.07802852076966063, "grad_norm": 0.0, "learning_rate": 9.220473690383382e-06, "loss": 0.0, "step": 11310 }, { "epoch": 0.07809751150420499, "grad_norm": 710.934814453125, "learning_rate": 9.219783783037939e-06, "loss": 0.281, "step": 11320 }, { "epoch": 0.07816650223874934, "grad_norm": 0.0, "learning_rate": 9.219093875692495e-06, "loss": 0.0023, "step": 11330 }, { "epoch": 0.07823549297329369, "grad_norm": 9.878846629618465e-10, "learning_rate": 9.218403968347052e-06, "loss": 0.0002, "step": 11340 }, { "epoch": 0.07830448370783803, "grad_norm": 0.0, "learning_rate": 9.217714061001608e-06, "loss": 0.0, "step": 11350 }, { "epoch": 0.0783734744423824, "grad_norm": 2.8398371796356514e-06, "learning_rate": 9.217024153656165e-06, "loss": 0.0, "step": 11360 }, { "epoch": 0.07844246517692674, "grad_norm": 0.0021105895284563303, "learning_rate": 9.216334246310721e-06, "loss": 0.0, "step": 11370 }, { "epoch": 0.07851145591147109, "grad_norm": 0.0, "learning_rate": 9.215644338965278e-06, "loss": 0.0009, "step": 11380 }, { "epoch": 0.07858044664601543, "grad_norm": 0.0, "learning_rate": 9.214954431619834e-06, "loss": 0.0, "step": 11390 }, { "epoch": 0.0786494373805598, "grad_norm": 3.435726370071279e-08, "learning_rate": 9.214264524274391e-06, "loss": 0.0, "step": 11400 }, { "epoch": 0.07871842811510414, "grad_norm": 0.0, "learning_rate": 9.213574616928947e-06, "loss": 0.0004, "step": 11410 }, { "epoch": 0.07878741884964849, "grad_norm": 3.9642894989810884e-05, "learning_rate": 9.212884709583504e-06, "loss": 0.0666, "step": 11420 }, { "epoch": 0.07885640958419285, "grad_norm": 0.0, "learning_rate": 9.21219480223806e-06, "loss": 0.0001, "step": 11430 }, { "epoch": 0.0789254003187372, "grad_norm": 0.0, "learning_rate": 9.211504894892617e-06, "loss": 0.1877, "step": 11440 }, { "epoch": 0.07899439105328154, "grad_norm": 0.0, "learning_rate": 9.210814987547173e-06, "loss": 0.0, "step": 11450 }, { "epoch": 0.07906338178782589, "grad_norm": 0.0, "learning_rate": 9.21012508020173e-06, "loss": 0.0, "step": 11460 }, { "epoch": 0.07913237252237025, "grad_norm": 0.0, "learning_rate": 9.209435172856286e-06, "loss": 0.0, "step": 11470 }, { "epoch": 0.0792013632569146, "grad_norm": 0.0, "learning_rate": 9.208745265510843e-06, "loss": 0.0, "step": 11480 }, { "epoch": 0.07927035399145894, "grad_norm": 0.0, "learning_rate": 9.2080553581654e-06, "loss": 0.0001, "step": 11490 }, { "epoch": 0.07933934472600329, "grad_norm": 0.0, "learning_rate": 9.207365450819956e-06, "loss": 0.0011, "step": 11500 }, { "epoch": 0.07940833546054765, "grad_norm": 0.0009103399352170527, "learning_rate": 9.206675543474512e-06, "loss": 0.0004, "step": 11510 }, { "epoch": 0.079477326195092, "grad_norm": 0.0, "learning_rate": 9.205985636129069e-06, "loss": 0.0, "step": 11520 }, { "epoch": 0.07954631692963635, "grad_norm": 0.0, "learning_rate": 9.205295728783625e-06, "loss": 0.0, "step": 11530 }, { "epoch": 0.0796153076641807, "grad_norm": 0.0, "learning_rate": 9.204605821438182e-06, "loss": 0.0, "step": 11540 }, { "epoch": 0.07968429839872505, "grad_norm": 0.00013076679897494614, "learning_rate": 9.203915914092738e-06, "loss": 0.0, "step": 11550 }, { "epoch": 0.0797532891332694, "grad_norm": 0.0, "learning_rate": 9.203226006747295e-06, "loss": 0.0, "step": 11560 }, { "epoch": 0.07982227986781375, "grad_norm": 0.0, "learning_rate": 9.202536099401851e-06, "loss": 0.0, "step": 11570 }, { "epoch": 0.07989127060235811, "grad_norm": 7.369824743364006e-05, "learning_rate": 9.201846192056408e-06, "loss": 0.0, "step": 11580 }, { "epoch": 0.07996026133690246, "grad_norm": 0.0, "learning_rate": 9.201156284710964e-06, "loss": 0.0019, "step": 11590 }, { "epoch": 0.0800292520714468, "grad_norm": 0.0452035553753376, "learning_rate": 9.20046637736552e-06, "loss": 0.069, "step": 11600 }, { "epoch": 0.08009824280599115, "grad_norm": 4.348968474232606e-08, "learning_rate": 9.199776470020077e-06, "loss": 0.0, "step": 11610 }, { "epoch": 0.08016723354053551, "grad_norm": 0.0, "learning_rate": 9.199086562674634e-06, "loss": 0.0, "step": 11620 }, { "epoch": 0.08023622427507986, "grad_norm": 0.0, "learning_rate": 9.19839665532919e-06, "loss": 0.0001, "step": 11630 }, { "epoch": 0.0803052150096242, "grad_norm": 0.00019220069225411862, "learning_rate": 9.197706747983747e-06, "loss": 0.0, "step": 11640 }, { "epoch": 0.08037420574416856, "grad_norm": 8.480259339194163e-08, "learning_rate": 9.197016840638303e-06, "loss": 0.0, "step": 11650 }, { "epoch": 0.08044319647871291, "grad_norm": 0.0, "learning_rate": 9.19632693329286e-06, "loss": 0.0005, "step": 11660 }, { "epoch": 0.08051218721325726, "grad_norm": 0.0007987026474438608, "learning_rate": 9.195637025947416e-06, "loss": 0.0003, "step": 11670 }, { "epoch": 0.0805811779478016, "grad_norm": 0.0, "learning_rate": 9.194947118601973e-06, "loss": 0.0, "step": 11680 }, { "epoch": 0.08065016868234597, "grad_norm": 1.3973802197142504e-05, "learning_rate": 9.194257211256529e-06, "loss": 0.0, "step": 11690 }, { "epoch": 0.08071915941689031, "grad_norm": 0.0, "learning_rate": 9.193567303911086e-06, "loss": 0.0, "step": 11700 }, { "epoch": 0.08078815015143466, "grad_norm": 0.0, "learning_rate": 9.192877396565642e-06, "loss": 0.0, "step": 11710 }, { "epoch": 0.080857140885979, "grad_norm": 0.0, "learning_rate": 9.192187489220199e-06, "loss": 0.0023, "step": 11720 }, { "epoch": 0.08092613162052337, "grad_norm": 0.0, "learning_rate": 9.191497581874755e-06, "loss": 0.0, "step": 11730 }, { "epoch": 0.08099512235506771, "grad_norm": 0.0022115407045930624, "learning_rate": 9.190807674529312e-06, "loss": 0.0, "step": 11740 }, { "epoch": 0.08106411308961206, "grad_norm": 0.0, "learning_rate": 9.190117767183868e-06, "loss": 0.0143, "step": 11750 }, { "epoch": 0.08113310382415642, "grad_norm": 6.433498583646724e-06, "learning_rate": 9.189427859838425e-06, "loss": 0.0066, "step": 11760 }, { "epoch": 0.08120209455870077, "grad_norm": 0.0, "learning_rate": 9.188737952492981e-06, "loss": 0.0, "step": 11770 }, { "epoch": 0.08127108529324512, "grad_norm": 0.0, "learning_rate": 9.188048045147538e-06, "loss": 0.0697, "step": 11780 }, { "epoch": 0.08134007602778946, "grad_norm": 0.001297208247706294, "learning_rate": 9.187358137802094e-06, "loss": 0.0449, "step": 11790 }, { "epoch": 0.08140906676233382, "grad_norm": 0.0, "learning_rate": 9.18666823045665e-06, "loss": 0.0257, "step": 11800 }, { "epoch": 0.08147805749687817, "grad_norm": 0.0, "learning_rate": 9.185978323111207e-06, "loss": 0.2309, "step": 11810 }, { "epoch": 0.08154704823142252, "grad_norm": 0.024593088775873184, "learning_rate": 9.185288415765763e-06, "loss": 0.0, "step": 11820 }, { "epoch": 0.08161603896596686, "grad_norm": 0.0, "learning_rate": 9.18459850842032e-06, "loss": 0.8766, "step": 11830 }, { "epoch": 0.08168502970051122, "grad_norm": 5.3823820053366944e-05, "learning_rate": 9.183908601074876e-06, "loss": 0.6586, "step": 11840 }, { "epoch": 0.08175402043505557, "grad_norm": 0.0, "learning_rate": 9.183218693729433e-06, "loss": 0.2819, "step": 11850 }, { "epoch": 0.08182301116959992, "grad_norm": 0.0, "learning_rate": 9.18252878638399e-06, "loss": 0.0006, "step": 11860 }, { "epoch": 0.08189200190414428, "grad_norm": 0.0, "learning_rate": 9.181838879038546e-06, "loss": 0.0024, "step": 11870 }, { "epoch": 0.08196099263868863, "grad_norm": 0.0, "learning_rate": 9.181148971693102e-06, "loss": 0.0001, "step": 11880 }, { "epoch": 0.08202998337323297, "grad_norm": 0.0, "learning_rate": 9.180459064347659e-06, "loss": 0.0003, "step": 11890 }, { "epoch": 0.08209897410777732, "grad_norm": 0.0, "learning_rate": 9.179769157002215e-06, "loss": 0.0, "step": 11900 }, { "epoch": 0.08216796484232168, "grad_norm": 0.0, "learning_rate": 9.179079249656772e-06, "loss": 0.0, "step": 11910 }, { "epoch": 0.08223695557686603, "grad_norm": 0.0, "learning_rate": 9.178389342311328e-06, "loss": 0.0, "step": 11920 }, { "epoch": 0.08230594631141037, "grad_norm": 0.0040501696057617664, "learning_rate": 9.177699434965885e-06, "loss": 0.0, "step": 11930 }, { "epoch": 0.08237493704595472, "grad_norm": 0.0, "learning_rate": 9.177009527620441e-06, "loss": 0.0, "step": 11940 }, { "epoch": 0.08244392778049908, "grad_norm": 154.3331756591797, "learning_rate": 9.176319620274998e-06, "loss": 0.0255, "step": 11950 }, { "epoch": 0.08251291851504343, "grad_norm": 0.0, "learning_rate": 9.175629712929554e-06, "loss": 0.0003, "step": 11960 }, { "epoch": 0.08258190924958778, "grad_norm": 8.946063889681e-10, "learning_rate": 9.174939805584111e-06, "loss": 0.0848, "step": 11970 }, { "epoch": 0.08265089998413214, "grad_norm": 17.25187110900879, "learning_rate": 9.174249898238667e-06, "loss": 0.0018, "step": 11980 }, { "epoch": 0.08271989071867648, "grad_norm": 0.0, "learning_rate": 9.173559990893224e-06, "loss": 0.0002, "step": 11990 }, { "epoch": 0.08278888145322083, "grad_norm": 0.0, "learning_rate": 9.17287008354778e-06, "loss": 0.0096, "step": 12000 }, { "epoch": 0.08285787218776518, "grad_norm": 0.0, "learning_rate": 9.172180176202337e-06, "loss": 0.0, "step": 12010 }, { "epoch": 0.08292686292230954, "grad_norm": 0.0, "learning_rate": 9.171490268856893e-06, "loss": 0.6281, "step": 12020 }, { "epoch": 0.08299585365685389, "grad_norm": 0.0, "learning_rate": 9.17080036151145e-06, "loss": 0.0003, "step": 12030 }, { "epoch": 0.08306484439139823, "grad_norm": 0.0, "learning_rate": 9.170110454166006e-06, "loss": 0.0, "step": 12040 }, { "epoch": 0.08313383512594259, "grad_norm": 0.0, "learning_rate": 9.169420546820563e-06, "loss": 0.0, "step": 12050 }, { "epoch": 0.08320282586048694, "grad_norm": 0.0, "learning_rate": 9.16873063947512e-06, "loss": 0.0, "step": 12060 }, { "epoch": 0.08327181659503129, "grad_norm": 0.0, "learning_rate": 9.168040732129676e-06, "loss": 0.0064, "step": 12070 }, { "epoch": 0.08334080732957563, "grad_norm": 0.0, "learning_rate": 9.167350824784232e-06, "loss": 0.0004, "step": 12080 }, { "epoch": 0.08340979806412, "grad_norm": 0.05091177672147751, "learning_rate": 9.166660917438789e-06, "loss": 0.0, "step": 12090 }, { "epoch": 0.08347878879866434, "grad_norm": 0.007914368063211441, "learning_rate": 9.165971010093345e-06, "loss": 0.0003, "step": 12100 }, { "epoch": 0.08354777953320869, "grad_norm": 0.0, "learning_rate": 9.165281102747902e-06, "loss": 0.2137, "step": 12110 }, { "epoch": 0.08361677026775304, "grad_norm": 0.0, "learning_rate": 9.164591195402458e-06, "loss": 0.0, "step": 12120 }, { "epoch": 0.0836857610022974, "grad_norm": 0.0, "learning_rate": 9.163901288057015e-06, "loss": 0.0131, "step": 12130 }, { "epoch": 0.08375475173684174, "grad_norm": 0.0020862892270088196, "learning_rate": 9.163211380711571e-06, "loss": 0.0009, "step": 12140 }, { "epoch": 0.08382374247138609, "grad_norm": 0.0, "learning_rate": 9.162521473366128e-06, "loss": 0.0, "step": 12150 }, { "epoch": 0.08389273320593045, "grad_norm": 1.7016177480400074e-06, "learning_rate": 9.161831566020684e-06, "loss": 0.0, "step": 12160 }, { "epoch": 0.0839617239404748, "grad_norm": 0.0, "learning_rate": 9.16114165867524e-06, "loss": 0.001, "step": 12170 }, { "epoch": 0.08403071467501914, "grad_norm": 0.0, "learning_rate": 9.160451751329797e-06, "loss": 0.0001, "step": 12180 }, { "epoch": 0.08409970540956349, "grad_norm": 0.0, "learning_rate": 9.159761843984354e-06, "loss": 0.0001, "step": 12190 }, { "epoch": 0.08416869614410785, "grad_norm": 0.00013891565322410315, "learning_rate": 9.15907193663891e-06, "loss": 0.0, "step": 12200 }, { "epoch": 0.0842376868786522, "grad_norm": 0.0, "learning_rate": 9.158382029293467e-06, "loss": 0.0, "step": 12210 }, { "epoch": 0.08430667761319655, "grad_norm": 0.0, "learning_rate": 9.157692121948023e-06, "loss": 0.0, "step": 12220 }, { "epoch": 0.08437566834774089, "grad_norm": 0.0, "learning_rate": 9.15700221460258e-06, "loss": 0.0, "step": 12230 }, { "epoch": 0.08444465908228525, "grad_norm": 0.0, "learning_rate": 9.156312307257136e-06, "loss": 0.0, "step": 12240 }, { "epoch": 0.0845136498168296, "grad_norm": 0.0, "learning_rate": 9.155622399911693e-06, "loss": 0.0, "step": 12250 }, { "epoch": 0.08458264055137395, "grad_norm": 4.975483580693663e-09, "learning_rate": 9.154932492566249e-06, "loss": 0.1714, "step": 12260 }, { "epoch": 0.08465163128591831, "grad_norm": 0.0, "learning_rate": 9.154242585220806e-06, "loss": 0.0, "step": 12270 }, { "epoch": 0.08472062202046265, "grad_norm": 0.0, "learning_rate": 9.153552677875362e-06, "loss": 0.0, "step": 12280 }, { "epoch": 0.084789612755007, "grad_norm": 0.0, "learning_rate": 9.152862770529919e-06, "loss": 0.0, "step": 12290 }, { "epoch": 0.08485860348955135, "grad_norm": 0.00012580711336340755, "learning_rate": 9.152172863184475e-06, "loss": 0.0061, "step": 12300 }, { "epoch": 0.08492759422409571, "grad_norm": 0.0, "learning_rate": 9.151482955839032e-06, "loss": 0.0, "step": 12310 }, { "epoch": 0.08499658495864006, "grad_norm": 0.0, "learning_rate": 9.150793048493588e-06, "loss": 0.0, "step": 12320 }, { "epoch": 0.0850655756931844, "grad_norm": 0.0, "learning_rate": 9.150103141148145e-06, "loss": 0.0, "step": 12330 }, { "epoch": 0.08513456642772875, "grad_norm": 0.0, "learning_rate": 9.149413233802701e-06, "loss": 0.0, "step": 12340 }, { "epoch": 0.08520355716227311, "grad_norm": 0.0, "learning_rate": 9.148723326457257e-06, "loss": 0.0, "step": 12350 }, { "epoch": 0.08527254789681746, "grad_norm": 0.0011592888040468097, "learning_rate": 9.148033419111814e-06, "loss": 0.0106, "step": 12360 }, { "epoch": 0.0853415386313618, "grad_norm": 0.0, "learning_rate": 9.14734351176637e-06, "loss": 0.0, "step": 12370 }, { "epoch": 0.08541052936590617, "grad_norm": 0.00942144077271223, "learning_rate": 9.146653604420927e-06, "loss": 0.0, "step": 12380 }, { "epoch": 0.08547952010045051, "grad_norm": 3.95666211261414e-06, "learning_rate": 9.145963697075483e-06, "loss": 0.0, "step": 12390 }, { "epoch": 0.08554851083499486, "grad_norm": 0.0, "learning_rate": 9.14527378973004e-06, "loss": 0.0, "step": 12400 }, { "epoch": 0.0856175015695392, "grad_norm": 0.0, "learning_rate": 9.144583882384596e-06, "loss": 0.0008, "step": 12410 }, { "epoch": 0.08568649230408357, "grad_norm": 0.0026933508925139904, "learning_rate": 9.143893975039153e-06, "loss": 0.0001, "step": 12420 }, { "epoch": 0.08575548303862791, "grad_norm": 1.0207188552158186e-06, "learning_rate": 9.14320406769371e-06, "loss": 0.1169, "step": 12430 }, { "epoch": 0.08582447377317226, "grad_norm": 0.0, "learning_rate": 9.142514160348266e-06, "loss": 0.0002, "step": 12440 }, { "epoch": 0.08589346450771661, "grad_norm": 9.92602777749596e-10, "learning_rate": 9.141824253002822e-06, "loss": 0.0, "step": 12450 }, { "epoch": 0.08596245524226097, "grad_norm": 0.0, "learning_rate": 9.141134345657379e-06, "loss": 0.0, "step": 12460 }, { "epoch": 0.08603144597680532, "grad_norm": 9.948919341695728e-07, "learning_rate": 9.140444438311935e-06, "loss": 0.0001, "step": 12470 }, { "epoch": 0.08610043671134966, "grad_norm": 0.0, "learning_rate": 9.139754530966492e-06, "loss": 0.0, "step": 12480 }, { "epoch": 0.08616942744589402, "grad_norm": 0.0, "learning_rate": 9.139064623621048e-06, "loss": 0.0, "step": 12490 }, { "epoch": 0.08623841818043837, "grad_norm": 0.0, "learning_rate": 9.138374716275605e-06, "loss": 0.0, "step": 12500 }, { "epoch": 0.08630740891498272, "grad_norm": 0.0, "learning_rate": 9.137684808930161e-06, "loss": 0.0, "step": 12510 }, { "epoch": 0.08637639964952706, "grad_norm": 0.0015388409374281764, "learning_rate": 9.136994901584718e-06, "loss": 0.0, "step": 12520 }, { "epoch": 0.08644539038407142, "grad_norm": 7.780840678606182e-05, "learning_rate": 9.136304994239274e-06, "loss": 0.0, "step": 12530 }, { "epoch": 0.08651438111861577, "grad_norm": 0.0, "learning_rate": 9.13561508689383e-06, "loss": 0.0, "step": 12540 }, { "epoch": 0.08658337185316012, "grad_norm": 0.08898631483316422, "learning_rate": 9.134925179548387e-06, "loss": 0.0, "step": 12550 }, { "epoch": 0.08665236258770447, "grad_norm": 0.0, "learning_rate": 9.134235272202944e-06, "loss": 0.0, "step": 12560 }, { "epoch": 0.08672135332224883, "grad_norm": 0.0, "learning_rate": 9.1335453648575e-06, "loss": 0.0001, "step": 12570 }, { "epoch": 0.08679034405679317, "grad_norm": 5.562814209270073e-09, "learning_rate": 9.132855457512057e-06, "loss": 0.0005, "step": 12580 }, { "epoch": 0.08685933479133752, "grad_norm": 0.0, "learning_rate": 9.132165550166613e-06, "loss": 0.0889, "step": 12590 }, { "epoch": 0.08692832552588188, "grad_norm": 0.0, "learning_rate": 9.13147564282117e-06, "loss": 0.0, "step": 12600 }, { "epoch": 0.08699731626042623, "grad_norm": 0.0, "learning_rate": 9.130785735475726e-06, "loss": 0.0, "step": 12610 }, { "epoch": 0.08706630699497057, "grad_norm": 5.688956434823922e-07, "learning_rate": 9.130095828130283e-06, "loss": 0.0, "step": 12620 }, { "epoch": 0.08713529772951492, "grad_norm": 0.0, "learning_rate": 9.12940592078484e-06, "loss": 0.0, "step": 12630 }, { "epoch": 0.08720428846405928, "grad_norm": 0.0, "learning_rate": 9.128716013439396e-06, "loss": 0.0, "step": 12640 }, { "epoch": 0.08727327919860363, "grad_norm": 0.0, "learning_rate": 9.128026106093952e-06, "loss": 0.0001, "step": 12650 }, { "epoch": 0.08734226993314798, "grad_norm": 36.286399841308594, "learning_rate": 9.127336198748509e-06, "loss": 0.0075, "step": 12660 }, { "epoch": 0.08741126066769232, "grad_norm": 0.0, "learning_rate": 9.126646291403065e-06, "loss": 0.0, "step": 12670 }, { "epoch": 0.08748025140223668, "grad_norm": 0.0, "learning_rate": 9.125956384057622e-06, "loss": 0.0, "step": 12680 }, { "epoch": 0.08754924213678103, "grad_norm": 0.0, "learning_rate": 9.125266476712178e-06, "loss": 0.0, "step": 12690 }, { "epoch": 0.08761823287132538, "grad_norm": 1.579142292484903e-07, "learning_rate": 9.124576569366735e-06, "loss": 0.0, "step": 12700 }, { "epoch": 0.08768722360586974, "grad_norm": 0.0, "learning_rate": 9.123886662021291e-06, "loss": 0.0, "step": 12710 }, { "epoch": 0.08775621434041408, "grad_norm": 0.0, "learning_rate": 9.123196754675848e-06, "loss": 0.0, "step": 12720 }, { "epoch": 0.08782520507495843, "grad_norm": 0.0, "learning_rate": 9.122506847330404e-06, "loss": 0.0, "step": 12730 }, { "epoch": 0.08789419580950278, "grad_norm": 0.0, "learning_rate": 9.12181693998496e-06, "loss": 0.0, "step": 12740 }, { "epoch": 0.08796318654404714, "grad_norm": 0.0, "learning_rate": 9.121127032639517e-06, "loss": 0.0, "step": 12750 }, { "epoch": 0.08803217727859149, "grad_norm": 8.586281130540385e-10, "learning_rate": 9.120437125294074e-06, "loss": 0.0, "step": 12760 }, { "epoch": 0.08810116801313583, "grad_norm": 0.0, "learning_rate": 9.11974721794863e-06, "loss": 0.0, "step": 12770 }, { "epoch": 0.08817015874768018, "grad_norm": 0.0, "learning_rate": 9.119057310603187e-06, "loss": 0.0, "step": 12780 }, { "epoch": 0.08823914948222454, "grad_norm": 0.0, "learning_rate": 9.118367403257743e-06, "loss": 0.0, "step": 12790 }, { "epoch": 0.08830814021676889, "grad_norm": 0.0, "learning_rate": 9.1176774959123e-06, "loss": 0.0, "step": 12800 }, { "epoch": 0.08837713095131323, "grad_norm": 0.0, "learning_rate": 9.116987588566856e-06, "loss": 0.0, "step": 12810 }, { "epoch": 0.0884461216858576, "grad_norm": 0.0004610455653164536, "learning_rate": 9.116297681221413e-06, "loss": 0.0, "step": 12820 }, { "epoch": 0.08851511242040194, "grad_norm": 0.0, "learning_rate": 9.115607773875969e-06, "loss": 0.0006, "step": 12830 }, { "epoch": 0.08858410315494629, "grad_norm": 0.0, "learning_rate": 9.114917866530526e-06, "loss": 0.0001, "step": 12840 }, { "epoch": 0.08865309388949064, "grad_norm": 1.3961353033664636e-06, "learning_rate": 9.114227959185082e-06, "loss": 0.0, "step": 12850 }, { "epoch": 0.088722084624035, "grad_norm": 0.0, "learning_rate": 9.113538051839638e-06, "loss": 0.0, "step": 12860 }, { "epoch": 0.08879107535857934, "grad_norm": 0.0, "learning_rate": 9.112848144494195e-06, "loss": 0.0, "step": 12870 }, { "epoch": 0.08886006609312369, "grad_norm": 0.0, "learning_rate": 9.112158237148751e-06, "loss": 0.0, "step": 12880 }, { "epoch": 0.08892905682766804, "grad_norm": 8.310395969601814e-06, "learning_rate": 9.111468329803308e-06, "loss": 0.0, "step": 12890 }, { "epoch": 0.0889980475622124, "grad_norm": 0.0, "learning_rate": 9.110778422457864e-06, "loss": 0.0, "step": 12900 }, { "epoch": 0.08906703829675675, "grad_norm": 0.0, "learning_rate": 9.110088515112421e-06, "loss": 0.0, "step": 12910 }, { "epoch": 0.08913602903130109, "grad_norm": 0.0, "learning_rate": 9.109398607766977e-06, "loss": 0.0003, "step": 12920 }, { "epoch": 0.08920501976584545, "grad_norm": 0.0, "learning_rate": 9.108708700421534e-06, "loss": 0.0, "step": 12930 }, { "epoch": 0.0892740105003898, "grad_norm": 2.1934381422283877e-08, "learning_rate": 9.10801879307609e-06, "loss": 0.0, "step": 12940 }, { "epoch": 0.08934300123493415, "grad_norm": 0.0, "learning_rate": 9.107328885730647e-06, "loss": 0.0193, "step": 12950 }, { "epoch": 0.0894119919694785, "grad_norm": 15.994109153747559, "learning_rate": 9.106638978385203e-06, "loss": 0.0037, "step": 12960 }, { "epoch": 0.08948098270402285, "grad_norm": 0.05119892954826355, "learning_rate": 9.10594907103976e-06, "loss": 0.0, "step": 12970 }, { "epoch": 0.0895499734385672, "grad_norm": 2.489688633033893e-08, "learning_rate": 9.105259163694316e-06, "loss": 0.0871, "step": 12980 }, { "epoch": 0.08961896417311155, "grad_norm": 6.265150318540691e-07, "learning_rate": 9.104569256348873e-06, "loss": 0.0, "step": 12990 }, { "epoch": 0.0896879549076559, "grad_norm": 0.0, "learning_rate": 9.10387934900343e-06, "loss": 0.0044, "step": 13000 }, { "epoch": 0.08975694564220026, "grad_norm": 0.0, "learning_rate": 9.103189441657986e-06, "loss": 0.0, "step": 13010 }, { "epoch": 0.0898259363767446, "grad_norm": 0.0, "learning_rate": 9.102499534312542e-06, "loss": 0.0, "step": 13020 }, { "epoch": 0.08989492711128895, "grad_norm": 3.9424712383606675e-09, "learning_rate": 9.101809626967099e-06, "loss": 0.0, "step": 13030 }, { "epoch": 0.08996391784583331, "grad_norm": 0.0, "learning_rate": 9.101119719621655e-06, "loss": 0.0, "step": 13040 }, { "epoch": 0.09003290858037766, "grad_norm": 0.0, "learning_rate": 9.100429812276212e-06, "loss": 0.0007, "step": 13050 }, { "epoch": 0.090101899314922, "grad_norm": 66.62147521972656, "learning_rate": 9.099739904930768e-06, "loss": 0.0115, "step": 13060 }, { "epoch": 0.09017089004946635, "grad_norm": 1.4568859338760376, "learning_rate": 9.099049997585325e-06, "loss": 0.0003, "step": 13070 }, { "epoch": 0.09023988078401071, "grad_norm": 0.0056158327497541904, "learning_rate": 9.098360090239881e-06, "loss": 0.0, "step": 13080 }, { "epoch": 0.09030887151855506, "grad_norm": 0.0, "learning_rate": 9.097670182894438e-06, "loss": 0.0, "step": 13090 }, { "epoch": 0.0903778622530994, "grad_norm": 0.0, "learning_rate": 9.096980275548994e-06, "loss": 0.0, "step": 13100 }, { "epoch": 0.09044685298764375, "grad_norm": 1.6741969375289045e-05, "learning_rate": 9.09629036820355e-06, "loss": 0.0, "step": 13110 }, { "epoch": 0.09051584372218811, "grad_norm": 0.0, "learning_rate": 9.095600460858107e-06, "loss": 0.0003, "step": 13120 }, { "epoch": 0.09058483445673246, "grad_norm": 2.2316659453736065e-07, "learning_rate": 9.094910553512664e-06, "loss": 0.0, "step": 13130 }, { "epoch": 0.09065382519127681, "grad_norm": 0.0, "learning_rate": 9.09422064616722e-06, "loss": 0.0326, "step": 13140 }, { "epoch": 0.09072281592582117, "grad_norm": 100.9626693725586, "learning_rate": 9.093530738821777e-06, "loss": 0.0394, "step": 13150 }, { "epoch": 0.09079180666036551, "grad_norm": 1.0193910071620849e-07, "learning_rate": 9.092840831476333e-06, "loss": 0.0225, "step": 13160 }, { "epoch": 0.09086079739490986, "grad_norm": 0.0, "learning_rate": 9.09215092413089e-06, "loss": 0.0, "step": 13170 }, { "epoch": 0.09092978812945421, "grad_norm": 0.0, "learning_rate": 9.091461016785446e-06, "loss": 0.0005, "step": 13180 }, { "epoch": 0.09099877886399857, "grad_norm": 3.2125100357660585e-09, "learning_rate": 9.090771109440003e-06, "loss": 0.0, "step": 13190 }, { "epoch": 0.09106776959854292, "grad_norm": 8.771229431658867e-07, "learning_rate": 9.09008120209456e-06, "loss": 0.5305, "step": 13200 }, { "epoch": 0.09113676033308726, "grad_norm": 0.011078632436692715, "learning_rate": 9.089391294749116e-06, "loss": 0.0, "step": 13210 }, { "epoch": 0.09120575106763161, "grad_norm": 0.00012009937927359715, "learning_rate": 9.088701387403672e-06, "loss": 0.0, "step": 13220 }, { "epoch": 0.09127474180217597, "grad_norm": 0.0, "learning_rate": 9.088011480058229e-06, "loss": 0.0, "step": 13230 }, { "epoch": 0.09134373253672032, "grad_norm": 0.023638155311346054, "learning_rate": 9.087321572712785e-06, "loss": 0.002, "step": 13240 }, { "epoch": 0.09141272327126466, "grad_norm": 8.871408607724618e-10, "learning_rate": 9.086631665367342e-06, "loss": 0.0002, "step": 13250 }, { "epoch": 0.09148171400580903, "grad_norm": 0.0, "learning_rate": 9.085941758021898e-06, "loss": 0.0, "step": 13260 }, { "epoch": 0.09155070474035337, "grad_norm": 0.0, "learning_rate": 9.085251850676455e-06, "loss": 0.0001, "step": 13270 }, { "epoch": 0.09161969547489772, "grad_norm": 0.0001510678994236514, "learning_rate": 9.084561943331011e-06, "loss": 0.0, "step": 13280 }, { "epoch": 0.09168868620944207, "grad_norm": 0.0, "learning_rate": 9.083872035985568e-06, "loss": 0.0, "step": 13290 }, { "epoch": 0.09175767694398643, "grad_norm": 0.0, "learning_rate": 9.083182128640124e-06, "loss": 0.0, "step": 13300 }, { "epoch": 0.09182666767853077, "grad_norm": 1.1380863725207746e-05, "learning_rate": 9.08249222129468e-06, "loss": 0.0, "step": 13310 }, { "epoch": 0.09189565841307512, "grad_norm": 2.755751302174758e-05, "learning_rate": 9.081802313949237e-06, "loss": 0.0097, "step": 13320 }, { "epoch": 0.09196464914761948, "grad_norm": 0.0005176571430638433, "learning_rate": 9.081112406603794e-06, "loss": 0.0006, "step": 13330 }, { "epoch": 0.09203363988216383, "grad_norm": 0.00021993771952111274, "learning_rate": 9.08042249925835e-06, "loss": 0.0, "step": 13340 }, { "epoch": 0.09210263061670818, "grad_norm": 5.26480370410809e-08, "learning_rate": 9.079732591912907e-06, "loss": 0.0, "step": 13350 }, { "epoch": 0.09217162135125252, "grad_norm": 0.0, "learning_rate": 9.079042684567463e-06, "loss": 0.0, "step": 13360 }, { "epoch": 0.09224061208579688, "grad_norm": 0.0003403661830816418, "learning_rate": 9.078352777222021e-06, "loss": 0.0, "step": 13370 }, { "epoch": 0.09230960282034123, "grad_norm": 9.105828313593634e-10, "learning_rate": 9.077662869876576e-06, "loss": 0.019, "step": 13380 }, { "epoch": 0.09237859355488558, "grad_norm": 0.0, "learning_rate": 9.076972962531132e-06, "loss": 0.0, "step": 13390 }, { "epoch": 0.09244758428942992, "grad_norm": 0.0, "learning_rate": 9.076283055185689e-06, "loss": 0.0, "step": 13400 }, { "epoch": 0.09251657502397428, "grad_norm": 0.0, "learning_rate": 9.075593147840245e-06, "loss": 0.0004, "step": 13410 }, { "epoch": 0.09258556575851863, "grad_norm": 0.0, "learning_rate": 9.074903240494802e-06, "loss": 0.0, "step": 13420 }, { "epoch": 0.09265455649306298, "grad_norm": 0.0, "learning_rate": 9.074213333149358e-06, "loss": 0.0, "step": 13430 }, { "epoch": 0.09272354722760734, "grad_norm": 0.003086378099396825, "learning_rate": 9.073523425803915e-06, "loss": 0.0, "step": 13440 }, { "epoch": 0.09279253796215169, "grad_norm": 0.0, "learning_rate": 9.072833518458471e-06, "loss": 0.0003, "step": 13450 }, { "epoch": 0.09286152869669603, "grad_norm": 0.0, "learning_rate": 9.072143611113028e-06, "loss": 0.0, "step": 13460 }, { "epoch": 0.09293051943124038, "grad_norm": 0.0, "learning_rate": 9.071453703767584e-06, "loss": 0.0001, "step": 13470 }, { "epoch": 0.09299951016578474, "grad_norm": 0.0, "learning_rate": 9.070763796422143e-06, "loss": 0.0014, "step": 13480 }, { "epoch": 0.09306850090032909, "grad_norm": 0.0, "learning_rate": 9.070073889076697e-06, "loss": 0.0, "step": 13490 }, { "epoch": 0.09313749163487343, "grad_norm": 0.0, "learning_rate": 9.069383981731254e-06, "loss": 0.0, "step": 13500 }, { "epoch": 0.09320648236941778, "grad_norm": 0.0, "learning_rate": 9.06869407438581e-06, "loss": 0.0, "step": 13510 }, { "epoch": 0.09327547310396214, "grad_norm": 0.0, "learning_rate": 9.068004167040367e-06, "loss": 0.0, "step": 13520 }, { "epoch": 0.09334446383850649, "grad_norm": 0.0, "learning_rate": 9.067314259694923e-06, "loss": 0.0517, "step": 13530 }, { "epoch": 0.09341345457305084, "grad_norm": 0.0, "learning_rate": 9.06662435234948e-06, "loss": 0.0, "step": 13540 }, { "epoch": 0.0934824453075952, "grad_norm": 0.0, "learning_rate": 9.065934445004036e-06, "loss": 0.0, "step": 13550 }, { "epoch": 0.09355143604213954, "grad_norm": 45.55268859863281, "learning_rate": 9.065244537658593e-06, "loss": 0.0076, "step": 13560 }, { "epoch": 0.09362042677668389, "grad_norm": 5.459155905107593e-10, "learning_rate": 9.06455463031315e-06, "loss": 0.0, "step": 13570 }, { "epoch": 0.09368941751122824, "grad_norm": 0.006124570034444332, "learning_rate": 9.063864722967706e-06, "loss": 0.0, "step": 13580 }, { "epoch": 0.0937584082457726, "grad_norm": 0.0008759878692217171, "learning_rate": 9.063174815622264e-06, "loss": 0.0039, "step": 13590 }, { "epoch": 0.09382739898031695, "grad_norm": 0.0, "learning_rate": 9.062484908276819e-06, "loss": 0.199, "step": 13600 }, { "epoch": 0.09389638971486129, "grad_norm": 0.0, "learning_rate": 9.061795000931375e-06, "loss": 0.0, "step": 13610 }, { "epoch": 0.09396538044940564, "grad_norm": 0.0, "learning_rate": 9.061105093585932e-06, "loss": 0.0, "step": 13620 }, { "epoch": 0.09403437118395, "grad_norm": 0.0, "learning_rate": 9.060415186240488e-06, "loss": 0.0, "step": 13630 }, { "epoch": 0.09410336191849435, "grad_norm": 0.0, "learning_rate": 9.059725278895045e-06, "loss": 0.0, "step": 13640 }, { "epoch": 0.0941723526530387, "grad_norm": 8.18364143371582, "learning_rate": 9.059035371549601e-06, "loss": 0.0018, "step": 13650 }, { "epoch": 0.09424134338758305, "grad_norm": 109.45488739013672, "learning_rate": 9.058345464204158e-06, "loss": 0.0498, "step": 13660 }, { "epoch": 0.0943103341221274, "grad_norm": 7.464707962867578e-09, "learning_rate": 9.057655556858714e-06, "loss": 0.004, "step": 13670 }, { "epoch": 0.09437932485667175, "grad_norm": 0.01129795704036951, "learning_rate": 9.05696564951327e-06, "loss": 0.0041, "step": 13680 }, { "epoch": 0.0944483155912161, "grad_norm": 0.0, "learning_rate": 9.056275742167827e-06, "loss": 0.0, "step": 13690 }, { "epoch": 0.09451730632576046, "grad_norm": 0.0, "learning_rate": 9.055585834822385e-06, "loss": 0.0, "step": 13700 }, { "epoch": 0.0945862970603048, "grad_norm": 2.364405372645706e-05, "learning_rate": 9.05489592747694e-06, "loss": 0.0, "step": 13710 }, { "epoch": 0.09465528779484915, "grad_norm": 0.0, "learning_rate": 9.054206020131497e-06, "loss": 0.0001, "step": 13720 }, { "epoch": 0.0947242785293935, "grad_norm": 0.0004204988945275545, "learning_rate": 9.053516112786053e-06, "loss": 0.0149, "step": 13730 }, { "epoch": 0.09479326926393786, "grad_norm": 1.9861205657889514e-07, "learning_rate": 9.05282620544061e-06, "loss": 0.0, "step": 13740 }, { "epoch": 0.0948622599984822, "grad_norm": 0.0, "learning_rate": 9.052136298095166e-06, "loss": 0.0001, "step": 13750 }, { "epoch": 0.09493125073302655, "grad_norm": 0.0, "learning_rate": 9.051446390749723e-06, "loss": 0.0, "step": 13760 }, { "epoch": 0.09500024146757091, "grad_norm": 3.38435079960675e-09, "learning_rate": 9.050756483404279e-06, "loss": 0.0, "step": 13770 }, { "epoch": 0.09506923220211526, "grad_norm": 0.0006970739341340959, "learning_rate": 9.050066576058836e-06, "loss": 0.0, "step": 13780 }, { "epoch": 0.0951382229366596, "grad_norm": 1.2562564734253101e-05, "learning_rate": 9.049376668713392e-06, "loss": 0.006, "step": 13790 }, { "epoch": 0.09520721367120395, "grad_norm": 4.783676232911205e-10, "learning_rate": 9.048686761367949e-06, "loss": 0.0, "step": 13800 }, { "epoch": 0.09527620440574831, "grad_norm": 0.0, "learning_rate": 9.047996854022507e-06, "loss": 0.0, "step": 13810 }, { "epoch": 0.09534519514029266, "grad_norm": 0.0, "learning_rate": 9.047306946677063e-06, "loss": 0.0, "step": 13820 }, { "epoch": 0.09541418587483701, "grad_norm": 0.0, "learning_rate": 9.046617039331618e-06, "loss": 0.363, "step": 13830 }, { "epoch": 0.09548317660938135, "grad_norm": 0.0, "learning_rate": 9.045927131986175e-06, "loss": 0.0, "step": 13840 }, { "epoch": 0.09555216734392571, "grad_norm": 0.0002969199267681688, "learning_rate": 9.045237224640731e-06, "loss": 0.0, "step": 13850 }, { "epoch": 0.09562115807847006, "grad_norm": 0.0, "learning_rate": 9.044547317295288e-06, "loss": 0.0001, "step": 13860 }, { "epoch": 0.09569014881301441, "grad_norm": 0.0, "learning_rate": 9.043857409949844e-06, "loss": 0.0, "step": 13870 }, { "epoch": 0.09575913954755877, "grad_norm": 0.0, "learning_rate": 9.0431675026044e-06, "loss": 0.0033, "step": 13880 }, { "epoch": 0.09582813028210312, "grad_norm": 0.007406032178550959, "learning_rate": 9.042477595258957e-06, "loss": 0.0003, "step": 13890 }, { "epoch": 0.09589712101664746, "grad_norm": 0.0, "learning_rate": 9.041787687913513e-06, "loss": 0.0, "step": 13900 }, { "epoch": 0.09596611175119181, "grad_norm": 0.0, "learning_rate": 9.04109778056807e-06, "loss": 0.0, "step": 13910 }, { "epoch": 0.09603510248573617, "grad_norm": 6.853615559521131e-07, "learning_rate": 9.040407873222628e-06, "loss": 0.0005, "step": 13920 }, { "epoch": 0.09610409322028052, "grad_norm": 0.0, "learning_rate": 9.039717965877185e-06, "loss": 0.0, "step": 13930 }, { "epoch": 0.09617308395482486, "grad_norm": 0.0, "learning_rate": 9.03902805853174e-06, "loss": 0.0, "step": 13940 }, { "epoch": 0.09624207468936921, "grad_norm": 0.0, "learning_rate": 9.038338151186296e-06, "loss": 0.0002, "step": 13950 }, { "epoch": 0.09631106542391357, "grad_norm": 0.0, "learning_rate": 9.037648243840852e-06, "loss": 0.0, "step": 13960 }, { "epoch": 0.09638005615845792, "grad_norm": 0.0, "learning_rate": 9.036958336495409e-06, "loss": 0.0, "step": 13970 }, { "epoch": 0.09644904689300227, "grad_norm": 0.0, "learning_rate": 9.036268429149965e-06, "loss": 0.0, "step": 13980 }, { "epoch": 0.09651803762754663, "grad_norm": 0.0, "learning_rate": 9.035578521804522e-06, "loss": 0.0, "step": 13990 }, { "epoch": 0.09658702836209097, "grad_norm": 5.598350227842275e-08, "learning_rate": 9.034888614459078e-06, "loss": 0.0018, "step": 14000 }, { "epoch": 0.09665601909663532, "grad_norm": 0.0, "learning_rate": 9.034198707113635e-06, "loss": 0.0, "step": 14010 }, { "epoch": 0.09672500983117967, "grad_norm": 0.00027740251971408725, "learning_rate": 9.033508799768191e-06, "loss": 0.0032, "step": 14020 }, { "epoch": 0.09679400056572403, "grad_norm": 0.0, "learning_rate": 9.03281889242275e-06, "loss": 0.0, "step": 14030 }, { "epoch": 0.09686299130026838, "grad_norm": 0.0, "learning_rate": 9.032128985077306e-06, "loss": 0.0, "step": 14040 }, { "epoch": 0.09693198203481272, "grad_norm": 0.0, "learning_rate": 9.03143907773186e-06, "loss": 0.0, "step": 14050 }, { "epoch": 0.09700097276935707, "grad_norm": 0.002469313330948353, "learning_rate": 9.030749170386417e-06, "loss": 0.0, "step": 14060 }, { "epoch": 0.09706996350390143, "grad_norm": 256.684814453125, "learning_rate": 9.030059263040974e-06, "loss": 0.0274, "step": 14070 }, { "epoch": 0.09713895423844578, "grad_norm": 0.0, "learning_rate": 9.02936935569553e-06, "loss": 0.005, "step": 14080 }, { "epoch": 0.09720794497299012, "grad_norm": 0.0, "learning_rate": 9.028679448350087e-06, "loss": 0.0, "step": 14090 }, { "epoch": 0.09727693570753448, "grad_norm": 0.0, "learning_rate": 9.027989541004643e-06, "loss": 0.0, "step": 14100 }, { "epoch": 0.09734592644207883, "grad_norm": 0.0, "learning_rate": 9.0272996336592e-06, "loss": 0.0, "step": 14110 }, { "epoch": 0.09741491717662318, "grad_norm": 0.0, "learning_rate": 9.026609726313756e-06, "loss": 0.0, "step": 14120 }, { "epoch": 0.09748390791116752, "grad_norm": 3.649547352324589e-06, "learning_rate": 9.025919818968313e-06, "loss": 0.5051, "step": 14130 }, { "epoch": 0.09755289864571189, "grad_norm": 2.96873192695557e-09, "learning_rate": 9.025229911622871e-06, "loss": 0.0102, "step": 14140 }, { "epoch": 0.09762188938025623, "grad_norm": 0.017666010186076164, "learning_rate": 9.024540004277427e-06, "loss": 0.0, "step": 14150 }, { "epoch": 0.09769088011480058, "grad_norm": 0.0, "learning_rate": 9.023850096931984e-06, "loss": 0.0105, "step": 14160 }, { "epoch": 0.09775987084934493, "grad_norm": 0.0, "learning_rate": 9.023160189586539e-06, "loss": 0.0, "step": 14170 }, { "epoch": 0.09782886158388929, "grad_norm": 0.0, "learning_rate": 9.022470282241095e-06, "loss": 0.0003, "step": 14180 }, { "epoch": 0.09789785231843363, "grad_norm": 0.0, "learning_rate": 9.021780374895652e-06, "loss": 0.0007, "step": 14190 }, { "epoch": 0.09796684305297798, "grad_norm": 4.652412066707967e-10, "learning_rate": 9.021090467550208e-06, "loss": 0.0, "step": 14200 }, { "epoch": 0.09803583378752234, "grad_norm": 0.0, "learning_rate": 9.020400560204765e-06, "loss": 0.0, "step": 14210 }, { "epoch": 0.09810482452206669, "grad_norm": 0.0, "learning_rate": 9.019710652859321e-06, "loss": 0.0, "step": 14220 }, { "epoch": 0.09817381525661104, "grad_norm": 0.0, "learning_rate": 9.019020745513878e-06, "loss": 0.0, "step": 14230 }, { "epoch": 0.09824280599115538, "grad_norm": 0.6789950728416443, "learning_rate": 9.018330838168434e-06, "loss": 0.0002, "step": 14240 }, { "epoch": 0.09831179672569974, "grad_norm": 1.3683045096968272e-07, "learning_rate": 9.017640930822992e-06, "loss": 0.0, "step": 14250 }, { "epoch": 0.09838078746024409, "grad_norm": 3.689347849444857e-08, "learning_rate": 9.016951023477549e-06, "loss": 0.0001, "step": 14260 }, { "epoch": 0.09844977819478844, "grad_norm": 0.0, "learning_rate": 9.016261116132105e-06, "loss": 0.0, "step": 14270 }, { "epoch": 0.09851876892933278, "grad_norm": 0.0, "learning_rate": 9.01557120878666e-06, "loss": 0.0, "step": 14280 }, { "epoch": 0.09858775966387714, "grad_norm": 0.17829005420207977, "learning_rate": 9.014881301441217e-06, "loss": 0.011, "step": 14290 }, { "epoch": 0.09865675039842149, "grad_norm": 0.9580913186073303, "learning_rate": 9.014191394095773e-06, "loss": 0.0001, "step": 14300 }, { "epoch": 0.09872574113296584, "grad_norm": 0.0, "learning_rate": 9.01350148675033e-06, "loss": 0.0, "step": 14310 }, { "epoch": 0.0987947318675102, "grad_norm": 0.0, "learning_rate": 9.01288057013943e-06, "loss": 0.9633, "step": 14320 }, { "epoch": 0.09886372260205455, "grad_norm": 0.0, "learning_rate": 9.012190662793987e-06, "loss": 0.0, "step": 14330 }, { "epoch": 0.0989327133365989, "grad_norm": 0.0, "learning_rate": 9.011500755448545e-06, "loss": 0.0, "step": 14340 }, { "epoch": 0.09900170407114324, "grad_norm": 0.0, "learning_rate": 9.010810848103101e-06, "loss": 0.0, "step": 14350 }, { "epoch": 0.0990706948056876, "grad_norm": 0.0, "learning_rate": 9.010120940757658e-06, "loss": 0.0062, "step": 14360 }, { "epoch": 0.09913968554023195, "grad_norm": 0.0, "learning_rate": 9.009431033412214e-06, "loss": 0.0, "step": 14370 }, { "epoch": 0.0992086762747763, "grad_norm": 0.0013788517098873854, "learning_rate": 9.008741126066771e-06, "loss": 0.0, "step": 14380 }, { "epoch": 0.09927766700932064, "grad_norm": 0.009718854911625385, "learning_rate": 9.008051218721326e-06, "loss": 0.0, "step": 14390 }, { "epoch": 0.099346657743865, "grad_norm": 2.6873924525716575e-06, "learning_rate": 9.007361311375882e-06, "loss": 0.0, "step": 14400 }, { "epoch": 0.09941564847840935, "grad_norm": 0.0, "learning_rate": 9.006671404030439e-06, "loss": 0.0, "step": 14410 }, { "epoch": 0.0994846392129537, "grad_norm": 0.0, "learning_rate": 9.005981496684995e-06, "loss": 0.0092, "step": 14420 }, { "epoch": 0.09955362994749806, "grad_norm": 0.0, "learning_rate": 9.005291589339552e-06, "loss": 0.0, "step": 14430 }, { "epoch": 0.0996226206820424, "grad_norm": 0.17684686183929443, "learning_rate": 9.004601681994108e-06, "loss": 0.0, "step": 14440 }, { "epoch": 0.09969161141658675, "grad_norm": 0.0, "learning_rate": 9.003911774648666e-06, "loss": 0.3758, "step": 14450 }, { "epoch": 0.0997606021511311, "grad_norm": 0.0, "learning_rate": 9.003221867303223e-06, "loss": 0.0004, "step": 14460 }, { "epoch": 0.09982959288567546, "grad_norm": 0.0, "learning_rate": 9.00253195995778e-06, "loss": 0.0, "step": 14470 }, { "epoch": 0.0998985836202198, "grad_norm": 7.986645221710205, "learning_rate": 9.001842052612336e-06, "loss": 0.0057, "step": 14480 }, { "epoch": 0.09996757435476415, "grad_norm": 8.702115883352235e-05, "learning_rate": 9.001152145266892e-06, "loss": 0.0, "step": 14490 }, { "epoch": 0.1000365650893085, "grad_norm": 0.0, "learning_rate": 9.000462237921447e-06, "loss": 0.0032, "step": 14500 }, { "epoch": 0.10010555582385286, "grad_norm": 0.0, "learning_rate": 8.999772330576004e-06, "loss": 0.0008, "step": 14510 }, { "epoch": 0.1001745465583972, "grad_norm": 0.0, "learning_rate": 8.99908242323056e-06, "loss": 0.0, "step": 14520 }, { "epoch": 0.10024353729294155, "grad_norm": 0.0, "learning_rate": 8.998392515885117e-06, "loss": 0.0, "step": 14530 }, { "epoch": 0.10031252802748591, "grad_norm": 3.892130848726083e-07, "learning_rate": 8.997702608539673e-06, "loss": 0.0025, "step": 14540 }, { "epoch": 0.10038151876203026, "grad_norm": 0.0, "learning_rate": 8.99701270119423e-06, "loss": 0.1964, "step": 14550 }, { "epoch": 0.10045050949657461, "grad_norm": 0.0, "learning_rate": 8.996322793848788e-06, "loss": 0.0038, "step": 14560 }, { "epoch": 0.10051950023111896, "grad_norm": 0.0, "learning_rate": 8.995632886503344e-06, "loss": 0.0, "step": 14570 }, { "epoch": 0.10058849096566332, "grad_norm": 334.4906311035156, "learning_rate": 8.9949429791579e-06, "loss": 0.1375, "step": 14580 }, { "epoch": 0.10065748170020766, "grad_norm": 1.3659666776657104, "learning_rate": 8.994253071812457e-06, "loss": 0.0003, "step": 14590 }, { "epoch": 0.10072647243475201, "grad_norm": 0.004187623504549265, "learning_rate": 8.993563164467014e-06, "loss": 2.1156, "step": 14600 }, { "epoch": 0.10079546316929637, "grad_norm": 0.0, "learning_rate": 8.99287325712157e-06, "loss": 0.0, "step": 14610 }, { "epoch": 0.10086445390384072, "grad_norm": 0.00265240459702909, "learning_rate": 8.992183349776125e-06, "loss": 0.0, "step": 14620 }, { "epoch": 0.10093344463838506, "grad_norm": 1.7600315871391103e-08, "learning_rate": 8.991493442430681e-06, "loss": 0.0156, "step": 14630 }, { "epoch": 0.10100243537292941, "grad_norm": 0.0, "learning_rate": 8.990803535085238e-06, "loss": 0.6057, "step": 14640 }, { "epoch": 0.10107142610747377, "grad_norm": 1.6776948541519232e-07, "learning_rate": 8.990113627739794e-06, "loss": 0.1937, "step": 14650 }, { "epoch": 0.10114041684201812, "grad_norm": 0.0, "learning_rate": 8.989423720394351e-06, "loss": 0.0, "step": 14660 }, { "epoch": 0.10120940757656247, "grad_norm": 1.028349757194519, "learning_rate": 8.988733813048909e-06, "loss": 0.0169, "step": 14670 }, { "epoch": 0.10127839831110681, "grad_norm": 0.0, "learning_rate": 8.988043905703466e-06, "loss": 0.0, "step": 14680 }, { "epoch": 0.10134738904565117, "grad_norm": 0.0011982250725850463, "learning_rate": 8.987353998358022e-06, "loss": 0.0, "step": 14690 }, { "epoch": 0.10141637978019552, "grad_norm": 0.0, "learning_rate": 8.986664091012579e-06, "loss": 0.0, "step": 14700 }, { "epoch": 0.10148537051473987, "grad_norm": 0.0, "learning_rate": 8.985974183667135e-06, "loss": 0.359, "step": 14710 }, { "epoch": 0.10155436124928423, "grad_norm": 0.0, "learning_rate": 8.985284276321692e-06, "loss": 0.0, "step": 14720 }, { "epoch": 0.10162335198382857, "grad_norm": 0.0, "learning_rate": 8.984594368976246e-06, "loss": 0.0016, "step": 14730 }, { "epoch": 0.10169234271837292, "grad_norm": 0.0, "learning_rate": 8.983904461630803e-06, "loss": 0.0008, "step": 14740 }, { "epoch": 0.10176133345291727, "grad_norm": 0.0, "learning_rate": 8.98321455428536e-06, "loss": 0.0015, "step": 14750 }, { "epoch": 0.10183032418746163, "grad_norm": 53.06954574584961, "learning_rate": 8.982524646939916e-06, "loss": 0.0094, "step": 14760 }, { "epoch": 0.10189931492200598, "grad_norm": 0.0, "learning_rate": 8.981834739594472e-06, "loss": 0.0002, "step": 14770 }, { "epoch": 0.10196830565655032, "grad_norm": 0.0, "learning_rate": 8.98114483224903e-06, "loss": 0.0, "step": 14780 }, { "epoch": 0.10203729639109467, "grad_norm": 0.0, "learning_rate": 8.980454924903587e-06, "loss": 0.2904, "step": 14790 }, { "epoch": 0.10210628712563903, "grad_norm": 0.0007439965265803039, "learning_rate": 8.979765017558143e-06, "loss": 0.0, "step": 14800 }, { "epoch": 0.10217527786018338, "grad_norm": 0.0, "learning_rate": 8.9790751102127e-06, "loss": 0.0, "step": 14810 }, { "epoch": 0.10224426859472772, "grad_norm": 0.6486353278160095, "learning_rate": 8.978385202867256e-06, "loss": 0.0001, "step": 14820 }, { "epoch": 0.10231325932927209, "grad_norm": 0.0, "learning_rate": 8.977695295521813e-06, "loss": 0.0001, "step": 14830 }, { "epoch": 0.10238225006381643, "grad_norm": 0.0, "learning_rate": 8.977005388176368e-06, "loss": 0.0, "step": 14840 }, { "epoch": 0.10245124079836078, "grad_norm": 20.890771865844727, "learning_rate": 8.976315480830924e-06, "loss": 0.8796, "step": 14850 }, { "epoch": 0.10252023153290513, "grad_norm": 0.0, "learning_rate": 8.97562557348548e-06, "loss": 0.0, "step": 14860 }, { "epoch": 0.10258922226744949, "grad_norm": 0.0, "learning_rate": 8.974935666140037e-06, "loss": 0.0, "step": 14870 }, { "epoch": 0.10265821300199383, "grad_norm": 21.972917556762695, "learning_rate": 8.974245758794594e-06, "loss": 0.0038, "step": 14880 }, { "epoch": 0.10272720373653818, "grad_norm": 0.0, "learning_rate": 8.973555851449152e-06, "loss": 0.7, "step": 14890 }, { "epoch": 0.10279619447108253, "grad_norm": 2.8864340251288922e-08, "learning_rate": 8.972865944103708e-06, "loss": 0.0005, "step": 14900 }, { "epoch": 0.10286518520562689, "grad_norm": 0.0, "learning_rate": 8.972176036758265e-06, "loss": 0.0006, "step": 14910 }, { "epoch": 0.10293417594017124, "grad_norm": 0.0, "learning_rate": 8.971486129412821e-06, "loss": 0.0, "step": 14920 }, { "epoch": 0.10300316667471558, "grad_norm": 0.0, "learning_rate": 8.970796222067378e-06, "loss": 0.0, "step": 14930 }, { "epoch": 0.10307215740925994, "grad_norm": 0.0, "learning_rate": 8.970106314721934e-06, "loss": 0.0044, "step": 14940 }, { "epoch": 0.10314114814380429, "grad_norm": 0.0, "learning_rate": 8.96941640737649e-06, "loss": 0.0, "step": 14950 }, { "epoch": 0.10321013887834864, "grad_norm": 0.0, "learning_rate": 8.968726500031046e-06, "loss": 0.0015, "step": 14960 }, { "epoch": 0.10327912961289298, "grad_norm": 0.0, "learning_rate": 8.968036592685602e-06, "loss": 0.0, "step": 14970 }, { "epoch": 0.10334812034743734, "grad_norm": 0.11133863776922226, "learning_rate": 8.967346685340159e-06, "loss": 0.0005, "step": 14980 }, { "epoch": 0.10341711108198169, "grad_norm": 0.0, "learning_rate": 8.966656777994715e-06, "loss": 0.0002, "step": 14990 }, { "epoch": 0.10348610181652604, "grad_norm": 0.0, "learning_rate": 8.965966870649273e-06, "loss": 0.0001, "step": 15000 }, { "epoch": 0.10355509255107039, "grad_norm": 0.0, "learning_rate": 8.96527696330383e-06, "loss": 0.0, "step": 15010 }, { "epoch": 0.10362408328561475, "grad_norm": 0.0, "learning_rate": 8.964587055958386e-06, "loss": 0.0, "step": 15020 }, { "epoch": 0.10369307402015909, "grad_norm": 0.0, "learning_rate": 8.963897148612943e-06, "loss": 0.0004, "step": 15030 }, { "epoch": 0.10376206475470344, "grad_norm": 0.0, "learning_rate": 8.9632072412675e-06, "loss": 0.0, "step": 15040 }, { "epoch": 0.1038310554892478, "grad_norm": 0.0, "learning_rate": 8.962517333922056e-06, "loss": 0.0002, "step": 15050 }, { "epoch": 0.10390004622379215, "grad_norm": 0.0, "learning_rate": 8.961827426576612e-06, "loss": 0.0, "step": 15060 }, { "epoch": 0.1039690369583365, "grad_norm": 1.1922156772925518e-07, "learning_rate": 8.961137519231167e-06, "loss": 0.0, "step": 15070 }, { "epoch": 0.10403802769288084, "grad_norm": 0.007567184045910835, "learning_rate": 8.960447611885724e-06, "loss": 0.0115, "step": 15080 }, { "epoch": 0.1041070184274252, "grad_norm": 0.0, "learning_rate": 8.95975770454028e-06, "loss": 0.0031, "step": 15090 }, { "epoch": 0.10417600916196955, "grad_norm": 0.0, "learning_rate": 8.959067797194837e-06, "loss": 0.0, "step": 15100 }, { "epoch": 0.1042449998965139, "grad_norm": 0.0, "learning_rate": 8.958377889849395e-06, "loss": 0.0, "step": 15110 }, { "epoch": 0.10431399063105824, "grad_norm": 0.0, "learning_rate": 8.957687982503951e-06, "loss": 0.0, "step": 15120 }, { "epoch": 0.1043829813656026, "grad_norm": 0.00018382474081590772, "learning_rate": 8.956998075158508e-06, "loss": 0.075, "step": 15130 }, { "epoch": 0.10445197210014695, "grad_norm": 0.0, "learning_rate": 8.956308167813064e-06, "loss": 0.107, "step": 15140 }, { "epoch": 0.1045209628346913, "grad_norm": 6.728784001097665e-07, "learning_rate": 8.95561826046762e-06, "loss": 0.0, "step": 15150 }, { "epoch": 0.10458995356923566, "grad_norm": 0.0, "learning_rate": 8.954928353122177e-06, "loss": 0.0, "step": 15160 }, { "epoch": 0.10465894430378, "grad_norm": 1.3022390604019165, "learning_rate": 8.954238445776734e-06, "loss": 0.0124, "step": 15170 }, { "epoch": 0.10472793503832435, "grad_norm": 0.0, "learning_rate": 8.953548538431288e-06, "loss": 0.0014, "step": 15180 }, { "epoch": 0.1047969257728687, "grad_norm": 0.0, "learning_rate": 8.952858631085845e-06, "loss": 0.4665, "step": 15190 }, { "epoch": 0.10486591650741306, "grad_norm": 0.000387391890399158, "learning_rate": 8.952168723740401e-06, "loss": 0.0002, "step": 15200 }, { "epoch": 0.1049349072419574, "grad_norm": 0.0, "learning_rate": 8.951478816394958e-06, "loss": 0.0, "step": 15210 }, { "epoch": 0.10500389797650175, "grad_norm": 0.0, "learning_rate": 8.950788909049516e-06, "loss": 0.2579, "step": 15220 }, { "epoch": 0.1050728887110461, "grad_norm": 6.467067237281299e-08, "learning_rate": 8.950099001704073e-06, "loss": 0.0, "step": 15230 }, { "epoch": 0.10514187944559046, "grad_norm": 0.0, "learning_rate": 8.949409094358629e-06, "loss": 0.0, "step": 15240 }, { "epoch": 0.10521087018013481, "grad_norm": 95.11103057861328, "learning_rate": 8.948719187013186e-06, "loss": 0.0141, "step": 15250 }, { "epoch": 0.10527986091467915, "grad_norm": 0.0, "learning_rate": 8.948029279667742e-06, "loss": 0.0012, "step": 15260 }, { "epoch": 0.10534885164922352, "grad_norm": 0.0, "learning_rate": 8.947339372322299e-06, "loss": 0.0219, "step": 15270 }, { "epoch": 0.10541784238376786, "grad_norm": 7.990665471879765e-05, "learning_rate": 8.946649464976855e-06, "loss": 0.0353, "step": 15280 }, { "epoch": 0.10548683311831221, "grad_norm": 0.0, "learning_rate": 8.94595955763141e-06, "loss": 0.0, "step": 15290 }, { "epoch": 0.10555582385285656, "grad_norm": 0.01425836980342865, "learning_rate": 8.945269650285966e-06, "loss": 0.0, "step": 15300 }, { "epoch": 0.10562481458740092, "grad_norm": 0.0, "learning_rate": 8.944579742940523e-06, "loss": 0.0001, "step": 15310 }, { "epoch": 0.10569380532194526, "grad_norm": 0.0, "learning_rate": 8.94388983559508e-06, "loss": 0.0011, "step": 15320 }, { "epoch": 0.10576279605648961, "grad_norm": 0.0, "learning_rate": 8.943199928249637e-06, "loss": 0.0001, "step": 15330 }, { "epoch": 0.10583178679103396, "grad_norm": 0.0, "learning_rate": 8.942510020904194e-06, "loss": 0.0, "step": 15340 }, { "epoch": 0.10590077752557832, "grad_norm": 0.0, "learning_rate": 8.94182011355875e-06, "loss": 0.0, "step": 15350 }, { "epoch": 0.10596976826012267, "grad_norm": 0.0, "learning_rate": 8.941130206213307e-06, "loss": 0.0, "step": 15360 }, { "epoch": 0.10603875899466701, "grad_norm": 0.0, "learning_rate": 8.940440298867863e-06, "loss": 0.0, "step": 15370 }, { "epoch": 0.10610774972921137, "grad_norm": 0.0, "learning_rate": 8.93975039152242e-06, "loss": 0.0003, "step": 15380 }, { "epoch": 0.10617674046375572, "grad_norm": 0.0014280946925282478, "learning_rate": 8.939060484176976e-06, "loss": 0.0, "step": 15390 }, { "epoch": 0.10624573119830007, "grad_norm": 0.0, "learning_rate": 8.938370576831533e-06, "loss": 0.0, "step": 15400 }, { "epoch": 0.10631472193284441, "grad_norm": 0.0, "learning_rate": 8.937680669486088e-06, "loss": 0.0, "step": 15410 }, { "epoch": 0.10638371266738877, "grad_norm": 0.0, "learning_rate": 8.936990762140644e-06, "loss": 0.0003, "step": 15420 }, { "epoch": 0.10645270340193312, "grad_norm": 0.48397955298423767, "learning_rate": 8.9363008547952e-06, "loss": 0.0014, "step": 15430 }, { "epoch": 0.10652169413647747, "grad_norm": 4.2676206248870585e-06, "learning_rate": 8.935610947449759e-06, "loss": 0.0, "step": 15440 }, { "epoch": 0.10659068487102182, "grad_norm": 0.0, "learning_rate": 8.934921040104315e-06, "loss": 0.0718, "step": 15450 }, { "epoch": 0.10665967560556618, "grad_norm": 0.0, "learning_rate": 8.934231132758872e-06, "loss": 0.0001, "step": 15460 }, { "epoch": 0.10672866634011052, "grad_norm": 0.03356366232037544, "learning_rate": 8.933541225413428e-06, "loss": 0.0, "step": 15470 }, { "epoch": 0.10679765707465487, "grad_norm": 0.004412492737174034, "learning_rate": 8.932851318067985e-06, "loss": 0.0, "step": 15480 }, { "epoch": 0.10686664780919923, "grad_norm": 0.0, "learning_rate": 8.932161410722541e-06, "loss": 0.0, "step": 15490 }, { "epoch": 0.10693563854374358, "grad_norm": 0.0, "learning_rate": 8.931471503377098e-06, "loss": 0.0, "step": 15500 }, { "epoch": 0.10700462927828792, "grad_norm": 0.0, "learning_rate": 8.930781596031654e-06, "loss": 0.0, "step": 15510 }, { "epoch": 0.10707362001283227, "grad_norm": 0.0, "learning_rate": 8.930091688686209e-06, "loss": 0.0001, "step": 15520 }, { "epoch": 0.10714261074737663, "grad_norm": 1.216089273725629e-08, "learning_rate": 8.929401781340766e-06, "loss": 0.0, "step": 15530 }, { "epoch": 0.10721160148192098, "grad_norm": 0.0, "learning_rate": 8.928711873995322e-06, "loss": 0.0, "step": 15540 }, { "epoch": 0.10728059221646533, "grad_norm": 0.005477714352309704, "learning_rate": 8.92802196664988e-06, "loss": 0.0, "step": 15550 }, { "epoch": 0.10734958295100967, "grad_norm": 0.0, "learning_rate": 8.927332059304437e-06, "loss": 0.0, "step": 15560 }, { "epoch": 0.10741857368555403, "grad_norm": 0.002477702684700489, "learning_rate": 8.926642151958993e-06, "loss": 0.0, "step": 15570 }, { "epoch": 0.10748756442009838, "grad_norm": 5.71883845168486e-07, "learning_rate": 8.92595224461355e-06, "loss": 0.0, "step": 15580 }, { "epoch": 0.10755655515464273, "grad_norm": 0.0, "learning_rate": 8.925262337268106e-06, "loss": 0.1838, "step": 15590 }, { "epoch": 0.10762554588918709, "grad_norm": 0.005755654536187649, "learning_rate": 8.924572429922663e-06, "loss": 0.0001, "step": 15600 }, { "epoch": 0.10769453662373143, "grad_norm": 0.0, "learning_rate": 8.92388252257722e-06, "loss": 0.0005, "step": 15610 }, { "epoch": 0.10776352735827578, "grad_norm": 0.0, "learning_rate": 8.923192615231776e-06, "loss": 0.0, "step": 15620 }, { "epoch": 0.10783251809282013, "grad_norm": 0.0015298360958695412, "learning_rate": 8.92250270788633e-06, "loss": 0.0, "step": 15630 }, { "epoch": 0.10790150882736449, "grad_norm": 0.0, "learning_rate": 8.921812800540887e-06, "loss": 0.0, "step": 15640 }, { "epoch": 0.10797049956190884, "grad_norm": 0.0, "learning_rate": 8.921122893195443e-06, "loss": 0.0, "step": 15650 }, { "epoch": 0.10803949029645318, "grad_norm": 0.0, "learning_rate": 8.920432985850002e-06, "loss": 0.0, "step": 15660 }, { "epoch": 0.10810848103099753, "grad_norm": 2.5580108697909054e-08, "learning_rate": 8.919743078504558e-06, "loss": 0.0001, "step": 15670 }, { "epoch": 0.10817747176554189, "grad_norm": 0.0, "learning_rate": 8.919053171159115e-06, "loss": 0.5555, "step": 15680 }, { "epoch": 0.10824646250008624, "grad_norm": 0.0, "learning_rate": 8.918363263813671e-06, "loss": 0.0, "step": 15690 }, { "epoch": 0.10831545323463058, "grad_norm": 0.45190346240997314, "learning_rate": 8.917673356468228e-06, "loss": 0.0001, "step": 15700 }, { "epoch": 0.10838444396917495, "grad_norm": 0.0, "learning_rate": 8.916983449122784e-06, "loss": 0.0, "step": 15710 }, { "epoch": 0.10845343470371929, "grad_norm": 0.0002167319762520492, "learning_rate": 8.91629354177734e-06, "loss": 0.0083, "step": 15720 }, { "epoch": 0.10852242543826364, "grad_norm": 0.0, "learning_rate": 8.915603634431897e-06, "loss": 0.0121, "step": 15730 }, { "epoch": 0.10859141617280799, "grad_norm": 0.0, "learning_rate": 8.914913727086454e-06, "loss": 0.0, "step": 15740 }, { "epoch": 0.10866040690735235, "grad_norm": 0.0015758657827973366, "learning_rate": 8.914223819741008e-06, "loss": 0.0094, "step": 15750 }, { "epoch": 0.1087293976418967, "grad_norm": 0.0, "learning_rate": 8.913533912395565e-06, "loss": 0.0001, "step": 15760 }, { "epoch": 0.10879838837644104, "grad_norm": 0.0, "learning_rate": 8.912844005050123e-06, "loss": 0.0, "step": 15770 }, { "epoch": 0.10886737911098539, "grad_norm": 0.0, "learning_rate": 8.91215409770468e-06, "loss": 0.0004, "step": 15780 }, { "epoch": 0.10893636984552975, "grad_norm": 0.0, "learning_rate": 8.911464190359236e-06, "loss": 0.0, "step": 15790 }, { "epoch": 0.1090053605800741, "grad_norm": 0.0, "learning_rate": 8.910774283013793e-06, "loss": 0.0, "step": 15800 }, { "epoch": 0.10907435131461844, "grad_norm": 1.0306755939382128e-05, "learning_rate": 8.910084375668349e-06, "loss": 0.0, "step": 15810 }, { "epoch": 0.1091433420491628, "grad_norm": 0.0, "learning_rate": 8.909394468322906e-06, "loss": 0.0, "step": 15820 }, { "epoch": 0.10921233278370715, "grad_norm": 0.0, "learning_rate": 8.908704560977462e-06, "loss": 0.0, "step": 15830 }, { "epoch": 0.1092813235182515, "grad_norm": 3.588132244658482e-07, "learning_rate": 8.908014653632018e-06, "loss": 0.0, "step": 15840 }, { "epoch": 0.10935031425279584, "grad_norm": 141.24696350097656, "learning_rate": 8.907324746286575e-06, "loss": 0.0244, "step": 15850 }, { "epoch": 0.1094193049873402, "grad_norm": 0.03340895101428032, "learning_rate": 8.90663483894113e-06, "loss": 0.0, "step": 15860 }, { "epoch": 0.10948829572188455, "grad_norm": 0.0, "learning_rate": 8.905944931595686e-06, "loss": 0.2949, "step": 15870 }, { "epoch": 0.1095572864564289, "grad_norm": 8.070172043517232e-05, "learning_rate": 8.905255024250244e-06, "loss": 0.0889, "step": 15880 }, { "epoch": 0.10962627719097326, "grad_norm": 0.0, "learning_rate": 8.904565116904801e-06, "loss": 0.0, "step": 15890 }, { "epoch": 0.1096952679255176, "grad_norm": 4.5419182777404785, "learning_rate": 8.903875209559357e-06, "loss": 0.0013, "step": 15900 }, { "epoch": 0.10976425866006195, "grad_norm": 0.0, "learning_rate": 8.903185302213914e-06, "loss": 0.0, "step": 15910 }, { "epoch": 0.1098332493946063, "grad_norm": 0.0, "learning_rate": 8.90249539486847e-06, "loss": 0.0, "step": 15920 }, { "epoch": 0.10990224012915066, "grad_norm": 5.2723315491221e-08, "learning_rate": 8.901805487523027e-06, "loss": 0.0, "step": 15930 }, { "epoch": 0.10997123086369501, "grad_norm": 9.635378006578321e-08, "learning_rate": 8.901115580177583e-06, "loss": 0.107, "step": 15940 }, { "epoch": 0.11004022159823935, "grad_norm": 0.0, "learning_rate": 8.90042567283214e-06, "loss": 0.0001, "step": 15950 }, { "epoch": 0.1101092123327837, "grad_norm": 0.0, "learning_rate": 8.899735765486696e-06, "loss": 0.0, "step": 15960 }, { "epoch": 0.11017820306732806, "grad_norm": 0.0, "learning_rate": 8.899045858141251e-06, "loss": 0.0008, "step": 15970 }, { "epoch": 0.11024719380187241, "grad_norm": 0.0, "learning_rate": 8.898355950795808e-06, "loss": 0.0, "step": 15980 }, { "epoch": 0.11031618453641676, "grad_norm": 0.0, "learning_rate": 8.897666043450366e-06, "loss": 0.0, "step": 15990 }, { "epoch": 0.11038517527096112, "grad_norm": 0.0, "learning_rate": 8.896976136104922e-06, "loss": 0.0, "step": 16000 }, { "epoch": 0.11045416600550546, "grad_norm": 1.895869493484497, "learning_rate": 8.896286228759479e-06, "loss": 0.0004, "step": 16010 }, { "epoch": 0.11052315674004981, "grad_norm": 0.0, "learning_rate": 8.895596321414035e-06, "loss": 0.0, "step": 16020 }, { "epoch": 0.11059214747459416, "grad_norm": 0.0, "learning_rate": 8.894906414068592e-06, "loss": 0.0, "step": 16030 }, { "epoch": 0.11066113820913852, "grad_norm": 0.0001389685639878735, "learning_rate": 8.894216506723148e-06, "loss": 0.0001, "step": 16040 }, { "epoch": 0.11073012894368287, "grad_norm": 0.0, "learning_rate": 8.893526599377705e-06, "loss": 0.0, "step": 16050 }, { "epoch": 0.11079911967822721, "grad_norm": 0.0, "learning_rate": 8.892836692032261e-06, "loss": 0.0, "step": 16060 }, { "epoch": 0.11086811041277156, "grad_norm": 0.0, "learning_rate": 8.892146784686818e-06, "loss": 0.0, "step": 16070 }, { "epoch": 0.11093710114731592, "grad_norm": 0.0, "learning_rate": 8.891456877341374e-06, "loss": 0.0, "step": 16080 }, { "epoch": 0.11100609188186027, "grad_norm": 0.2615267038345337, "learning_rate": 8.890766969995929e-06, "loss": 0.0, "step": 16090 }, { "epoch": 0.11107508261640461, "grad_norm": 0.0004013115249108523, "learning_rate": 8.890077062650487e-06, "loss": 0.0, "step": 16100 }, { "epoch": 0.11114407335094897, "grad_norm": 0.0, "learning_rate": 8.889387155305044e-06, "loss": 0.0036, "step": 16110 }, { "epoch": 0.11121306408549332, "grad_norm": 0.0, "learning_rate": 8.8886972479596e-06, "loss": 0.0, "step": 16120 }, { "epoch": 0.11128205482003767, "grad_norm": 0.0, "learning_rate": 8.888007340614157e-06, "loss": 0.0, "step": 16130 }, { "epoch": 0.11135104555458201, "grad_norm": 3.3078337846603745e-09, "learning_rate": 8.887317433268713e-06, "loss": 0.0, "step": 16140 }, { "epoch": 0.11142003628912638, "grad_norm": 0.0035774235147982836, "learning_rate": 8.88662752592327e-06, "loss": 0.0, "step": 16150 }, { "epoch": 0.11148902702367072, "grad_norm": 0.0, "learning_rate": 8.885937618577826e-06, "loss": 0.0, "step": 16160 }, { "epoch": 0.11155801775821507, "grad_norm": 0.0, "learning_rate": 8.885247711232383e-06, "loss": 0.0, "step": 16170 }, { "epoch": 0.11162700849275942, "grad_norm": 0.0, "learning_rate": 8.88455780388694e-06, "loss": 0.0, "step": 16180 }, { "epoch": 0.11169599922730378, "grad_norm": 0.0, "learning_rate": 8.883867896541496e-06, "loss": 0.0, "step": 16190 }, { "epoch": 0.11176498996184812, "grad_norm": 0.0, "learning_rate": 8.88317798919605e-06, "loss": 0.0001, "step": 16200 }, { "epoch": 0.11183398069639247, "grad_norm": 0.0, "learning_rate": 8.882488081850609e-06, "loss": 0.0, "step": 16210 }, { "epoch": 0.11190297143093683, "grad_norm": 0.0002957073738798499, "learning_rate": 8.881798174505165e-06, "loss": 0.0, "step": 16220 }, { "epoch": 0.11197196216548118, "grad_norm": 0.0, "learning_rate": 8.881108267159722e-06, "loss": 0.0, "step": 16230 }, { "epoch": 0.11204095290002553, "grad_norm": 9.604125079931691e-05, "learning_rate": 8.880418359814278e-06, "loss": 0.0, "step": 16240 }, { "epoch": 0.11210994363456987, "grad_norm": 9.580921611629378e-10, "learning_rate": 8.879728452468835e-06, "loss": 0.0, "step": 16250 }, { "epoch": 0.11217893436911423, "grad_norm": 0.0, "learning_rate": 8.879038545123391e-06, "loss": 0.002, "step": 16260 }, { "epoch": 0.11224792510365858, "grad_norm": 8.946687279909327e-10, "learning_rate": 8.878348637777948e-06, "loss": 0.0, "step": 16270 }, { "epoch": 0.11231691583820293, "grad_norm": 0.0, "learning_rate": 8.877658730432504e-06, "loss": 0.0, "step": 16280 }, { "epoch": 0.11238590657274727, "grad_norm": 0.00015398212417494506, "learning_rate": 8.87696882308706e-06, "loss": 0.0005, "step": 16290 }, { "epoch": 0.11245489730729163, "grad_norm": 0.0, "learning_rate": 8.876278915741617e-06, "loss": 0.0, "step": 16300 }, { "epoch": 0.11252388804183598, "grad_norm": 0.0, "learning_rate": 8.875589008396172e-06, "loss": 0.0, "step": 16310 }, { "epoch": 0.11259287877638033, "grad_norm": 2.7241250677434437e-07, "learning_rate": 8.87489910105073e-06, "loss": 0.0011, "step": 16320 }, { "epoch": 0.11266186951092469, "grad_norm": 0.0, "learning_rate": 8.874209193705287e-06, "loss": 0.0, "step": 16330 }, { "epoch": 0.11273086024546904, "grad_norm": 0.0, "learning_rate": 8.873519286359843e-06, "loss": 0.0, "step": 16340 }, { "epoch": 0.11279985098001338, "grad_norm": 0.0, "learning_rate": 8.8728293790144e-06, "loss": 0.0344, "step": 16350 }, { "epoch": 0.11286884171455773, "grad_norm": 0.0, "learning_rate": 8.872139471668956e-06, "loss": 0.0, "step": 16360 }, { "epoch": 0.11293783244910209, "grad_norm": 1.253390382771613e-05, "learning_rate": 8.871449564323512e-06, "loss": 0.0, "step": 16370 }, { "epoch": 0.11300682318364644, "grad_norm": 0.0, "learning_rate": 8.870759656978069e-06, "loss": 0.0, "step": 16380 }, { "epoch": 0.11307581391819078, "grad_norm": 1.9625547793111764e-05, "learning_rate": 8.870069749632625e-06, "loss": 0.0026, "step": 16390 }, { "epoch": 0.11314480465273513, "grad_norm": 1.0650897941388848e-09, "learning_rate": 8.869379842287182e-06, "loss": 0.0185, "step": 16400 }, { "epoch": 0.11321379538727949, "grad_norm": 0.0, "learning_rate": 8.868689934941738e-06, "loss": 0.0, "step": 16410 }, { "epoch": 0.11328278612182384, "grad_norm": 0.0, "learning_rate": 8.868000027596293e-06, "loss": 0.0084, "step": 16420 }, { "epoch": 0.11335177685636819, "grad_norm": 0.0, "learning_rate": 8.867310120250851e-06, "loss": 0.0, "step": 16430 }, { "epoch": 0.11342076759091255, "grad_norm": 1.967303433048073e-06, "learning_rate": 8.866620212905408e-06, "loss": 0.0, "step": 16440 }, { "epoch": 0.1134897583254569, "grad_norm": 0.0, "learning_rate": 8.865930305559964e-06, "loss": 0.0, "step": 16450 }, { "epoch": 0.11355874906000124, "grad_norm": 1.1420366519132585e-07, "learning_rate": 8.865240398214521e-06, "loss": 0.0, "step": 16460 }, { "epoch": 0.11362773979454559, "grad_norm": 0.0, "learning_rate": 8.864550490869077e-06, "loss": 0.0, "step": 16470 }, { "epoch": 0.11369673052908995, "grad_norm": 8.06945317890495e-05, "learning_rate": 8.863860583523634e-06, "loss": 0.0, "step": 16480 }, { "epoch": 0.1137657212636343, "grad_norm": 0.0, "learning_rate": 8.86317067617819e-06, "loss": 0.0, "step": 16490 }, { "epoch": 0.11383471199817864, "grad_norm": 0.013719487935304642, "learning_rate": 8.862480768832747e-06, "loss": 0.0, "step": 16500 }, { "epoch": 0.11390370273272299, "grad_norm": 0.03387153893709183, "learning_rate": 8.861790861487303e-06, "loss": 0.0003, "step": 16510 }, { "epoch": 0.11397269346726735, "grad_norm": 0.0, "learning_rate": 8.86110095414186e-06, "loss": 0.0, "step": 16520 }, { "epoch": 0.1140416842018117, "grad_norm": 0.0, "learning_rate": 8.860411046796416e-06, "loss": 0.0001, "step": 16530 }, { "epoch": 0.11411067493635604, "grad_norm": 0.0, "learning_rate": 8.859721139450973e-06, "loss": 0.0, "step": 16540 }, { "epoch": 0.1141796656709004, "grad_norm": 0.0, "learning_rate": 8.85903123210553e-06, "loss": 0.0, "step": 16550 }, { "epoch": 0.11424865640544475, "grad_norm": 0.0, "learning_rate": 8.858341324760086e-06, "loss": 0.0, "step": 16560 }, { "epoch": 0.1143176471399891, "grad_norm": 0.0, "learning_rate": 8.857651417414642e-06, "loss": 0.0, "step": 16570 }, { "epoch": 0.11438663787453345, "grad_norm": 0.0, "learning_rate": 8.856961510069199e-06, "loss": 0.0, "step": 16580 }, { "epoch": 0.1144556286090778, "grad_norm": 0.0367586687207222, "learning_rate": 8.856271602723755e-06, "loss": 0.0, "step": 16590 }, { "epoch": 0.11452461934362215, "grad_norm": 0.0, "learning_rate": 8.855581695378312e-06, "loss": 0.0, "step": 16600 }, { "epoch": 0.1145936100781665, "grad_norm": 0.0, "learning_rate": 8.854891788032868e-06, "loss": 0.0001, "step": 16610 }, { "epoch": 0.11466260081271085, "grad_norm": 9.291002411870863e-10, "learning_rate": 8.854201880687425e-06, "loss": 0.0, "step": 16620 }, { "epoch": 0.11473159154725521, "grad_norm": 0.0, "learning_rate": 8.853511973341981e-06, "loss": 0.0, "step": 16630 }, { "epoch": 0.11480058228179955, "grad_norm": 0.0, "learning_rate": 8.852822065996538e-06, "loss": 0.0, "step": 16640 }, { "epoch": 0.1148695730163439, "grad_norm": 0.0, "learning_rate": 8.852132158651094e-06, "loss": 0.0, "step": 16650 }, { "epoch": 0.11493856375088826, "grad_norm": 0.0, "learning_rate": 8.85144225130565e-06, "loss": 0.0, "step": 16660 }, { "epoch": 0.11500755448543261, "grad_norm": 0.0, "learning_rate": 8.850752343960207e-06, "loss": 0.0192, "step": 16670 }, { "epoch": 0.11507654521997696, "grad_norm": 0.0, "learning_rate": 8.850062436614764e-06, "loss": 0.0, "step": 16680 }, { "epoch": 0.1151455359545213, "grad_norm": 1.0388605531375106e-09, "learning_rate": 8.84937252926932e-06, "loss": 0.0, "step": 16690 }, { "epoch": 0.11521452668906566, "grad_norm": 0.0, "learning_rate": 8.848682621923877e-06, "loss": 0.0, "step": 16700 }, { "epoch": 0.11528351742361001, "grad_norm": 0.0, "learning_rate": 8.847992714578433e-06, "loss": 0.0, "step": 16710 }, { "epoch": 0.11535250815815436, "grad_norm": 0.0, "learning_rate": 8.84730280723299e-06, "loss": 0.0018, "step": 16720 }, { "epoch": 0.1154214988926987, "grad_norm": 0.0, "learning_rate": 8.846612899887546e-06, "loss": 0.0003, "step": 16730 }, { "epoch": 0.11549048962724306, "grad_norm": 0.0, "learning_rate": 8.845922992542103e-06, "loss": 0.0, "step": 16740 }, { "epoch": 0.11555948036178741, "grad_norm": 0.0, "learning_rate": 8.845233085196659e-06, "loss": 0.0, "step": 16750 }, { "epoch": 0.11562847109633176, "grad_norm": 0.0, "learning_rate": 8.844543177851216e-06, "loss": 0.0, "step": 16760 }, { "epoch": 0.11569746183087612, "grad_norm": 0.023466231301426888, "learning_rate": 8.843922261240316e-06, "loss": 0.6711, "step": 16770 }, { "epoch": 0.11576645256542047, "grad_norm": 9.900904115056619e-05, "learning_rate": 8.843232353894873e-06, "loss": 0.0, "step": 16780 }, { "epoch": 0.11583544329996481, "grad_norm": 0.0, "learning_rate": 8.84254244654943e-06, "loss": 0.0, "step": 16790 }, { "epoch": 0.11590443403450916, "grad_norm": 0.0, "learning_rate": 8.841852539203986e-06, "loss": 0.0, "step": 16800 }, { "epoch": 0.11597342476905352, "grad_norm": 0.0, "learning_rate": 8.841162631858542e-06, "loss": 0.0012, "step": 16810 }, { "epoch": 0.11604241550359787, "grad_norm": 0.0, "learning_rate": 8.840472724513099e-06, "loss": 0.0, "step": 16820 }, { "epoch": 0.11611140623814221, "grad_norm": 0.0, "learning_rate": 8.839782817167655e-06, "loss": 0.0011, "step": 16830 }, { "epoch": 0.11618039697268656, "grad_norm": 6.143480959508452e-07, "learning_rate": 8.839092909822212e-06, "loss": 0.0, "step": 16840 }, { "epoch": 0.11624938770723092, "grad_norm": 0.000599917839281261, "learning_rate": 8.838403002476768e-06, "loss": 0.011, "step": 16850 }, { "epoch": 0.11631837844177527, "grad_norm": 0.0, "learning_rate": 8.837713095131325e-06, "loss": 0.0, "step": 16860 }, { "epoch": 0.11638736917631962, "grad_norm": 0.0, "learning_rate": 8.837023187785881e-06, "loss": 0.0, "step": 16870 }, { "epoch": 0.11645635991086398, "grad_norm": 0.0, "learning_rate": 8.836333280440438e-06, "loss": 0.0002, "step": 16880 }, { "epoch": 0.11652535064540832, "grad_norm": 0.00526492390781641, "learning_rate": 8.835643373094994e-06, "loss": 0.0, "step": 16890 }, { "epoch": 0.11659434137995267, "grad_norm": 0.0, "learning_rate": 8.83495346574955e-06, "loss": 0.0001, "step": 16900 }, { "epoch": 0.11666333211449702, "grad_norm": 1.077324673914859e-09, "learning_rate": 8.834263558404107e-06, "loss": 0.0898, "step": 16910 }, { "epoch": 0.11673232284904138, "grad_norm": 0.0, "learning_rate": 8.833573651058664e-06, "loss": 0.0, "step": 16920 }, { "epoch": 0.11680131358358573, "grad_norm": 0.0, "learning_rate": 8.83288374371322e-06, "loss": 0.0008, "step": 16930 }, { "epoch": 0.11687030431813007, "grad_norm": 0.0, "learning_rate": 8.832193836367777e-06, "loss": 0.0108, "step": 16940 }, { "epoch": 0.11693929505267442, "grad_norm": 0.752956211566925, "learning_rate": 8.831503929022333e-06, "loss": 0.0001, "step": 16950 }, { "epoch": 0.11700828578721878, "grad_norm": 0.0, "learning_rate": 8.83081402167689e-06, "loss": 0.0, "step": 16960 }, { "epoch": 0.11707727652176313, "grad_norm": 2.4848066004778957e-06, "learning_rate": 8.830124114331446e-06, "loss": 0.0, "step": 16970 }, { "epoch": 0.11714626725630747, "grad_norm": 0.0, "learning_rate": 8.829434206986003e-06, "loss": 0.0, "step": 16980 }, { "epoch": 0.11721525799085183, "grad_norm": 0.0, "learning_rate": 8.828744299640559e-06, "loss": 0.0, "step": 16990 }, { "epoch": 0.11728424872539618, "grad_norm": 0.0, "learning_rate": 8.828054392295116e-06, "loss": 0.0, "step": 17000 }, { "epoch": 0.11735323945994053, "grad_norm": 0.0, "learning_rate": 8.827364484949672e-06, "loss": 0.0001, "step": 17010 }, { "epoch": 0.11742223019448488, "grad_norm": 0.0, "learning_rate": 8.826674577604229e-06, "loss": 0.3004, "step": 17020 }, { "epoch": 0.11749122092902924, "grad_norm": 0.0, "learning_rate": 8.825984670258785e-06, "loss": 0.0, "step": 17030 }, { "epoch": 0.11756021166357358, "grad_norm": 0.0, "learning_rate": 8.825294762913342e-06, "loss": 0.0, "step": 17040 }, { "epoch": 0.11762920239811793, "grad_norm": 3.680776834487915, "learning_rate": 8.824604855567898e-06, "loss": 0.0008, "step": 17050 }, { "epoch": 0.11769819313266228, "grad_norm": 3.346624311006963e-08, "learning_rate": 8.823914948222454e-06, "loss": 0.0, "step": 17060 }, { "epoch": 0.11776718386720664, "grad_norm": 0.0, "learning_rate": 8.823225040877011e-06, "loss": 0.0, "step": 17070 }, { "epoch": 0.11783617460175098, "grad_norm": 0.0, "learning_rate": 8.822535133531567e-06, "loss": 0.0, "step": 17080 }, { "epoch": 0.11790516533629533, "grad_norm": 0.0, "learning_rate": 8.821845226186124e-06, "loss": 0.0, "step": 17090 }, { "epoch": 0.11797415607083969, "grad_norm": 224.3075408935547, "learning_rate": 8.82115531884068e-06, "loss": 0.0576, "step": 17100 }, { "epoch": 0.11804314680538404, "grad_norm": 0.0, "learning_rate": 8.820465411495237e-06, "loss": 0.0011, "step": 17110 }, { "epoch": 0.11811213753992839, "grad_norm": 0.0, "learning_rate": 8.819775504149793e-06, "loss": 0.0051, "step": 17120 }, { "epoch": 0.11818112827447273, "grad_norm": 0.0003071391547564417, "learning_rate": 8.81908559680435e-06, "loss": 0.0, "step": 17130 }, { "epoch": 0.1182501190090171, "grad_norm": 0.05422855168581009, "learning_rate": 8.818395689458906e-06, "loss": 0.0, "step": 17140 }, { "epoch": 0.11831910974356144, "grad_norm": 0.00013419974129647017, "learning_rate": 8.817705782113463e-06, "loss": 0.0, "step": 17150 }, { "epoch": 0.11838810047810579, "grad_norm": 0.0, "learning_rate": 8.81701587476802e-06, "loss": 0.0, "step": 17160 }, { "epoch": 0.11845709121265015, "grad_norm": 7.59888716856949e-05, "learning_rate": 8.816325967422576e-06, "loss": 0.0001, "step": 17170 }, { "epoch": 0.1185260819471945, "grad_norm": 1.506629843106566e-08, "learning_rate": 8.815636060077132e-06, "loss": 0.1693, "step": 17180 }, { "epoch": 0.11859507268173884, "grad_norm": 0.0, "learning_rate": 8.814946152731689e-06, "loss": 0.001, "step": 17190 }, { "epoch": 0.11866406341628319, "grad_norm": 0.0009273168398067355, "learning_rate": 8.814256245386245e-06, "loss": 0.0, "step": 17200 }, { "epoch": 0.11873305415082755, "grad_norm": 0.0, "learning_rate": 8.813566338040802e-06, "loss": 0.0001, "step": 17210 }, { "epoch": 0.1188020448853719, "grad_norm": 0.0, "learning_rate": 8.812876430695358e-06, "loss": 0.0, "step": 17220 }, { "epoch": 0.11887103561991624, "grad_norm": 0.0, "learning_rate": 8.812186523349915e-06, "loss": 0.0, "step": 17230 }, { "epoch": 0.11894002635446059, "grad_norm": 0.0, "learning_rate": 8.811496616004471e-06, "loss": 0.0001, "step": 17240 }, { "epoch": 0.11900901708900495, "grad_norm": 0.0, "learning_rate": 8.810806708659028e-06, "loss": 0.0, "step": 17250 }, { "epoch": 0.1190780078235493, "grad_norm": 0.0, "learning_rate": 8.810116801313584e-06, "loss": 0.0, "step": 17260 }, { "epoch": 0.11914699855809364, "grad_norm": 0.0, "learning_rate": 8.80942689396814e-06, "loss": 0.0, "step": 17270 }, { "epoch": 0.119215989292638, "grad_norm": 0.0, "learning_rate": 8.808736986622697e-06, "loss": 0.0289, "step": 17280 }, { "epoch": 0.11928498002718235, "grad_norm": 0.0, "learning_rate": 8.808047079277254e-06, "loss": 0.0, "step": 17290 }, { "epoch": 0.1193539707617267, "grad_norm": 0.0, "learning_rate": 8.80735717193181e-06, "loss": 0.0, "step": 17300 }, { "epoch": 0.11942296149627105, "grad_norm": 0.0, "learning_rate": 8.806667264586367e-06, "loss": 0.0, "step": 17310 }, { "epoch": 0.11949195223081541, "grad_norm": 0.0, "learning_rate": 8.805977357240923e-06, "loss": 0.0001, "step": 17320 }, { "epoch": 0.11956094296535975, "grad_norm": 0.0004927848931401968, "learning_rate": 8.80528744989548e-06, "loss": 0.0, "step": 17330 }, { "epoch": 0.1196299336999041, "grad_norm": 0.0, "learning_rate": 8.804597542550036e-06, "loss": 0.0, "step": 17340 }, { "epoch": 0.11969892443444845, "grad_norm": 0.0, "learning_rate": 8.803907635204593e-06, "loss": 0.0055, "step": 17350 }, { "epoch": 0.11976791516899281, "grad_norm": 0.0, "learning_rate": 8.80321772785915e-06, "loss": 0.0048, "step": 17360 }, { "epoch": 0.11983690590353716, "grad_norm": 0.0, "learning_rate": 8.802527820513706e-06, "loss": 0.0, "step": 17370 }, { "epoch": 0.1199058966380815, "grad_norm": 0.0, "learning_rate": 8.801837913168262e-06, "loss": 0.0, "step": 17380 }, { "epoch": 0.11997488737262586, "grad_norm": 9.931895306181104e-10, "learning_rate": 8.801148005822819e-06, "loss": 0.0, "step": 17390 }, { "epoch": 0.12004387810717021, "grad_norm": 0.00020726821094285697, "learning_rate": 8.800458098477375e-06, "loss": 0.2492, "step": 17400 }, { "epoch": 0.12011286884171456, "grad_norm": 0.00019490814884193242, "learning_rate": 8.799768191131932e-06, "loss": 0.0, "step": 17410 }, { "epoch": 0.1201818595762589, "grad_norm": 0.0, "learning_rate": 8.799078283786488e-06, "loss": 0.0007, "step": 17420 }, { "epoch": 0.12025085031080326, "grad_norm": 0.0, "learning_rate": 8.798388376441045e-06, "loss": 0.0, "step": 17430 }, { "epoch": 0.12031984104534761, "grad_norm": 9.201019945948019e-10, "learning_rate": 8.797698469095601e-06, "loss": 0.0, "step": 17440 }, { "epoch": 0.12038883177989196, "grad_norm": 0.0, "learning_rate": 8.797008561750158e-06, "loss": 0.2594, "step": 17450 }, { "epoch": 0.1204578225144363, "grad_norm": 0.0, "learning_rate": 8.796318654404714e-06, "loss": 0.0, "step": 17460 }, { "epoch": 0.12052681324898067, "grad_norm": 0.0, "learning_rate": 8.79562874705927e-06, "loss": 0.0804, "step": 17470 }, { "epoch": 0.12059580398352501, "grad_norm": 1.2530210646843898e-08, "learning_rate": 8.794938839713827e-06, "loss": 0.0001, "step": 17480 }, { "epoch": 0.12066479471806936, "grad_norm": 0.0, "learning_rate": 8.794248932368384e-06, "loss": 0.0, "step": 17490 }, { "epoch": 0.12073378545261372, "grad_norm": 1.2081340551376343, "learning_rate": 8.79355902502294e-06, "loss": 0.0003, "step": 17500 }, { "epoch": 0.12080277618715807, "grad_norm": 0.0, "learning_rate": 8.792869117677497e-06, "loss": 0.0002, "step": 17510 }, { "epoch": 0.12087176692170241, "grad_norm": 0.0, "learning_rate": 8.792179210332053e-06, "loss": 0.0003, "step": 17520 }, { "epoch": 0.12094075765624676, "grad_norm": 2.628364370593772e-07, "learning_rate": 8.79148930298661e-06, "loss": 0.0, "step": 17530 }, { "epoch": 0.12100974839079112, "grad_norm": 0.0, "learning_rate": 8.790799395641166e-06, "loss": 0.0008, "step": 17540 }, { "epoch": 0.12107873912533547, "grad_norm": 9.124003554461524e-05, "learning_rate": 8.790109488295723e-06, "loss": 0.0, "step": 17550 }, { "epoch": 0.12114772985987982, "grad_norm": 0.0013275255914777517, "learning_rate": 8.789419580950279e-06, "loss": 0.0028, "step": 17560 }, { "epoch": 0.12121672059442416, "grad_norm": 0.18724779784679413, "learning_rate": 8.788729673604836e-06, "loss": 0.0, "step": 17570 }, { "epoch": 0.12128571132896852, "grad_norm": 0.0, "learning_rate": 8.788039766259392e-06, "loss": 0.0, "step": 17580 }, { "epoch": 0.12135470206351287, "grad_norm": 4.172948138148058e-07, "learning_rate": 8.787349858913948e-06, "loss": 0.0, "step": 17590 }, { "epoch": 0.12142369279805722, "grad_norm": 0.0, "learning_rate": 8.786659951568505e-06, "loss": 0.0, "step": 17600 }, { "epoch": 0.12149268353260158, "grad_norm": 0.0, "learning_rate": 8.785970044223061e-06, "loss": 0.0, "step": 17610 }, { "epoch": 0.12156167426714592, "grad_norm": 0.0, "learning_rate": 8.785280136877618e-06, "loss": 0.0, "step": 17620 }, { "epoch": 0.12163066500169027, "grad_norm": 0.0, "learning_rate": 8.784590229532174e-06, "loss": 0.0, "step": 17630 }, { "epoch": 0.12169965573623462, "grad_norm": 0.0, "learning_rate": 8.783900322186731e-06, "loss": 0.0, "step": 17640 }, { "epoch": 0.12176864647077898, "grad_norm": 0.004924447275698185, "learning_rate": 8.783210414841287e-06, "loss": 0.0, "step": 17650 }, { "epoch": 0.12183763720532333, "grad_norm": 8.936698603356774e-10, "learning_rate": 8.782520507495844e-06, "loss": 0.0248, "step": 17660 }, { "epoch": 0.12190662793986767, "grad_norm": 2.3284343114937656e-05, "learning_rate": 8.7818306001504e-06, "loss": 0.0022, "step": 17670 }, { "epoch": 0.12197561867441202, "grad_norm": 1.5944105768994632e-07, "learning_rate": 8.781140692804957e-06, "loss": 0.0, "step": 17680 }, { "epoch": 0.12204460940895638, "grad_norm": 5.706326192012057e-05, "learning_rate": 8.780450785459513e-06, "loss": 0.0, "step": 17690 }, { "epoch": 0.12211360014350073, "grad_norm": 0.0, "learning_rate": 8.77976087811407e-06, "loss": 0.3295, "step": 17700 }, { "epoch": 0.12218259087804507, "grad_norm": 0.0, "learning_rate": 8.779070970768626e-06, "loss": 0.0, "step": 17710 }, { "epoch": 0.12225158161258944, "grad_norm": 0.0, "learning_rate": 8.778381063423183e-06, "loss": 0.04, "step": 17720 }, { "epoch": 0.12232057234713378, "grad_norm": 0.011400078423321247, "learning_rate": 8.77769115607774e-06, "loss": 0.0002, "step": 17730 }, { "epoch": 0.12238956308167813, "grad_norm": 0.0, "learning_rate": 8.777001248732296e-06, "loss": 0.0, "step": 17740 }, { "epoch": 0.12245855381622248, "grad_norm": 0.0, "learning_rate": 8.776311341386852e-06, "loss": 0.0006, "step": 17750 }, { "epoch": 0.12252754455076684, "grad_norm": 0.0, "learning_rate": 8.775621434041409e-06, "loss": 0.0, "step": 17760 }, { "epoch": 0.12259653528531118, "grad_norm": 0.0, "learning_rate": 8.774931526695965e-06, "loss": 0.0, "step": 17770 }, { "epoch": 0.12266552601985553, "grad_norm": 0.30097076296806335, "learning_rate": 8.774241619350522e-06, "loss": 0.002, "step": 17780 }, { "epoch": 0.12273451675439988, "grad_norm": 0.018053485080599785, "learning_rate": 8.773551712005078e-06, "loss": 0.0001, "step": 17790 }, { "epoch": 0.12280350748894424, "grad_norm": 0.0, "learning_rate": 8.772861804659635e-06, "loss": 0.1314, "step": 17800 }, { "epoch": 0.12287249822348859, "grad_norm": 0.0, "learning_rate": 8.772171897314191e-06, "loss": 0.0, "step": 17810 }, { "epoch": 0.12294148895803293, "grad_norm": 0.0, "learning_rate": 8.771481989968748e-06, "loss": 0.0, "step": 17820 }, { "epoch": 0.1230104796925773, "grad_norm": 0.0, "learning_rate": 8.770792082623304e-06, "loss": 0.0, "step": 17830 }, { "epoch": 0.12307947042712164, "grad_norm": 0.0, "learning_rate": 8.77010217527786e-06, "loss": 0.0283, "step": 17840 }, { "epoch": 0.12314846116166599, "grad_norm": 0.0, "learning_rate": 8.769412267932417e-06, "loss": 0.0, "step": 17850 }, { "epoch": 0.12321745189621033, "grad_norm": 0.002129849512130022, "learning_rate": 8.768722360586974e-06, "loss": 0.0, "step": 17860 }, { "epoch": 0.1232864426307547, "grad_norm": 0.0, "learning_rate": 8.76803245324153e-06, "loss": 0.0, "step": 17870 }, { "epoch": 0.12335543336529904, "grad_norm": 8.501918613568193e-10, "learning_rate": 8.767342545896087e-06, "loss": 0.0, "step": 17880 }, { "epoch": 0.12342442409984339, "grad_norm": 0.0, "learning_rate": 8.766652638550643e-06, "loss": 0.0, "step": 17890 }, { "epoch": 0.12349341483438774, "grad_norm": 0.00872646551579237, "learning_rate": 8.7659627312052e-06, "loss": 0.0, "step": 17900 }, { "epoch": 0.1235624055689321, "grad_norm": 0.0, "learning_rate": 8.765272823859756e-06, "loss": 0.0, "step": 17910 }, { "epoch": 0.12363139630347644, "grad_norm": 0.5586554408073425, "learning_rate": 8.764582916514313e-06, "loss": 0.0001, "step": 17920 }, { "epoch": 0.12370038703802079, "grad_norm": 0.7736665606498718, "learning_rate": 8.76389300916887e-06, "loss": 0.0003, "step": 17930 }, { "epoch": 0.12376937777256515, "grad_norm": 0.0, "learning_rate": 8.763203101823426e-06, "loss": 0.0, "step": 17940 }, { "epoch": 0.1238383685071095, "grad_norm": 0.0, "learning_rate": 8.762513194477982e-06, "loss": 0.0088, "step": 17950 }, { "epoch": 0.12390735924165384, "grad_norm": 4.878197845670229e-09, "learning_rate": 8.761823287132539e-06, "loss": 0.0, "step": 17960 }, { "epoch": 0.12397634997619819, "grad_norm": 0.0, "learning_rate": 8.761133379787095e-06, "loss": 0.0, "step": 17970 }, { "epoch": 0.12404534071074255, "grad_norm": 0.0, "learning_rate": 8.760443472441652e-06, "loss": 0.0, "step": 17980 }, { "epoch": 0.1241143314452869, "grad_norm": 0.0, "learning_rate": 8.759753565096208e-06, "loss": 0.0, "step": 17990 }, { "epoch": 0.12418332217983125, "grad_norm": 0.0, "learning_rate": 8.759063657750765e-06, "loss": 0.0099, "step": 18000 }, { "epoch": 0.12425231291437559, "grad_norm": 0.0, "learning_rate": 8.758373750405321e-06, "loss": 0.0, "step": 18010 }, { "epoch": 0.12432130364891995, "grad_norm": 0.0, "learning_rate": 8.757683843059878e-06, "loss": 0.0, "step": 18020 }, { "epoch": 0.1243902943834643, "grad_norm": 0.0, "learning_rate": 8.756993935714434e-06, "loss": 0.0, "step": 18030 }, { "epoch": 0.12445928511800865, "grad_norm": 0.0, "learning_rate": 8.75630402836899e-06, "loss": 0.0, "step": 18040 }, { "epoch": 0.12452827585255301, "grad_norm": 0.0, "learning_rate": 8.755614121023547e-06, "loss": 0.0, "step": 18050 }, { "epoch": 0.12459726658709736, "grad_norm": 5.171454517949314e-07, "learning_rate": 8.754924213678104e-06, "loss": 0.0, "step": 18060 }, { "epoch": 0.1246662573216417, "grad_norm": 0.0, "learning_rate": 8.75423430633266e-06, "loss": 0.0008, "step": 18070 }, { "epoch": 0.12473524805618605, "grad_norm": 7.356138667091727e-05, "learning_rate": 8.753544398987217e-06, "loss": 0.0007, "step": 18080 }, { "epoch": 0.12480423879073041, "grad_norm": 415.2155456542969, "learning_rate": 8.752854491641773e-06, "loss": 0.2725, "step": 18090 }, { "epoch": 0.12487322952527476, "grad_norm": 0.0, "learning_rate": 8.75216458429633e-06, "loss": 0.0, "step": 18100 }, { "epoch": 0.1249422202598191, "grad_norm": 8.613947888314044e-10, "learning_rate": 8.751474676950886e-06, "loss": 1.3031, "step": 18110 }, { "epoch": 0.12501121099436346, "grad_norm": 5.406222314263687e-08, "learning_rate": 8.750784769605442e-06, "loss": 0.0573, "step": 18120 }, { "epoch": 0.1250802017289078, "grad_norm": 0.0011144365416839719, "learning_rate": 8.750094862259999e-06, "loss": 0.0, "step": 18130 }, { "epoch": 0.12514919246345216, "grad_norm": 0.0, "learning_rate": 8.749404954914555e-06, "loss": 0.1248, "step": 18140 }, { "epoch": 0.12521818319799652, "grad_norm": 0.0, "learning_rate": 8.748715047569112e-06, "loss": 0.0005, "step": 18150 }, { "epoch": 0.12528717393254085, "grad_norm": 0.0, "learning_rate": 8.748025140223668e-06, "loss": 0.0, "step": 18160 }, { "epoch": 0.1253561646670852, "grad_norm": 0.0, "learning_rate": 8.747335232878225e-06, "loss": 0.0, "step": 18170 }, { "epoch": 0.12542515540162957, "grad_norm": 0.00011594170791795477, "learning_rate": 8.746645325532781e-06, "loss": 0.0, "step": 18180 }, { "epoch": 0.1254941461361739, "grad_norm": 0.0, "learning_rate": 8.745955418187338e-06, "loss": 0.0, "step": 18190 }, { "epoch": 0.12556313687071827, "grad_norm": 0.0, "learning_rate": 8.745265510841894e-06, "loss": 0.0088, "step": 18200 }, { "epoch": 0.1256321276052626, "grad_norm": 9.818076351919558e-10, "learning_rate": 8.744575603496451e-06, "loss": 0.0003, "step": 18210 }, { "epoch": 0.12570111833980696, "grad_norm": 0.0, "learning_rate": 8.743885696151007e-06, "loss": 0.0001, "step": 18220 }, { "epoch": 0.12577010907435132, "grad_norm": 0.0, "learning_rate": 8.743195788805564e-06, "loss": 0.0, "step": 18230 }, { "epoch": 0.12583909980889565, "grad_norm": 0.0, "learning_rate": 8.74250588146012e-06, "loss": 0.0, "step": 18240 }, { "epoch": 0.12590809054344002, "grad_norm": 0.0, "learning_rate": 8.741815974114677e-06, "loss": 0.0, "step": 18250 }, { "epoch": 0.12597708127798438, "grad_norm": 0.0, "learning_rate": 8.741126066769233e-06, "loss": 0.0, "step": 18260 }, { "epoch": 0.1260460720125287, "grad_norm": 0.0, "learning_rate": 8.74043615942379e-06, "loss": 0.0, "step": 18270 }, { "epoch": 0.12611506274707307, "grad_norm": 0.0, "learning_rate": 8.739746252078346e-06, "loss": 0.0, "step": 18280 }, { "epoch": 0.12618405348161743, "grad_norm": 0.0, "learning_rate": 8.739056344732903e-06, "loss": 0.0, "step": 18290 }, { "epoch": 0.12625304421616176, "grad_norm": 0.0, "learning_rate": 8.73836643738746e-06, "loss": 0.0, "step": 18300 }, { "epoch": 0.12632203495070612, "grad_norm": 0.0, "learning_rate": 8.737676530042016e-06, "loss": 0.0092, "step": 18310 }, { "epoch": 0.12639102568525046, "grad_norm": 0.0, "learning_rate": 8.736986622696572e-06, "loss": 0.0, "step": 18320 }, { "epoch": 0.12646001641979482, "grad_norm": 0.0, "learning_rate": 8.736296715351129e-06, "loss": 0.0, "step": 18330 }, { "epoch": 0.12652900715433918, "grad_norm": 2.6131767299375497e-05, "learning_rate": 8.735606808005685e-06, "loss": 0.0, "step": 18340 }, { "epoch": 0.1265979978888835, "grad_norm": 9.021263394970447e-06, "learning_rate": 8.734916900660242e-06, "loss": 0.0, "step": 18350 }, { "epoch": 0.12666698862342787, "grad_norm": 0.0, "learning_rate": 8.734226993314798e-06, "loss": 0.0, "step": 18360 }, { "epoch": 0.12673597935797223, "grad_norm": 2.1192390704527497e-06, "learning_rate": 8.733537085969355e-06, "loss": 0.0, "step": 18370 }, { "epoch": 0.12680497009251657, "grad_norm": 0.0, "learning_rate": 8.732847178623911e-06, "loss": 0.0, "step": 18380 }, { "epoch": 0.12687396082706093, "grad_norm": 0.0, "learning_rate": 8.732157271278468e-06, "loss": 0.0001, "step": 18390 }, { "epoch": 0.1269429515616053, "grad_norm": 0.0, "learning_rate": 8.731467363933024e-06, "loss": 0.0006, "step": 18400 }, { "epoch": 0.12701194229614962, "grad_norm": 0.0, "learning_rate": 8.73077745658758e-06, "loss": 0.0, "step": 18410 }, { "epoch": 0.12708093303069398, "grad_norm": 0.0, "learning_rate": 8.730087549242137e-06, "loss": 0.0, "step": 18420 }, { "epoch": 0.12714992376523832, "grad_norm": 0.0, "learning_rate": 8.729397641896694e-06, "loss": 0.0917, "step": 18430 }, { "epoch": 0.12721891449978268, "grad_norm": 0.0, "learning_rate": 8.72870773455125e-06, "loss": 0.0, "step": 18440 }, { "epoch": 0.12728790523432704, "grad_norm": 0.0, "learning_rate": 8.728017827205807e-06, "loss": 0.0, "step": 18450 }, { "epoch": 0.12735689596887137, "grad_norm": 0.0, "learning_rate": 8.727327919860363e-06, "loss": 0.0014, "step": 18460 }, { "epoch": 0.12742588670341573, "grad_norm": 0.0, "learning_rate": 8.72663801251492e-06, "loss": 0.0, "step": 18470 }, { "epoch": 0.1274948774379601, "grad_norm": 8.226572390412912e-05, "learning_rate": 8.725948105169476e-06, "loss": 0.0, "step": 18480 }, { "epoch": 0.12756386817250442, "grad_norm": 0.0, "learning_rate": 8.725258197824033e-06, "loss": 0.0, "step": 18490 }, { "epoch": 0.12763285890704879, "grad_norm": 0.0, "learning_rate": 8.724568290478589e-06, "loss": 0.0, "step": 18500 }, { "epoch": 0.12770184964159315, "grad_norm": 0.0, "learning_rate": 8.723878383133146e-06, "loss": 0.0, "step": 18510 }, { "epoch": 0.12777084037613748, "grad_norm": 0.0, "learning_rate": 8.723188475787702e-06, "loss": 0.0, "step": 18520 }, { "epoch": 0.12783983111068184, "grad_norm": 0.0, "learning_rate": 8.722498568442259e-06, "loss": 0.0, "step": 18530 }, { "epoch": 0.12790882184522617, "grad_norm": 0.0, "learning_rate": 8.721808661096815e-06, "loss": 0.0, "step": 18540 }, { "epoch": 0.12797781257977053, "grad_norm": 0.0, "learning_rate": 8.721118753751372e-06, "loss": 0.0, "step": 18550 }, { "epoch": 0.1280468033143149, "grad_norm": 0.0, "learning_rate": 8.720428846405928e-06, "loss": 0.0001, "step": 18560 }, { "epoch": 0.12811579404885923, "grad_norm": 0.0, "learning_rate": 8.719738939060485e-06, "loss": 0.0, "step": 18570 }, { "epoch": 0.1281847847834036, "grad_norm": 1.5707319889202154e-09, "learning_rate": 8.719049031715041e-06, "loss": 0.0145, "step": 18580 }, { "epoch": 0.12825377551794795, "grad_norm": 0.0, "learning_rate": 8.718359124369598e-06, "loss": 0.0, "step": 18590 }, { "epoch": 0.12832276625249228, "grad_norm": 0.0, "learning_rate": 8.717669217024154e-06, "loss": 0.0, "step": 18600 }, { "epoch": 0.12839175698703664, "grad_norm": 0.0, "learning_rate": 8.71697930967871e-06, "loss": 0.3123, "step": 18610 }, { "epoch": 0.128460747721581, "grad_norm": 0.0, "learning_rate": 8.716289402333267e-06, "loss": 0.0, "step": 18620 }, { "epoch": 0.12852973845612534, "grad_norm": 8.372144089108247e-10, "learning_rate": 8.715599494987823e-06, "loss": 0.0, "step": 18630 }, { "epoch": 0.1285987291906697, "grad_norm": 0.0, "learning_rate": 8.71490958764238e-06, "loss": 0.0, "step": 18640 }, { "epoch": 0.12866771992521403, "grad_norm": 0.5077363848686218, "learning_rate": 8.714219680296936e-06, "loss": 0.0002, "step": 18650 }, { "epoch": 0.1287367106597584, "grad_norm": 0.0, "learning_rate": 8.713529772951493e-06, "loss": 0.0001, "step": 18660 }, { "epoch": 0.12880570139430275, "grad_norm": 0.0, "learning_rate": 8.71283986560605e-06, "loss": 0.0201, "step": 18670 }, { "epoch": 0.12887469212884708, "grad_norm": 0.0, "learning_rate": 8.712149958260606e-06, "loss": 0.0, "step": 18680 }, { "epoch": 0.12894368286339145, "grad_norm": 0.060998305678367615, "learning_rate": 8.711460050915162e-06, "loss": 0.0, "step": 18690 }, { "epoch": 0.1290126735979358, "grad_norm": 0.0, "learning_rate": 8.710770143569719e-06, "loss": 0.0002, "step": 18700 }, { "epoch": 0.12908166433248014, "grad_norm": 1.6868391036987305, "learning_rate": 8.710080236224275e-06, "loss": 0.3038, "step": 18710 }, { "epoch": 0.1291506550670245, "grad_norm": 0.0, "learning_rate": 8.709390328878832e-06, "loss": 0.0, "step": 18720 }, { "epoch": 0.12921964580156886, "grad_norm": 0.0, "learning_rate": 8.70870042153339e-06, "loss": 0.0, "step": 18730 }, { "epoch": 0.1292886365361132, "grad_norm": 0.18403001129627228, "learning_rate": 8.708010514187945e-06, "loss": 0.0, "step": 18740 }, { "epoch": 0.12935762727065755, "grad_norm": 0.0, "learning_rate": 8.707320606842501e-06, "loss": 0.0015, "step": 18750 }, { "epoch": 0.1294266180052019, "grad_norm": 0.0006686258711852133, "learning_rate": 8.706630699497058e-06, "loss": 0.0, "step": 18760 }, { "epoch": 0.12949560873974625, "grad_norm": 0.0, "learning_rate": 8.705940792151614e-06, "loss": 0.0, "step": 18770 }, { "epoch": 0.1295645994742906, "grad_norm": 0.0, "learning_rate": 8.70525088480617e-06, "loss": 0.0, "step": 18780 }, { "epoch": 0.12963359020883494, "grad_norm": 0.0, "learning_rate": 8.704560977460727e-06, "loss": 0.0142, "step": 18790 }, { "epoch": 0.1297025809433793, "grad_norm": 4.698492706012303e-10, "learning_rate": 8.703871070115284e-06, "loss": 0.0, "step": 18800 }, { "epoch": 0.12977157167792366, "grad_norm": 0.0, "learning_rate": 8.70318116276984e-06, "loss": 0.0001, "step": 18810 }, { "epoch": 0.129840562412468, "grad_norm": 0.00023342363419942558, "learning_rate": 8.702491255424397e-06, "loss": 0.0, "step": 18820 }, { "epoch": 0.12990955314701236, "grad_norm": 0.0, "learning_rate": 8.701801348078953e-06, "loss": 0.0, "step": 18830 }, { "epoch": 0.12997854388155672, "grad_norm": 0.0, "learning_rate": 8.701111440733511e-06, "loss": 0.0, "step": 18840 }, { "epoch": 0.13004753461610105, "grad_norm": 0.0, "learning_rate": 8.700421533388066e-06, "loss": 0.001, "step": 18850 }, { "epoch": 0.1301165253506454, "grad_norm": 0.4395293891429901, "learning_rate": 8.699731626042623e-06, "loss": 0.0001, "step": 18860 }, { "epoch": 0.13018551608518975, "grad_norm": 0.0008274639840237796, "learning_rate": 8.69904171869718e-06, "loss": 0.0002, "step": 18870 }, { "epoch": 0.1302545068197341, "grad_norm": 7.702266202613828e-07, "learning_rate": 8.698351811351736e-06, "loss": 0.0, "step": 18880 }, { "epoch": 0.13032349755427847, "grad_norm": 2.2250200345297344e-05, "learning_rate": 8.697661904006292e-06, "loss": 0.0, "step": 18890 }, { "epoch": 0.1303924882888228, "grad_norm": 0.0, "learning_rate": 8.696971996660849e-06, "loss": 0.0, "step": 18900 }, { "epoch": 0.13046147902336716, "grad_norm": 1.3947058505436871e-05, "learning_rate": 8.696282089315405e-06, "loss": 0.0, "step": 18910 }, { "epoch": 0.13053046975791152, "grad_norm": 0.0, "learning_rate": 8.695592181969962e-06, "loss": 0.0, "step": 18920 }, { "epoch": 0.13059946049245585, "grad_norm": 6.807110912632197e-05, "learning_rate": 8.694902274624518e-06, "loss": 0.0, "step": 18930 }, { "epoch": 0.13066845122700022, "grad_norm": 0.0, "learning_rate": 8.694212367279075e-06, "loss": 0.0, "step": 18940 }, { "epoch": 0.13073744196154458, "grad_norm": 0.0, "learning_rate": 8.693522459933633e-06, "loss": 0.0047, "step": 18950 }, { "epoch": 0.1308064326960889, "grad_norm": 0.0002660874743014574, "learning_rate": 8.692832552588188e-06, "loss": 0.0, "step": 18960 }, { "epoch": 0.13087542343063327, "grad_norm": 6.707267061756284e-07, "learning_rate": 8.692142645242744e-06, "loss": 0.0418, "step": 18970 }, { "epoch": 0.1309444141651776, "grad_norm": 0.0, "learning_rate": 8.6914527378973e-06, "loss": 0.0, "step": 18980 }, { "epoch": 0.13101340489972196, "grad_norm": 0.0022617836948484182, "learning_rate": 8.690762830551857e-06, "loss": 0.0072, "step": 18990 }, { "epoch": 0.13108239563426632, "grad_norm": 0.0, "learning_rate": 8.690072923206414e-06, "loss": 0.0095, "step": 19000 }, { "epoch": 0.13115138636881066, "grad_norm": 0.001342968549579382, "learning_rate": 8.68938301586097e-06, "loss": 0.0, "step": 19010 }, { "epoch": 0.13122037710335502, "grad_norm": 0.0, "learning_rate": 8.688693108515527e-06, "loss": 0.0001, "step": 19020 }, { "epoch": 0.13128936783789938, "grad_norm": 0.0, "learning_rate": 8.688003201170083e-06, "loss": 0.007, "step": 19030 }, { "epoch": 0.1313583585724437, "grad_norm": 0.0, "learning_rate": 8.68731329382464e-06, "loss": 0.0, "step": 19040 }, { "epoch": 0.13142734930698807, "grad_norm": 0.0, "learning_rate": 8.686623386479196e-06, "loss": 0.0, "step": 19050 }, { "epoch": 0.13149634004153243, "grad_norm": 5.441776465886505e-06, "learning_rate": 8.685933479133754e-06, "loss": 0.0, "step": 19060 }, { "epoch": 0.13156533077607677, "grad_norm": 0.0, "learning_rate": 8.685243571788309e-06, "loss": 0.0, "step": 19070 }, { "epoch": 0.13163432151062113, "grad_norm": 0.0, "learning_rate": 8.684553664442866e-06, "loss": 0.0003, "step": 19080 }, { "epoch": 0.1317033122451655, "grad_norm": 2.34255912801018e-05, "learning_rate": 8.683863757097422e-06, "loss": 0.0, "step": 19090 }, { "epoch": 0.13177230297970982, "grad_norm": 0.0007809360395185649, "learning_rate": 8.683173849751979e-06, "loss": 0.0, "step": 19100 }, { "epoch": 0.13184129371425418, "grad_norm": 1.6401661468989914e-06, "learning_rate": 8.682483942406535e-06, "loss": 0.0, "step": 19110 }, { "epoch": 0.13191028444879851, "grad_norm": 0.0, "learning_rate": 8.681794035061092e-06, "loss": 0.0, "step": 19120 }, { "epoch": 0.13197927518334288, "grad_norm": 0.0, "learning_rate": 8.681104127715648e-06, "loss": 0.0014, "step": 19130 }, { "epoch": 0.13204826591788724, "grad_norm": 0.0, "learning_rate": 8.680414220370204e-06, "loss": 0.0054, "step": 19140 }, { "epoch": 0.13211725665243157, "grad_norm": 0.0, "learning_rate": 8.679724313024761e-06, "loss": 0.0, "step": 19150 }, { "epoch": 0.13218624738697593, "grad_norm": 1.6095175681130058e-07, "learning_rate": 8.679034405679317e-06, "loss": 0.0, "step": 19160 }, { "epoch": 0.1322552381215203, "grad_norm": 0.0, "learning_rate": 8.678344498333876e-06, "loss": 0.0001, "step": 19170 }, { "epoch": 0.13232422885606462, "grad_norm": 0.0, "learning_rate": 8.677654590988432e-06, "loss": 0.0001, "step": 19180 }, { "epoch": 0.13239321959060898, "grad_norm": 0.0, "learning_rate": 8.676964683642987e-06, "loss": 0.0, "step": 19190 }, { "epoch": 0.13246221032515335, "grad_norm": 0.0, "learning_rate": 8.676274776297543e-06, "loss": 0.0, "step": 19200 }, { "epoch": 0.13253120105969768, "grad_norm": 0.0, "learning_rate": 8.6755848689521e-06, "loss": 0.0, "step": 19210 }, { "epoch": 0.13260019179424204, "grad_norm": 0.0, "learning_rate": 8.674894961606656e-06, "loss": 0.0, "step": 19220 }, { "epoch": 0.13266918252878637, "grad_norm": 0.0, "learning_rate": 8.674205054261213e-06, "loss": 0.0, "step": 19230 }, { "epoch": 0.13273817326333073, "grad_norm": 0.0, "learning_rate": 8.67351514691577e-06, "loss": 0.0, "step": 19240 }, { "epoch": 0.1328071639978751, "grad_norm": 0.0, "learning_rate": 8.672825239570326e-06, "loss": 0.0, "step": 19250 }, { "epoch": 0.13287615473241943, "grad_norm": 0.0, "learning_rate": 8.672135332224882e-06, "loss": 0.0, "step": 19260 }, { "epoch": 0.1329451454669638, "grad_norm": 0.0, "learning_rate": 8.671445424879439e-06, "loss": 0.0, "step": 19270 }, { "epoch": 0.13301413620150815, "grad_norm": 0.0, "learning_rate": 8.670755517533997e-06, "loss": 0.0, "step": 19280 }, { "epoch": 0.13308312693605248, "grad_norm": 0.0, "learning_rate": 8.670065610188554e-06, "loss": 0.0, "step": 19290 }, { "epoch": 0.13315211767059684, "grad_norm": 0.00677242036908865, "learning_rate": 8.669375702843108e-06, "loss": 0.0, "step": 19300 }, { "epoch": 0.1332211084051412, "grad_norm": 0.0, "learning_rate": 8.668685795497665e-06, "loss": 0.0, "step": 19310 }, { "epoch": 0.13329009913968554, "grad_norm": 0.0, "learning_rate": 8.667995888152221e-06, "loss": 0.0, "step": 19320 }, { "epoch": 0.1333590898742299, "grad_norm": 0.0006715624476782978, "learning_rate": 8.667305980806778e-06, "loss": 0.0, "step": 19330 }, { "epoch": 0.13342808060877423, "grad_norm": 0.0, "learning_rate": 8.666616073461334e-06, "loss": 0.0, "step": 19340 }, { "epoch": 0.1334970713433186, "grad_norm": 1.0780977222069055e-09, "learning_rate": 8.66592616611589e-06, "loss": 0.0, "step": 19350 }, { "epoch": 0.13356606207786295, "grad_norm": 3.8380069732666016, "learning_rate": 8.665236258770447e-06, "loss": 0.0006, "step": 19360 }, { "epoch": 0.13363505281240728, "grad_norm": 0.0, "learning_rate": 8.664546351425004e-06, "loss": 0.0, "step": 19370 }, { "epoch": 0.13370404354695165, "grad_norm": 3.7773427266074577e-06, "learning_rate": 8.66385644407956e-06, "loss": 0.0, "step": 19380 }, { "epoch": 0.133773034281496, "grad_norm": 0.0, "learning_rate": 8.663166536734118e-06, "loss": 0.0, "step": 19390 }, { "epoch": 0.13384202501604034, "grad_norm": 0.0, "learning_rate": 8.662476629388675e-06, "loss": 0.0, "step": 19400 }, { "epoch": 0.1339110157505847, "grad_norm": 0.005529284942895174, "learning_rate": 8.66178672204323e-06, "loss": 0.0, "step": 19410 }, { "epoch": 0.13398000648512906, "grad_norm": 2.355862125114072e-05, "learning_rate": 8.661096814697786e-06, "loss": 0.0, "step": 19420 }, { "epoch": 0.1340489972196734, "grad_norm": 0.0, "learning_rate": 8.660406907352343e-06, "loss": 0.0, "step": 19430 }, { "epoch": 0.13411798795421775, "grad_norm": 0.0, "learning_rate": 8.6597170000069e-06, "loss": 0.0001, "step": 19440 }, { "epoch": 0.1341869786887621, "grad_norm": 0.0, "learning_rate": 8.659027092661456e-06, "loss": 0.0015, "step": 19450 }, { "epoch": 0.13425596942330645, "grad_norm": 9.999886970035732e-05, "learning_rate": 8.658337185316012e-06, "loss": 0.0, "step": 19460 }, { "epoch": 0.1343249601578508, "grad_norm": 0.0, "learning_rate": 8.657647277970569e-06, "loss": 0.0, "step": 19470 }, { "epoch": 0.13439395089239514, "grad_norm": 0.0, "learning_rate": 8.656957370625125e-06, "loss": 0.0, "step": 19480 }, { "epoch": 0.1344629416269395, "grad_norm": 0.0002451766049489379, "learning_rate": 8.656267463279682e-06, "loss": 0.0024, "step": 19490 }, { "epoch": 0.13453193236148386, "grad_norm": 0.0, "learning_rate": 8.65557755593424e-06, "loss": 0.0, "step": 19500 }, { "epoch": 0.1346009230960282, "grad_norm": 0.0, "learning_rate": 8.654887648588796e-06, "loss": 0.0, "step": 19510 }, { "epoch": 0.13466991383057256, "grad_norm": 0.0, "learning_rate": 8.654197741243353e-06, "loss": 0.0, "step": 19520 }, { "epoch": 0.13473890456511692, "grad_norm": 0.0, "learning_rate": 8.653507833897908e-06, "loss": 0.0005, "step": 19530 }, { "epoch": 0.13480789529966125, "grad_norm": 0.0, "learning_rate": 8.652817926552464e-06, "loss": 0.0001, "step": 19540 }, { "epoch": 0.1348768860342056, "grad_norm": 428.6827697753906, "learning_rate": 8.65212801920702e-06, "loss": 1.448, "step": 19550 }, { "epoch": 0.13494587676874994, "grad_norm": 0.0, "learning_rate": 8.651438111861577e-06, "loss": 0.0, "step": 19560 }, { "epoch": 0.1350148675032943, "grad_norm": 0.0, "learning_rate": 8.650748204516134e-06, "loss": 0.0, "step": 19570 }, { "epoch": 0.13508385823783867, "grad_norm": 0.0, "learning_rate": 8.65005829717069e-06, "loss": 0.0002, "step": 19580 }, { "epoch": 0.135152848972383, "grad_norm": 0.0, "learning_rate": 8.649368389825247e-06, "loss": 0.0001, "step": 19590 }, { "epoch": 0.13522183970692736, "grad_norm": 0.3019979000091553, "learning_rate": 8.648678482479803e-06, "loss": 0.0001, "step": 19600 }, { "epoch": 0.13529083044147172, "grad_norm": 8.601876470493153e-05, "learning_rate": 8.647988575134361e-06, "loss": 0.2959, "step": 19610 }, { "epoch": 0.13535982117601605, "grad_norm": 0.0, "learning_rate": 8.647298667788918e-06, "loss": 0.0, "step": 19620 }, { "epoch": 0.13542881191056041, "grad_norm": 6.097987004238803e-09, "learning_rate": 8.646608760443474e-06, "loss": 0.0002, "step": 19630 }, { "epoch": 0.13549780264510478, "grad_norm": 0.0003549469984136522, "learning_rate": 8.645918853098029e-06, "loss": 0.0115, "step": 19640 }, { "epoch": 0.1355667933796491, "grad_norm": 0.0, "learning_rate": 8.645228945752585e-06, "loss": 0.0, "step": 19650 }, { "epoch": 0.13563578411419347, "grad_norm": 0.0, "learning_rate": 8.644539038407142e-06, "loss": 0.0058, "step": 19660 }, { "epoch": 0.1357047748487378, "grad_norm": 0.0, "learning_rate": 8.643849131061698e-06, "loss": 0.0, "step": 19670 }, { "epoch": 0.13577376558328216, "grad_norm": 0.0, "learning_rate": 8.643159223716255e-06, "loss": 0.0004, "step": 19680 }, { "epoch": 0.13584275631782652, "grad_norm": 3.587937116622925, "learning_rate": 8.642469316370811e-06, "loss": 0.0006, "step": 19690 }, { "epoch": 0.13591174705237086, "grad_norm": 6.730941581878369e-09, "learning_rate": 8.641779409025368e-06, "loss": 0.0009, "step": 19700 }, { "epoch": 0.13598073778691522, "grad_norm": 0.0, "learning_rate": 8.641089501679924e-06, "loss": 0.0013, "step": 19710 }, { "epoch": 0.13604972852145958, "grad_norm": 0.0, "learning_rate": 8.640399594334483e-06, "loss": 0.0, "step": 19720 }, { "epoch": 0.1361187192560039, "grad_norm": 0.0, "learning_rate": 8.639709686989039e-06, "loss": 0.0, "step": 19730 }, { "epoch": 0.13618770999054827, "grad_norm": 4.829764366220957e-10, "learning_rate": 8.639019779643596e-06, "loss": 0.0077, "step": 19740 }, { "epoch": 0.13625670072509263, "grad_norm": 0.0, "learning_rate": 8.63832987229815e-06, "loss": 0.0001, "step": 19750 }, { "epoch": 0.13632569145963697, "grad_norm": 0.0, "learning_rate": 8.637639964952707e-06, "loss": 0.6105, "step": 19760 }, { "epoch": 0.13639468219418133, "grad_norm": 0.0, "learning_rate": 8.636950057607263e-06, "loss": 0.0, "step": 19770 }, { "epoch": 0.13646367292872566, "grad_norm": 0.0, "learning_rate": 8.63626015026182e-06, "loss": 0.0, "step": 19780 }, { "epoch": 0.13653266366327002, "grad_norm": 0.0, "learning_rate": 8.635570242916376e-06, "loss": 0.0067, "step": 19790 }, { "epoch": 0.13660165439781438, "grad_norm": 0.0, "learning_rate": 8.634880335570933e-06, "loss": 0.0, "step": 19800 }, { "epoch": 0.13667064513235871, "grad_norm": 0.0, "learning_rate": 8.63419042822549e-06, "loss": 0.0, "step": 19810 }, { "epoch": 0.13673963586690308, "grad_norm": 0.0, "learning_rate": 8.633500520880046e-06, "loss": 0.0331, "step": 19820 }, { "epoch": 0.13680862660144744, "grad_norm": 0.0, "learning_rate": 8.632810613534604e-06, "loss": 0.0, "step": 19830 }, { "epoch": 0.13687761733599177, "grad_norm": 0.0, "learning_rate": 8.63212070618916e-06, "loss": 0.0, "step": 19840 }, { "epoch": 0.13694660807053613, "grad_norm": 377.914306640625, "learning_rate": 8.631430798843717e-06, "loss": 0.0543, "step": 19850 }, { "epoch": 0.1370155988050805, "grad_norm": 0.0, "learning_rate": 8.630740891498273e-06, "loss": 0.0, "step": 19860 }, { "epoch": 0.13708458953962482, "grad_norm": 0.28941604495048523, "learning_rate": 8.630050984152828e-06, "loss": 0.0, "step": 19870 }, { "epoch": 0.13715358027416918, "grad_norm": 0.0, "learning_rate": 8.629361076807385e-06, "loss": 0.0, "step": 19880 }, { "epoch": 0.13722257100871352, "grad_norm": 0.0, "learning_rate": 8.628671169461941e-06, "loss": 0.0, "step": 19890 }, { "epoch": 0.13729156174325788, "grad_norm": 3.1171734917734284e-06, "learning_rate": 8.627981262116498e-06, "loss": 0.0, "step": 19900 }, { "epoch": 0.13736055247780224, "grad_norm": 2.208297100736445e-09, "learning_rate": 8.627291354771054e-06, "loss": 0.0, "step": 19910 }, { "epoch": 0.13742954321234657, "grad_norm": 0.0, "learning_rate": 8.62660144742561e-06, "loss": 0.0, "step": 19920 }, { "epoch": 0.13749853394689093, "grad_norm": 0.000125538426800631, "learning_rate": 8.625911540080167e-06, "loss": 0.0, "step": 19930 }, { "epoch": 0.1375675246814353, "grad_norm": 0.0, "learning_rate": 8.625221632734725e-06, "loss": 0.365, "step": 19940 }, { "epoch": 0.13763651541597963, "grad_norm": 0.0, "learning_rate": 8.624531725389282e-06, "loss": 0.0, "step": 19950 }, { "epoch": 0.137705506150524, "grad_norm": 0.0, "learning_rate": 8.623841818043838e-06, "loss": 0.0, "step": 19960 }, { "epoch": 0.13777449688506835, "grad_norm": 5.0932342787746165e-08, "learning_rate": 8.623151910698395e-06, "loss": 0.0, "step": 19970 }, { "epoch": 0.13784348761961268, "grad_norm": 0.0, "learning_rate": 8.62246200335295e-06, "loss": 0.0, "step": 19980 }, { "epoch": 0.13791247835415704, "grad_norm": 0.0, "learning_rate": 8.621772096007506e-06, "loss": 0.0005, "step": 19990 }, { "epoch": 0.13798146908870138, "grad_norm": 0.0, "learning_rate": 8.621082188662063e-06, "loss": 0.0009, "step": 20000 }, { "epoch": 0.13805045982324574, "grad_norm": 4.4332235120236874e-05, "learning_rate": 8.620392281316619e-06, "loss": 0.0, "step": 20010 }, { "epoch": 0.1381194505577901, "grad_norm": 0.03322438895702362, "learning_rate": 8.619702373971176e-06, "loss": 0.0, "step": 20020 }, { "epoch": 0.13818844129233443, "grad_norm": 0.0, "learning_rate": 8.619012466625732e-06, "loss": 0.0019, "step": 20030 }, { "epoch": 0.1382574320268788, "grad_norm": 1.6360729205189273e-05, "learning_rate": 8.618322559280289e-06, "loss": 0.0002, "step": 20040 }, { "epoch": 0.13832642276142315, "grad_norm": 0.0, "learning_rate": 8.617632651934847e-06, "loss": 0.0, "step": 20050 }, { "epoch": 0.13839541349596748, "grad_norm": 2.6920284312836884e-07, "learning_rate": 8.616942744589403e-06, "loss": 0.875, "step": 20060 }, { "epoch": 0.13846440423051184, "grad_norm": 0.06351414322853088, "learning_rate": 8.61625283724396e-06, "loss": 0.0, "step": 20070 }, { "epoch": 0.1385333949650562, "grad_norm": 0.0, "learning_rate": 8.615562929898516e-06, "loss": 0.0, "step": 20080 }, { "epoch": 0.13860238569960054, "grad_norm": 4.050568591651427e-08, "learning_rate": 8.614873022553071e-06, "loss": 0.0, "step": 20090 }, { "epoch": 0.1386713764341449, "grad_norm": 0.04281700402498245, "learning_rate": 8.614183115207628e-06, "loss": 0.0001, "step": 20100 }, { "epoch": 0.13874036716868923, "grad_norm": 2.1143573576409835e-06, "learning_rate": 8.613493207862184e-06, "loss": 0.0125, "step": 20110 }, { "epoch": 0.1388093579032336, "grad_norm": 0.0, "learning_rate": 8.61280330051674e-06, "loss": 0.0, "step": 20120 }, { "epoch": 0.13887834863777795, "grad_norm": 0.0, "learning_rate": 8.612113393171297e-06, "loss": 0.0, "step": 20130 }, { "epoch": 0.1389473393723223, "grad_norm": 0.0, "learning_rate": 8.611423485825854e-06, "loss": 0.0, "step": 20140 }, { "epoch": 0.13901633010686665, "grad_norm": 0.0, "learning_rate": 8.61073357848041e-06, "loss": 0.0, "step": 20150 }, { "epoch": 0.139085320841411, "grad_norm": 0.0, "learning_rate": 8.610043671134968e-06, "loss": 0.0, "step": 20160 }, { "epoch": 0.13915431157595534, "grad_norm": 0.0, "learning_rate": 8.609353763789525e-06, "loss": 0.0, "step": 20170 }, { "epoch": 0.1392233023104997, "grad_norm": 0.0, "learning_rate": 8.608663856444081e-06, "loss": 0.0002, "step": 20180 }, { "epoch": 0.13929229304504406, "grad_norm": 4.659440056009601e-10, "learning_rate": 8.607973949098638e-06, "loss": 0.0, "step": 20190 }, { "epoch": 0.1393612837795884, "grad_norm": 4.7986690333345905e-05, "learning_rate": 8.607284041753194e-06, "loss": 0.0, "step": 20200 }, { "epoch": 0.13943027451413276, "grad_norm": 2.532990484027664e-09, "learning_rate": 8.606594134407749e-06, "loss": 0.2168, "step": 20210 }, { "epoch": 0.1394992652486771, "grad_norm": 0.0, "learning_rate": 8.605904227062305e-06, "loss": 0.0, "step": 20220 }, { "epoch": 0.13956825598322145, "grad_norm": 0.0, "learning_rate": 8.605214319716862e-06, "loss": 0.3484, "step": 20230 }, { "epoch": 0.1396372467177658, "grad_norm": 0.0, "learning_rate": 8.604524412371418e-06, "loss": 0.0003, "step": 20240 }, { "epoch": 0.13970623745231014, "grad_norm": 0.0, "learning_rate": 8.603834505025975e-06, "loss": 0.0, "step": 20250 }, { "epoch": 0.1397752281868545, "grad_norm": 4.6949327980883027e-08, "learning_rate": 8.603144597680531e-06, "loss": 0.0, "step": 20260 }, { "epoch": 0.13984421892139887, "grad_norm": 0.0, "learning_rate": 8.60245469033509e-06, "loss": 0.0, "step": 20270 }, { "epoch": 0.1399132096559432, "grad_norm": 0.0, "learning_rate": 8.601764782989646e-06, "loss": 0.0, "step": 20280 }, { "epoch": 0.13998220039048756, "grad_norm": 0.0, "learning_rate": 8.601074875644203e-06, "loss": 0.0, "step": 20290 }, { "epoch": 0.14005119112503192, "grad_norm": 0.0011996901594102383, "learning_rate": 8.600384968298759e-06, "loss": 0.0003, "step": 20300 }, { "epoch": 0.14012018185957625, "grad_norm": 7.967141613107742e-08, "learning_rate": 8.599695060953316e-06, "loss": 0.0911, "step": 20310 }, { "epoch": 0.14018917259412061, "grad_norm": 0.0, "learning_rate": 8.59900515360787e-06, "loss": 0.0, "step": 20320 }, { "epoch": 0.14025816332866495, "grad_norm": 0.0, "learning_rate": 8.598315246262427e-06, "loss": 0.0005, "step": 20330 }, { "epoch": 0.1403271540632093, "grad_norm": 0.0, "learning_rate": 8.597625338916983e-06, "loss": 0.0001, "step": 20340 }, { "epoch": 0.14039614479775367, "grad_norm": 0.0, "learning_rate": 8.59693543157154e-06, "loss": 0.0, "step": 20350 }, { "epoch": 0.140465135532298, "grad_norm": 0.0, "learning_rate": 8.596245524226096e-06, "loss": 0.0002, "step": 20360 }, { "epoch": 0.14053412626684236, "grad_norm": 0.0, "learning_rate": 8.595555616880653e-06, "loss": 0.0, "step": 20370 }, { "epoch": 0.14060311700138672, "grad_norm": 0.0, "learning_rate": 8.594865709535211e-06, "loss": 0.5828, "step": 20380 }, { "epoch": 0.14067210773593106, "grad_norm": 145.4329071044922, "learning_rate": 8.594175802189767e-06, "loss": 0.0276, "step": 20390 }, { "epoch": 0.14074109847047542, "grad_norm": 0.0, "learning_rate": 8.593485894844324e-06, "loss": 0.0, "step": 20400 }, { "epoch": 0.14081008920501978, "grad_norm": 0.0, "learning_rate": 8.59279598749888e-06, "loss": 0.0006, "step": 20410 }, { "epoch": 0.1408790799395641, "grad_norm": 0.0, "learning_rate": 8.592106080153437e-06, "loss": 0.0011, "step": 20420 }, { "epoch": 0.14094807067410847, "grad_norm": 0.0, "learning_rate": 8.591416172807992e-06, "loss": 0.0, "step": 20430 }, { "epoch": 0.1410170614086528, "grad_norm": 0.0, "learning_rate": 8.590726265462548e-06, "loss": 0.0197, "step": 20440 }, { "epoch": 0.14108605214319717, "grad_norm": 9.441091970074922e-05, "learning_rate": 8.590036358117105e-06, "loss": 0.0, "step": 20450 }, { "epoch": 0.14115504287774153, "grad_norm": 0.31755271553993225, "learning_rate": 8.589346450771661e-06, "loss": 0.0001, "step": 20460 }, { "epoch": 0.14122403361228586, "grad_norm": 0.0, "learning_rate": 8.588656543426218e-06, "loss": 0.0, "step": 20470 }, { "epoch": 0.14129302434683022, "grad_norm": 0.0, "learning_rate": 8.587966636080774e-06, "loss": 0.0, "step": 20480 }, { "epoch": 0.14136201508137458, "grad_norm": 0.0, "learning_rate": 8.587276728735332e-06, "loss": 0.0, "step": 20490 }, { "epoch": 0.14143100581591891, "grad_norm": 0.0, "learning_rate": 8.586586821389889e-06, "loss": 0.0, "step": 20500 }, { "epoch": 0.14149999655046328, "grad_norm": 0.0, "learning_rate": 8.585896914044445e-06, "loss": 0.0, "step": 20510 }, { "epoch": 0.14156898728500764, "grad_norm": 0.0, "learning_rate": 8.585207006699002e-06, "loss": 0.0, "step": 20520 }, { "epoch": 0.14163797801955197, "grad_norm": 0.0, "learning_rate": 8.584517099353558e-06, "loss": 0.0, "step": 20530 }, { "epoch": 0.14170696875409633, "grad_norm": 0.0026347588282078505, "learning_rate": 8.583827192008113e-06, "loss": 0.6192, "step": 20540 }, { "epoch": 0.14177595948864066, "grad_norm": 3.6158878803253174, "learning_rate": 8.58313728466267e-06, "loss": 0.0008, "step": 20550 }, { "epoch": 0.14184495022318502, "grad_norm": 0.0014746921369805932, "learning_rate": 8.582447377317226e-06, "loss": 0.5042, "step": 20560 }, { "epoch": 0.14191394095772938, "grad_norm": 0.0, "learning_rate": 8.581757469971783e-06, "loss": 0.0, "step": 20570 }, { "epoch": 0.14198293169227372, "grad_norm": 0.0, "learning_rate": 8.581067562626339e-06, "loss": 0.0, "step": 20580 }, { "epoch": 0.14205192242681808, "grad_norm": 0.0, "learning_rate": 8.580377655280896e-06, "loss": 0.0, "step": 20590 }, { "epoch": 0.14212091316136244, "grad_norm": 0.0, "learning_rate": 8.579687747935454e-06, "loss": 0.0, "step": 20600 }, { "epoch": 0.14218990389590677, "grad_norm": 0.22815673053264618, "learning_rate": 8.57899784059001e-06, "loss": 0.0, "step": 20610 }, { "epoch": 0.14225889463045113, "grad_norm": 4.310106160865246e-10, "learning_rate": 8.578307933244567e-06, "loss": 0.0, "step": 20620 }, { "epoch": 0.1423278853649955, "grad_norm": 0.0, "learning_rate": 8.577618025899123e-06, "loss": 0.0, "step": 20630 }, { "epoch": 0.14239687609953983, "grad_norm": 0.0, "learning_rate": 8.57692811855368e-06, "loss": 0.0, "step": 20640 }, { "epoch": 0.1424658668340842, "grad_norm": 0.0, "learning_rate": 8.576238211208236e-06, "loss": 0.0002, "step": 20650 }, { "epoch": 0.14253485756862852, "grad_norm": 0.0, "learning_rate": 8.575548303862791e-06, "loss": 0.0, "step": 20660 }, { "epoch": 0.14260384830317288, "grad_norm": 0.0, "learning_rate": 8.574858396517348e-06, "loss": 0.0001, "step": 20670 }, { "epoch": 0.14267283903771724, "grad_norm": 0.0, "learning_rate": 8.574168489171904e-06, "loss": 0.0, "step": 20680 }, { "epoch": 0.14274182977226157, "grad_norm": 0.0023758942261338234, "learning_rate": 8.57347858182646e-06, "loss": 0.0002, "step": 20690 }, { "epoch": 0.14281082050680594, "grad_norm": 0.0, "learning_rate": 8.572788674481017e-06, "loss": 0.1063, "step": 20700 }, { "epoch": 0.1428798112413503, "grad_norm": 0.0, "learning_rate": 8.572098767135575e-06, "loss": 0.0, "step": 20710 }, { "epoch": 0.14294880197589463, "grad_norm": 1.6173250116935378e-07, "learning_rate": 8.571408859790132e-06, "loss": 0.0, "step": 20720 }, { "epoch": 0.143017792710439, "grad_norm": 0.0, "learning_rate": 8.570718952444688e-06, "loss": 0.0, "step": 20730 }, { "epoch": 0.14308678344498335, "grad_norm": 0.0, "learning_rate": 8.570029045099245e-06, "loss": 0.0, "step": 20740 }, { "epoch": 0.14315577417952768, "grad_norm": 0.0, "learning_rate": 8.569339137753801e-06, "loss": 0.0003, "step": 20750 }, { "epoch": 0.14322476491407204, "grad_norm": 67.3569564819336, "learning_rate": 8.568649230408358e-06, "loss": 0.0189, "step": 20760 }, { "epoch": 0.14329375564861638, "grad_norm": 2.327387865763697e-10, "learning_rate": 8.567959323062912e-06, "loss": 0.0, "step": 20770 }, { "epoch": 0.14336274638316074, "grad_norm": 0.0, "learning_rate": 8.567269415717469e-06, "loss": 0.0, "step": 20780 }, { "epoch": 0.1434317371177051, "grad_norm": 4.710614575742511e-06, "learning_rate": 8.566579508372025e-06, "loss": 0.0, "step": 20790 }, { "epoch": 0.14350072785224943, "grad_norm": 0.0, "learning_rate": 8.565889601026582e-06, "loss": 0.0, "step": 20800 }, { "epoch": 0.1435697185867938, "grad_norm": 0.0010542693780735135, "learning_rate": 8.565199693681138e-06, "loss": 0.0, "step": 20810 }, { "epoch": 0.14363870932133815, "grad_norm": 0.0, "learning_rate": 8.564509786335697e-06, "loss": 0.0, "step": 20820 }, { "epoch": 0.1437077000558825, "grad_norm": 0.0, "learning_rate": 8.563819878990253e-06, "loss": 0.0, "step": 20830 }, { "epoch": 0.14377669079042685, "grad_norm": 4.367059773358051e-06, "learning_rate": 8.56312997164481e-06, "loss": 0.0006, "step": 20840 }, { "epoch": 0.1438456815249712, "grad_norm": 0.0, "learning_rate": 8.562440064299366e-06, "loss": 0.0, "step": 20850 }, { "epoch": 0.14391467225951554, "grad_norm": 0.0, "learning_rate": 8.561750156953923e-06, "loss": 0.0143, "step": 20860 }, { "epoch": 0.1439836629940599, "grad_norm": 0.0, "learning_rate": 8.561129240343023e-06, "loss": 0.1563, "step": 20870 }, { "epoch": 0.14405265372860424, "grad_norm": 0.000269328651484102, "learning_rate": 8.560439332997578e-06, "loss": 0.0, "step": 20880 }, { "epoch": 0.1441216444631486, "grad_norm": 0.0, "learning_rate": 8.559749425652134e-06, "loss": 0.0, "step": 20890 }, { "epoch": 0.14419063519769296, "grad_norm": 0.0, "learning_rate": 8.559059518306691e-06, "loss": 0.0, "step": 20900 }, { "epoch": 0.1442596259322373, "grad_norm": 0.0, "learning_rate": 8.55836961096125e-06, "loss": 0.0, "step": 20910 }, { "epoch": 0.14432861666678165, "grad_norm": 0.0, "learning_rate": 8.557679703615806e-06, "loss": 0.0, "step": 20920 }, { "epoch": 0.144397607401326, "grad_norm": 0.0, "learning_rate": 8.556989796270362e-06, "loss": 0.0, "step": 20930 }, { "epoch": 0.14446659813587034, "grad_norm": 0.0, "learning_rate": 8.556299888924919e-06, "loss": 0.0, "step": 20940 }, { "epoch": 0.1445355888704147, "grad_norm": 0.00013134221080690622, "learning_rate": 8.555609981579475e-06, "loss": 0.0029, "step": 20950 }, { "epoch": 0.14460457960495907, "grad_norm": 0.0, "learning_rate": 8.554920074234032e-06, "loss": 0.0001, "step": 20960 }, { "epoch": 0.1446735703395034, "grad_norm": 0.1842164695262909, "learning_rate": 8.554230166888588e-06, "loss": 0.7594, "step": 20970 }, { "epoch": 0.14474256107404776, "grad_norm": 0.01287443470209837, "learning_rate": 8.553540259543145e-06, "loss": 0.0, "step": 20980 }, { "epoch": 0.1448115518085921, "grad_norm": 0.0, "learning_rate": 8.5528503521977e-06, "loss": 0.0, "step": 20990 }, { "epoch": 0.14488054254313645, "grad_norm": 0.0, "learning_rate": 8.552160444852256e-06, "loss": 0.0005, "step": 21000 }, { "epoch": 0.14494953327768081, "grad_norm": 0.0, "learning_rate": 8.551470537506812e-06, "loss": 0.0, "step": 21010 }, { "epoch": 0.14501852401222515, "grad_norm": 7.392631687253015e-06, "learning_rate": 8.55078063016137e-06, "loss": 0.0022, "step": 21020 }, { "epoch": 0.1450875147467695, "grad_norm": 0.0, "learning_rate": 8.550090722815927e-06, "loss": 0.0064, "step": 21030 }, { "epoch": 0.14515650548131387, "grad_norm": 4.6237408346527786e-10, "learning_rate": 8.549400815470484e-06, "loss": 0.0, "step": 21040 }, { "epoch": 0.1452254962158582, "grad_norm": 0.3659661114215851, "learning_rate": 8.54871090812504e-06, "loss": 0.0001, "step": 21050 }, { "epoch": 0.14529448695040256, "grad_norm": 9.910163800697092e-10, "learning_rate": 8.548021000779597e-06, "loss": 0.0001, "step": 21060 }, { "epoch": 0.14536347768494692, "grad_norm": 0.0, "learning_rate": 8.547331093434153e-06, "loss": 0.0, "step": 21070 }, { "epoch": 0.14543246841949126, "grad_norm": 3.528532943164464e-06, "learning_rate": 8.54664118608871e-06, "loss": 0.0, "step": 21080 }, { "epoch": 0.14550145915403562, "grad_norm": 0.00026312697445973754, "learning_rate": 8.545951278743266e-06, "loss": 0.0001, "step": 21090 }, { "epoch": 0.14557044988857995, "grad_norm": 6.645448991093872e-08, "learning_rate": 8.545261371397822e-06, "loss": 0.0048, "step": 21100 }, { "epoch": 0.1456394406231243, "grad_norm": 0.0, "learning_rate": 8.544571464052377e-06, "loss": 0.0, "step": 21110 }, { "epoch": 0.14570843135766867, "grad_norm": 0.0008390983566641808, "learning_rate": 8.543881556706934e-06, "loss": 0.0, "step": 21120 }, { "epoch": 0.145777422092213, "grad_norm": 0.0, "learning_rate": 8.543191649361492e-06, "loss": 0.007, "step": 21130 }, { "epoch": 0.14584641282675737, "grad_norm": 0.0, "learning_rate": 8.542501742016048e-06, "loss": 0.0, "step": 21140 }, { "epoch": 0.14591540356130173, "grad_norm": 4.408148015500046e-06, "learning_rate": 8.541811834670605e-06, "loss": 0.0147, "step": 21150 }, { "epoch": 0.14598439429584606, "grad_norm": 4.568366795965062e-10, "learning_rate": 8.541121927325161e-06, "loss": 0.0003, "step": 21160 }, { "epoch": 0.14605338503039042, "grad_norm": 0.01356740202754736, "learning_rate": 8.540432019979718e-06, "loss": 0.0, "step": 21170 }, { "epoch": 0.14612237576493478, "grad_norm": 0.0, "learning_rate": 8.539742112634274e-06, "loss": 0.0, "step": 21180 }, { "epoch": 0.14619136649947911, "grad_norm": 0.0, "learning_rate": 8.539052205288831e-06, "loss": 0.0, "step": 21190 }, { "epoch": 0.14626035723402347, "grad_norm": 0.0, "learning_rate": 8.538362297943387e-06, "loss": 0.0, "step": 21200 }, { "epoch": 0.1463293479685678, "grad_norm": 6.831362406956032e-05, "learning_rate": 8.537672390597944e-06, "loss": 0.0, "step": 21210 }, { "epoch": 0.14639833870311217, "grad_norm": 4.554460701911012e-06, "learning_rate": 8.536982483252499e-06, "loss": 0.1257, "step": 21220 }, { "epoch": 0.14646732943765653, "grad_norm": 0.024184178560972214, "learning_rate": 8.536292575907055e-06, "loss": 0.0, "step": 21230 }, { "epoch": 0.14653632017220086, "grad_norm": 1.8706403352553025e-05, "learning_rate": 8.535602668561613e-06, "loss": 0.0001, "step": 21240 }, { "epoch": 0.14660531090674522, "grad_norm": 10.950136184692383, "learning_rate": 8.53491276121617e-06, "loss": 0.0029, "step": 21250 }, { "epoch": 0.14667430164128958, "grad_norm": 0.0, "learning_rate": 8.534222853870726e-06, "loss": 0.0, "step": 21260 }, { "epoch": 0.14674329237583392, "grad_norm": 0.0, "learning_rate": 8.533532946525283e-06, "loss": 0.0, "step": 21270 }, { "epoch": 0.14681228311037828, "grad_norm": 4.391468022557632e-10, "learning_rate": 8.53284303917984e-06, "loss": 0.0, "step": 21280 }, { "epoch": 0.14688127384492264, "grad_norm": 0.03587803989648819, "learning_rate": 8.532153131834396e-06, "loss": 0.0, "step": 21290 }, { "epoch": 0.14695026457946697, "grad_norm": 0.0002556056424509734, "learning_rate": 8.531463224488952e-06, "loss": 0.0, "step": 21300 }, { "epoch": 0.14701925531401133, "grad_norm": 0.0011786416871473193, "learning_rate": 8.530773317143509e-06, "loss": 0.0, "step": 21310 }, { "epoch": 0.14708824604855567, "grad_norm": 0.0024628208484500647, "learning_rate": 8.530083409798065e-06, "loss": 0.0, "step": 21320 }, { "epoch": 0.14715723678310003, "grad_norm": 4.728093472294859e-10, "learning_rate": 8.52939350245262e-06, "loss": 0.0001, "step": 21330 }, { "epoch": 0.1472262275176444, "grad_norm": 0.0, "learning_rate": 8.528703595107177e-06, "loss": 0.0, "step": 21340 }, { "epoch": 0.14729521825218872, "grad_norm": 0.0, "learning_rate": 8.528013687761735e-06, "loss": 0.0, "step": 21350 }, { "epoch": 0.14736420898673308, "grad_norm": 0.0, "learning_rate": 8.527323780416291e-06, "loss": 0.0839, "step": 21360 }, { "epoch": 0.14743319972127744, "grad_norm": 0.0, "learning_rate": 8.526633873070848e-06, "loss": 0.0, "step": 21370 }, { "epoch": 0.14750219045582177, "grad_norm": 5.044664130871013e-10, "learning_rate": 8.525943965725404e-06, "loss": 0.0, "step": 21380 }, { "epoch": 0.14757118119036614, "grad_norm": 9.083923941943794e-07, "learning_rate": 8.52525405837996e-06, "loss": 0.0, "step": 21390 }, { "epoch": 0.1476401719249105, "grad_norm": 9.69434913713485e-05, "learning_rate": 8.524564151034517e-06, "loss": 0.0, "step": 21400 }, { "epoch": 0.14770916265945483, "grad_norm": 0.0, "learning_rate": 8.523874243689074e-06, "loss": 0.0, "step": 21410 }, { "epoch": 0.1477781533939992, "grad_norm": 1.9816625118255615, "learning_rate": 8.52318433634363e-06, "loss": 0.0003, "step": 21420 }, { "epoch": 0.14784714412854352, "grad_norm": 0.0006800815463066101, "learning_rate": 8.522494428998187e-06, "loss": 0.0, "step": 21430 }, { "epoch": 0.14791613486308788, "grad_norm": 2.3139521090342896e-06, "learning_rate": 8.521804521652743e-06, "loss": 0.0, "step": 21440 }, { "epoch": 0.14798512559763224, "grad_norm": 1.0483397261396021e-08, "learning_rate": 8.521114614307298e-06, "loss": 0.0, "step": 21450 }, { "epoch": 0.14805411633217658, "grad_norm": 0.0, "learning_rate": 8.520424706961856e-06, "loss": 0.0, "step": 21460 }, { "epoch": 0.14812310706672094, "grad_norm": 1.1231684027279698e-08, "learning_rate": 8.519734799616413e-06, "loss": 0.0006, "step": 21470 }, { "epoch": 0.1481920978012653, "grad_norm": 3.9493808223767246e-10, "learning_rate": 8.519044892270969e-06, "loss": 0.0, "step": 21480 }, { "epoch": 0.14826108853580963, "grad_norm": 0.0, "learning_rate": 8.518354984925526e-06, "loss": 0.0, "step": 21490 }, { "epoch": 0.148330079270354, "grad_norm": 0.0, "learning_rate": 8.517665077580082e-06, "loss": 0.0, "step": 21500 }, { "epoch": 0.14839907000489835, "grad_norm": 4.80211981290779e-10, "learning_rate": 8.516975170234639e-06, "loss": 0.0003, "step": 21510 }, { "epoch": 0.1484680607394427, "grad_norm": 1.0378970216606831e-07, "learning_rate": 8.516285262889195e-06, "loss": 0.0, "step": 21520 }, { "epoch": 0.14853705147398705, "grad_norm": 0.0, "learning_rate": 8.515595355543752e-06, "loss": 0.0009, "step": 21530 }, { "epoch": 0.1486060422085314, "grad_norm": 0.0, "learning_rate": 8.514905448198308e-06, "loss": 0.0, "step": 21540 }, { "epoch": 0.14867503294307574, "grad_norm": 0.0, "learning_rate": 8.514215540852865e-06, "loss": 0.0005, "step": 21550 }, { "epoch": 0.1487440236776201, "grad_norm": 0.0, "learning_rate": 8.51352563350742e-06, "loss": 0.0424, "step": 21560 }, { "epoch": 0.14881301441216443, "grad_norm": 0.0, "learning_rate": 8.512835726161978e-06, "loss": 0.2916, "step": 21570 }, { "epoch": 0.1488820051467088, "grad_norm": 0.0, "learning_rate": 8.512145818816534e-06, "loss": 0.0, "step": 21580 }, { "epoch": 0.14895099588125316, "grad_norm": 0.0, "learning_rate": 8.51145591147109e-06, "loss": 0.0, "step": 21590 }, { "epoch": 0.1490199866157975, "grad_norm": 0.0, "learning_rate": 8.510766004125647e-06, "loss": 0.0, "step": 21600 }, { "epoch": 0.14908897735034185, "grad_norm": 0.0, "learning_rate": 8.510076096780203e-06, "loss": 0.0, "step": 21610 }, { "epoch": 0.1491579680848862, "grad_norm": 2.675690302567091e-05, "learning_rate": 8.50938618943476e-06, "loss": 0.0, "step": 21620 }, { "epoch": 0.14922695881943054, "grad_norm": 0.0, "learning_rate": 8.508696282089316e-06, "loss": 0.0, "step": 21630 }, { "epoch": 0.1492959495539749, "grad_norm": 0.0017321071354672313, "learning_rate": 8.508006374743873e-06, "loss": 0.0, "step": 21640 }, { "epoch": 0.14936494028851927, "grad_norm": 0.0, "learning_rate": 8.50731646739843e-06, "loss": 0.0, "step": 21650 }, { "epoch": 0.1494339310230636, "grad_norm": 0.00024256906181108207, "learning_rate": 8.506626560052986e-06, "loss": 0.0, "step": 21660 }, { "epoch": 0.14950292175760796, "grad_norm": 0.0, "learning_rate": 8.50593665270754e-06, "loss": 0.0, "step": 21670 }, { "epoch": 0.1495719124921523, "grad_norm": 0.19439753890037537, "learning_rate": 8.505246745362099e-06, "loss": 0.0, "step": 21680 }, { "epoch": 0.14964090322669665, "grad_norm": 0.0, "learning_rate": 8.504556838016655e-06, "loss": 0.003, "step": 21690 }, { "epoch": 0.14970989396124101, "grad_norm": 0.0, "learning_rate": 8.503866930671212e-06, "loss": 0.0, "step": 21700 }, { "epoch": 0.14977888469578535, "grad_norm": 0.0, "learning_rate": 8.503177023325768e-06, "loss": 0.0, "step": 21710 }, { "epoch": 0.1498478754303297, "grad_norm": 0.0, "learning_rate": 8.502487115980325e-06, "loss": 0.0, "step": 21720 }, { "epoch": 0.14991686616487407, "grad_norm": 0.0, "learning_rate": 8.501797208634881e-06, "loss": 0.0, "step": 21730 }, { "epoch": 0.1499858568994184, "grad_norm": 0.0, "learning_rate": 8.501107301289438e-06, "loss": 0.0, "step": 21740 }, { "epoch": 0.15005484763396276, "grad_norm": 2.5094188458751887e-05, "learning_rate": 8.500417393943994e-06, "loss": 0.0011, "step": 21750 }, { "epoch": 0.15012383836850712, "grad_norm": 2.246944745820656e-07, "learning_rate": 8.49972748659855e-06, "loss": 0.0, "step": 21760 }, { "epoch": 0.15019282910305146, "grad_norm": 0.0, "learning_rate": 8.499037579253107e-06, "loss": 0.0003, "step": 21770 }, { "epoch": 0.15026181983759582, "grad_norm": 0.0, "learning_rate": 8.498347671907662e-06, "loss": 0.0, "step": 21780 }, { "epoch": 0.15033081057214015, "grad_norm": 0.1592770665884018, "learning_rate": 8.49765776456222e-06, "loss": 0.0, "step": 21790 }, { "epoch": 0.1503998013066845, "grad_norm": 0.0, "learning_rate": 8.496967857216777e-06, "loss": 0.0661, "step": 21800 }, { "epoch": 0.15046879204122887, "grad_norm": 2.2893316745758057, "learning_rate": 8.496277949871333e-06, "loss": 0.0045, "step": 21810 }, { "epoch": 0.1505377827757732, "grad_norm": 8.9533195932745e-06, "learning_rate": 8.49558804252589e-06, "loss": 0.0015, "step": 21820 }, { "epoch": 0.15060677351031757, "grad_norm": 0.00012073614198016003, "learning_rate": 8.494898135180446e-06, "loss": 0.0, "step": 21830 }, { "epoch": 0.15067576424486193, "grad_norm": 0.0, "learning_rate": 8.494208227835003e-06, "loss": 0.0007, "step": 21840 }, { "epoch": 0.15074475497940626, "grad_norm": 0.0, "learning_rate": 8.49351832048956e-06, "loss": 0.0, "step": 21850 }, { "epoch": 0.15081374571395062, "grad_norm": 9.611086170480121e-06, "learning_rate": 8.492828413144116e-06, "loss": 0.0007, "step": 21860 }, { "epoch": 0.15088273644849498, "grad_norm": 0.0, "learning_rate": 8.492138505798672e-06, "loss": 0.0917, "step": 21870 }, { "epoch": 0.1509517271830393, "grad_norm": 1.0724800176831195e-06, "learning_rate": 8.491448598453229e-06, "loss": 0.0019, "step": 21880 }, { "epoch": 0.15102071791758367, "grad_norm": 0.0, "learning_rate": 8.490758691107785e-06, "loss": 0.0, "step": 21890 }, { "epoch": 0.151089708652128, "grad_norm": 2.9375782105489634e-05, "learning_rate": 8.490068783762342e-06, "loss": 0.0, "step": 21900 }, { "epoch": 0.15115869938667237, "grad_norm": 4.700917988209596e-10, "learning_rate": 8.489378876416898e-06, "loss": 0.0, "step": 21910 }, { "epoch": 0.15122769012121673, "grad_norm": 9.998324390636526e-10, "learning_rate": 8.488688969071455e-06, "loss": 0.0, "step": 21920 }, { "epoch": 0.15129668085576106, "grad_norm": 0.0, "learning_rate": 8.487999061726011e-06, "loss": 0.1684, "step": 21930 }, { "epoch": 0.15136567159030542, "grad_norm": 0.0, "learning_rate": 8.487309154380568e-06, "loss": 0.0, "step": 21940 }, { "epoch": 0.15143466232484978, "grad_norm": 8.427773479091627e-10, "learning_rate": 8.486619247035124e-06, "loss": 0.0, "step": 21950 }, { "epoch": 0.15150365305939412, "grad_norm": 5.873551845550537, "learning_rate": 8.48592933968968e-06, "loss": 0.0007, "step": 21960 }, { "epoch": 0.15157264379393848, "grad_norm": 0.0, "learning_rate": 8.485239432344237e-06, "loss": 0.0, "step": 21970 }, { "epoch": 0.15164163452848284, "grad_norm": 0.0, "learning_rate": 8.484549524998794e-06, "loss": 0.0, "step": 21980 }, { "epoch": 0.15171062526302717, "grad_norm": 0.0, "learning_rate": 8.48385961765335e-06, "loss": 0.0, "step": 21990 }, { "epoch": 0.15177961599757153, "grad_norm": 0.0, "learning_rate": 8.483169710307907e-06, "loss": 0.0, "step": 22000 }, { "epoch": 0.15184860673211587, "grad_norm": 0.0, "learning_rate": 8.482479802962463e-06, "loss": 0.0, "step": 22010 }, { "epoch": 0.15191759746666023, "grad_norm": 0.0, "learning_rate": 8.48178989561702e-06, "loss": 0.0, "step": 22020 }, { "epoch": 0.1519865882012046, "grad_norm": 4.358558177947998, "learning_rate": 8.481099988271576e-06, "loss": 0.0009, "step": 22030 }, { "epoch": 0.15205557893574892, "grad_norm": 2.074382791761309e-06, "learning_rate": 8.480410080926133e-06, "loss": 0.0, "step": 22040 }, { "epoch": 0.15212456967029328, "grad_norm": 0.0, "learning_rate": 8.479720173580689e-06, "loss": 0.0, "step": 22050 }, { "epoch": 0.15219356040483764, "grad_norm": 0.0, "learning_rate": 8.479030266235246e-06, "loss": 0.0, "step": 22060 }, { "epoch": 0.15226255113938197, "grad_norm": 0.0, "learning_rate": 8.478340358889802e-06, "loss": 0.0103, "step": 22070 }, { "epoch": 0.15233154187392633, "grad_norm": 0.0, "learning_rate": 8.477650451544359e-06, "loss": 0.0906, "step": 22080 }, { "epoch": 0.1524005326084707, "grad_norm": 8.175796210707631e-06, "learning_rate": 8.476960544198915e-06, "loss": 0.0007, "step": 22090 }, { "epoch": 0.15246952334301503, "grad_norm": 0.0, "learning_rate": 8.476270636853472e-06, "loss": 0.2031, "step": 22100 }, { "epoch": 0.1525385140775594, "grad_norm": 0.0, "learning_rate": 8.475580729508028e-06, "loss": 0.0, "step": 22110 }, { "epoch": 0.15260750481210372, "grad_norm": 0.0, "learning_rate": 8.474890822162584e-06, "loss": 0.1562, "step": 22120 }, { "epoch": 0.15267649554664808, "grad_norm": 0.4623240828514099, "learning_rate": 8.474200914817141e-06, "loss": 0.0001, "step": 22130 }, { "epoch": 0.15274548628119244, "grad_norm": 0.0, "learning_rate": 8.473511007471697e-06, "loss": 0.0447, "step": 22140 }, { "epoch": 0.15281447701573678, "grad_norm": 0.0, "learning_rate": 8.472821100126254e-06, "loss": 0.0, "step": 22150 }, { "epoch": 0.15288346775028114, "grad_norm": 29.423892974853516, "learning_rate": 8.47213119278081e-06, "loss": 0.0059, "step": 22160 }, { "epoch": 0.1529524584848255, "grad_norm": 0.0, "learning_rate": 8.471441285435367e-06, "loss": 0.0, "step": 22170 }, { "epoch": 0.15302144921936983, "grad_norm": 0.0, "learning_rate": 8.470751378089923e-06, "loss": 0.0637, "step": 22180 }, { "epoch": 0.1530904399539142, "grad_norm": 0.0, "learning_rate": 8.47006147074448e-06, "loss": 0.0, "step": 22190 }, { "epoch": 0.15315943068845855, "grad_norm": 0.0, "learning_rate": 8.469371563399036e-06, "loss": 0.002, "step": 22200 }, { "epoch": 0.1532284214230029, "grad_norm": 0.06562628597021103, "learning_rate": 8.468750646788137e-06, "loss": 0.8662, "step": 22210 }, { "epoch": 0.15329741215754725, "grad_norm": 4.369831458461704e-06, "learning_rate": 8.468060739442694e-06, "loss": 0.0, "step": 22220 }, { "epoch": 0.15336640289209158, "grad_norm": 0.0, "learning_rate": 8.46737083209725e-06, "loss": 0.0013, "step": 22230 }, { "epoch": 0.15343539362663594, "grad_norm": 1.7202839899255196e-06, "learning_rate": 8.466680924751807e-06, "loss": 0.021, "step": 22240 }, { "epoch": 0.1535043843611803, "grad_norm": 0.0, "learning_rate": 8.465991017406363e-06, "loss": 0.0, "step": 22250 }, { "epoch": 0.15357337509572463, "grad_norm": 1.1882375478744507, "learning_rate": 8.46530111006092e-06, "loss": 0.0002, "step": 22260 }, { "epoch": 0.153642365830269, "grad_norm": 4.670365427728029e-09, "learning_rate": 8.464611202715476e-06, "loss": 0.0045, "step": 22270 }, { "epoch": 0.15371135656481336, "grad_norm": 0.0, "learning_rate": 8.463921295370033e-06, "loss": 0.0, "step": 22280 }, { "epoch": 0.1537803472993577, "grad_norm": 0.0, "learning_rate": 8.463231388024589e-06, "loss": 0.0, "step": 22290 }, { "epoch": 0.15384933803390205, "grad_norm": 0.0, "learning_rate": 8.462541480679146e-06, "loss": 0.0289, "step": 22300 }, { "epoch": 0.1539183287684464, "grad_norm": 0.0, "learning_rate": 8.461851573333702e-06, "loss": 0.1851, "step": 22310 }, { "epoch": 0.15398731950299074, "grad_norm": 0.0, "learning_rate": 8.461161665988258e-06, "loss": 0.0, "step": 22320 }, { "epoch": 0.1540563102375351, "grad_norm": 0.0, "learning_rate": 8.460471758642815e-06, "loss": 0.0, "step": 22330 }, { "epoch": 0.15412530097207944, "grad_norm": 1.896715366456192e-05, "learning_rate": 8.459781851297371e-06, "loss": 0.0, "step": 22340 }, { "epoch": 0.1541942917066238, "grad_norm": 0.0, "learning_rate": 8.459091943951928e-06, "loss": 0.0, "step": 22350 }, { "epoch": 0.15426328244116816, "grad_norm": 4.8361735343933105, "learning_rate": 8.458402036606484e-06, "loss": 0.0007, "step": 22360 }, { "epoch": 0.1543322731757125, "grad_norm": 0.0, "learning_rate": 8.457712129261041e-06, "loss": 0.0, "step": 22370 }, { "epoch": 0.15440126391025685, "grad_norm": 0.0, "learning_rate": 8.457022221915597e-06, "loss": 0.0001, "step": 22380 }, { "epoch": 0.1544702546448012, "grad_norm": 0.0, "learning_rate": 8.456332314570154e-06, "loss": 0.0004, "step": 22390 }, { "epoch": 0.15453924537934555, "grad_norm": 0.0, "learning_rate": 8.45564240722471e-06, "loss": 0.0001, "step": 22400 }, { "epoch": 0.1546082361138899, "grad_norm": 0.0, "learning_rate": 8.454952499879267e-06, "loss": 0.0, "step": 22410 }, { "epoch": 0.15467722684843427, "grad_norm": 0.8087576627731323, "learning_rate": 8.454262592533823e-06, "loss": 0.0001, "step": 22420 }, { "epoch": 0.1547462175829786, "grad_norm": 0.0, "learning_rate": 8.45357268518838e-06, "loss": 0.0, "step": 22430 }, { "epoch": 0.15481520831752296, "grad_norm": 0.0, "learning_rate": 8.452882777842936e-06, "loss": 0.0002, "step": 22440 }, { "epoch": 0.1548841990520673, "grad_norm": 0.0, "learning_rate": 8.452192870497493e-06, "loss": 0.0, "step": 22450 }, { "epoch": 0.15495318978661166, "grad_norm": 0.0, "learning_rate": 8.45150296315205e-06, "loss": 0.0332, "step": 22460 }, { "epoch": 0.15502218052115602, "grad_norm": 0.0, "learning_rate": 8.450813055806606e-06, "loss": 0.0, "step": 22470 }, { "epoch": 0.15509117125570035, "grad_norm": 0.0, "learning_rate": 8.450123148461162e-06, "loss": 0.0, "step": 22480 }, { "epoch": 0.1551601619902447, "grad_norm": 0.00048648554366081953, "learning_rate": 8.449433241115719e-06, "loss": 0.0, "step": 22490 }, { "epoch": 0.15522915272478907, "grad_norm": 0.0, "learning_rate": 8.448743333770275e-06, "loss": 0.0, "step": 22500 }, { "epoch": 0.1552981434593334, "grad_norm": 0.0, "learning_rate": 8.448053426424832e-06, "loss": 0.0001, "step": 22510 }, { "epoch": 0.15536713419387777, "grad_norm": 8.714773116480501e-07, "learning_rate": 8.447363519079388e-06, "loss": 0.0, "step": 22520 }, { "epoch": 0.15543612492842213, "grad_norm": 0.0, "learning_rate": 8.446673611733945e-06, "loss": 0.0, "step": 22530 }, { "epoch": 0.15550511566296646, "grad_norm": 0.0, "learning_rate": 8.445983704388501e-06, "loss": 0.1804, "step": 22540 }, { "epoch": 0.15557410639751082, "grad_norm": 0.0, "learning_rate": 8.445293797043058e-06, "loss": 0.0001, "step": 22550 }, { "epoch": 0.15564309713205515, "grad_norm": 0.0, "learning_rate": 8.444603889697614e-06, "loss": 0.0, "step": 22560 }, { "epoch": 0.1557120878665995, "grad_norm": 0.0, "learning_rate": 8.44391398235217e-06, "loss": 0.0, "step": 22570 }, { "epoch": 0.15578107860114387, "grad_norm": 0.0, "learning_rate": 8.443224075006727e-06, "loss": 0.0, "step": 22580 }, { "epoch": 0.1558500693356882, "grad_norm": 0.0, "learning_rate": 8.442534167661284e-06, "loss": 0.0094, "step": 22590 }, { "epoch": 0.15591906007023257, "grad_norm": 0.0, "learning_rate": 8.44184426031584e-06, "loss": 0.0, "step": 22600 }, { "epoch": 0.15598805080477693, "grad_norm": 0.0, "learning_rate": 8.441154352970397e-06, "loss": 0.0, "step": 22610 }, { "epoch": 0.15605704153932126, "grad_norm": 0.0, "learning_rate": 8.440464445624953e-06, "loss": 0.0, "step": 22620 }, { "epoch": 0.15612603227386562, "grad_norm": 0.37789469957351685, "learning_rate": 8.43977453827951e-06, "loss": 0.0013, "step": 22630 }, { "epoch": 0.15619502300840998, "grad_norm": 2.2981808456279396e-07, "learning_rate": 8.439084630934066e-06, "loss": 0.0, "step": 22640 }, { "epoch": 0.15626401374295432, "grad_norm": 0.0, "learning_rate": 8.438394723588623e-06, "loss": 0.0, "step": 22650 }, { "epoch": 0.15633300447749868, "grad_norm": 0.0, "learning_rate": 8.437704816243179e-06, "loss": 0.0003, "step": 22660 }, { "epoch": 0.156401995212043, "grad_norm": 6.841717549832538e-05, "learning_rate": 8.437014908897736e-06, "loss": 0.0, "step": 22670 }, { "epoch": 0.15647098594658737, "grad_norm": 0.0, "learning_rate": 8.436325001552292e-06, "loss": 0.3749, "step": 22680 }, { "epoch": 0.15653997668113173, "grad_norm": 0.0, "learning_rate": 8.435635094206849e-06, "loss": 0.0003, "step": 22690 }, { "epoch": 0.15660896741567606, "grad_norm": 0.0, "learning_rate": 8.434945186861405e-06, "loss": 0.0001, "step": 22700 }, { "epoch": 0.15667795815022043, "grad_norm": 0.0, "learning_rate": 8.434255279515962e-06, "loss": 0.0, "step": 22710 }, { "epoch": 0.1567469488847648, "grad_norm": 0.0, "learning_rate": 8.433565372170518e-06, "loss": 0.0, "step": 22720 }, { "epoch": 0.15681593961930912, "grad_norm": 0.0, "learning_rate": 8.432875464825075e-06, "loss": 0.0008, "step": 22730 }, { "epoch": 0.15688493035385348, "grad_norm": 0.0, "learning_rate": 8.432185557479631e-06, "loss": 0.0, "step": 22740 }, { "epoch": 0.15695392108839784, "grad_norm": 0.0, "learning_rate": 8.431495650134188e-06, "loss": 0.0, "step": 22750 }, { "epoch": 0.15702291182294217, "grad_norm": 0.0, "learning_rate": 8.430805742788744e-06, "loss": 0.0001, "step": 22760 }, { "epoch": 0.15709190255748653, "grad_norm": 1.8915983446277096e-06, "learning_rate": 8.4301158354433e-06, "loss": 0.0, "step": 22770 }, { "epoch": 0.15716089329203087, "grad_norm": 0.0, "learning_rate": 8.429425928097857e-06, "loss": 0.0186, "step": 22780 }, { "epoch": 0.15722988402657523, "grad_norm": 3.625578756327741e-06, "learning_rate": 8.428736020752414e-06, "loss": 0.0002, "step": 22790 }, { "epoch": 0.1572988747611196, "grad_norm": 1.0459139110352567e-09, "learning_rate": 8.42804611340697e-06, "loss": 0.0, "step": 22800 }, { "epoch": 0.15736786549566392, "grad_norm": 2.289341449737549, "learning_rate": 8.427356206061527e-06, "loss": 0.0007, "step": 22810 }, { "epoch": 0.15743685623020828, "grad_norm": 0.0, "learning_rate": 8.426666298716083e-06, "loss": 0.0003, "step": 22820 }, { "epoch": 0.15750584696475264, "grad_norm": 0.0, "learning_rate": 8.42597639137064e-06, "loss": 0.0, "step": 22830 }, { "epoch": 0.15757483769929698, "grad_norm": 1.2251314407407676e-09, "learning_rate": 8.425286484025196e-06, "loss": 0.0, "step": 22840 }, { "epoch": 0.15764382843384134, "grad_norm": 0.0, "learning_rate": 8.424596576679752e-06, "loss": 0.0, "step": 22850 }, { "epoch": 0.1577128191683857, "grad_norm": 0.0, "learning_rate": 8.423906669334309e-06, "loss": 0.0, "step": 22860 }, { "epoch": 0.15778180990293003, "grad_norm": 0.0, "learning_rate": 8.423216761988865e-06, "loss": 0.0, "step": 22870 }, { "epoch": 0.1578508006374744, "grad_norm": 1.9375049760128604e-06, "learning_rate": 8.422526854643422e-06, "loss": 0.001, "step": 22880 }, { "epoch": 0.15791979137201873, "grad_norm": 0.0, "learning_rate": 8.421836947297978e-06, "loss": 0.1014, "step": 22890 }, { "epoch": 0.15798878210656309, "grad_norm": 0.0, "learning_rate": 8.421147039952535e-06, "loss": 0.0, "step": 22900 }, { "epoch": 0.15805777284110745, "grad_norm": 0.17923089861869812, "learning_rate": 8.420457132607091e-06, "loss": 0.0, "step": 22910 }, { "epoch": 0.15812676357565178, "grad_norm": 0.013974872417747974, "learning_rate": 8.419767225261648e-06, "loss": 0.0, "step": 22920 }, { "epoch": 0.15819575431019614, "grad_norm": 0.0, "learning_rate": 8.419077317916204e-06, "loss": 0.0, "step": 22930 }, { "epoch": 0.1582647450447405, "grad_norm": 0.0, "learning_rate": 8.418387410570761e-06, "loss": 0.0207, "step": 22940 }, { "epoch": 0.15833373577928483, "grad_norm": 0.0, "learning_rate": 8.417697503225317e-06, "loss": 0.0, "step": 22950 }, { "epoch": 0.1584027265138292, "grad_norm": 9.797831435065518e-10, "learning_rate": 8.417007595879874e-06, "loss": 0.0, "step": 22960 }, { "epoch": 0.15847171724837356, "grad_norm": 0.0, "learning_rate": 8.41631768853443e-06, "loss": 0.0001, "step": 22970 }, { "epoch": 0.1585407079829179, "grad_norm": 0.0, "learning_rate": 8.415627781188987e-06, "loss": 0.0, "step": 22980 }, { "epoch": 0.15860969871746225, "grad_norm": 5.27865813637618e-06, "learning_rate": 8.414937873843543e-06, "loss": 0.0, "step": 22990 }, { "epoch": 0.15867868945200658, "grad_norm": 0.0015955582493916154, "learning_rate": 8.4142479664981e-06, "loss": 0.0, "step": 23000 }, { "epoch": 0.15874768018655094, "grad_norm": 2.0678694312437074e-09, "learning_rate": 8.413558059152656e-06, "loss": 0.0001, "step": 23010 }, { "epoch": 0.1588166709210953, "grad_norm": 0.0, "learning_rate": 8.412868151807213e-06, "loss": 0.0, "step": 23020 }, { "epoch": 0.15888566165563964, "grad_norm": 7.874262337281834e-06, "learning_rate": 8.41217824446177e-06, "loss": 0.0, "step": 23030 }, { "epoch": 0.158954652390184, "grad_norm": 0.0, "learning_rate": 8.411488337116326e-06, "loss": 0.0, "step": 23040 }, { "epoch": 0.15902364312472836, "grad_norm": 0.0, "learning_rate": 8.410798429770882e-06, "loss": 0.0, "step": 23050 }, { "epoch": 0.1590926338592727, "grad_norm": 0.0, "learning_rate": 8.410108522425439e-06, "loss": 0.0, "step": 23060 }, { "epoch": 0.15916162459381705, "grad_norm": 0.00026652630185708404, "learning_rate": 8.409418615079995e-06, "loss": 0.0058, "step": 23070 }, { "epoch": 0.1592306153283614, "grad_norm": 0.0, "learning_rate": 8.408728707734552e-06, "loss": 0.0, "step": 23080 }, { "epoch": 0.15929960606290575, "grad_norm": 0.0004675178788602352, "learning_rate": 8.408038800389108e-06, "loss": 0.0001, "step": 23090 }, { "epoch": 0.1593685967974501, "grad_norm": 0.0, "learning_rate": 8.407348893043665e-06, "loss": 0.0, "step": 23100 }, { "epoch": 0.15943758753199444, "grad_norm": 0.0, "learning_rate": 8.406658985698221e-06, "loss": 0.0065, "step": 23110 }, { "epoch": 0.1595065782665388, "grad_norm": 0.0, "learning_rate": 8.406038069087322e-06, "loss": 0.8562, "step": 23120 }, { "epoch": 0.15957556900108316, "grad_norm": 0.0008137800032272935, "learning_rate": 8.405348161741878e-06, "loss": 0.0, "step": 23130 }, { "epoch": 0.1596445597356275, "grad_norm": 0.0, "learning_rate": 8.404658254396435e-06, "loss": 0.0005, "step": 23140 }, { "epoch": 0.15971355047017186, "grad_norm": 0.0, "learning_rate": 8.403968347050991e-06, "loss": 0.0, "step": 23150 }, { "epoch": 0.15978254120471622, "grad_norm": 586.30029296875, "learning_rate": 8.403278439705548e-06, "loss": 0.1831, "step": 23160 }, { "epoch": 0.15985153193926055, "grad_norm": 0.0, "learning_rate": 8.402588532360104e-06, "loss": 0.001, "step": 23170 }, { "epoch": 0.1599205226738049, "grad_norm": 0.0002095679083140567, "learning_rate": 8.401898625014663e-06, "loss": 0.0, "step": 23180 }, { "epoch": 0.15998951340834927, "grad_norm": 0.0, "learning_rate": 8.401208717669217e-06, "loss": 0.0, "step": 23190 }, { "epoch": 0.1600585041428936, "grad_norm": 7.866688974900171e-05, "learning_rate": 8.400518810323774e-06, "loss": 0.0007, "step": 23200 }, { "epoch": 0.16012749487743796, "grad_norm": 0.00010515480971662328, "learning_rate": 8.39982890297833e-06, "loss": 0.0, "step": 23210 }, { "epoch": 0.1601964856119823, "grad_norm": 0.0, "learning_rate": 8.399138995632887e-06, "loss": 0.0, "step": 23220 }, { "epoch": 0.16026547634652666, "grad_norm": 0.148875892162323, "learning_rate": 8.398449088287443e-06, "loss": 0.0, "step": 23230 }, { "epoch": 0.16033446708107102, "grad_norm": 0.0, "learning_rate": 8.397759180942e-06, "loss": 0.1063, "step": 23240 }, { "epoch": 0.16040345781561535, "grad_norm": 0.0, "learning_rate": 8.397069273596556e-06, "loss": 0.0, "step": 23250 }, { "epoch": 0.1604724485501597, "grad_norm": 0.0, "learning_rate": 8.396379366251113e-06, "loss": 0.0, "step": 23260 }, { "epoch": 0.16054143928470407, "grad_norm": 0.0, "learning_rate": 8.39568945890567e-06, "loss": 0.0, "step": 23270 }, { "epoch": 0.1606104300192484, "grad_norm": 0.0, "learning_rate": 8.394999551560226e-06, "loss": 0.0019, "step": 23280 }, { "epoch": 0.16067942075379277, "grad_norm": 0.0, "learning_rate": 8.394309644214784e-06, "loss": 0.0, "step": 23290 }, { "epoch": 0.16074841148833713, "grad_norm": 0.0, "learning_rate": 8.393619736869339e-06, "loss": 0.0, "step": 23300 }, { "epoch": 0.16081740222288146, "grad_norm": 3.7193925717105003e-09, "learning_rate": 8.392929829523895e-06, "loss": 0.0, "step": 23310 }, { "epoch": 0.16088639295742582, "grad_norm": 0.0, "learning_rate": 8.392239922178452e-06, "loss": 0.0, "step": 23320 }, { "epoch": 0.16095538369197016, "grad_norm": 0.0, "learning_rate": 8.391550014833008e-06, "loss": 0.0, "step": 23330 }, { "epoch": 0.16102437442651452, "grad_norm": 0.0, "learning_rate": 8.390860107487565e-06, "loss": 0.0, "step": 23340 }, { "epoch": 0.16109336516105888, "grad_norm": 0.0, "learning_rate": 8.390170200142121e-06, "loss": 0.0, "step": 23350 }, { "epoch": 0.1611623558956032, "grad_norm": 0.0, "learning_rate": 8.389480292796678e-06, "loss": 0.1134, "step": 23360 }, { "epoch": 0.16123134663014757, "grad_norm": 0.0, "learning_rate": 8.388790385451234e-06, "loss": 0.0134, "step": 23370 }, { "epoch": 0.16130033736469193, "grad_norm": 0.0, "learning_rate": 8.38810047810579e-06, "loss": 0.0152, "step": 23380 }, { "epoch": 0.16136932809923626, "grad_norm": 0.03064868599176407, "learning_rate": 8.387410570760347e-06, "loss": 0.007, "step": 23390 }, { "epoch": 0.16143831883378063, "grad_norm": 0.0, "learning_rate": 8.386720663414905e-06, "loss": 0.0, "step": 23400 }, { "epoch": 0.16150730956832499, "grad_norm": 0.0, "learning_rate": 8.38603075606946e-06, "loss": 0.0, "step": 23410 }, { "epoch": 0.16157630030286932, "grad_norm": 0.0, "learning_rate": 8.385340848724017e-06, "loss": 0.0, "step": 23420 }, { "epoch": 0.16164529103741368, "grad_norm": 0.0, "learning_rate": 8.384650941378573e-06, "loss": 0.0002, "step": 23430 }, { "epoch": 0.161714281771958, "grad_norm": 9.744911949383095e-05, "learning_rate": 8.38396103403313e-06, "loss": 0.0001, "step": 23440 }, { "epoch": 0.16178327250650237, "grad_norm": 0.0, "learning_rate": 8.383271126687686e-06, "loss": 0.0038, "step": 23450 }, { "epoch": 0.16185226324104673, "grad_norm": 0.0, "learning_rate": 8.382581219342243e-06, "loss": 0.0, "step": 23460 }, { "epoch": 0.16192125397559107, "grad_norm": 0.0, "learning_rate": 8.381891311996799e-06, "loss": 0.0006, "step": 23470 }, { "epoch": 0.16199024471013543, "grad_norm": 0.0, "learning_rate": 8.381201404651356e-06, "loss": 0.0001, "step": 23480 }, { "epoch": 0.1620592354446798, "grad_norm": 53.06589889526367, "learning_rate": 8.380511497305912e-06, "loss": 0.0054, "step": 23490 }, { "epoch": 0.16212822617922412, "grad_norm": 7.495381851185812e-06, "learning_rate": 8.379821589960469e-06, "loss": 0.0027, "step": 23500 }, { "epoch": 0.16219721691376848, "grad_norm": 0.0, "learning_rate": 8.379131682615027e-06, "loss": 0.0009, "step": 23510 }, { "epoch": 0.16226620764831284, "grad_norm": 0.0, "learning_rate": 8.378441775269583e-06, "loss": 1.0734, "step": 23520 }, { "epoch": 0.16233519838285718, "grad_norm": 0.0, "learning_rate": 8.377751867924138e-06, "loss": 0.0, "step": 23530 }, { "epoch": 0.16240418911740154, "grad_norm": 0.0, "learning_rate": 8.377061960578694e-06, "loss": 0.0, "step": 23540 }, { "epoch": 0.16247317985194587, "grad_norm": 0.0004498373600654304, "learning_rate": 8.376372053233251e-06, "loss": 0.0, "step": 23550 }, { "epoch": 0.16254217058649023, "grad_norm": 0.0, "learning_rate": 8.375682145887807e-06, "loss": 0.0, "step": 23560 }, { "epoch": 0.1626111613210346, "grad_norm": 0.0, "learning_rate": 8.374992238542364e-06, "loss": 0.0004, "step": 23570 }, { "epoch": 0.16268015205557892, "grad_norm": 0.0, "learning_rate": 8.37430233119692e-06, "loss": 0.425, "step": 23580 }, { "epoch": 0.16274914279012329, "grad_norm": 0.0, "learning_rate": 8.373612423851477e-06, "loss": 0.0, "step": 23590 }, { "epoch": 0.16281813352466765, "grad_norm": 0.0, "learning_rate": 8.372922516506033e-06, "loss": 0.0, "step": 23600 }, { "epoch": 0.16288712425921198, "grad_norm": 0.0, "learning_rate": 8.37223260916059e-06, "loss": 0.0, "step": 23610 }, { "epoch": 0.16295611499375634, "grad_norm": 0.0, "learning_rate": 8.371542701815148e-06, "loss": 0.0033, "step": 23620 }, { "epoch": 0.1630251057283007, "grad_norm": 7.632367487531155e-05, "learning_rate": 8.370852794469705e-06, "loss": 0.0, "step": 23630 }, { "epoch": 0.16309409646284503, "grad_norm": 0.0, "learning_rate": 8.37016288712426e-06, "loss": 0.0004, "step": 23640 }, { "epoch": 0.1631630871973894, "grad_norm": 0.0, "learning_rate": 8.369472979778816e-06, "loss": 0.0, "step": 23650 }, { "epoch": 0.16323207793193373, "grad_norm": 0.0, "learning_rate": 8.368783072433372e-06, "loss": 0.0, "step": 23660 }, { "epoch": 0.1633010686664781, "grad_norm": 0.0, "learning_rate": 8.368093165087929e-06, "loss": 0.0024, "step": 23670 }, { "epoch": 0.16337005940102245, "grad_norm": 0.0, "learning_rate": 8.367403257742485e-06, "loss": 0.0, "step": 23680 }, { "epoch": 0.16343905013556678, "grad_norm": 0.0, "learning_rate": 8.366713350397042e-06, "loss": 0.0, "step": 23690 }, { "epoch": 0.16350804087011114, "grad_norm": 0.024429699406027794, "learning_rate": 8.366023443051598e-06, "loss": 0.1677, "step": 23700 }, { "epoch": 0.1635770316046555, "grad_norm": 1.4550627724929655e-07, "learning_rate": 8.365333535706155e-06, "loss": 0.0002, "step": 23710 }, { "epoch": 0.16364602233919984, "grad_norm": 0.0, "learning_rate": 8.364643628360711e-06, "loss": 0.0281, "step": 23720 }, { "epoch": 0.1637150130737442, "grad_norm": 0.003637194400653243, "learning_rate": 8.36395372101527e-06, "loss": 0.0, "step": 23730 }, { "epoch": 0.16378400380828856, "grad_norm": 0.0, "learning_rate": 8.363263813669826e-06, "loss": 0.0, "step": 23740 }, { "epoch": 0.1638529945428329, "grad_norm": 1.1732846161294219e-07, "learning_rate": 8.36257390632438e-06, "loss": 0.0, "step": 23750 }, { "epoch": 0.16392198527737725, "grad_norm": 0.12413500994443893, "learning_rate": 8.361883998978937e-06, "loss": 0.0, "step": 23760 }, { "epoch": 0.16399097601192159, "grad_norm": 0.0, "learning_rate": 8.361194091633494e-06, "loss": 0.003, "step": 23770 }, { "epoch": 0.16405996674646595, "grad_norm": 3.282538414001465, "learning_rate": 8.36050418428805e-06, "loss": 0.0004, "step": 23780 }, { "epoch": 0.1641289574810103, "grad_norm": 0.0, "learning_rate": 8.359814276942607e-06, "loss": 0.0, "step": 23790 }, { "epoch": 0.16419794821555464, "grad_norm": 0.0, "learning_rate": 8.359124369597163e-06, "loss": 0.0006, "step": 23800 }, { "epoch": 0.164266938950099, "grad_norm": 0.0, "learning_rate": 8.358503452986264e-06, "loss": 0.399, "step": 23810 }, { "epoch": 0.16433592968464336, "grad_norm": 0.0, "learning_rate": 8.357813545640822e-06, "loss": 0.0001, "step": 23820 }, { "epoch": 0.1644049204191877, "grad_norm": 0.0, "learning_rate": 8.357123638295379e-06, "loss": 0.0, "step": 23830 }, { "epoch": 0.16447391115373206, "grad_norm": 0.0, "learning_rate": 8.356433730949935e-06, "loss": 0.0006, "step": 23840 }, { "epoch": 0.16454290188827642, "grad_norm": 0.0, "learning_rate": 8.355743823604492e-06, "loss": 0.0001, "step": 23850 }, { "epoch": 0.16461189262282075, "grad_norm": 0.0, "learning_rate": 8.355053916259046e-06, "loss": 0.0002, "step": 23860 }, { "epoch": 0.1646808833573651, "grad_norm": 0.0, "learning_rate": 8.354364008913603e-06, "loss": 0.0, "step": 23870 }, { "epoch": 0.16474987409190944, "grad_norm": 0.0, "learning_rate": 8.35367410156816e-06, "loss": 0.021, "step": 23880 }, { "epoch": 0.1648188648264538, "grad_norm": 0.0, "learning_rate": 8.352984194222716e-06, "loss": 0.0, "step": 23890 }, { "epoch": 0.16488785556099816, "grad_norm": 0.019565938040614128, "learning_rate": 8.352294286877272e-06, "loss": 0.0, "step": 23900 }, { "epoch": 0.1649568462955425, "grad_norm": 0.0, "learning_rate": 8.351604379531829e-06, "loss": 0.0, "step": 23910 }, { "epoch": 0.16502583703008686, "grad_norm": 0.0, "learning_rate": 8.350914472186385e-06, "loss": 0.0, "step": 23920 }, { "epoch": 0.16509482776463122, "grad_norm": 0.0, "learning_rate": 8.350224564840943e-06, "loss": 0.0, "step": 23930 }, { "epoch": 0.16516381849917555, "grad_norm": 0.1406061202287674, "learning_rate": 8.3495346574955e-06, "loss": 0.0026, "step": 23940 }, { "epoch": 0.1652328092337199, "grad_norm": 6.7976855255835744e-09, "learning_rate": 8.348844750150056e-06, "loss": 0.0118, "step": 23950 }, { "epoch": 0.16530179996826427, "grad_norm": 0.0, "learning_rate": 8.348154842804613e-06, "loss": 0.0, "step": 23960 }, { "epoch": 0.1653707907028086, "grad_norm": 0.0, "learning_rate": 8.347464935459168e-06, "loss": 0.0001, "step": 23970 }, { "epoch": 0.16543978143735297, "grad_norm": 0.0, "learning_rate": 8.346775028113724e-06, "loss": 0.0, "step": 23980 }, { "epoch": 0.1655087721718973, "grad_norm": 10.209993362426758, "learning_rate": 8.34608512076828e-06, "loss": 0.0019, "step": 23990 }, { "epoch": 0.16557776290644166, "grad_norm": 0.0, "learning_rate": 8.345395213422837e-06, "loss": 0.0, "step": 24000 }, { "epoch": 0.16564675364098602, "grad_norm": 0.11810226738452911, "learning_rate": 8.344705306077394e-06, "loss": 0.0024, "step": 24010 }, { "epoch": 0.16571574437553035, "grad_norm": 0.0, "learning_rate": 8.34401539873195e-06, "loss": 0.0, "step": 24020 }, { "epoch": 0.16578473511007472, "grad_norm": 11.925437927246094, "learning_rate": 8.343325491386507e-06, "loss": 0.0029, "step": 24030 }, { "epoch": 0.16585372584461908, "grad_norm": 0.0, "learning_rate": 8.342635584041065e-06, "loss": 0.0, "step": 24040 }, { "epoch": 0.1659227165791634, "grad_norm": 0.0, "learning_rate": 8.341945676695621e-06, "loss": 0.0, "step": 24050 }, { "epoch": 0.16599170731370777, "grad_norm": 0.0, "learning_rate": 8.341255769350178e-06, "loss": 0.0, "step": 24060 }, { "epoch": 0.16606069804825213, "grad_norm": 0.00028354235109873116, "learning_rate": 8.340565862004734e-06, "loss": 0.0051, "step": 24070 }, { "epoch": 0.16612968878279646, "grad_norm": 0.0, "learning_rate": 8.33987595465929e-06, "loss": 0.0, "step": 24080 }, { "epoch": 0.16619867951734082, "grad_norm": 0.0, "learning_rate": 8.339186047313846e-06, "loss": 0.0, "step": 24090 }, { "epoch": 0.16626767025188519, "grad_norm": 1.2160699952801224e-05, "learning_rate": 8.338496139968402e-06, "loss": 0.0, "step": 24100 }, { "epoch": 0.16633666098642952, "grad_norm": 0.5098678469657898, "learning_rate": 8.337806232622959e-06, "loss": 0.0001, "step": 24110 }, { "epoch": 0.16640565172097388, "grad_norm": 1.0296205488202759e-07, "learning_rate": 8.337116325277515e-06, "loss": 0.0, "step": 24120 }, { "epoch": 0.1664746424555182, "grad_norm": 0.0, "learning_rate": 8.336426417932072e-06, "loss": 0.0, "step": 24130 }, { "epoch": 0.16654363319006257, "grad_norm": 0.0, "learning_rate": 8.335736510586628e-06, "loss": 0.0, "step": 24140 }, { "epoch": 0.16661262392460693, "grad_norm": 0.0, "learning_rate": 8.335046603241186e-06, "loss": 0.0039, "step": 24150 }, { "epoch": 0.16668161465915127, "grad_norm": 0.0, "learning_rate": 8.334356695895743e-06, "loss": 0.0, "step": 24160 }, { "epoch": 0.16675060539369563, "grad_norm": 0.0, "learning_rate": 8.3336667885503e-06, "loss": 0.0, "step": 24170 }, { "epoch": 0.16681959612824, "grad_norm": 0.0, "learning_rate": 8.332976881204856e-06, "loss": 0.0, "step": 24180 }, { "epoch": 0.16688858686278432, "grad_norm": 0.001259670709259808, "learning_rate": 8.332286973859412e-06, "loss": 0.0, "step": 24190 }, { "epoch": 0.16695757759732868, "grad_norm": 0.0, "learning_rate": 8.331597066513967e-06, "loss": 0.0, "step": 24200 }, { "epoch": 0.16702656833187304, "grad_norm": 0.0, "learning_rate": 8.330907159168524e-06, "loss": 0.1286, "step": 24210 }, { "epoch": 0.16709555906641738, "grad_norm": 0.0, "learning_rate": 8.33021725182308e-06, "loss": 0.0008, "step": 24220 }, { "epoch": 0.16716454980096174, "grad_norm": 0.0, "learning_rate": 8.329527344477637e-06, "loss": 0.0, "step": 24230 }, { "epoch": 0.16723354053550607, "grad_norm": 0.0, "learning_rate": 8.328837437132193e-06, "loss": 0.0, "step": 24240 }, { "epoch": 0.16730253127005043, "grad_norm": 0.0, "learning_rate": 8.32814752978675e-06, "loss": 0.0, "step": 24250 }, { "epoch": 0.1673715220045948, "grad_norm": 0.0, "learning_rate": 8.327457622441308e-06, "loss": 0.0, "step": 24260 }, { "epoch": 0.16744051273913912, "grad_norm": 0.0, "learning_rate": 8.326767715095864e-06, "loss": 0.0, "step": 24270 }, { "epoch": 0.16750950347368349, "grad_norm": 0.0, "learning_rate": 8.32607780775042e-06, "loss": 0.0451, "step": 24280 }, { "epoch": 0.16757849420822785, "grad_norm": 0.0, "learning_rate": 8.325387900404977e-06, "loss": 0.0, "step": 24290 }, { "epoch": 0.16764748494277218, "grad_norm": 0.0005077415844425559, "learning_rate": 8.324697993059534e-06, "loss": 0.0, "step": 24300 }, { "epoch": 0.16771647567731654, "grad_norm": 0.0, "learning_rate": 8.324008085714088e-06, "loss": 0.0002, "step": 24310 }, { "epoch": 0.1677854664118609, "grad_norm": 5.61315346203628e-07, "learning_rate": 8.323318178368645e-06, "loss": 0.0003, "step": 24320 }, { "epoch": 0.16785445714640523, "grad_norm": 127.95923614501953, "learning_rate": 8.322628271023201e-06, "loss": 0.1063, "step": 24330 }, { "epoch": 0.1679234478809496, "grad_norm": 0.0, "learning_rate": 8.321938363677758e-06, "loss": 0.0, "step": 24340 }, { "epoch": 0.16799243861549393, "grad_norm": 0.0, "learning_rate": 8.321248456332314e-06, "loss": 0.0, "step": 24350 }, { "epoch": 0.1680614293500383, "grad_norm": 0.09342078864574432, "learning_rate": 8.320558548986871e-06, "loss": 0.0, "step": 24360 }, { "epoch": 0.16813042008458265, "grad_norm": 0.0, "learning_rate": 8.319868641641429e-06, "loss": 1.486, "step": 24370 }, { "epoch": 0.16819941081912698, "grad_norm": 0.0, "learning_rate": 8.319178734295986e-06, "loss": 0.0, "step": 24380 }, { "epoch": 0.16826840155367134, "grad_norm": 0.0, "learning_rate": 8.318488826950542e-06, "loss": 0.0, "step": 24390 }, { "epoch": 0.1683373922882157, "grad_norm": 0.0, "learning_rate": 8.317798919605099e-06, "loss": 0.0, "step": 24400 }, { "epoch": 0.16840638302276004, "grad_norm": 0.0, "learning_rate": 8.317109012259655e-06, "loss": 0.8625, "step": 24410 }, { "epoch": 0.1684753737573044, "grad_norm": 0.0, "learning_rate": 8.316419104914212e-06, "loss": 0.0529, "step": 24420 }, { "epoch": 0.16854436449184876, "grad_norm": 0.0, "learning_rate": 8.315729197568766e-06, "loss": 0.0, "step": 24430 }, { "epoch": 0.1686133552263931, "grad_norm": 0.0, "learning_rate": 8.315039290223323e-06, "loss": 0.0, "step": 24440 }, { "epoch": 0.16868234596093745, "grad_norm": 0.0, "learning_rate": 8.31434938287788e-06, "loss": 0.0032, "step": 24450 }, { "epoch": 0.16875133669548179, "grad_norm": 5.762419732491253e-06, "learning_rate": 8.313659475532436e-06, "loss": 0.0, "step": 24460 }, { "epoch": 0.16882032743002615, "grad_norm": 0.0, "learning_rate": 8.312969568186992e-06, "loss": 0.0, "step": 24470 }, { "epoch": 0.1688893181645705, "grad_norm": 0.0, "learning_rate": 8.31227966084155e-06, "loss": 0.0002, "step": 24480 }, { "epoch": 0.16895830889911484, "grad_norm": 0.0, "learning_rate": 8.311589753496107e-06, "loss": 0.0, "step": 24490 }, { "epoch": 0.1690272996336592, "grad_norm": 0.0, "learning_rate": 8.310899846150663e-06, "loss": 0.0001, "step": 24500 }, { "epoch": 0.16909629036820356, "grad_norm": 0.0, "learning_rate": 8.31020993880522e-06, "loss": 0.0, "step": 24510 }, { "epoch": 0.1691652811027479, "grad_norm": 0.0, "learning_rate": 8.309520031459776e-06, "loss": 0.0, "step": 24520 }, { "epoch": 0.16923427183729225, "grad_norm": 0.0, "learning_rate": 8.308830124114333e-06, "loss": 0.0, "step": 24530 }, { "epoch": 0.16930326257183662, "grad_norm": 0.0, "learning_rate": 8.308140216768888e-06, "loss": 0.0, "step": 24540 }, { "epoch": 0.16937225330638095, "grad_norm": 0.0, "learning_rate": 8.307450309423444e-06, "loss": 0.6671, "step": 24550 }, { "epoch": 0.1694412440409253, "grad_norm": 0.0, "learning_rate": 8.306760402078e-06, "loss": 0.0, "step": 24560 }, { "epoch": 0.16951023477546964, "grad_norm": 0.00019171614258084446, "learning_rate": 8.306070494732557e-06, "loss": 0.0006, "step": 24570 }, { "epoch": 0.169579225510014, "grad_norm": 0.0, "learning_rate": 8.305380587387114e-06, "loss": 0.0001, "step": 24580 }, { "epoch": 0.16964821624455836, "grad_norm": 0.0, "learning_rate": 8.304690680041672e-06, "loss": 0.0, "step": 24590 }, { "epoch": 0.1697172069791027, "grad_norm": 0.0, "learning_rate": 8.304000772696228e-06, "loss": 0.0001, "step": 24600 }, { "epoch": 0.16978619771364706, "grad_norm": 0.0, "learning_rate": 8.303310865350785e-06, "loss": 0.0, "step": 24610 }, { "epoch": 0.16985518844819142, "grad_norm": 0.0, "learning_rate": 8.302620958005341e-06, "loss": 0.0, "step": 24620 }, { "epoch": 0.16992417918273575, "grad_norm": 0.0, "learning_rate": 8.301931050659898e-06, "loss": 0.0, "step": 24630 }, { "epoch": 0.1699931699172801, "grad_norm": 212.1542205810547, "learning_rate": 8.301241143314454e-06, "loss": 0.0169, "step": 24640 }, { "epoch": 0.17006216065182447, "grad_norm": 0.0, "learning_rate": 8.300551235969009e-06, "loss": 0.0, "step": 24650 }, { "epoch": 0.1701311513863688, "grad_norm": 0.0, "learning_rate": 8.299861328623566e-06, "loss": 0.0021, "step": 24660 }, { "epoch": 0.17020014212091317, "grad_norm": 0.0, "learning_rate": 8.299171421278122e-06, "loss": 0.0, "step": 24670 }, { "epoch": 0.1702691328554575, "grad_norm": 0.0, "learning_rate": 8.298481513932679e-06, "loss": 0.7152, "step": 24680 }, { "epoch": 0.17033812359000186, "grad_norm": 0.0, "learning_rate": 8.297791606587235e-06, "loss": 0.0001, "step": 24690 }, { "epoch": 0.17040711432454622, "grad_norm": 1.1909532560139269e-07, "learning_rate": 8.297101699241793e-06, "loss": 0.0008, "step": 24700 }, { "epoch": 0.17047610505909055, "grad_norm": 1.1512995115481317e-05, "learning_rate": 8.29641179189635e-06, "loss": 0.0, "step": 24710 }, { "epoch": 0.17054509579363492, "grad_norm": 0.03892756626009941, "learning_rate": 8.295721884550906e-06, "loss": 0.0, "step": 24720 }, { "epoch": 0.17061408652817928, "grad_norm": 0.0, "learning_rate": 8.295031977205463e-06, "loss": 0.0001, "step": 24730 }, { "epoch": 0.1706830772627236, "grad_norm": 0.0, "learning_rate": 8.29434206986002e-06, "loss": 0.0, "step": 24740 }, { "epoch": 0.17075206799726797, "grad_norm": 0.0, "learning_rate": 8.293652162514576e-06, "loss": 0.0, "step": 24750 }, { "epoch": 0.17082105873181233, "grad_norm": 0.0, "learning_rate": 8.29296225516913e-06, "loss": 0.0, "step": 24760 }, { "epoch": 0.17089004946635666, "grad_norm": 0.0, "learning_rate": 8.292272347823687e-06, "loss": 0.0, "step": 24770 }, { "epoch": 0.17095904020090102, "grad_norm": 7.119715883163735e-05, "learning_rate": 8.291582440478243e-06, "loss": 0.0, "step": 24780 }, { "epoch": 0.17102803093544536, "grad_norm": 0.0, "learning_rate": 8.2908925331328e-06, "loss": 0.0005, "step": 24790 }, { "epoch": 0.17109702166998972, "grad_norm": 0.0, "learning_rate": 8.290202625787356e-06, "loss": 0.0868, "step": 24800 }, { "epoch": 0.17116601240453408, "grad_norm": 0.0, "learning_rate": 8.289512718441915e-06, "loss": 0.0085, "step": 24810 }, { "epoch": 0.1712350031390784, "grad_norm": 0.0, "learning_rate": 8.288822811096471e-06, "loss": 0.0, "step": 24820 }, { "epoch": 0.17130399387362277, "grad_norm": 0.0, "learning_rate": 8.288132903751028e-06, "loss": 0.0, "step": 24830 }, { "epoch": 0.17137298460816713, "grad_norm": 0.0, "learning_rate": 8.287442996405584e-06, "loss": 0.0002, "step": 24840 }, { "epoch": 0.17144197534271147, "grad_norm": 0.0029003354720771313, "learning_rate": 8.28675308906014e-06, "loss": 0.0001, "step": 24850 }, { "epoch": 0.17151096607725583, "grad_norm": 0.0, "learning_rate": 8.286063181714697e-06, "loss": 0.0002, "step": 24860 }, { "epoch": 0.1715799568118002, "grad_norm": 0.0, "learning_rate": 8.285373274369254e-06, "loss": 0.0, "step": 24870 }, { "epoch": 0.17164894754634452, "grad_norm": 0.6882894039154053, "learning_rate": 8.284683367023808e-06, "loss": 0.0279, "step": 24880 }, { "epoch": 0.17171793828088888, "grad_norm": 0.0, "learning_rate": 8.283993459678365e-06, "loss": 0.0002, "step": 24890 }, { "epoch": 0.17178692901543322, "grad_norm": 5.751724785341139e-08, "learning_rate": 8.283303552332921e-06, "loss": 0.0, "step": 24900 }, { "epoch": 0.17185591974997758, "grad_norm": 0.0, "learning_rate": 8.282613644987478e-06, "loss": 0.0, "step": 24910 }, { "epoch": 0.17192491048452194, "grad_norm": 0.0, "learning_rate": 8.281923737642036e-06, "loss": 0.0032, "step": 24920 }, { "epoch": 0.17199390121906627, "grad_norm": 0.006061329040676355, "learning_rate": 8.281233830296593e-06, "loss": 0.0, "step": 24930 }, { "epoch": 0.17206289195361063, "grad_norm": 0.0, "learning_rate": 8.280543922951149e-06, "loss": 0.0008, "step": 24940 }, { "epoch": 0.172131882688155, "grad_norm": 0.0, "learning_rate": 8.279854015605706e-06, "loss": 0.0, "step": 24950 }, { "epoch": 0.17220087342269932, "grad_norm": 0.0, "learning_rate": 8.279164108260262e-06, "loss": 0.0, "step": 24960 }, { "epoch": 0.17226986415724369, "grad_norm": 0.0, "learning_rate": 8.278474200914818e-06, "loss": 0.0045, "step": 24970 }, { "epoch": 0.17233885489178805, "grad_norm": 0.0, "learning_rate": 8.277784293569375e-06, "loss": 0.0, "step": 24980 }, { "epoch": 0.17240784562633238, "grad_norm": 0.0, "learning_rate": 8.27709438622393e-06, "loss": 0.0, "step": 24990 }, { "epoch": 0.17247683636087674, "grad_norm": 0.0, "learning_rate": 8.276404478878486e-06, "loss": 0.0, "step": 25000 }, { "epoch": 0.17254582709542107, "grad_norm": 0.0016783939208835363, "learning_rate": 8.275714571533043e-06, "loss": 0.0, "step": 25010 }, { "epoch": 0.17261481782996543, "grad_norm": 0.0, "learning_rate": 8.2750246641876e-06, "loss": 0.0, "step": 25020 }, { "epoch": 0.1726838085645098, "grad_norm": 0.0, "learning_rate": 8.274334756842157e-06, "loss": 0.0, "step": 25030 }, { "epoch": 0.17275279929905413, "grad_norm": 0.0, "learning_rate": 8.273644849496714e-06, "loss": 0.0, "step": 25040 }, { "epoch": 0.1728217900335985, "grad_norm": 0.0, "learning_rate": 8.27295494215127e-06, "loss": 0.0002, "step": 25050 }, { "epoch": 0.17289078076814285, "grad_norm": 0.0, "learning_rate": 8.272265034805827e-06, "loss": 0.0, "step": 25060 }, { "epoch": 0.17295977150268718, "grad_norm": 0.0, "learning_rate": 8.271575127460383e-06, "loss": 0.0, "step": 25070 }, { "epoch": 0.17302876223723154, "grad_norm": 0.0, "learning_rate": 8.27088522011494e-06, "loss": 0.0001, "step": 25080 }, { "epoch": 0.1730977529717759, "grad_norm": 0.0, "learning_rate": 8.270195312769496e-06, "loss": 0.0014, "step": 25090 }, { "epoch": 0.17316674370632024, "grad_norm": 0.0, "learning_rate": 8.269505405424051e-06, "loss": 0.0, "step": 25100 }, { "epoch": 0.1732357344408646, "grad_norm": 0.0, "learning_rate": 8.268815498078608e-06, "loss": 0.0, "step": 25110 }, { "epoch": 0.17330472517540893, "grad_norm": 0.0, "learning_rate": 8.268125590733164e-06, "loss": 0.0, "step": 25120 }, { "epoch": 0.1733737159099533, "grad_norm": 9.67517371464055e-06, "learning_rate": 8.26743568338772e-06, "loss": 0.0121, "step": 25130 }, { "epoch": 0.17344270664449765, "grad_norm": 4.8759222437411154e-08, "learning_rate": 8.266745776042279e-06, "loss": 0.0, "step": 25140 }, { "epoch": 0.17351169737904198, "grad_norm": 2.282831337652169e-05, "learning_rate": 8.266055868696835e-06, "loss": 0.0013, "step": 25150 }, { "epoch": 0.17358068811358635, "grad_norm": 0.0008630091324448586, "learning_rate": 8.265365961351392e-06, "loss": 0.0003, "step": 25160 }, { "epoch": 0.1736496788481307, "grad_norm": 1.5896519656166674e-08, "learning_rate": 8.264676054005948e-06, "loss": 0.0, "step": 25170 }, { "epoch": 0.17371866958267504, "grad_norm": 0.0, "learning_rate": 8.263986146660505e-06, "loss": 0.0, "step": 25180 }, { "epoch": 0.1737876603172194, "grad_norm": 0.0, "learning_rate": 8.263296239315061e-06, "loss": 0.0, "step": 25190 }, { "epoch": 0.17385665105176376, "grad_norm": 0.0, "learning_rate": 8.262606331969618e-06, "loss": 0.0, "step": 25200 }, { "epoch": 0.1739256417863081, "grad_norm": 483.7286682128906, "learning_rate": 8.261916424624174e-06, "loss": 0.2293, "step": 25210 }, { "epoch": 0.17399463252085245, "grad_norm": 0.0, "learning_rate": 8.261226517278729e-06, "loss": 0.0, "step": 25220 }, { "epoch": 0.1740636232553968, "grad_norm": 0.0, "learning_rate": 8.260536609933286e-06, "loss": 0.0, "step": 25230 }, { "epoch": 0.17413261398994115, "grad_norm": 8.166391580743948e-07, "learning_rate": 8.259846702587842e-06, "loss": 0.0, "step": 25240 }, { "epoch": 0.1742016047244855, "grad_norm": 1.974263312831681e-07, "learning_rate": 8.2591567952424e-06, "loss": 0.0002, "step": 25250 }, { "epoch": 0.17427059545902984, "grad_norm": 0.0, "learning_rate": 8.258466887896957e-06, "loss": 0.0, "step": 25260 }, { "epoch": 0.1743395861935742, "grad_norm": 0.0, "learning_rate": 8.257776980551513e-06, "loss": 0.0, "step": 25270 }, { "epoch": 0.17440857692811856, "grad_norm": 0.0, "learning_rate": 8.25708707320607e-06, "loss": 0.0, "step": 25280 }, { "epoch": 0.1744775676626629, "grad_norm": 0.0, "learning_rate": 8.256397165860626e-06, "loss": 0.0, "step": 25290 }, { "epoch": 0.17454655839720726, "grad_norm": 0.0, "learning_rate": 8.255707258515183e-06, "loss": 0.001, "step": 25300 }, { "epoch": 0.17461554913175162, "grad_norm": 0.0, "learning_rate": 8.25501735116974e-06, "loss": 0.0, "step": 25310 }, { "epoch": 0.17468453986629595, "grad_norm": 0.0, "learning_rate": 8.254327443824296e-06, "loss": 0.0, "step": 25320 }, { "epoch": 0.1747535306008403, "grad_norm": 0.0, "learning_rate": 8.25363753647885e-06, "loss": 0.0, "step": 25330 }, { "epoch": 0.17482252133538465, "grad_norm": 0.0, "learning_rate": 8.252947629133407e-06, "loss": 0.0, "step": 25340 }, { "epoch": 0.174891512069929, "grad_norm": 0.0, "learning_rate": 8.252257721787963e-06, "loss": 0.0001, "step": 25350 }, { "epoch": 0.17496050280447337, "grad_norm": 0.0, "learning_rate": 8.251567814442522e-06, "loss": 0.0, "step": 25360 }, { "epoch": 0.1750294935390177, "grad_norm": 0.0, "learning_rate": 8.250877907097078e-06, "loss": 0.0, "step": 25370 }, { "epoch": 0.17509848427356206, "grad_norm": 7.346125698859396e-07, "learning_rate": 8.250187999751635e-06, "loss": 0.0, "step": 25380 }, { "epoch": 0.17516747500810642, "grad_norm": 0.0, "learning_rate": 8.249498092406191e-06, "loss": 0.0, "step": 25390 }, { "epoch": 0.17523646574265075, "grad_norm": 807.4644165039062, "learning_rate": 8.248808185060748e-06, "loss": 0.2996, "step": 25400 }, { "epoch": 0.17530545647719512, "grad_norm": 0.0, "learning_rate": 8.248118277715304e-06, "loss": 0.0, "step": 25410 }, { "epoch": 0.17537444721173948, "grad_norm": 9.436121035832912e-05, "learning_rate": 8.24742837036986e-06, "loss": 0.0196, "step": 25420 }, { "epoch": 0.1754434379462838, "grad_norm": 1.77166275534546e-05, "learning_rate": 8.246738463024417e-06, "loss": 0.0, "step": 25430 }, { "epoch": 0.17551242868082817, "grad_norm": 0.0, "learning_rate": 8.246048555678972e-06, "loss": 1.0488, "step": 25440 }, { "epoch": 0.1755814194153725, "grad_norm": 0.0, "learning_rate": 8.245358648333528e-06, "loss": 0.4918, "step": 25450 }, { "epoch": 0.17565041014991686, "grad_norm": 1.7942100384971127e-05, "learning_rate": 8.244668740988085e-06, "loss": 0.0, "step": 25460 }, { "epoch": 0.17571940088446122, "grad_norm": 0.0, "learning_rate": 8.243978833642643e-06, "loss": 0.0, "step": 25470 }, { "epoch": 0.17578839161900556, "grad_norm": 0.0007773078978061676, "learning_rate": 8.2432889262972e-06, "loss": 0.0, "step": 25480 }, { "epoch": 0.17585738235354992, "grad_norm": 0.0, "learning_rate": 8.242599018951756e-06, "loss": 0.0004, "step": 25490 }, { "epoch": 0.17592637308809428, "grad_norm": 0.0, "learning_rate": 8.241909111606312e-06, "loss": 0.0, "step": 25500 }, { "epoch": 0.1759953638226386, "grad_norm": 0.0, "learning_rate": 8.241219204260869e-06, "loss": 0.0, "step": 25510 }, { "epoch": 0.17606435455718297, "grad_norm": 0.0, "learning_rate": 8.240529296915425e-06, "loss": 0.0015, "step": 25520 }, { "epoch": 0.17613334529172733, "grad_norm": 0.4363812804222107, "learning_rate": 8.239839389569982e-06, "loss": 0.0001, "step": 25530 }, { "epoch": 0.17620233602627167, "grad_norm": 3.2861378063131497e-09, "learning_rate": 8.239149482224538e-06, "loss": 0.0002, "step": 25540 }, { "epoch": 0.17627132676081603, "grad_norm": 0.0, "learning_rate": 8.238459574879095e-06, "loss": 0.0, "step": 25550 }, { "epoch": 0.17634031749536036, "grad_norm": 0.0, "learning_rate": 8.23776966753365e-06, "loss": 0.0, "step": 25560 }, { "epoch": 0.17640930822990472, "grad_norm": 0.0, "learning_rate": 8.237079760188206e-06, "loss": 0.0, "step": 25570 }, { "epoch": 0.17647829896444908, "grad_norm": 0.0, "learning_rate": 8.236389852842764e-06, "loss": 0.0, "step": 25580 }, { "epoch": 0.17654728969899341, "grad_norm": 3.523582563502714e-05, "learning_rate": 8.235699945497321e-06, "loss": 0.4141, "step": 25590 }, { "epoch": 0.17661628043353778, "grad_norm": 0.0, "learning_rate": 8.235010038151877e-06, "loss": 0.0, "step": 25600 }, { "epoch": 0.17668527116808214, "grad_norm": 0.0, "learning_rate": 8.234320130806434e-06, "loss": 0.0001, "step": 25610 }, { "epoch": 0.17675426190262647, "grad_norm": 0.0, "learning_rate": 8.23363022346099e-06, "loss": 0.0, "step": 25620 }, { "epoch": 0.17682325263717083, "grad_norm": 0.0, "learning_rate": 8.232940316115547e-06, "loss": 0.0, "step": 25630 }, { "epoch": 0.1768922433717152, "grad_norm": 0.0004252506187185645, "learning_rate": 8.232250408770103e-06, "loss": 0.008, "step": 25640 }, { "epoch": 0.17696123410625952, "grad_norm": 0.0, "learning_rate": 8.23156050142466e-06, "loss": 0.0, "step": 25650 }, { "epoch": 0.17703022484080388, "grad_norm": 0.0005287486128509045, "learning_rate": 8.230870594079216e-06, "loss": 0.0, "step": 25660 }, { "epoch": 0.17709921557534822, "grad_norm": 0.0, "learning_rate": 8.230180686733771e-06, "loss": 0.0377, "step": 25670 }, { "epoch": 0.17716820630989258, "grad_norm": 0.0, "learning_rate": 8.229490779388328e-06, "loss": 0.0, "step": 25680 }, { "epoch": 0.17723719704443694, "grad_norm": 0.0, "learning_rate": 8.228800872042886e-06, "loss": 0.0, "step": 25690 }, { "epoch": 0.17730618777898127, "grad_norm": 0.0, "learning_rate": 8.228110964697442e-06, "loss": 0.0, "step": 25700 }, { "epoch": 0.17737517851352563, "grad_norm": 0.0002452524204272777, "learning_rate": 8.227421057351999e-06, "loss": 0.1014, "step": 25710 }, { "epoch": 0.17744416924807, "grad_norm": 0.0, "learning_rate": 8.226731150006555e-06, "loss": 0.0058, "step": 25720 }, { "epoch": 0.17751315998261433, "grad_norm": 7.203065251815133e-06, "learning_rate": 8.226041242661112e-06, "loss": 0.0001, "step": 25730 }, { "epoch": 0.1775821507171587, "grad_norm": 0.0, "learning_rate": 8.225351335315668e-06, "loss": 0.0, "step": 25740 }, { "epoch": 0.17765114145170305, "grad_norm": 0.0, "learning_rate": 8.224661427970225e-06, "loss": 0.0, "step": 25750 }, { "epoch": 0.17772013218624738, "grad_norm": 34.71978759765625, "learning_rate": 8.223971520624781e-06, "loss": 0.0087, "step": 25760 }, { "epoch": 0.17778912292079174, "grad_norm": 1.0024551784226787e-06, "learning_rate": 8.223281613279338e-06, "loss": 0.0, "step": 25770 }, { "epoch": 0.17785811365533608, "grad_norm": 0.0, "learning_rate": 8.222591705933893e-06, "loss": 0.0, "step": 25780 }, { "epoch": 0.17792710438988044, "grad_norm": 0.0, "learning_rate": 8.221901798588449e-06, "loss": 0.0, "step": 25790 }, { "epoch": 0.1779960951244248, "grad_norm": 0.0, "learning_rate": 8.221211891243007e-06, "loss": 0.0, "step": 25800 }, { "epoch": 0.17806508585896913, "grad_norm": 0.0, "learning_rate": 8.220521983897564e-06, "loss": 0.0006, "step": 25810 }, { "epoch": 0.1781340765935135, "grad_norm": 4.79025885624651e-08, "learning_rate": 8.21983207655212e-06, "loss": 0.1689, "step": 25820 }, { "epoch": 0.17820306732805785, "grad_norm": 0.0, "learning_rate": 8.219142169206677e-06, "loss": 0.0, "step": 25830 }, { "epoch": 0.17827205806260218, "grad_norm": 0.0, "learning_rate": 8.218452261861233e-06, "loss": 0.0, "step": 25840 }, { "epoch": 0.17834104879714655, "grad_norm": 0.0, "learning_rate": 8.21776235451579e-06, "loss": 0.0027, "step": 25850 }, { "epoch": 0.1784100395316909, "grad_norm": 1.7561869736226754e-09, "learning_rate": 8.217072447170346e-06, "loss": 0.0026, "step": 25860 }, { "epoch": 0.17847903026623524, "grad_norm": 0.0, "learning_rate": 8.216382539824903e-06, "loss": 0.0, "step": 25870 }, { "epoch": 0.1785480210007796, "grad_norm": 2.0229863650911284e-07, "learning_rate": 8.215692632479459e-06, "loss": 0.0, "step": 25880 }, { "epoch": 0.17861701173532393, "grad_norm": 0.0, "learning_rate": 8.215002725134016e-06, "loss": 0.0, "step": 25890 }, { "epoch": 0.1786860024698683, "grad_norm": 0.0, "learning_rate": 8.21431281778857e-06, "loss": 0.0, "step": 25900 }, { "epoch": 0.17875499320441265, "grad_norm": 0.0, "learning_rate": 8.213622910443129e-06, "loss": 0.0001, "step": 25910 }, { "epoch": 0.178823983938957, "grad_norm": 0.0, "learning_rate": 8.212933003097685e-06, "loss": 0.0, "step": 25920 }, { "epoch": 0.17889297467350135, "grad_norm": 3.810760151878867e-09, "learning_rate": 8.212243095752242e-06, "loss": 0.0, "step": 25930 }, { "epoch": 0.1789619654080457, "grad_norm": 0.0, "learning_rate": 8.211553188406798e-06, "loss": 0.0, "step": 25940 }, { "epoch": 0.17903095614259004, "grad_norm": 0.0, "learning_rate": 8.210863281061355e-06, "loss": 0.0, "step": 25950 }, { "epoch": 0.1790999468771344, "grad_norm": 0.0, "learning_rate": 8.210173373715911e-06, "loss": 0.0, "step": 25960 }, { "epoch": 0.17916893761167876, "grad_norm": 0.007863602600991726, "learning_rate": 8.209483466370468e-06, "loss": 0.0, "step": 25970 }, { "epoch": 0.1792379283462231, "grad_norm": 0.0, "learning_rate": 8.208793559025024e-06, "loss": 0.0, "step": 25980 }, { "epoch": 0.17930691908076746, "grad_norm": 3.773438450593858e-08, "learning_rate": 8.20810365167958e-06, "loss": 0.0016, "step": 25990 }, { "epoch": 0.1793759098153118, "grad_norm": 0.0, "learning_rate": 8.207413744334137e-06, "loss": 0.0274, "step": 26000 }, { "epoch": 0.17944490054985615, "grad_norm": 1.6173011374576163e-07, "learning_rate": 8.206723836988692e-06, "loss": 0.9105, "step": 26010 }, { "epoch": 0.1795138912844005, "grad_norm": 0.0, "learning_rate": 8.20603392964325e-06, "loss": 0.048, "step": 26020 }, { "epoch": 0.17958288201894484, "grad_norm": 0.0, "learning_rate": 8.205344022297806e-06, "loss": 0.0056, "step": 26030 }, { "epoch": 0.1796518727534892, "grad_norm": 0.0, "learning_rate": 8.204654114952363e-06, "loss": 0.0, "step": 26040 }, { "epoch": 0.17972086348803357, "grad_norm": 0.0, "learning_rate": 8.20396420760692e-06, "loss": 0.0014, "step": 26050 }, { "epoch": 0.1797898542225779, "grad_norm": 0.0, "learning_rate": 8.203274300261476e-06, "loss": 0.0006, "step": 26060 }, { "epoch": 0.17985884495712226, "grad_norm": 0.0, "learning_rate": 8.202584392916032e-06, "loss": 0.0, "step": 26070 }, { "epoch": 0.17992783569166662, "grad_norm": 1.7319604639354225e-09, "learning_rate": 8.201894485570589e-06, "loss": 0.0005, "step": 26080 }, { "epoch": 0.17999682642621095, "grad_norm": 0.0, "learning_rate": 8.201204578225145e-06, "loss": 0.0, "step": 26090 }, { "epoch": 0.18006581716075531, "grad_norm": 1.5374783668420378e-09, "learning_rate": 8.200514670879702e-06, "loss": 0.4062, "step": 26100 }, { "epoch": 0.18013480789529965, "grad_norm": 0.0, "learning_rate": 8.199824763534258e-06, "loss": 0.0, "step": 26110 }, { "epoch": 0.180203798629844, "grad_norm": 0.0, "learning_rate": 8.199134856188813e-06, "loss": 0.0, "step": 26120 }, { "epoch": 0.18027278936438837, "grad_norm": 0.0, "learning_rate": 8.198444948843371e-06, "loss": 0.0, "step": 26130 }, { "epoch": 0.1803417800989327, "grad_norm": 0.0, "learning_rate": 8.197755041497928e-06, "loss": 0.0, "step": 26140 }, { "epoch": 0.18041077083347706, "grad_norm": 0.0, "learning_rate": 8.197065134152484e-06, "loss": 0.0, "step": 26150 }, { "epoch": 0.18047976156802142, "grad_norm": 0.0, "learning_rate": 8.19637522680704e-06, "loss": 0.01, "step": 26160 }, { "epoch": 0.18054875230256576, "grad_norm": 0.00018162050400860608, "learning_rate": 8.195685319461597e-06, "loss": 0.0, "step": 26170 }, { "epoch": 0.18061774303711012, "grad_norm": 1.9468604506300835e-09, "learning_rate": 8.194995412116154e-06, "loss": 0.0, "step": 26180 }, { "epoch": 0.18068673377165448, "grad_norm": 0.0, "learning_rate": 8.19430550477071e-06, "loss": 0.0, "step": 26190 }, { "epoch": 0.1807557245061988, "grad_norm": 0.36995938420295715, "learning_rate": 8.193615597425267e-06, "loss": 0.0, "step": 26200 }, { "epoch": 0.18082471524074317, "grad_norm": 0.0, "learning_rate": 8.192925690079823e-06, "loss": 0.276, "step": 26210 }, { "epoch": 0.1808937059752875, "grad_norm": 0.0, "learning_rate": 8.19223578273438e-06, "loss": 0.0, "step": 26220 }, { "epoch": 0.18096269670983187, "grad_norm": 0.0, "learning_rate": 8.191545875388936e-06, "loss": 0.0, "step": 26230 }, { "epoch": 0.18103168744437623, "grad_norm": 0.0, "learning_rate": 8.190855968043493e-06, "loss": 0.0, "step": 26240 }, { "epoch": 0.18110067817892056, "grad_norm": 0.0, "learning_rate": 8.19016606069805e-06, "loss": 0.0, "step": 26250 }, { "epoch": 0.18116966891346492, "grad_norm": 0.0, "learning_rate": 8.189476153352606e-06, "loss": 0.0, "step": 26260 }, { "epoch": 0.18123865964800928, "grad_norm": 0.0, "learning_rate": 8.188786246007162e-06, "loss": 0.2093, "step": 26270 }, { "epoch": 0.18130765038255361, "grad_norm": 0.0, "learning_rate": 8.188096338661719e-06, "loss": 0.0, "step": 26280 }, { "epoch": 0.18137664111709798, "grad_norm": 0.0, "learning_rate": 8.187406431316275e-06, "loss": 0.0, "step": 26290 }, { "epoch": 0.18144563185164234, "grad_norm": 0.0, "learning_rate": 8.186716523970832e-06, "loss": 0.0, "step": 26300 }, { "epoch": 0.18151462258618667, "grad_norm": 0.0, "learning_rate": 8.186026616625388e-06, "loss": 0.0, "step": 26310 }, { "epoch": 0.18158361332073103, "grad_norm": 0.0, "learning_rate": 8.185336709279945e-06, "loss": 0.1025, "step": 26320 }, { "epoch": 0.18165260405527536, "grad_norm": 0.0, "learning_rate": 8.184646801934501e-06, "loss": 0.0, "step": 26330 }, { "epoch": 0.18172159478981972, "grad_norm": 0.0, "learning_rate": 8.183956894589058e-06, "loss": 0.0, "step": 26340 }, { "epoch": 0.18179058552436408, "grad_norm": 0.0, "learning_rate": 8.183266987243614e-06, "loss": 0.0, "step": 26350 }, { "epoch": 0.18185957625890842, "grad_norm": 0.0, "learning_rate": 8.18257707989817e-06, "loss": 0.0, "step": 26360 }, { "epoch": 0.18192856699345278, "grad_norm": 3.433952677767138e-09, "learning_rate": 8.181887172552727e-06, "loss": 0.0, "step": 26370 }, { "epoch": 0.18199755772799714, "grad_norm": 0.0, "learning_rate": 8.181197265207284e-06, "loss": 0.0, "step": 26380 }, { "epoch": 0.18206654846254147, "grad_norm": 0.00016772629169281572, "learning_rate": 8.18050735786184e-06, "loss": 0.0, "step": 26390 }, { "epoch": 0.18213553919708583, "grad_norm": 4.427766597814298e-08, "learning_rate": 8.179817450516397e-06, "loss": 0.1638, "step": 26400 }, { "epoch": 0.1822045299316302, "grad_norm": 7.976954918831325e-08, "learning_rate": 8.179127543170953e-06, "loss": 0.0004, "step": 26410 }, { "epoch": 0.18227352066617453, "grad_norm": 0.0, "learning_rate": 8.17843763582551e-06, "loss": 0.0, "step": 26420 }, { "epoch": 0.1823425114007189, "grad_norm": 0.0, "learning_rate": 8.177747728480066e-06, "loss": 0.0, "step": 26430 }, { "epoch": 0.18241150213526322, "grad_norm": 0.0, "learning_rate": 8.177057821134623e-06, "loss": 0.0003, "step": 26440 }, { "epoch": 0.18248049286980758, "grad_norm": 1.7809059619903564, "learning_rate": 8.176367913789179e-06, "loss": 0.0169, "step": 26450 }, { "epoch": 0.18254948360435194, "grad_norm": 0.0, "learning_rate": 8.175678006443736e-06, "loss": 0.0, "step": 26460 }, { "epoch": 0.18261847433889628, "grad_norm": 0.0, "learning_rate": 8.174988099098292e-06, "loss": 0.0, "step": 26470 }, { "epoch": 0.18268746507344064, "grad_norm": 0.0, "learning_rate": 8.174298191752849e-06, "loss": 0.0001, "step": 26480 }, { "epoch": 0.182756455807985, "grad_norm": 0.0, "learning_rate": 8.173608284407405e-06, "loss": 0.0014, "step": 26490 }, { "epoch": 0.18282544654252933, "grad_norm": 0.5432473421096802, "learning_rate": 8.172918377061962e-06, "loss": 0.0001, "step": 26500 }, { "epoch": 0.1828944372770737, "grad_norm": 0.0, "learning_rate": 8.172228469716518e-06, "loss": 0.0001, "step": 26510 }, { "epoch": 0.18296342801161805, "grad_norm": 0.0, "learning_rate": 8.171538562371074e-06, "loss": 0.0025, "step": 26520 }, { "epoch": 0.18303241874616238, "grad_norm": 0.0, "learning_rate": 8.170848655025631e-06, "loss": 0.0035, "step": 26530 }, { "epoch": 0.18310140948070674, "grad_norm": 0.007199286948889494, "learning_rate": 8.170158747680187e-06, "loss": 0.3424, "step": 26540 }, { "epoch": 0.18317040021525108, "grad_norm": 0.0, "learning_rate": 8.169468840334744e-06, "loss": 0.0, "step": 26550 }, { "epoch": 0.18323939094979544, "grad_norm": 0.0, "learning_rate": 8.1687789329893e-06, "loss": 0.0, "step": 26560 }, { "epoch": 0.1833083816843398, "grad_norm": 9.50700996327214e-06, "learning_rate": 8.168089025643857e-06, "loss": 0.001, "step": 26570 }, { "epoch": 0.18337737241888413, "grad_norm": 0.0, "learning_rate": 8.167399118298413e-06, "loss": 0.0, "step": 26580 }, { "epoch": 0.1834463631534285, "grad_norm": 0.0, "learning_rate": 8.16670921095297e-06, "loss": 0.0001, "step": 26590 }, { "epoch": 0.18351535388797285, "grad_norm": 0.0, "learning_rate": 8.166019303607526e-06, "loss": 0.0, "step": 26600 }, { "epoch": 0.1835843446225172, "grad_norm": 0.0, "learning_rate": 8.165329396262083e-06, "loss": 0.0, "step": 26610 }, { "epoch": 0.18365333535706155, "grad_norm": 0.0, "learning_rate": 8.16463948891664e-06, "loss": 0.0006, "step": 26620 }, { "epoch": 0.1837223260916059, "grad_norm": 0.0, "learning_rate": 8.163949581571196e-06, "loss": 0.0, "step": 26630 }, { "epoch": 0.18379131682615024, "grad_norm": 0.0, "learning_rate": 8.163259674225752e-06, "loss": 0.0, "step": 26640 }, { "epoch": 0.1838603075606946, "grad_norm": 0.00027806765865534544, "learning_rate": 8.162569766880309e-06, "loss": 0.0, "step": 26650 }, { "epoch": 0.18392929829523896, "grad_norm": 0.0001901968935271725, "learning_rate": 8.161879859534865e-06, "loss": 0.0, "step": 26660 }, { "epoch": 0.1839982890297833, "grad_norm": 2.6538904762674065e-07, "learning_rate": 8.161189952189422e-06, "loss": 0.0, "step": 26670 }, { "epoch": 0.18406727976432766, "grad_norm": 0.0, "learning_rate": 8.160500044843978e-06, "loss": 0.0, "step": 26680 }, { "epoch": 0.184136270498872, "grad_norm": 1.838854291058567e-09, "learning_rate": 8.159810137498535e-06, "loss": 0.0, "step": 26690 }, { "epoch": 0.18420526123341635, "grad_norm": 0.0, "learning_rate": 8.159120230153091e-06, "loss": 0.0006, "step": 26700 }, { "epoch": 0.1842742519679607, "grad_norm": 0.0, "learning_rate": 8.158430322807648e-06, "loss": 0.0, "step": 26710 }, { "epoch": 0.18434324270250504, "grad_norm": 4.077456026152504e-07, "learning_rate": 8.157740415462204e-06, "loss": 0.0, "step": 26720 }, { "epoch": 0.1844122334370494, "grad_norm": 0.0, "learning_rate": 8.15705050811676e-06, "loss": 0.0809, "step": 26730 }, { "epoch": 0.18448122417159377, "grad_norm": 0.0, "learning_rate": 8.156360600771317e-06, "loss": 0.0, "step": 26740 }, { "epoch": 0.1845502149061381, "grad_norm": 0.0, "learning_rate": 8.155670693425874e-06, "loss": 0.0013, "step": 26750 }, { "epoch": 0.18461920564068246, "grad_norm": 0.03360108286142349, "learning_rate": 8.15498078608043e-06, "loss": 0.0, "step": 26760 }, { "epoch": 0.18468819637522682, "grad_norm": 0.0, "learning_rate": 8.154290878734987e-06, "loss": 0.0, "step": 26770 }, { "epoch": 0.18475718710977115, "grad_norm": 3.782386848172337e-09, "learning_rate": 8.153600971389543e-06, "loss": 0.0, "step": 26780 }, { "epoch": 0.18482617784431551, "grad_norm": 0.0, "learning_rate": 8.1529110640441e-06, "loss": 0.0012, "step": 26790 }, { "epoch": 0.18489516857885985, "grad_norm": 0.0, "learning_rate": 8.152221156698656e-06, "loss": 0.0033, "step": 26800 }, { "epoch": 0.1849641593134042, "grad_norm": 3.249361384405347e-07, "learning_rate": 8.151531249353213e-06, "loss": 0.0004, "step": 26810 }, { "epoch": 0.18503315004794857, "grad_norm": 0.0, "learning_rate": 8.15084134200777e-06, "loss": 0.0, "step": 26820 }, { "epoch": 0.1851021407824929, "grad_norm": 0.0, "learning_rate": 8.150151434662326e-06, "loss": 0.0, "step": 26830 }, { "epoch": 0.18517113151703726, "grad_norm": 0.0, "learning_rate": 8.149461527316882e-06, "loss": 0.0, "step": 26840 }, { "epoch": 0.18524012225158162, "grad_norm": 0.0, "learning_rate": 8.148771619971439e-06, "loss": 0.0, "step": 26850 }, { "epoch": 0.18530911298612596, "grad_norm": 0.0, "learning_rate": 8.148081712625995e-06, "loss": 0.0, "step": 26860 }, { "epoch": 0.18537810372067032, "grad_norm": 0.0, "learning_rate": 8.147391805280552e-06, "loss": 0.0019, "step": 26870 }, { "epoch": 0.18544709445521468, "grad_norm": 0.0, "learning_rate": 8.146701897935108e-06, "loss": 0.0, "step": 26880 }, { "epoch": 0.185516085189759, "grad_norm": 0.0, "learning_rate": 8.146011990589665e-06, "loss": 0.0001, "step": 26890 }, { "epoch": 0.18558507592430337, "grad_norm": 0.0, "learning_rate": 8.145322083244221e-06, "loss": 0.0, "step": 26900 }, { "epoch": 0.1856540666588477, "grad_norm": 0.0, "learning_rate": 8.144632175898778e-06, "loss": 0.0, "step": 26910 }, { "epoch": 0.18572305739339207, "grad_norm": 0.0, "learning_rate": 8.143942268553334e-06, "loss": 0.1884, "step": 26920 }, { "epoch": 0.18579204812793643, "grad_norm": 0.0, "learning_rate": 8.14325236120789e-06, "loss": 0.0, "step": 26930 }, { "epoch": 0.18586103886248076, "grad_norm": 0.9882470965385437, "learning_rate": 8.142562453862447e-06, "loss": 0.0009, "step": 26940 }, { "epoch": 0.18593002959702512, "grad_norm": 0.0, "learning_rate": 8.141872546517004e-06, "loss": 0.0, "step": 26950 }, { "epoch": 0.18599902033156948, "grad_norm": 0.0, "learning_rate": 8.14118263917156e-06, "loss": 0.0, "step": 26960 }, { "epoch": 0.18606801106611381, "grad_norm": 0.0002240832254756242, "learning_rate": 8.140492731826117e-06, "loss": 0.0, "step": 26970 }, { "epoch": 0.18613700180065818, "grad_norm": 0.0, "learning_rate": 8.139802824480673e-06, "loss": 0.0935, "step": 26980 }, { "epoch": 0.18620599253520254, "grad_norm": 0.0, "learning_rate": 8.13911291713523e-06, "loss": 0.0, "step": 26990 }, { "epoch": 0.18627498326974687, "grad_norm": 0.0, "learning_rate": 8.138423009789786e-06, "loss": 0.0, "step": 27000 }, { "epoch": 0.18634397400429123, "grad_norm": 0.012020009569823742, "learning_rate": 8.137733102444343e-06, "loss": 0.0, "step": 27010 }, { "epoch": 0.18641296473883556, "grad_norm": 0.0, "learning_rate": 8.137043195098899e-06, "loss": 0.0, "step": 27020 }, { "epoch": 0.18648195547337992, "grad_norm": 0.00021059042774140835, "learning_rate": 8.136353287753455e-06, "loss": 0.0, "step": 27030 }, { "epoch": 0.18655094620792428, "grad_norm": 0.0, "learning_rate": 8.135663380408012e-06, "loss": 0.0539, "step": 27040 }, { "epoch": 0.18661993694246862, "grad_norm": 0.0, "learning_rate": 8.134973473062568e-06, "loss": 0.0, "step": 27050 }, { "epoch": 0.18668892767701298, "grad_norm": 0.0, "learning_rate": 8.134283565717125e-06, "loss": 0.0, "step": 27060 }, { "epoch": 0.18675791841155734, "grad_norm": 0.0, "learning_rate": 8.133593658371681e-06, "loss": 0.0418, "step": 27070 }, { "epoch": 0.18682690914610167, "grad_norm": 5.648707428917987e-06, "learning_rate": 8.132903751026238e-06, "loss": 0.0536, "step": 27080 }, { "epoch": 0.18689589988064603, "grad_norm": 0.021818069741129875, "learning_rate": 8.132213843680794e-06, "loss": 0.0, "step": 27090 }, { "epoch": 0.1869648906151904, "grad_norm": 0.0, "learning_rate": 8.131523936335351e-06, "loss": 0.0001, "step": 27100 }, { "epoch": 0.18703388134973473, "grad_norm": 0.0, "learning_rate": 8.130834028989907e-06, "loss": 0.0027, "step": 27110 }, { "epoch": 0.1871028720842791, "grad_norm": 0.0, "learning_rate": 8.130144121644464e-06, "loss": 0.0023, "step": 27120 }, { "epoch": 0.18717186281882342, "grad_norm": 0.0, "learning_rate": 8.12945421429902e-06, "loss": 0.0, "step": 27130 }, { "epoch": 0.18724085355336778, "grad_norm": 1.878991497505922e-05, "learning_rate": 8.128764306953577e-06, "loss": 0.0, "step": 27140 }, { "epoch": 0.18730984428791214, "grad_norm": 0.0, "learning_rate": 8.128074399608133e-06, "loss": 0.0032, "step": 27150 }, { "epoch": 0.18737883502245647, "grad_norm": 0.0, "learning_rate": 8.12738449226269e-06, "loss": 0.6387, "step": 27160 }, { "epoch": 0.18744782575700084, "grad_norm": 0.0, "learning_rate": 8.126694584917246e-06, "loss": 0.0, "step": 27170 }, { "epoch": 0.1875168164915452, "grad_norm": 0.0, "learning_rate": 8.126004677571803e-06, "loss": 0.0, "step": 27180 }, { "epoch": 0.18758580722608953, "grad_norm": 0.0, "learning_rate": 8.12531477022636e-06, "loss": 0.1798, "step": 27190 }, { "epoch": 0.1876547979606339, "grad_norm": 0.0, "learning_rate": 8.124624862880916e-06, "loss": 0.0, "step": 27200 }, { "epoch": 0.18772378869517825, "grad_norm": 0.0, "learning_rate": 8.123934955535472e-06, "loss": 0.0, "step": 27210 }, { "epoch": 0.18779277942972258, "grad_norm": 0.0, "learning_rate": 8.123245048190029e-06, "loss": 0.4548, "step": 27220 }, { "epoch": 0.18786177016426694, "grad_norm": 0.0, "learning_rate": 8.122555140844585e-06, "loss": 0.0024, "step": 27230 }, { "epoch": 0.18793076089881128, "grad_norm": 0.0, "learning_rate": 8.121865233499142e-06, "loss": 0.0, "step": 27240 }, { "epoch": 0.18799975163335564, "grad_norm": 3.6567162631229166e-09, "learning_rate": 8.121175326153698e-06, "loss": 0.0007, "step": 27250 }, { "epoch": 0.1880687423679, "grad_norm": 0.0, "learning_rate": 8.120485418808255e-06, "loss": 0.0182, "step": 27260 }, { "epoch": 0.18813773310244433, "grad_norm": 0.0, "learning_rate": 8.119795511462811e-06, "loss": 0.0, "step": 27270 }, { "epoch": 0.1882067238369887, "grad_norm": 0.0, "learning_rate": 8.119105604117368e-06, "loss": 0.0, "step": 27280 }, { "epoch": 0.18827571457153305, "grad_norm": 0.0, "learning_rate": 8.118415696771924e-06, "loss": 0.0, "step": 27290 }, { "epoch": 0.1883447053060774, "grad_norm": 0.0, "learning_rate": 8.11772578942648e-06, "loss": 0.0, "step": 27300 }, { "epoch": 0.18841369604062175, "grad_norm": 0.0, "learning_rate": 8.117035882081037e-06, "loss": 0.0, "step": 27310 }, { "epoch": 0.1884826867751661, "grad_norm": 3.6656784274669008e-09, "learning_rate": 8.116345974735594e-06, "loss": 0.0043, "step": 27320 }, { "epoch": 0.18855167750971044, "grad_norm": 0.0, "learning_rate": 8.11565606739015e-06, "loss": 0.0, "step": 27330 }, { "epoch": 0.1886206682442548, "grad_norm": 0.0, "learning_rate": 8.114966160044707e-06, "loss": 0.0, "step": 27340 }, { "epoch": 0.18868965897879914, "grad_norm": 0.0, "learning_rate": 8.114276252699263e-06, "loss": 0.5191, "step": 27350 }, { "epoch": 0.1887586497133435, "grad_norm": 0.0, "learning_rate": 8.11358634535382e-06, "loss": 0.0001, "step": 27360 }, { "epoch": 0.18882764044788786, "grad_norm": 0.0, "learning_rate": 8.112896438008376e-06, "loss": 0.0, "step": 27370 }, { "epoch": 0.1888966311824322, "grad_norm": 0.0, "learning_rate": 8.112206530662933e-06, "loss": 0.0, "step": 27380 }, { "epoch": 0.18896562191697655, "grad_norm": 0.0005686933291144669, "learning_rate": 8.111516623317489e-06, "loss": 0.0001, "step": 27390 }, { "epoch": 0.1890346126515209, "grad_norm": 0.0, "learning_rate": 8.110826715972046e-06, "loss": 0.0003, "step": 27400 }, { "epoch": 0.18910360338606524, "grad_norm": 0.0, "learning_rate": 8.110136808626602e-06, "loss": 0.0003, "step": 27410 }, { "epoch": 0.1891725941206096, "grad_norm": 0.0, "learning_rate": 8.109446901281159e-06, "loss": 0.0001, "step": 27420 }, { "epoch": 0.18924158485515397, "grad_norm": 1.7458006595916231e-06, "learning_rate": 8.108756993935715e-06, "loss": 0.0013, "step": 27430 }, { "epoch": 0.1893105755896983, "grad_norm": 0.0026687169447541237, "learning_rate": 8.108067086590272e-06, "loss": 0.0, "step": 27440 }, { "epoch": 0.18937956632424266, "grad_norm": 0.0, "learning_rate": 8.107377179244828e-06, "loss": 0.0, "step": 27450 }, { "epoch": 0.189448557058787, "grad_norm": 0.0, "learning_rate": 8.106687271899385e-06, "loss": 0.0, "step": 27460 }, { "epoch": 0.18951754779333135, "grad_norm": 3.820101119345054e-05, "learning_rate": 8.105997364553941e-06, "loss": 0.0, "step": 27470 }, { "epoch": 0.18958653852787571, "grad_norm": 1.2474804407247575e-06, "learning_rate": 8.105307457208498e-06, "loss": 0.0, "step": 27480 }, { "epoch": 0.18965552926242005, "grad_norm": 1.9436503517766823e-09, "learning_rate": 8.104617549863054e-06, "loss": 0.0021, "step": 27490 }, { "epoch": 0.1897245199969644, "grad_norm": 0.0, "learning_rate": 8.10392764251761e-06, "loss": 0.0, "step": 27500 }, { "epoch": 0.18979351073150877, "grad_norm": 0.0, "learning_rate": 8.103237735172167e-06, "loss": 0.0, "step": 27510 }, { "epoch": 0.1898625014660531, "grad_norm": 0.0, "learning_rate": 8.102547827826724e-06, "loss": 0.0, "step": 27520 }, { "epoch": 0.18993149220059746, "grad_norm": 0.0, "learning_rate": 8.10185792048128e-06, "loss": 0.0, "step": 27530 }, { "epoch": 0.19000048293514182, "grad_norm": 0.0, "learning_rate": 8.101168013135837e-06, "loss": 0.0, "step": 27540 }, { "epoch": 0.19006947366968616, "grad_norm": 0.0, "learning_rate": 8.100478105790393e-06, "loss": 0.0, "step": 27550 }, { "epoch": 0.19013846440423052, "grad_norm": 0.0, "learning_rate": 8.09978819844495e-06, "loss": 0.0, "step": 27560 }, { "epoch": 0.19020745513877485, "grad_norm": 8.863953127047353e-08, "learning_rate": 8.099098291099506e-06, "loss": 0.003, "step": 27570 }, { "epoch": 0.1902764458733192, "grad_norm": 0.0, "learning_rate": 8.098408383754062e-06, "loss": 0.0, "step": 27580 }, { "epoch": 0.19034543660786357, "grad_norm": 0.0, "learning_rate": 8.097718476408619e-06, "loss": 0.0, "step": 27590 }, { "epoch": 0.1904144273424079, "grad_norm": 0.0, "learning_rate": 8.097028569063175e-06, "loss": 0.0002, "step": 27600 }, { "epoch": 0.19048341807695227, "grad_norm": 0.0, "learning_rate": 8.096338661717732e-06, "loss": 0.0093, "step": 27610 }, { "epoch": 0.19055240881149663, "grad_norm": 0.0, "learning_rate": 8.095648754372288e-06, "loss": 0.3045, "step": 27620 }, { "epoch": 0.19062139954604096, "grad_norm": 0.0, "learning_rate": 8.094958847026845e-06, "loss": 0.0001, "step": 27630 }, { "epoch": 0.19069039028058532, "grad_norm": 1.0063113222713582e-06, "learning_rate": 8.094268939681401e-06, "loss": 0.0, "step": 27640 }, { "epoch": 0.19075938101512968, "grad_norm": 0.0, "learning_rate": 8.093579032335958e-06, "loss": 0.0, "step": 27650 }, { "epoch": 0.19082837174967401, "grad_norm": 0.0, "learning_rate": 8.092889124990514e-06, "loss": 0.0, "step": 27660 }, { "epoch": 0.19089736248421837, "grad_norm": 0.0, "learning_rate": 8.092199217645071e-06, "loss": 0.0, "step": 27670 }, { "epoch": 0.1909663532187627, "grad_norm": 0.0, "learning_rate": 8.091509310299627e-06, "loss": 0.0, "step": 27680 }, { "epoch": 0.19103534395330707, "grad_norm": 0.0, "learning_rate": 8.090819402954184e-06, "loss": 0.0, "step": 27690 }, { "epoch": 0.19110433468785143, "grad_norm": 0.10012844949960709, "learning_rate": 8.09012949560874e-06, "loss": 0.0, "step": 27700 }, { "epoch": 0.19117332542239576, "grad_norm": 662.7427368164062, "learning_rate": 8.089439588263297e-06, "loss": 0.4266, "step": 27710 }, { "epoch": 0.19124231615694012, "grad_norm": 0.0, "learning_rate": 8.088749680917853e-06, "loss": 0.0, "step": 27720 }, { "epoch": 0.19131130689148448, "grad_norm": 0.0, "learning_rate": 8.08805977357241e-06, "loss": 0.0, "step": 27730 }, { "epoch": 0.19138029762602882, "grad_norm": 99.03844451904297, "learning_rate": 8.087369866226966e-06, "loss": 0.0156, "step": 27740 }, { "epoch": 0.19144928836057318, "grad_norm": 0.07214906066656113, "learning_rate": 8.086679958881523e-06, "loss": 0.0, "step": 27750 }, { "epoch": 0.19151827909511754, "grad_norm": 0.029231760650873184, "learning_rate": 8.08599005153608e-06, "loss": 0.0, "step": 27760 }, { "epoch": 0.19158726982966187, "grad_norm": 0.0, "learning_rate": 8.085300144190636e-06, "loss": 0.001, "step": 27770 }, { "epoch": 0.19165626056420623, "grad_norm": 0.0, "learning_rate": 8.084610236845192e-06, "loss": 0.0005, "step": 27780 }, { "epoch": 0.19172525129875057, "grad_norm": 9.33956544031389e-05, "learning_rate": 8.083920329499749e-06, "loss": 0.6906, "step": 27790 }, { "epoch": 0.19179424203329493, "grad_norm": 0.0, "learning_rate": 8.083230422154305e-06, "loss": 0.0, "step": 27800 }, { "epoch": 0.1918632327678393, "grad_norm": 0.2688226103782654, "learning_rate": 8.082540514808862e-06, "loss": 0.0001, "step": 27810 }, { "epoch": 0.19193222350238362, "grad_norm": 2.284170705024735e-06, "learning_rate": 8.081850607463418e-06, "loss": 0.0, "step": 27820 }, { "epoch": 0.19200121423692798, "grad_norm": 0.0, "learning_rate": 8.081160700117975e-06, "loss": 0.0, "step": 27830 }, { "epoch": 0.19207020497147234, "grad_norm": 7.018526275714976e-07, "learning_rate": 8.080470792772531e-06, "loss": 0.0001, "step": 27840 }, { "epoch": 0.19213919570601667, "grad_norm": 0.0, "learning_rate": 8.079780885427088e-06, "loss": 0.0278, "step": 27850 }, { "epoch": 0.19220818644056104, "grad_norm": 0.0002476059889886528, "learning_rate": 8.079090978081644e-06, "loss": 0.0, "step": 27860 }, { "epoch": 0.1922771771751054, "grad_norm": 0.0, "learning_rate": 8.0784010707362e-06, "loss": 0.0, "step": 27870 }, { "epoch": 0.19234616790964973, "grad_norm": 0.0, "learning_rate": 8.077711163390757e-06, "loss": 0.0, "step": 27880 }, { "epoch": 0.1924151586441941, "grad_norm": 0.0, "learning_rate": 8.077021256045314e-06, "loss": 0.0002, "step": 27890 }, { "epoch": 0.19248414937873842, "grad_norm": 0.0, "learning_rate": 8.07633134869987e-06, "loss": 0.0004, "step": 27900 }, { "epoch": 0.19255314011328278, "grad_norm": 0.0, "learning_rate": 8.075641441354427e-06, "loss": 0.0, "step": 27910 }, { "epoch": 0.19262213084782714, "grad_norm": 0.0, "learning_rate": 8.074951534008983e-06, "loss": 0.0, "step": 27920 }, { "epoch": 0.19269112158237148, "grad_norm": 0.0, "learning_rate": 8.07426162666354e-06, "loss": 0.5188, "step": 27930 }, { "epoch": 0.19276011231691584, "grad_norm": 0.0, "learning_rate": 8.073571719318096e-06, "loss": 0.0, "step": 27940 }, { "epoch": 0.1928291030514602, "grad_norm": 0.9008578658103943, "learning_rate": 8.072881811972653e-06, "loss": 0.0002, "step": 27950 }, { "epoch": 0.19289809378600453, "grad_norm": 7.989507139427587e-05, "learning_rate": 8.072191904627209e-06, "loss": 0.0001, "step": 27960 }, { "epoch": 0.1929670845205489, "grad_norm": 0.0, "learning_rate": 8.071501997281766e-06, "loss": 0.0001, "step": 27970 }, { "epoch": 0.19303607525509325, "grad_norm": 0.0, "learning_rate": 8.070812089936322e-06, "loss": 0.0001, "step": 27980 }, { "epoch": 0.1931050659896376, "grad_norm": 0.0, "learning_rate": 8.070122182590879e-06, "loss": 0.0, "step": 27990 }, { "epoch": 0.19317405672418195, "grad_norm": 0.0, "learning_rate": 8.069432275245435e-06, "loss": 0.0, "step": 28000 }, { "epoch": 0.19324304745872628, "grad_norm": 5.028121449868195e-06, "learning_rate": 8.068742367899992e-06, "loss": 0.0, "step": 28010 }, { "epoch": 0.19331203819327064, "grad_norm": 0.0, "learning_rate": 8.068052460554548e-06, "loss": 0.0001, "step": 28020 }, { "epoch": 0.193381028927815, "grad_norm": 0.0, "learning_rate": 8.067362553209105e-06, "loss": 0.0, "step": 28030 }, { "epoch": 0.19345001966235933, "grad_norm": 0.0, "learning_rate": 8.066672645863661e-06, "loss": 0.0, "step": 28040 }, { "epoch": 0.1935190103969037, "grad_norm": 0.0, "learning_rate": 8.065982738518218e-06, "loss": 0.0, "step": 28050 }, { "epoch": 0.19358800113144806, "grad_norm": 0.0, "learning_rate": 8.065292831172774e-06, "loss": 0.0, "step": 28060 }, { "epoch": 0.1936569918659924, "grad_norm": 1.2853544149038498e-06, "learning_rate": 8.06460292382733e-06, "loss": 0.0, "step": 28070 }, { "epoch": 0.19372598260053675, "grad_norm": 0.006968051660805941, "learning_rate": 8.063913016481887e-06, "loss": 0.0, "step": 28080 }, { "epoch": 0.1937949733350811, "grad_norm": 0.0, "learning_rate": 8.063223109136443e-06, "loss": 0.4004, "step": 28090 }, { "epoch": 0.19386396406962544, "grad_norm": 0.006582055706530809, "learning_rate": 8.062533201791e-06, "loss": 0.0, "step": 28100 }, { "epoch": 0.1939329548041698, "grad_norm": 0.0, "learning_rate": 8.061843294445556e-06, "loss": 0.0179, "step": 28110 }, { "epoch": 0.19400194553871414, "grad_norm": 0.0, "learning_rate": 8.061153387100113e-06, "loss": 0.0283, "step": 28120 }, { "epoch": 0.1940709362732585, "grad_norm": 0.0, "learning_rate": 8.06046347975467e-06, "loss": 0.0, "step": 28130 }, { "epoch": 0.19413992700780286, "grad_norm": 0.0, "learning_rate": 8.059773572409226e-06, "loss": 0.0, "step": 28140 }, { "epoch": 0.1942089177423472, "grad_norm": 0.0, "learning_rate": 8.059083665063782e-06, "loss": 0.0, "step": 28150 }, { "epoch": 0.19427790847689155, "grad_norm": 3.1908464431762695, "learning_rate": 8.058393757718339e-06, "loss": 0.0015, "step": 28160 }, { "epoch": 0.19434689921143591, "grad_norm": 0.20757558941841125, "learning_rate": 8.057703850372895e-06, "loss": 0.0001, "step": 28170 }, { "epoch": 0.19441588994598025, "grad_norm": 0.0, "learning_rate": 8.057013943027452e-06, "loss": 0.021, "step": 28180 }, { "epoch": 0.1944848806805246, "grad_norm": 0.0, "learning_rate": 8.056324035682008e-06, "loss": 0.0, "step": 28190 }, { "epoch": 0.19455387141506897, "grad_norm": 0.0, "learning_rate": 8.055634128336565e-06, "loss": 0.0, "step": 28200 }, { "epoch": 0.1946228621496133, "grad_norm": 0.0, "learning_rate": 8.054944220991121e-06, "loss": 0.0, "step": 28210 }, { "epoch": 0.19469185288415766, "grad_norm": 0.0, "learning_rate": 8.054254313645678e-06, "loss": 0.0, "step": 28220 }, { "epoch": 0.194760843618702, "grad_norm": 0.0, "learning_rate": 8.053564406300234e-06, "loss": 0.0013, "step": 28230 }, { "epoch": 0.19482983435324636, "grad_norm": 0.0, "learning_rate": 8.052943489689335e-06, "loss": 1.6985, "step": 28240 }, { "epoch": 0.19489882508779072, "grad_norm": 0.0, "learning_rate": 8.052253582343891e-06, "loss": 0.0, "step": 28250 }, { "epoch": 0.19496781582233505, "grad_norm": 0.0, "learning_rate": 8.051563674998448e-06, "loss": 0.0, "step": 28260 }, { "epoch": 0.1950368065568794, "grad_norm": 0.0, "learning_rate": 8.050873767653004e-06, "loss": 0.0, "step": 28270 }, { "epoch": 0.19510579729142377, "grad_norm": 0.0, "learning_rate": 8.050183860307561e-06, "loss": 0.0, "step": 28280 }, { "epoch": 0.1951747880259681, "grad_norm": 0.0, "learning_rate": 8.049493952962117e-06, "loss": 0.473, "step": 28290 }, { "epoch": 0.19524377876051247, "grad_norm": 32.63266372680664, "learning_rate": 8.048804045616674e-06, "loss": 0.007, "step": 28300 }, { "epoch": 0.19531276949505683, "grad_norm": 0.0, "learning_rate": 8.048114138271232e-06, "loss": 0.0, "step": 28310 }, { "epoch": 0.19538176022960116, "grad_norm": 2.072190909530036e-06, "learning_rate": 8.047424230925787e-06, "loss": 0.0, "step": 28320 }, { "epoch": 0.19545075096414552, "grad_norm": 0.0, "learning_rate": 8.046734323580343e-06, "loss": 0.1123, "step": 28330 }, { "epoch": 0.19551974169868985, "grad_norm": 1.80930851456651e-06, "learning_rate": 8.0460444162349e-06, "loss": 0.0, "step": 28340 }, { "epoch": 0.1955887324332342, "grad_norm": 0.0, "learning_rate": 8.045354508889456e-06, "loss": 0.0, "step": 28350 }, { "epoch": 0.19565772316777857, "grad_norm": 0.0, "learning_rate": 8.044664601544013e-06, "loss": 0.0, "step": 28360 }, { "epoch": 0.1957267139023229, "grad_norm": 0.0, "learning_rate": 8.04397469419857e-06, "loss": 0.0039, "step": 28370 }, { "epoch": 0.19579570463686727, "grad_norm": 0.0, "learning_rate": 8.043284786853126e-06, "loss": 0.0, "step": 28380 }, { "epoch": 0.19586469537141163, "grad_norm": 0.0, "learning_rate": 8.042594879507682e-06, "loss": 0.0, "step": 28390 }, { "epoch": 0.19593368610595596, "grad_norm": 0.00044247324694879353, "learning_rate": 8.041904972162239e-06, "loss": 0.0, "step": 28400 }, { "epoch": 0.19600267684050032, "grad_norm": 0.0, "learning_rate": 8.041215064816795e-06, "loss": 0.0, "step": 28410 }, { "epoch": 0.19607166757504468, "grad_norm": 0.0, "learning_rate": 8.040525157471354e-06, "loss": 0.0, "step": 28420 }, { "epoch": 0.19614065830958902, "grad_norm": 1.752103031549268e-07, "learning_rate": 8.039835250125908e-06, "loss": 0.0, "step": 28430 }, { "epoch": 0.19620964904413338, "grad_norm": 2.3830211830500048e-06, "learning_rate": 8.039145342780465e-06, "loss": 0.0, "step": 28440 }, { "epoch": 0.1962786397786777, "grad_norm": 0.0, "learning_rate": 8.038455435435021e-06, "loss": 0.0, "step": 28450 }, { "epoch": 0.19634763051322207, "grad_norm": 0.0, "learning_rate": 8.037765528089578e-06, "loss": 0.0, "step": 28460 }, { "epoch": 0.19641662124776643, "grad_norm": 0.0, "learning_rate": 8.037075620744134e-06, "loss": 0.0, "step": 28470 }, { "epoch": 0.19648561198231076, "grad_norm": 0.0, "learning_rate": 8.03638571339869e-06, "loss": 0.0, "step": 28480 }, { "epoch": 0.19655460271685513, "grad_norm": 0.0, "learning_rate": 8.035695806053247e-06, "loss": 0.0083, "step": 28490 }, { "epoch": 0.1966235934513995, "grad_norm": 0.0, "learning_rate": 8.035005898707804e-06, "loss": 0.0004, "step": 28500 }, { "epoch": 0.19669258418594382, "grad_norm": 5.137346192896075e-08, "learning_rate": 8.03431599136236e-06, "loss": 0.0, "step": 28510 }, { "epoch": 0.19676157492048818, "grad_norm": 0.0, "learning_rate": 8.033626084016917e-06, "loss": 0.0, "step": 28520 }, { "epoch": 0.19683056565503254, "grad_norm": 0.0, "learning_rate": 8.032936176671475e-06, "loss": 0.0, "step": 28530 }, { "epoch": 0.19689955638957687, "grad_norm": 0.0, "learning_rate": 8.032246269326031e-06, "loss": 0.001, "step": 28540 }, { "epoch": 0.19696854712412123, "grad_norm": 0.0, "learning_rate": 8.031556361980586e-06, "loss": 0.0, "step": 28550 }, { "epoch": 0.19703753785866557, "grad_norm": 0.0, "learning_rate": 8.030866454635143e-06, "loss": 0.0, "step": 28560 }, { "epoch": 0.19710652859320993, "grad_norm": 2.1289222786435857e-05, "learning_rate": 8.0301765472897e-06, "loss": 0.001, "step": 28570 }, { "epoch": 0.1971755193277543, "grad_norm": 0.0, "learning_rate": 8.029486639944256e-06, "loss": 0.0, "step": 28580 }, { "epoch": 0.19724451006229862, "grad_norm": 0.0, "learning_rate": 8.028796732598812e-06, "loss": 0.0861, "step": 28590 }, { "epoch": 0.19731350079684298, "grad_norm": 0.0, "learning_rate": 8.028106825253369e-06, "loss": 0.0001, "step": 28600 }, { "epoch": 0.19738249153138734, "grad_norm": 0.0, "learning_rate": 8.027416917907925e-06, "loss": 0.0012, "step": 28610 }, { "epoch": 0.19745148226593168, "grad_norm": 0.0, "learning_rate": 8.026727010562482e-06, "loss": 0.2949, "step": 28620 }, { "epoch": 0.19752047300047604, "grad_norm": 0.0, "learning_rate": 8.026037103217038e-06, "loss": 0.0, "step": 28630 }, { "epoch": 0.1975894637350204, "grad_norm": 3.5208393001084914e-06, "learning_rate": 8.025347195871596e-06, "loss": 0.0001, "step": 28640 }, { "epoch": 0.19765845446956473, "grad_norm": 0.0, "learning_rate": 8.024657288526153e-06, "loss": 0.0, "step": 28650 }, { "epoch": 0.1977274452041091, "grad_norm": 10.6176118850708, "learning_rate": 8.023967381180708e-06, "loss": 0.0025, "step": 28660 }, { "epoch": 0.19779643593865343, "grad_norm": 0.0, "learning_rate": 8.023277473835264e-06, "loss": 0.0, "step": 28670 }, { "epoch": 0.1978654266731978, "grad_norm": 0.0, "learning_rate": 8.02258756648982e-06, "loss": 0.0, "step": 28680 }, { "epoch": 0.19793441740774215, "grad_norm": 0.0, "learning_rate": 8.021897659144377e-06, "loss": 0.0029, "step": 28690 }, { "epoch": 0.19800340814228648, "grad_norm": 0.0, "learning_rate": 8.021207751798934e-06, "loss": 0.0007, "step": 28700 }, { "epoch": 0.19807239887683084, "grad_norm": 0.0, "learning_rate": 8.02051784445349e-06, "loss": 0.0, "step": 28710 }, { "epoch": 0.1981413896113752, "grad_norm": 0.0002310090058017522, "learning_rate": 8.019827937108047e-06, "loss": 0.0, "step": 28720 }, { "epoch": 0.19821038034591953, "grad_norm": 0.0, "learning_rate": 8.019138029762603e-06, "loss": 0.0001, "step": 28730 }, { "epoch": 0.1982793710804639, "grad_norm": 0.0, "learning_rate": 8.01844812241716e-06, "loss": 0.0, "step": 28740 }, { "epoch": 0.19834836181500826, "grad_norm": 0.0, "learning_rate": 8.017758215071718e-06, "loss": 0.0, "step": 28750 }, { "epoch": 0.1984173525495526, "grad_norm": 0.0, "learning_rate": 8.017068307726274e-06, "loss": 0.0, "step": 28760 }, { "epoch": 0.19848634328409695, "grad_norm": 0.0, "learning_rate": 8.016378400380829e-06, "loss": 0.0, "step": 28770 }, { "epoch": 0.19855533401864128, "grad_norm": 1.0427751817587705e-07, "learning_rate": 8.015688493035385e-06, "loss": 0.0116, "step": 28780 }, { "epoch": 0.19862432475318564, "grad_norm": 0.0, "learning_rate": 8.014998585689942e-06, "loss": 0.0, "step": 28790 }, { "epoch": 0.19869331548773, "grad_norm": 0.0, "learning_rate": 8.014308678344498e-06, "loss": 0.2629, "step": 28800 }, { "epoch": 0.19876230622227434, "grad_norm": 0.0, "learning_rate": 8.013618770999055e-06, "loss": 0.0, "step": 28810 }, { "epoch": 0.1988312969568187, "grad_norm": 0.0, "learning_rate": 8.012928863653611e-06, "loss": 0.0, "step": 28820 }, { "epoch": 0.19890028769136306, "grad_norm": 4.594663778334507e-07, "learning_rate": 8.012238956308168e-06, "loss": 0.0002, "step": 28830 }, { "epoch": 0.1989692784259074, "grad_norm": 0.0002869892923627049, "learning_rate": 8.011549048962724e-06, "loss": 0.0232, "step": 28840 }, { "epoch": 0.19903826916045175, "grad_norm": 0.0, "learning_rate": 8.010859141617281e-06, "loss": 0.119, "step": 28850 }, { "epoch": 0.1991072598949961, "grad_norm": 2.7629999749478884e-05, "learning_rate": 8.010169234271839e-06, "loss": 0.0, "step": 28860 }, { "epoch": 0.19917625062954045, "grad_norm": 0.019157318398356438, "learning_rate": 8.009479326926396e-06, "loss": 0.0, "step": 28870 }, { "epoch": 0.1992452413640848, "grad_norm": 0.0, "learning_rate": 8.008789419580952e-06, "loss": 0.0001, "step": 28880 }, { "epoch": 0.19931423209862914, "grad_norm": 0.0, "learning_rate": 8.008099512235507e-06, "loss": 0.0, "step": 28890 }, { "epoch": 0.1993832228331735, "grad_norm": 0.0, "learning_rate": 8.007409604890063e-06, "loss": 0.0, "step": 28900 }, { "epoch": 0.19945221356771786, "grad_norm": 0.0, "learning_rate": 8.00671969754462e-06, "loss": 0.0, "step": 28910 }, { "epoch": 0.1995212043022622, "grad_norm": 20.248844146728516, "learning_rate": 8.006029790199176e-06, "loss": 0.0042, "step": 28920 }, { "epoch": 0.19959019503680656, "grad_norm": 0.0, "learning_rate": 8.005339882853733e-06, "loss": 0.0064, "step": 28930 }, { "epoch": 0.19965918577135092, "grad_norm": 0.0, "learning_rate": 8.00464997550829e-06, "loss": 0.0194, "step": 28940 }, { "epoch": 0.19972817650589525, "grad_norm": 0.0, "learning_rate": 8.003960068162846e-06, "loss": 0.0, "step": 28950 }, { "epoch": 0.1997971672404396, "grad_norm": 1.489995497649943e-06, "learning_rate": 8.003270160817402e-06, "loss": 0.0024, "step": 28960 }, { "epoch": 0.19986615797498397, "grad_norm": 0.0, "learning_rate": 8.00258025347196e-06, "loss": 0.0457, "step": 28970 }, { "epoch": 0.1999351487095283, "grad_norm": 0.0, "learning_rate": 8.001890346126517e-06, "loss": 0.0004, "step": 28980 }, { "epoch": 0.20000413944407266, "grad_norm": 0.0, "learning_rate": 8.001200438781073e-06, "loss": 0.0, "step": 28990 }, { "epoch": 0.200073130178617, "grad_norm": 0.00046699633821845055, "learning_rate": 8.000510531435628e-06, "loss": 0.0001, "step": 29000 }, { "epoch": 0.20014212091316136, "grad_norm": 0.0, "learning_rate": 7.999820624090185e-06, "loss": 0.0511, "step": 29010 }, { "epoch": 0.20021111164770572, "grad_norm": 0.0, "learning_rate": 7.999130716744741e-06, "loss": 0.0, "step": 29020 }, { "epoch": 0.20028010238225005, "grad_norm": 0.0, "learning_rate": 7.998440809399298e-06, "loss": 0.0, "step": 29030 }, { "epoch": 0.2003490931167944, "grad_norm": 0.0, "learning_rate": 7.997750902053854e-06, "loss": 0.0, "step": 29040 }, { "epoch": 0.20041808385133877, "grad_norm": 0.0, "learning_rate": 7.99706099470841e-06, "loss": 0.0977, "step": 29050 }, { "epoch": 0.2004870745858831, "grad_norm": 0.0, "learning_rate": 7.996371087362967e-06, "loss": 0.0, "step": 29060 }, { "epoch": 0.20055606532042747, "grad_norm": 0.0001643638388486579, "learning_rate": 7.995681180017524e-06, "loss": 0.548, "step": 29070 }, { "epoch": 0.20062505605497183, "grad_norm": 0.0, "learning_rate": 7.994991272672082e-06, "loss": 0.0, "step": 29080 }, { "epoch": 0.20069404678951616, "grad_norm": 0.006344607565551996, "learning_rate": 7.994301365326638e-06, "loss": 0.0, "step": 29090 }, { "epoch": 0.20076303752406052, "grad_norm": 0.0, "learning_rate": 7.993611457981195e-06, "loss": 0.0, "step": 29100 }, { "epoch": 0.20083202825860486, "grad_norm": 5.787363079434726e-06, "learning_rate": 7.99292155063575e-06, "loss": 0.0207, "step": 29110 }, { "epoch": 0.20090101899314922, "grad_norm": 0.0, "learning_rate": 7.992231643290306e-06, "loss": 0.0, "step": 29120 }, { "epoch": 0.20097000972769358, "grad_norm": 0.0, "learning_rate": 7.991541735944863e-06, "loss": 0.0, "step": 29130 }, { "epoch": 0.2010390004622379, "grad_norm": 0.0, "learning_rate": 7.990851828599419e-06, "loss": 0.0001, "step": 29140 }, { "epoch": 0.20110799119678227, "grad_norm": 7.075677422108129e-05, "learning_rate": 7.990161921253976e-06, "loss": 0.0, "step": 29150 }, { "epoch": 0.20117698193132663, "grad_norm": 0.0, "learning_rate": 7.989472013908532e-06, "loss": 0.0, "step": 29160 }, { "epoch": 0.20124597266587096, "grad_norm": 0.0, "learning_rate": 7.988782106563089e-06, "loss": 0.0005, "step": 29170 }, { "epoch": 0.20131496340041533, "grad_norm": 0.0, "learning_rate": 7.988092199217645e-06, "loss": 0.0, "step": 29180 }, { "epoch": 0.2013839541349597, "grad_norm": 0.0, "learning_rate": 7.987402291872203e-06, "loss": 0.0, "step": 29190 }, { "epoch": 0.20145294486950402, "grad_norm": 0.0, "learning_rate": 7.98671238452676e-06, "loss": 0.0, "step": 29200 }, { "epoch": 0.20152193560404838, "grad_norm": 0.0, "learning_rate": 7.986022477181316e-06, "loss": 0.0, "step": 29210 }, { "epoch": 0.20159092633859274, "grad_norm": 0.0, "learning_rate": 7.985332569835871e-06, "loss": 0.012, "step": 29220 }, { "epoch": 0.20165991707313707, "grad_norm": 0.0, "learning_rate": 7.984642662490428e-06, "loss": 0.0, "step": 29230 }, { "epoch": 0.20172890780768143, "grad_norm": 2.1180779796736715e-08, "learning_rate": 7.983952755144984e-06, "loss": 0.0, "step": 29240 }, { "epoch": 0.20179789854222577, "grad_norm": 0.0, "learning_rate": 7.98326284779954e-06, "loss": 0.0, "step": 29250 }, { "epoch": 0.20186688927677013, "grad_norm": 0.0, "learning_rate": 7.982572940454097e-06, "loss": 0.0, "step": 29260 }, { "epoch": 0.2019358800113145, "grad_norm": 0.0, "learning_rate": 7.981883033108654e-06, "loss": 0.0, "step": 29270 }, { "epoch": 0.20200487074585882, "grad_norm": 0.0, "learning_rate": 7.98119312576321e-06, "loss": 0.0, "step": 29280 }, { "epoch": 0.20207386148040318, "grad_norm": 0.0, "learning_rate": 7.980503218417766e-06, "loss": 0.0, "step": 29290 }, { "epoch": 0.20214285221494754, "grad_norm": 0.0, "learning_rate": 7.979813311072325e-06, "loss": 0.0, "step": 29300 }, { "epoch": 0.20221184294949188, "grad_norm": 0.0, "learning_rate": 7.979123403726881e-06, "loss": 0.0, "step": 29310 }, { "epoch": 0.20228083368403624, "grad_norm": 0.0, "learning_rate": 7.978433496381438e-06, "loss": 0.0, "step": 29320 }, { "epoch": 0.2023498244185806, "grad_norm": 0.16354624927043915, "learning_rate": 7.977743589035994e-06, "loss": 0.0, "step": 29330 }, { "epoch": 0.20241881515312493, "grad_norm": 0.0, "learning_rate": 7.977053681690549e-06, "loss": 0.0, "step": 29340 }, { "epoch": 0.2024878058876693, "grad_norm": 6.709145054628607e-06, "learning_rate": 7.976363774345105e-06, "loss": 0.0, "step": 29350 }, { "epoch": 0.20255679662221363, "grad_norm": 8.35243336041458e-05, "learning_rate": 7.975673866999662e-06, "loss": 0.0, "step": 29360 }, { "epoch": 0.20262578735675799, "grad_norm": 0.005395881831645966, "learning_rate": 7.974983959654218e-06, "loss": 0.0058, "step": 29370 }, { "epoch": 0.20269477809130235, "grad_norm": 0.0, "learning_rate": 7.974294052308775e-06, "loss": 0.0002, "step": 29380 }, { "epoch": 0.20276376882584668, "grad_norm": 3.590628017136055e-09, "learning_rate": 7.973604144963331e-06, "loss": 0.0, "step": 29390 }, { "epoch": 0.20283275956039104, "grad_norm": 0.0029196913819760084, "learning_rate": 7.972914237617888e-06, "loss": 0.0, "step": 29400 }, { "epoch": 0.2029017502949354, "grad_norm": 0.0, "learning_rate": 7.972224330272446e-06, "loss": 0.0, "step": 29410 }, { "epoch": 0.20297074102947973, "grad_norm": 2.9109278329997323e-05, "learning_rate": 7.971534422927003e-06, "loss": 0.0346, "step": 29420 }, { "epoch": 0.2030397317640241, "grad_norm": 0.0, "learning_rate": 7.970844515581559e-06, "loss": 0.0, "step": 29430 }, { "epoch": 0.20310872249856846, "grad_norm": 0.0, "learning_rate": 7.970154608236116e-06, "loss": 0.031, "step": 29440 }, { "epoch": 0.2031777132331128, "grad_norm": 0.0015341612743213773, "learning_rate": 7.96946470089067e-06, "loss": 0.0, "step": 29450 }, { "epoch": 0.20324670396765715, "grad_norm": 0.0, "learning_rate": 7.968774793545227e-06, "loss": 0.0001, "step": 29460 }, { "epoch": 0.20331569470220148, "grad_norm": 3.71329201698245e-06, "learning_rate": 7.968084886199783e-06, "loss": 0.0, "step": 29470 }, { "epoch": 0.20338468543674584, "grad_norm": 0.0, "learning_rate": 7.96739497885434e-06, "loss": 0.0, "step": 29480 }, { "epoch": 0.2034536761712902, "grad_norm": 0.0, "learning_rate": 7.966705071508896e-06, "loss": 0.0, "step": 29490 }, { "epoch": 0.20352266690583454, "grad_norm": 0.0, "learning_rate": 7.966015164163453e-06, "loss": 0.0, "step": 29500 }, { "epoch": 0.2035916576403789, "grad_norm": 0.009508379735052586, "learning_rate": 7.96532525681801e-06, "loss": 0.0, "step": 29510 }, { "epoch": 0.20366064837492326, "grad_norm": 0.0, "learning_rate": 7.964635349472567e-06, "loss": 0.0, "step": 29520 }, { "epoch": 0.2037296391094676, "grad_norm": 0.00326364953070879, "learning_rate": 7.963945442127124e-06, "loss": 0.0003, "step": 29530 }, { "epoch": 0.20379862984401195, "grad_norm": 0.0, "learning_rate": 7.96325553478168e-06, "loss": 0.0, "step": 29540 }, { "epoch": 0.2038676205785563, "grad_norm": 0.0, "learning_rate": 7.962565627436237e-06, "loss": 0.0, "step": 29550 }, { "epoch": 0.20393661131310065, "grad_norm": 0.0, "learning_rate": 7.961875720090792e-06, "loss": 0.0, "step": 29560 }, { "epoch": 0.204005602047645, "grad_norm": 5.616179166167967e-08, "learning_rate": 7.961185812745348e-06, "loss": 0.0, "step": 29570 }, { "epoch": 0.20407459278218934, "grad_norm": 0.0, "learning_rate": 7.960495905399905e-06, "loss": 0.0, "step": 29580 }, { "epoch": 0.2041435835167337, "grad_norm": 1.4619402463722508e-05, "learning_rate": 7.959805998054461e-06, "loss": 0.0, "step": 29590 }, { "epoch": 0.20421257425127806, "grad_norm": 0.0, "learning_rate": 7.959116090709018e-06, "loss": 0.0065, "step": 29600 }, { "epoch": 0.2042815649858224, "grad_norm": 0.0, "learning_rate": 7.958426183363574e-06, "loss": 0.0005, "step": 29610 }, { "epoch": 0.20435055572036676, "grad_norm": 0.0, "learning_rate": 7.95773627601813e-06, "loss": 0.0002, "step": 29620 }, { "epoch": 0.20441954645491112, "grad_norm": 0.0, "learning_rate": 7.957046368672689e-06, "loss": 0.0, "step": 29630 }, { "epoch": 0.20448853718945545, "grad_norm": 0.0, "learning_rate": 7.956356461327245e-06, "loss": 0.0002, "step": 29640 }, { "epoch": 0.2045575279239998, "grad_norm": 1.8629722209340116e-09, "learning_rate": 7.955666553981802e-06, "loss": 0.0, "step": 29650 }, { "epoch": 0.20462651865854417, "grad_norm": 0.0, "learning_rate": 7.954976646636358e-06, "loss": 0.0, "step": 29660 }, { "epoch": 0.2046955093930885, "grad_norm": 0.0, "learning_rate": 7.954286739290915e-06, "loss": 0.0, "step": 29670 }, { "epoch": 0.20476450012763286, "grad_norm": 0.0, "learning_rate": 7.95359683194547e-06, "loss": 0.0001, "step": 29680 }, { "epoch": 0.2048334908621772, "grad_norm": 0.0, "learning_rate": 7.952906924600026e-06, "loss": 0.0, "step": 29690 }, { "epoch": 0.20490248159672156, "grad_norm": 0.0, "learning_rate": 7.952217017254583e-06, "loss": 0.0, "step": 29700 }, { "epoch": 0.20497147233126592, "grad_norm": 0.0, "learning_rate": 7.951527109909139e-06, "loss": 0.0, "step": 29710 }, { "epoch": 0.20504046306581025, "grad_norm": 0.008679023943841457, "learning_rate": 7.950837202563696e-06, "loss": 0.0, "step": 29720 }, { "epoch": 0.2051094538003546, "grad_norm": 0.11770489811897278, "learning_rate": 7.950147295218252e-06, "loss": 0.0, "step": 29730 }, { "epoch": 0.20517844453489897, "grad_norm": 0.17468474805355072, "learning_rate": 7.94945738787281e-06, "loss": 0.0, "step": 29740 }, { "epoch": 0.2052474352694433, "grad_norm": 0.0, "learning_rate": 7.948767480527367e-06, "loss": 0.0, "step": 29750 }, { "epoch": 0.20531642600398767, "grad_norm": 533.59765625, "learning_rate": 7.948077573181923e-06, "loss": 0.602, "step": 29760 }, { "epoch": 0.20538541673853203, "grad_norm": 0.0, "learning_rate": 7.94738766583648e-06, "loss": 0.0, "step": 29770 }, { "epoch": 0.20545440747307636, "grad_norm": 0.0, "learning_rate": 7.946697758491036e-06, "loss": 0.004, "step": 29780 }, { "epoch": 0.20552339820762072, "grad_norm": 0.0, "learning_rate": 7.946007851145591e-06, "loss": 0.0, "step": 29790 }, { "epoch": 0.20559238894216506, "grad_norm": 0.0, "learning_rate": 7.945317943800148e-06, "loss": 0.0003, "step": 29800 }, { "epoch": 0.20566137967670942, "grad_norm": 0.0, "learning_rate": 7.944628036454704e-06, "loss": 0.0001, "step": 29810 }, { "epoch": 0.20573037041125378, "grad_norm": 1.8241812504982136e-09, "learning_rate": 7.94393812910926e-06, "loss": 0.0591, "step": 29820 }, { "epoch": 0.2057993611457981, "grad_norm": 4.153274858254008e-06, "learning_rate": 7.943248221763817e-06, "loss": 0.048, "step": 29830 }, { "epoch": 0.20586835188034247, "grad_norm": 0.0, "learning_rate": 7.942558314418373e-06, "loss": 0.0, "step": 29840 }, { "epoch": 0.20593734261488683, "grad_norm": 0.0, "learning_rate": 7.941868407072932e-06, "loss": 0.0, "step": 29850 }, { "epoch": 0.20600633334943116, "grad_norm": 0.0, "learning_rate": 7.941178499727488e-06, "loss": 0.0039, "step": 29860 }, { "epoch": 0.20607532408397553, "grad_norm": 0.0, "learning_rate": 7.940488592382045e-06, "loss": 0.0, "step": 29870 }, { "epoch": 0.20614431481851989, "grad_norm": 0.0, "learning_rate": 7.939798685036601e-06, "loss": 0.0, "step": 29880 }, { "epoch": 0.20621330555306422, "grad_norm": 0.0, "learning_rate": 7.939108777691158e-06, "loss": 0.0, "step": 29890 }, { "epoch": 0.20628229628760858, "grad_norm": 0.0, "learning_rate": 7.938418870345712e-06, "loss": 0.0, "step": 29900 }, { "epoch": 0.2063512870221529, "grad_norm": 4.2027363811314444e-09, "learning_rate": 7.937728963000269e-06, "loss": 0.0, "step": 29910 }, { "epoch": 0.20642027775669727, "grad_norm": 0.0, "learning_rate": 7.937039055654825e-06, "loss": 0.0, "step": 29920 }, { "epoch": 0.20648926849124163, "grad_norm": 0.0, "learning_rate": 7.936349148309382e-06, "loss": 0.9637, "step": 29930 }, { "epoch": 0.20655825922578597, "grad_norm": 0.0, "learning_rate": 7.935659240963938e-06, "loss": 0.0, "step": 29940 }, { "epoch": 0.20662724996033033, "grad_norm": 2.95427685159666e-06, "learning_rate": 7.934969333618495e-06, "loss": 0.0, "step": 29950 }, { "epoch": 0.2066962406948747, "grad_norm": 0.0, "learning_rate": 7.934279426273053e-06, "loss": 0.0, "step": 29960 }, { "epoch": 0.20676523142941902, "grad_norm": 0.0, "learning_rate": 7.93358951892761e-06, "loss": 0.0, "step": 29970 }, { "epoch": 0.20683422216396338, "grad_norm": 0.0, "learning_rate": 7.932899611582166e-06, "loss": 0.0, "step": 29980 }, { "epoch": 0.20690321289850774, "grad_norm": 0.0, "learning_rate": 7.932209704236723e-06, "loss": 0.0082, "step": 29990 }, { "epoch": 0.20697220363305208, "grad_norm": 0.0, "learning_rate": 7.931519796891279e-06, "loss": 0.0, "step": 30000 }, { "epoch": 0.20704119436759644, "grad_norm": 2.69940727548601e-08, "learning_rate": 7.930829889545835e-06, "loss": 0.0, "step": 30010 }, { "epoch": 0.20711018510214077, "grad_norm": 0.0, "learning_rate": 7.93013998220039e-06, "loss": 0.0, "step": 30020 }, { "epoch": 0.20717917583668513, "grad_norm": 0.0, "learning_rate": 7.929450074854947e-06, "loss": 0.0, "step": 30030 }, { "epoch": 0.2072481665712295, "grad_norm": 0.005969700403511524, "learning_rate": 7.928760167509503e-06, "loss": 0.0, "step": 30040 }, { "epoch": 0.20731715730577382, "grad_norm": 0.02649962157011032, "learning_rate": 7.92807026016406e-06, "loss": 0.0, "step": 30050 }, { "epoch": 0.20738614804031819, "grad_norm": 0.0, "learning_rate": 7.927380352818616e-06, "loss": 0.0, "step": 30060 }, { "epoch": 0.20745513877486255, "grad_norm": 0.0, "learning_rate": 7.926690445473174e-06, "loss": 0.0, "step": 30070 }, { "epoch": 0.20752412950940688, "grad_norm": 0.0, "learning_rate": 7.926000538127731e-06, "loss": 0.0, "step": 30080 }, { "epoch": 0.20759312024395124, "grad_norm": 0.0, "learning_rate": 7.925310630782287e-06, "loss": 0.0, "step": 30090 }, { "epoch": 0.2076621109784956, "grad_norm": 587.1788330078125, "learning_rate": 7.924620723436844e-06, "loss": 0.4207, "step": 30100 }, { "epoch": 0.20773110171303993, "grad_norm": 3.318666008667037e-09, "learning_rate": 7.9239308160914e-06, "loss": 0.0001, "step": 30110 }, { "epoch": 0.2078000924475843, "grad_norm": 0.0, "learning_rate": 7.923240908745957e-06, "loss": 0.0907, "step": 30120 }, { "epoch": 0.20786908318212863, "grad_norm": 0.0, "learning_rate": 7.922551001400512e-06, "loss": 0.0, "step": 30130 }, { "epoch": 0.207938073916673, "grad_norm": 0.0, "learning_rate": 7.921861094055068e-06, "loss": 0.0003, "step": 30140 }, { "epoch": 0.20800706465121735, "grad_norm": 0.0, "learning_rate": 7.921171186709625e-06, "loss": 0.0001, "step": 30150 }, { "epoch": 0.20807605538576168, "grad_norm": 0.012880227528512478, "learning_rate": 7.920481279364181e-06, "loss": 0.0, "step": 30160 }, { "epoch": 0.20814504612030604, "grad_norm": 0.0, "learning_rate": 7.919791372018738e-06, "loss": 0.0, "step": 30170 }, { "epoch": 0.2082140368548504, "grad_norm": 0.0, "learning_rate": 7.919101464673296e-06, "loss": 0.0002, "step": 30180 }, { "epoch": 0.20828302758939474, "grad_norm": 0.0, "learning_rate": 7.918411557327852e-06, "loss": 0.0323, "step": 30190 }, { "epoch": 0.2083520183239391, "grad_norm": 0.0, "learning_rate": 7.917721649982409e-06, "loss": 0.0, "step": 30200 }, { "epoch": 0.20842100905848346, "grad_norm": 0.0, "learning_rate": 7.917031742636965e-06, "loss": 0.0042, "step": 30210 }, { "epoch": 0.2084899997930278, "grad_norm": 0.5870230197906494, "learning_rate": 7.916341835291522e-06, "loss": 0.0001, "step": 30220 }, { "epoch": 0.20855899052757215, "grad_norm": 7.398648449452594e-05, "learning_rate": 7.915651927946078e-06, "loss": 0.0, "step": 30230 }, { "epoch": 0.20862798126211649, "grad_norm": 6.213717824721243e-06, "learning_rate": 7.914962020600633e-06, "loss": 0.0, "step": 30240 }, { "epoch": 0.20869697199666085, "grad_norm": 0.0, "learning_rate": 7.91427211325519e-06, "loss": 0.0, "step": 30250 }, { "epoch": 0.2087659627312052, "grad_norm": 0.0, "learning_rate": 7.913582205909746e-06, "loss": 0.0001, "step": 30260 }, { "epoch": 0.20883495346574954, "grad_norm": 1.6718652018354874e-09, "learning_rate": 7.912892298564303e-06, "loss": 0.0, "step": 30270 }, { "epoch": 0.2089039442002939, "grad_norm": 0.0, "learning_rate": 7.912202391218859e-06, "loss": 0.0, "step": 30280 }, { "epoch": 0.20897293493483826, "grad_norm": 0.0, "learning_rate": 7.911512483873417e-06, "loss": 0.0, "step": 30290 }, { "epoch": 0.2090419256693826, "grad_norm": 0.0, "learning_rate": 7.910822576527974e-06, "loss": 0.0, "step": 30300 }, { "epoch": 0.20911091640392696, "grad_norm": 0.0, "learning_rate": 7.91013266918253e-06, "loss": 0.0, "step": 30310 }, { "epoch": 0.20917990713847132, "grad_norm": 0.01005993876606226, "learning_rate": 7.909442761837087e-06, "loss": 0.0, "step": 30320 }, { "epoch": 0.20924889787301565, "grad_norm": 0.00029130533221177757, "learning_rate": 7.908752854491643e-06, "loss": 0.0004, "step": 30330 }, { "epoch": 0.20931788860756, "grad_norm": 0.0, "learning_rate": 7.9080629471462e-06, "loss": 0.0, "step": 30340 }, { "epoch": 0.20938687934210434, "grad_norm": 154.89215087890625, "learning_rate": 7.907373039800756e-06, "loss": 0.0574, "step": 30350 }, { "epoch": 0.2094558700766487, "grad_norm": 0.0031173587776720524, "learning_rate": 7.906683132455311e-06, "loss": 0.2479, "step": 30360 }, { "epoch": 0.20952486081119306, "grad_norm": 0.0, "learning_rate": 7.905993225109867e-06, "loss": 0.0196, "step": 30370 }, { "epoch": 0.2095938515457374, "grad_norm": 1.9878325474564917e-05, "learning_rate": 7.905303317764424e-06, "loss": 0.0, "step": 30380 }, { "epoch": 0.20966284228028176, "grad_norm": 0.00028508290415629745, "learning_rate": 7.90461341041898e-06, "loss": 0.0, "step": 30390 }, { "epoch": 0.20973183301482612, "grad_norm": 3.909709000993189e-09, "learning_rate": 7.903923503073539e-06, "loss": 0.0, "step": 30400 }, { "epoch": 0.20980082374937045, "grad_norm": 0.0, "learning_rate": 7.903233595728095e-06, "loss": 0.0, "step": 30410 }, { "epoch": 0.2098698144839148, "grad_norm": 1.0118680338067065e-09, "learning_rate": 7.902543688382652e-06, "loss": 0.0, "step": 30420 }, { "epoch": 0.20993880521845917, "grad_norm": 0.022737300023436546, "learning_rate": 7.901853781037208e-06, "loss": 0.0, "step": 30430 }, { "epoch": 0.2100077959530035, "grad_norm": 0.002234002575278282, "learning_rate": 7.901163873691765e-06, "loss": 0.0055, "step": 30440 }, { "epoch": 0.21007678668754787, "grad_norm": 0.0017814544262364507, "learning_rate": 7.900473966346321e-06, "loss": 0.0052, "step": 30450 }, { "epoch": 0.2101457774220922, "grad_norm": 0.0, "learning_rate": 7.899784059000878e-06, "loss": 0.0, "step": 30460 }, { "epoch": 0.21021476815663656, "grad_norm": 1.030958890914917, "learning_rate": 7.899094151655432e-06, "loss": 0.0003, "step": 30470 }, { "epoch": 0.21028375889118092, "grad_norm": 8.652215610638336e-10, "learning_rate": 7.898404244309989e-06, "loss": 0.0, "step": 30480 }, { "epoch": 0.21035274962572525, "grad_norm": 0.0, "learning_rate": 7.897714336964545e-06, "loss": 0.0017, "step": 30490 }, { "epoch": 0.21042174036026962, "grad_norm": 0.1631506383419037, "learning_rate": 7.897024429619102e-06, "loss": 0.0, "step": 30500 }, { "epoch": 0.21049073109481398, "grad_norm": 0.009636785835027695, "learning_rate": 7.89633452227366e-06, "loss": 0.0, "step": 30510 }, { "epoch": 0.2105597218293583, "grad_norm": 1.7102615990083336e-09, "learning_rate": 7.895644614928217e-06, "loss": 0.0, "step": 30520 }, { "epoch": 0.21062871256390267, "grad_norm": 0.0, "learning_rate": 7.894954707582773e-06, "loss": 0.0, "step": 30530 }, { "epoch": 0.21069770329844703, "grad_norm": 0.0049896929413080215, "learning_rate": 7.89426480023733e-06, "loss": 0.0, "step": 30540 }, { "epoch": 0.21076669403299136, "grad_norm": 0.0, "learning_rate": 7.893574892891886e-06, "loss": 0.0, "step": 30550 }, { "epoch": 0.21083568476753572, "grad_norm": 0.0, "learning_rate": 7.892884985546442e-06, "loss": 0.0, "step": 30560 }, { "epoch": 0.21090467550208006, "grad_norm": 0.0, "learning_rate": 7.892195078200999e-06, "loss": 0.3744, "step": 30570 }, { "epoch": 0.21097366623662442, "grad_norm": 1.631396884249625e-08, "learning_rate": 7.891505170855554e-06, "loss": 0.0, "step": 30580 }, { "epoch": 0.21104265697116878, "grad_norm": 0.0, "learning_rate": 7.89081526351011e-06, "loss": 0.0, "step": 30590 }, { "epoch": 0.2111116477057131, "grad_norm": 0.0, "learning_rate": 7.890125356164667e-06, "loss": 0.0, "step": 30600 }, { "epoch": 0.21118063844025747, "grad_norm": 0.0, "learning_rate": 7.889435448819223e-06, "loss": 0.0, "step": 30610 }, { "epoch": 0.21124962917480183, "grad_norm": 0.0, "learning_rate": 7.888745541473781e-06, "loss": 0.0, "step": 30620 }, { "epoch": 0.21131861990934617, "grad_norm": 0.0, "learning_rate": 7.888055634128338e-06, "loss": 0.0, "step": 30630 }, { "epoch": 0.21138761064389053, "grad_norm": 0.0, "learning_rate": 7.887365726782894e-06, "loss": 0.0002, "step": 30640 }, { "epoch": 0.2114566013784349, "grad_norm": 4.134491064178292e-06, "learning_rate": 7.886675819437451e-06, "loss": 0.0002, "step": 30650 }, { "epoch": 0.21152559211297922, "grad_norm": 0.0, "learning_rate": 7.885985912092007e-06, "loss": 0.0, "step": 30660 }, { "epoch": 0.21159458284752358, "grad_norm": 0.0, "learning_rate": 7.885296004746564e-06, "loss": 0.0, "step": 30670 }, { "epoch": 0.21166357358206792, "grad_norm": 0.004317484796047211, "learning_rate": 7.88460609740112e-06, "loss": 0.0, "step": 30680 }, { "epoch": 0.21173256431661228, "grad_norm": 0.0, "learning_rate": 7.883916190055675e-06, "loss": 0.0001, "step": 30690 }, { "epoch": 0.21180155505115664, "grad_norm": 0.0, "learning_rate": 7.883226282710232e-06, "loss": 0.0, "step": 30700 }, { "epoch": 0.21187054578570097, "grad_norm": 3.0680889722134452e-06, "learning_rate": 7.882536375364788e-06, "loss": 0.0, "step": 30710 }, { "epoch": 0.21193953652024533, "grad_norm": 3.896599487518415e-09, "learning_rate": 7.881846468019345e-06, "loss": 0.0, "step": 30720 }, { "epoch": 0.2120085272547897, "grad_norm": 0.035693928599357605, "learning_rate": 7.881156560673903e-06, "loss": 0.0, "step": 30730 }, { "epoch": 0.21207751798933402, "grad_norm": 4.6698460209881887e-07, "learning_rate": 7.88046665332846e-06, "loss": 0.0008, "step": 30740 }, { "epoch": 0.21214650872387839, "grad_norm": 0.0, "learning_rate": 7.879776745983016e-06, "loss": 0.0, "step": 30750 }, { "epoch": 0.21221549945842275, "grad_norm": 0.0, "learning_rate": 7.879086838637572e-06, "loss": 0.0, "step": 30760 }, { "epoch": 0.21228449019296708, "grad_norm": 0.0, "learning_rate": 7.878396931292129e-06, "loss": 0.0, "step": 30770 }, { "epoch": 0.21235348092751144, "grad_norm": 0.0, "learning_rate": 7.877707023946685e-06, "loss": 0.0003, "step": 30780 }, { "epoch": 0.21242247166205577, "grad_norm": 0.0, "learning_rate": 7.877017116601242e-06, "loss": 0.0006, "step": 30790 }, { "epoch": 0.21249146239660013, "grad_norm": 3.0766545933147427e-06, "learning_rate": 7.876327209255798e-06, "loss": 0.0, "step": 30800 }, { "epoch": 0.2125604531311445, "grad_norm": 0.0, "learning_rate": 7.875637301910353e-06, "loss": 0.0001, "step": 30810 }, { "epoch": 0.21262944386568883, "grad_norm": 3.128637672489276e-06, "learning_rate": 7.87494739456491e-06, "loss": 0.0, "step": 30820 }, { "epoch": 0.2126984346002332, "grad_norm": 0.5101665258407593, "learning_rate": 7.874257487219466e-06, "loss": 0.0001, "step": 30830 }, { "epoch": 0.21276742533477755, "grad_norm": 0.0, "learning_rate": 7.873567579874024e-06, "loss": 0.0188, "step": 30840 }, { "epoch": 0.21283641606932188, "grad_norm": 0.0, "learning_rate": 7.87287767252858e-06, "loss": 0.0007, "step": 30850 }, { "epoch": 0.21290540680386624, "grad_norm": 0.0, "learning_rate": 7.872187765183137e-06, "loss": 0.0, "step": 30860 }, { "epoch": 0.2129743975384106, "grad_norm": 0.00019063099171034992, "learning_rate": 7.871497857837694e-06, "loss": 0.0, "step": 30870 }, { "epoch": 0.21304338827295494, "grad_norm": 0.0, "learning_rate": 7.87080795049225e-06, "loss": 0.0, "step": 30880 }, { "epoch": 0.2131123790074993, "grad_norm": 0.0, "learning_rate": 7.870118043146807e-06, "loss": 0.0005, "step": 30890 }, { "epoch": 0.21318136974204363, "grad_norm": 0.0, "learning_rate": 7.869428135801363e-06, "loss": 0.0, "step": 30900 }, { "epoch": 0.213250360476588, "grad_norm": 0.0, "learning_rate": 7.86873822845592e-06, "loss": 0.0, "step": 30910 }, { "epoch": 0.21331935121113235, "grad_norm": 0.0, "learning_rate": 7.868048321110474e-06, "loss": 0.0, "step": 30920 }, { "epoch": 0.21338834194567669, "grad_norm": 0.0, "learning_rate": 7.867358413765031e-06, "loss": 0.0, "step": 30930 }, { "epoch": 0.21345733268022105, "grad_norm": 0.0, "learning_rate": 7.866668506419587e-06, "loss": 0.1472, "step": 30940 }, { "epoch": 0.2135263234147654, "grad_norm": 0.0, "learning_rate": 7.865978599074146e-06, "loss": 0.0027, "step": 30950 }, { "epoch": 0.21359531414930974, "grad_norm": 0.0, "learning_rate": 7.865288691728702e-06, "loss": 0.0, "step": 30960 }, { "epoch": 0.2136643048838541, "grad_norm": 0.0, "learning_rate": 7.864598784383259e-06, "loss": 0.0, "step": 30970 }, { "epoch": 0.21373329561839846, "grad_norm": 0.0, "learning_rate": 7.863908877037815e-06, "loss": 0.0, "step": 30980 }, { "epoch": 0.2138022863529428, "grad_norm": 0.0, "learning_rate": 7.863218969692372e-06, "loss": 0.0, "step": 30990 }, { "epoch": 0.21387127708748715, "grad_norm": 0.0, "learning_rate": 7.862529062346928e-06, "loss": 0.0633, "step": 31000 }, { "epoch": 0.2139402678220315, "grad_norm": 0.0, "learning_rate": 7.861839155001485e-06, "loss": 0.0, "step": 31010 }, { "epoch": 0.21400925855657585, "grad_norm": 0.0, "learning_rate": 7.861149247656041e-06, "loss": 0.0, "step": 31020 }, { "epoch": 0.2140782492911202, "grad_norm": 0.0, "learning_rate": 7.860459340310596e-06, "loss": 0.0003, "step": 31030 }, { "epoch": 0.21414724002566454, "grad_norm": 0.0, "learning_rate": 7.859769432965152e-06, "loss": 0.0, "step": 31040 }, { "epoch": 0.2142162307602089, "grad_norm": 4.306132177589461e-05, "learning_rate": 7.859079525619709e-06, "loss": 0.0, "step": 31050 }, { "epoch": 0.21428522149475326, "grad_norm": 0.0, "learning_rate": 7.858389618274267e-06, "loss": 0.0005, "step": 31060 }, { "epoch": 0.2143542122292976, "grad_norm": 0.0, "learning_rate": 7.857699710928823e-06, "loss": 0.0, "step": 31070 }, { "epoch": 0.21442320296384196, "grad_norm": 1.4645641385868657e-05, "learning_rate": 7.85700980358338e-06, "loss": 2.0531, "step": 31080 }, { "epoch": 0.21449219369838632, "grad_norm": 0.0, "learning_rate": 7.856319896237936e-06, "loss": 0.0, "step": 31090 }, { "epoch": 0.21456118443293065, "grad_norm": 0.00010842143092304468, "learning_rate": 7.855629988892493e-06, "loss": 0.0, "step": 31100 }, { "epoch": 0.214630175167475, "grad_norm": 0.0, "learning_rate": 7.85494008154705e-06, "loss": 0.0, "step": 31110 }, { "epoch": 0.21469916590201935, "grad_norm": 0.0, "learning_rate": 7.854250174201606e-06, "loss": 0.0001, "step": 31120 }, { "epoch": 0.2147681566365637, "grad_norm": 0.00031385431066155434, "learning_rate": 7.853560266856162e-06, "loss": 0.0, "step": 31130 }, { "epoch": 0.21483714737110807, "grad_norm": 0.0, "learning_rate": 7.852870359510719e-06, "loss": 0.0, "step": 31140 }, { "epoch": 0.2149061381056524, "grad_norm": 0.0, "learning_rate": 7.852180452165274e-06, "loss": 0.0, "step": 31150 }, { "epoch": 0.21497512884019676, "grad_norm": 0.0, "learning_rate": 7.85149054481983e-06, "loss": 0.0, "step": 31160 }, { "epoch": 0.21504411957474112, "grad_norm": 1.1168711353093386e-05, "learning_rate": 7.850800637474388e-06, "loss": 0.0, "step": 31170 }, { "epoch": 0.21511311030928545, "grad_norm": 0.0, "learning_rate": 7.850110730128945e-06, "loss": 0.0, "step": 31180 }, { "epoch": 0.21518210104382982, "grad_norm": 0.0, "learning_rate": 7.849420822783501e-06, "loss": 0.0, "step": 31190 }, { "epoch": 0.21525109177837418, "grad_norm": 0.0, "learning_rate": 7.848730915438058e-06, "loss": 0.0, "step": 31200 }, { "epoch": 0.2153200825129185, "grad_norm": 0.0, "learning_rate": 7.848041008092614e-06, "loss": 0.0, "step": 31210 }, { "epoch": 0.21538907324746287, "grad_norm": 0.0, "learning_rate": 7.84735110074717e-06, "loss": 0.0, "step": 31220 }, { "epoch": 0.2154580639820072, "grad_norm": 4.262914021779807e-09, "learning_rate": 7.846661193401727e-06, "loss": 0.2951, "step": 31230 }, { "epoch": 0.21552705471655156, "grad_norm": 0.0, "learning_rate": 7.845971286056284e-06, "loss": 0.0648, "step": 31240 }, { "epoch": 0.21559604545109592, "grad_norm": 0.0, "learning_rate": 7.84528137871084e-06, "loss": 0.0, "step": 31250 }, { "epoch": 0.21566503618564026, "grad_norm": 0.0, "learning_rate": 7.844591471365395e-06, "loss": 0.0, "step": 31260 }, { "epoch": 0.21573402692018462, "grad_norm": 353.7784423828125, "learning_rate": 7.843901564019952e-06, "loss": 0.0671, "step": 31270 }, { "epoch": 0.21580301765472898, "grad_norm": 2.469707993668635e-08, "learning_rate": 7.84321165667451e-06, "loss": 0.0, "step": 31280 }, { "epoch": 0.2158720083892733, "grad_norm": 0.0, "learning_rate": 7.842521749329066e-06, "loss": 0.0, "step": 31290 }, { "epoch": 0.21594099912381767, "grad_norm": 113.75282287597656, "learning_rate": 7.841831841983623e-06, "loss": 0.0248, "step": 31300 }, { "epoch": 0.21600998985836203, "grad_norm": 1.564249032526277e-05, "learning_rate": 7.84114193463818e-06, "loss": 0.0, "step": 31310 }, { "epoch": 0.21607898059290637, "grad_norm": 2.0569549974425172e-07, "learning_rate": 7.840452027292736e-06, "loss": 0.0007, "step": 31320 }, { "epoch": 0.21614797132745073, "grad_norm": 0.0845765545964241, "learning_rate": 7.839762119947292e-06, "loss": 0.0, "step": 31330 }, { "epoch": 0.21621696206199506, "grad_norm": 0.00021235295571386814, "learning_rate": 7.839072212601849e-06, "loss": 0.0, "step": 31340 }, { "epoch": 0.21628595279653942, "grad_norm": 0.0, "learning_rate": 7.838382305256405e-06, "loss": 0.0, "step": 31350 }, { "epoch": 0.21635494353108378, "grad_norm": 0.0, "learning_rate": 7.837692397910962e-06, "loss": 0.0, "step": 31360 }, { "epoch": 0.21642393426562812, "grad_norm": 0.0, "learning_rate": 7.837002490565516e-06, "loss": 0.0002, "step": 31370 }, { "epoch": 0.21649292500017248, "grad_norm": 0.0, "learning_rate": 7.836312583220073e-06, "loss": 0.0, "step": 31380 }, { "epoch": 0.21656191573471684, "grad_norm": 1.364437821393949e-06, "learning_rate": 7.835622675874631e-06, "loss": 0.0001, "step": 31390 }, { "epoch": 0.21663090646926117, "grad_norm": 0.0, "learning_rate": 7.834932768529188e-06, "loss": 0.0, "step": 31400 }, { "epoch": 0.21669989720380553, "grad_norm": 2.233626219094731e-06, "learning_rate": 7.834242861183744e-06, "loss": 0.0, "step": 31410 }, { "epoch": 0.2167688879383499, "grad_norm": 0.0, "learning_rate": 7.8335529538383e-06, "loss": 0.0, "step": 31420 }, { "epoch": 0.21683787867289422, "grad_norm": 0.3814917206764221, "learning_rate": 7.832863046492857e-06, "loss": 0.0002, "step": 31430 }, { "epoch": 0.21690686940743859, "grad_norm": 0.0, "learning_rate": 7.832173139147414e-06, "loss": 0.0, "step": 31440 }, { "epoch": 0.21697586014198292, "grad_norm": 0.0, "learning_rate": 7.83148323180197e-06, "loss": 0.0, "step": 31450 }, { "epoch": 0.21704485087652728, "grad_norm": 0.0, "learning_rate": 7.830793324456527e-06, "loss": 0.0, "step": 31460 }, { "epoch": 0.21711384161107164, "grad_norm": 0.0, "learning_rate": 7.830103417111083e-06, "loss": 0.0, "step": 31470 }, { "epoch": 0.21718283234561597, "grad_norm": 3.546709592683328e-09, "learning_rate": 7.82941350976564e-06, "loss": 0.0, "step": 31480 }, { "epoch": 0.21725182308016033, "grad_norm": 0.0007365705096162856, "learning_rate": 7.828723602420194e-06, "loss": 0.012, "step": 31490 }, { "epoch": 0.2173208138147047, "grad_norm": 8.523781125369112e-10, "learning_rate": 7.828033695074753e-06, "loss": 0.0, "step": 31500 }, { "epoch": 0.21738980454924903, "grad_norm": 0.0, "learning_rate": 7.827343787729309e-06, "loss": 0.293, "step": 31510 }, { "epoch": 0.2174587952837934, "grad_norm": 2.8272589247535507e-07, "learning_rate": 7.826653880383866e-06, "loss": 0.0001, "step": 31520 }, { "epoch": 0.21752778601833775, "grad_norm": 0.0, "learning_rate": 7.825963973038422e-06, "loss": 0.0007, "step": 31530 }, { "epoch": 0.21759677675288208, "grad_norm": 0.0, "learning_rate": 7.825274065692979e-06, "loss": 0.0, "step": 31540 }, { "epoch": 0.21766576748742644, "grad_norm": 3.286144760750176e-07, "learning_rate": 7.824584158347535e-06, "loss": 0.0025, "step": 31550 }, { "epoch": 0.21773475822197078, "grad_norm": 1.3250197525849217e-06, "learning_rate": 7.823894251002092e-06, "loss": 0.0001, "step": 31560 }, { "epoch": 0.21780374895651514, "grad_norm": 0.0, "learning_rate": 7.823204343656648e-06, "loss": 0.0113, "step": 31570 }, { "epoch": 0.2178727396910595, "grad_norm": 0.0, "learning_rate": 7.822514436311204e-06, "loss": 0.0, "step": 31580 }, { "epoch": 0.21794173042560383, "grad_norm": 0.0, "learning_rate": 7.821824528965761e-06, "loss": 0.0, "step": 31590 }, { "epoch": 0.2180107211601482, "grad_norm": 0.0, "learning_rate": 7.821134621620316e-06, "loss": 0.0, "step": 31600 }, { "epoch": 0.21807971189469255, "grad_norm": 0.0, "learning_rate": 7.820444714274874e-06, "loss": 0.0693, "step": 31610 }, { "epoch": 0.21814870262923688, "grad_norm": 3.377792381797917e-05, "learning_rate": 7.81975480692943e-06, "loss": 0.0, "step": 31620 }, { "epoch": 0.21821769336378125, "grad_norm": 0.0, "learning_rate": 7.819064899583987e-06, "loss": 0.0, "step": 31630 }, { "epoch": 0.2182866840983256, "grad_norm": 0.00022295626695267856, "learning_rate": 7.818374992238543e-06, "loss": 0.0, "step": 31640 }, { "epoch": 0.21835567483286994, "grad_norm": 0.12362496554851532, "learning_rate": 7.8176850848931e-06, "loss": 0.0, "step": 31650 }, { "epoch": 0.2184246655674143, "grad_norm": 0.0, "learning_rate": 7.816995177547656e-06, "loss": 0.0, "step": 31660 }, { "epoch": 0.21849365630195863, "grad_norm": 0.0, "learning_rate": 7.816305270202213e-06, "loss": 0.0, "step": 31670 }, { "epoch": 0.218562647036503, "grad_norm": 0.0, "learning_rate": 7.81561536285677e-06, "loss": 0.0, "step": 31680 }, { "epoch": 0.21863163777104735, "grad_norm": 0.00035919054062105715, "learning_rate": 7.814925455511326e-06, "loss": 0.07, "step": 31690 }, { "epoch": 0.2187006285055917, "grad_norm": 0.0, "learning_rate": 7.814235548165882e-06, "loss": 0.0, "step": 31700 }, { "epoch": 0.21876961924013605, "grad_norm": 8.322115172632039e-05, "learning_rate": 7.813545640820437e-06, "loss": 0.0, "step": 31710 }, { "epoch": 0.2188386099746804, "grad_norm": 0.0, "learning_rate": 7.812855733474995e-06, "loss": 0.0, "step": 31720 }, { "epoch": 0.21890760070922474, "grad_norm": 0.0, "learning_rate": 7.812165826129552e-06, "loss": 0.0, "step": 31730 }, { "epoch": 0.2189765914437691, "grad_norm": 0.0, "learning_rate": 7.811475918784108e-06, "loss": 0.0, "step": 31740 }, { "epoch": 0.21904558217831346, "grad_norm": 5.785098892374663e-06, "learning_rate": 7.810786011438665e-06, "loss": 0.0028, "step": 31750 }, { "epoch": 0.2191145729128578, "grad_norm": 0.0, "learning_rate": 7.810096104093221e-06, "loss": 0.0, "step": 31760 }, { "epoch": 0.21918356364740216, "grad_norm": 0.0, "learning_rate": 7.809406196747778e-06, "loss": 0.0, "step": 31770 }, { "epoch": 0.21925255438194652, "grad_norm": 0.0, "learning_rate": 7.808716289402334e-06, "loss": 0.0, "step": 31780 }, { "epoch": 0.21932154511649085, "grad_norm": 0.0, "learning_rate": 7.80802638205689e-06, "loss": 0.0, "step": 31790 }, { "epoch": 0.2193905358510352, "grad_norm": 0.0017589419148862362, "learning_rate": 7.807336474711447e-06, "loss": 0.0, "step": 31800 }, { "epoch": 0.21945952658557955, "grad_norm": 0.0, "learning_rate": 7.806646567366004e-06, "loss": 0.0, "step": 31810 }, { "epoch": 0.2195285173201239, "grad_norm": 8.556105268731073e-10, "learning_rate": 7.80595666002056e-06, "loss": 0.0001, "step": 31820 }, { "epoch": 0.21959750805466827, "grad_norm": 0.0034977784380316734, "learning_rate": 7.805266752675117e-06, "loss": 0.0, "step": 31830 }, { "epoch": 0.2196664987892126, "grad_norm": 0.0, "learning_rate": 7.804576845329673e-06, "loss": 0.0, "step": 31840 }, { "epoch": 0.21973548952375696, "grad_norm": 0.0001161557956947945, "learning_rate": 7.80388693798423e-06, "loss": 0.0, "step": 31850 }, { "epoch": 0.21980448025830132, "grad_norm": 0.0, "learning_rate": 7.803197030638786e-06, "loss": 0.0003, "step": 31860 }, { "epoch": 0.21987347099284565, "grad_norm": 0.0, "learning_rate": 7.802507123293343e-06, "loss": 0.0, "step": 31870 }, { "epoch": 0.21994246172739002, "grad_norm": 0.0, "learning_rate": 7.8018172159479e-06, "loss": 0.0001, "step": 31880 }, { "epoch": 0.22001145246193438, "grad_norm": 0.07350252568721771, "learning_rate": 7.801127308602456e-06, "loss": 0.0161, "step": 31890 }, { "epoch": 0.2200804431964787, "grad_norm": 9.503233755481233e-10, "learning_rate": 7.800437401257012e-06, "loss": 0.0028, "step": 31900 }, { "epoch": 0.22014943393102307, "grad_norm": 4.8008246267272625e-06, "learning_rate": 7.799747493911569e-06, "loss": 0.0008, "step": 31910 }, { "epoch": 0.2202184246655674, "grad_norm": 3.972463247237101e-09, "learning_rate": 7.799057586566125e-06, "loss": 0.0, "step": 31920 }, { "epoch": 0.22028741540011176, "grad_norm": 0.0, "learning_rate": 7.798367679220682e-06, "loss": 0.0, "step": 31930 }, { "epoch": 0.22035640613465612, "grad_norm": 0.0, "learning_rate": 7.797677771875238e-06, "loss": 0.0, "step": 31940 }, { "epoch": 0.22042539686920046, "grad_norm": 0.0, "learning_rate": 7.796987864529795e-06, "loss": 0.0, "step": 31950 }, { "epoch": 0.22049438760374482, "grad_norm": 0.0, "learning_rate": 7.796297957184351e-06, "loss": 0.0, "step": 31960 }, { "epoch": 0.22056337833828918, "grad_norm": 0.00018339813686907291, "learning_rate": 7.795608049838908e-06, "loss": 0.0, "step": 31970 }, { "epoch": 0.2206323690728335, "grad_norm": 1.0926111571052388e-07, "learning_rate": 7.794918142493464e-06, "loss": 0.0, "step": 31980 }, { "epoch": 0.22070135980737787, "grad_norm": 0.0, "learning_rate": 7.79422823514802e-06, "loss": 0.0, "step": 31990 }, { "epoch": 0.22077035054192223, "grad_norm": 8.809608698356897e-05, "learning_rate": 7.793538327802577e-06, "loss": 0.0, "step": 32000 }, { "epoch": 0.22083934127646657, "grad_norm": 3.545181357367255e-07, "learning_rate": 7.792848420457134e-06, "loss": 0.0, "step": 32010 }, { "epoch": 0.22090833201101093, "grad_norm": 0.0, "learning_rate": 7.79215851311169e-06, "loss": 0.0017, "step": 32020 }, { "epoch": 0.22097732274555526, "grad_norm": 0.09344573318958282, "learning_rate": 7.791468605766247e-06, "loss": 0.0, "step": 32030 }, { "epoch": 0.22104631348009962, "grad_norm": 0.0, "learning_rate": 7.790778698420803e-06, "loss": 0.0, "step": 32040 }, { "epoch": 0.22111530421464398, "grad_norm": 0.0, "learning_rate": 7.79008879107536e-06, "loss": 0.0, "step": 32050 }, { "epoch": 0.22118429494918831, "grad_norm": 0.0, "learning_rate": 7.789398883729916e-06, "loss": 0.0, "step": 32060 }, { "epoch": 0.22125328568373268, "grad_norm": 6.706374645233154, "learning_rate": 7.788708976384473e-06, "loss": 0.0011, "step": 32070 }, { "epoch": 0.22132227641827704, "grad_norm": 0.0, "learning_rate": 7.788019069039029e-06, "loss": 0.0, "step": 32080 }, { "epoch": 0.22139126715282137, "grad_norm": 0.0, "learning_rate": 7.787329161693585e-06, "loss": 0.0, "step": 32090 }, { "epoch": 0.22146025788736573, "grad_norm": 0.0, "learning_rate": 7.786639254348142e-06, "loss": 0.0, "step": 32100 }, { "epoch": 0.2215292486219101, "grad_norm": 0.0, "learning_rate": 7.785949347002698e-06, "loss": 0.0, "step": 32110 }, { "epoch": 0.22159823935645442, "grad_norm": 0.0, "learning_rate": 7.785259439657255e-06, "loss": 0.0001, "step": 32120 }, { "epoch": 0.22166723009099878, "grad_norm": 0.0, "learning_rate": 7.784569532311811e-06, "loss": 0.0013, "step": 32130 }, { "epoch": 0.22173622082554312, "grad_norm": 0.0, "learning_rate": 7.783879624966368e-06, "loss": 0.0, "step": 32140 }, { "epoch": 0.22180521156008748, "grad_norm": 0.0, "learning_rate": 7.783189717620924e-06, "loss": 0.0, "step": 32150 }, { "epoch": 0.22187420229463184, "grad_norm": 0.0, "learning_rate": 7.782499810275481e-06, "loss": 0.0389, "step": 32160 }, { "epoch": 0.22194319302917617, "grad_norm": 0.0, "learning_rate": 7.781809902930037e-06, "loss": 0.0, "step": 32170 }, { "epoch": 0.22201218376372053, "grad_norm": 0.0, "learning_rate": 7.781119995584594e-06, "loss": 0.0, "step": 32180 }, { "epoch": 0.2220811744982649, "grad_norm": 0.0, "learning_rate": 7.78043008823915e-06, "loss": 0.0, "step": 32190 }, { "epoch": 0.22215016523280923, "grad_norm": 0.0, "learning_rate": 7.779740180893707e-06, "loss": 0.0, "step": 32200 }, { "epoch": 0.2222191559673536, "grad_norm": 0.0, "learning_rate": 7.779050273548263e-06, "loss": 0.0008, "step": 32210 }, { "epoch": 0.22228814670189795, "grad_norm": 0.0, "learning_rate": 7.77836036620282e-06, "loss": 0.0, "step": 32220 }, { "epoch": 0.22235713743644228, "grad_norm": 0.0, "learning_rate": 7.777670458857376e-06, "loss": 0.0002, "step": 32230 }, { "epoch": 0.22242612817098664, "grad_norm": 0.0, "learning_rate": 7.776980551511933e-06, "loss": 0.0, "step": 32240 }, { "epoch": 0.22249511890553098, "grad_norm": 0.0, "learning_rate": 7.77629064416649e-06, "loss": 0.0, "step": 32250 }, { "epoch": 0.22256410964007534, "grad_norm": 0.0, "learning_rate": 7.775600736821046e-06, "loss": 0.0003, "step": 32260 }, { "epoch": 0.2226331003746197, "grad_norm": 0.00026333125424571335, "learning_rate": 7.774910829475602e-06, "loss": 0.0, "step": 32270 }, { "epoch": 0.22270209110916403, "grad_norm": 0.0, "learning_rate": 7.774220922130159e-06, "loss": 0.0, "step": 32280 }, { "epoch": 0.2227710818437084, "grad_norm": 0.0, "learning_rate": 7.773531014784715e-06, "loss": 0.0123, "step": 32290 }, { "epoch": 0.22284007257825275, "grad_norm": 1.7667003703536466e-05, "learning_rate": 7.772841107439272e-06, "loss": 0.0, "step": 32300 }, { "epoch": 0.22290906331279708, "grad_norm": 591.3378295898438, "learning_rate": 7.772151200093828e-06, "loss": 0.1389, "step": 32310 }, { "epoch": 0.22297805404734145, "grad_norm": 0.0, "learning_rate": 7.771461292748385e-06, "loss": 0.0746, "step": 32320 }, { "epoch": 0.2230470447818858, "grad_norm": 0.0, "learning_rate": 7.770771385402941e-06, "loss": 0.0317, "step": 32330 }, { "epoch": 0.22311603551643014, "grad_norm": 0.0005945048178546131, "learning_rate": 7.770081478057498e-06, "loss": 0.0, "step": 32340 }, { "epoch": 0.2231850262509745, "grad_norm": 0.0, "learning_rate": 7.769391570712054e-06, "loss": 0.0, "step": 32350 }, { "epoch": 0.22325401698551883, "grad_norm": 0.02066963165998459, "learning_rate": 7.76870166336661e-06, "loss": 0.0, "step": 32360 }, { "epoch": 0.2233230077200632, "grad_norm": 0.0, "learning_rate": 7.768011756021167e-06, "loss": 0.0006, "step": 32370 }, { "epoch": 0.22339199845460755, "grad_norm": 0.0, "learning_rate": 7.767321848675724e-06, "loss": 0.0001, "step": 32380 }, { "epoch": 0.2234609891891519, "grad_norm": 0.0, "learning_rate": 7.76663194133028e-06, "loss": 0.001, "step": 32390 }, { "epoch": 0.22352997992369625, "grad_norm": 0.7026162147521973, "learning_rate": 7.765942033984837e-06, "loss": 0.0011, "step": 32400 }, { "epoch": 0.2235989706582406, "grad_norm": 0.0, "learning_rate": 7.765252126639393e-06, "loss": 0.0, "step": 32410 }, { "epoch": 0.22366796139278494, "grad_norm": 0.0, "learning_rate": 7.76456221929395e-06, "loss": 0.0, "step": 32420 }, { "epoch": 0.2237369521273293, "grad_norm": 1.3782530913886148e-05, "learning_rate": 7.763872311948506e-06, "loss": 0.0, "step": 32430 }, { "epoch": 0.22380594286187366, "grad_norm": 0.0, "learning_rate": 7.763182404603063e-06, "loss": 0.0001, "step": 32440 }, { "epoch": 0.223874933596418, "grad_norm": 0.0, "learning_rate": 7.762492497257619e-06, "loss": 0.004, "step": 32450 }, { "epoch": 0.22394392433096236, "grad_norm": 7.390298378595617e-06, "learning_rate": 7.761802589912176e-06, "loss": 0.0, "step": 32460 }, { "epoch": 0.2240129150655067, "grad_norm": 2.8584359768046852e-08, "learning_rate": 7.761112682566732e-06, "loss": 0.0, "step": 32470 }, { "epoch": 0.22408190580005105, "grad_norm": 0.0, "learning_rate": 7.760422775221289e-06, "loss": 0.0, "step": 32480 }, { "epoch": 0.2241508965345954, "grad_norm": 0.0, "learning_rate": 7.759732867875845e-06, "loss": 0.0, "step": 32490 }, { "epoch": 0.22421988726913974, "grad_norm": 0.0, "learning_rate": 7.759042960530402e-06, "loss": 0.0001, "step": 32500 }, { "epoch": 0.2242888780036841, "grad_norm": 0.0, "learning_rate": 7.758353053184958e-06, "loss": 0.0, "step": 32510 }, { "epoch": 0.22435786873822847, "grad_norm": 0.0, "learning_rate": 7.757663145839515e-06, "loss": 0.0, "step": 32520 }, { "epoch": 0.2244268594727728, "grad_norm": 0.0, "learning_rate": 7.756973238494071e-06, "loss": 0.0, "step": 32530 }, { "epoch": 0.22449585020731716, "grad_norm": 0.0, "learning_rate": 7.756283331148628e-06, "loss": 0.0, "step": 32540 }, { "epoch": 0.22456484094186152, "grad_norm": 0.0, "learning_rate": 7.755593423803184e-06, "loss": 0.0, "step": 32550 }, { "epoch": 0.22463383167640585, "grad_norm": 0.0, "learning_rate": 7.75490351645774e-06, "loss": 0.0003, "step": 32560 }, { "epoch": 0.22470282241095021, "grad_norm": 0.0, "learning_rate": 7.754213609112297e-06, "loss": 0.0, "step": 32570 }, { "epoch": 0.22477181314549455, "grad_norm": 0.0, "learning_rate": 7.753523701766854e-06, "loss": 0.0, "step": 32580 }, { "epoch": 0.2248408038800389, "grad_norm": 0.0, "learning_rate": 7.75283379442141e-06, "loss": 0.0, "step": 32590 }, { "epoch": 0.22490979461458327, "grad_norm": 0.0, "learning_rate": 7.752143887075966e-06, "loss": 0.0, "step": 32600 }, { "epoch": 0.2249787853491276, "grad_norm": 0.0009406035533174872, "learning_rate": 7.751453979730523e-06, "loss": 0.1137, "step": 32610 }, { "epoch": 0.22504777608367196, "grad_norm": 0.0, "learning_rate": 7.75076407238508e-06, "loss": 0.0, "step": 32620 }, { "epoch": 0.22511676681821632, "grad_norm": 0.0, "learning_rate": 7.750074165039636e-06, "loss": 0.0, "step": 32630 }, { "epoch": 0.22518575755276066, "grad_norm": 0.4979203939437866, "learning_rate": 7.749384257694192e-06, "loss": 0.0001, "step": 32640 }, { "epoch": 0.22525474828730502, "grad_norm": 0.0, "learning_rate": 7.748694350348749e-06, "loss": 0.0, "step": 32650 }, { "epoch": 0.22532373902184938, "grad_norm": 0.0, "learning_rate": 7.748004443003305e-06, "loss": 0.0, "step": 32660 }, { "epoch": 0.2253927297563937, "grad_norm": 0.00028845269116573036, "learning_rate": 7.747314535657862e-06, "loss": 0.0, "step": 32670 }, { "epoch": 0.22546172049093807, "grad_norm": 0.0, "learning_rate": 7.746624628312418e-06, "loss": 0.0001, "step": 32680 }, { "epoch": 0.2255307112254824, "grad_norm": 0.0, "learning_rate": 7.745934720966975e-06, "loss": 0.0, "step": 32690 }, { "epoch": 0.22559970196002677, "grad_norm": 0.0, "learning_rate": 7.745244813621531e-06, "loss": 0.0, "step": 32700 }, { "epoch": 0.22566869269457113, "grad_norm": 2.1430544450140587e-07, "learning_rate": 7.744554906276088e-06, "loss": 0.0, "step": 32710 }, { "epoch": 0.22573768342911546, "grad_norm": 0.0, "learning_rate": 7.743864998930644e-06, "loss": 0.0, "step": 32720 }, { "epoch": 0.22580667416365982, "grad_norm": 0.0, "learning_rate": 7.743175091585201e-06, "loss": 0.0861, "step": 32730 }, { "epoch": 0.22587566489820418, "grad_norm": 0.0, "learning_rate": 7.742485184239757e-06, "loss": 0.0, "step": 32740 }, { "epoch": 0.22594465563274851, "grad_norm": 0.0, "learning_rate": 7.741795276894314e-06, "loss": 0.0, "step": 32750 }, { "epoch": 0.22601364636729288, "grad_norm": 1.7973080801425567e-08, "learning_rate": 7.74110536954887e-06, "loss": 0.088, "step": 32760 }, { "epoch": 0.22608263710183724, "grad_norm": 0.0, "learning_rate": 7.740415462203427e-06, "loss": 0.0337, "step": 32770 }, { "epoch": 0.22615162783638157, "grad_norm": 6.393340612476095e-08, "learning_rate": 7.739725554857983e-06, "loss": 0.0, "step": 32780 }, { "epoch": 0.22622061857092593, "grad_norm": 0.0, "learning_rate": 7.73903564751254e-06, "loss": 0.0, "step": 32790 }, { "epoch": 0.22628960930547026, "grad_norm": 0.0, "learning_rate": 7.738345740167096e-06, "loss": 0.0136, "step": 32800 }, { "epoch": 0.22635860004001462, "grad_norm": 0.014421519823372364, "learning_rate": 7.737655832821653e-06, "loss": 0.0, "step": 32810 }, { "epoch": 0.22642759077455898, "grad_norm": 0.0, "learning_rate": 7.73696592547621e-06, "loss": 0.0, "step": 32820 }, { "epoch": 0.22649658150910332, "grad_norm": 0.0, "learning_rate": 7.736276018130766e-06, "loss": 0.0, "step": 32830 }, { "epoch": 0.22656557224364768, "grad_norm": 0.0037522935308516026, "learning_rate": 7.735586110785322e-06, "loss": 0.001, "step": 32840 }, { "epoch": 0.22663456297819204, "grad_norm": 0.0, "learning_rate": 7.734896203439879e-06, "loss": 0.0, "step": 32850 }, { "epoch": 0.22670355371273637, "grad_norm": 0.0, "learning_rate": 7.734206296094435e-06, "loss": 0.0, "step": 32860 }, { "epoch": 0.22677254444728073, "grad_norm": 0.00032297128927893937, "learning_rate": 7.733516388748992e-06, "loss": 0.0, "step": 32870 }, { "epoch": 0.2268415351818251, "grad_norm": 6.696091531921411e-06, "learning_rate": 7.732826481403548e-06, "loss": 0.0, "step": 32880 }, { "epoch": 0.22691052591636943, "grad_norm": 0.0, "learning_rate": 7.732136574058105e-06, "loss": 0.0, "step": 32890 }, { "epoch": 0.2269795166509138, "grad_norm": 0.0, "learning_rate": 7.731446666712661e-06, "loss": 0.0, "step": 32900 }, { "epoch": 0.22704850738545812, "grad_norm": 0.0030558868311345577, "learning_rate": 7.730756759367218e-06, "loss": 0.0118, "step": 32910 }, { "epoch": 0.22711749812000248, "grad_norm": 0.0, "learning_rate": 7.730066852021774e-06, "loss": 0.0, "step": 32920 }, { "epoch": 0.22718648885454684, "grad_norm": 0.0, "learning_rate": 7.72937694467633e-06, "loss": 0.0, "step": 32930 }, { "epoch": 0.22725547958909117, "grad_norm": 8.771455213718582e-06, "learning_rate": 7.728687037330887e-06, "loss": 0.0, "step": 32940 }, { "epoch": 0.22732447032363554, "grad_norm": 0.0024318480864167213, "learning_rate": 7.727997129985444e-06, "loss": 0.0, "step": 32950 }, { "epoch": 0.2273934610581799, "grad_norm": 0.0, "learning_rate": 7.72730722264e-06, "loss": 0.0, "step": 32960 }, { "epoch": 0.22746245179272423, "grad_norm": 0.0, "learning_rate": 7.726617315294557e-06, "loss": 0.0, "step": 32970 }, { "epoch": 0.2275314425272686, "grad_norm": 0.0, "learning_rate": 7.725927407949113e-06, "loss": 0.4219, "step": 32980 }, { "epoch": 0.22760043326181295, "grad_norm": 0.0, "learning_rate": 7.72523750060367e-06, "loss": 0.0001, "step": 32990 }, { "epoch": 0.22766942399635728, "grad_norm": 0.0, "learning_rate": 7.724547593258226e-06, "loss": 0.0002, "step": 33000 }, { "epoch": 0.22773841473090164, "grad_norm": 0.00014790760178584605, "learning_rate": 7.723857685912783e-06, "loss": 0.0001, "step": 33010 }, { "epoch": 0.22780740546544598, "grad_norm": 0.0, "learning_rate": 7.723167778567339e-06, "loss": 0.0, "step": 33020 }, { "epoch": 0.22787639619999034, "grad_norm": 0.0, "learning_rate": 7.722477871221896e-06, "loss": 0.0002, "step": 33030 }, { "epoch": 0.2279453869345347, "grad_norm": 475.7007751464844, "learning_rate": 7.721787963876452e-06, "loss": 0.3242, "step": 33040 }, { "epoch": 0.22801437766907903, "grad_norm": 0.0, "learning_rate": 7.721098056531009e-06, "loss": 0.0, "step": 33050 }, { "epoch": 0.2280833684036234, "grad_norm": 0.0, "learning_rate": 7.720408149185565e-06, "loss": 0.0, "step": 33060 }, { "epoch": 0.22815235913816775, "grad_norm": 0.0, "learning_rate": 7.719718241840122e-06, "loss": 0.0, "step": 33070 }, { "epoch": 0.2282213498727121, "grad_norm": 0.0, "learning_rate": 7.719028334494678e-06, "loss": 0.0, "step": 33080 }, { "epoch": 0.22829034060725645, "grad_norm": 0.0, "learning_rate": 7.718338427149235e-06, "loss": 0.3926, "step": 33090 }, { "epoch": 0.2283593313418008, "grad_norm": 0.0, "learning_rate": 7.717648519803791e-06, "loss": 0.0, "step": 33100 }, { "epoch": 0.22842832207634514, "grad_norm": 0.0, "learning_rate": 7.716958612458348e-06, "loss": 0.0028, "step": 33110 }, { "epoch": 0.2284973128108895, "grad_norm": 9.887799468089042e-10, "learning_rate": 7.716268705112904e-06, "loss": 0.0, "step": 33120 }, { "epoch": 0.22856630354543384, "grad_norm": 0.0, "learning_rate": 7.71557879776746e-06, "loss": 0.0, "step": 33130 }, { "epoch": 0.2286352942799782, "grad_norm": 0.0, "learning_rate": 7.714888890422017e-06, "loss": 0.0001, "step": 33140 }, { "epoch": 0.22870428501452256, "grad_norm": 2.5723660979082297e-09, "learning_rate": 7.714198983076573e-06, "loss": 0.0003, "step": 33150 }, { "epoch": 0.2287732757490669, "grad_norm": 0.0, "learning_rate": 7.71350907573113e-06, "loss": 0.0, "step": 33160 }, { "epoch": 0.22884226648361125, "grad_norm": 2.1958415530320963e-08, "learning_rate": 7.712819168385686e-06, "loss": 0.0, "step": 33170 }, { "epoch": 0.2289112572181556, "grad_norm": 310.5282897949219, "learning_rate": 7.712129261040243e-06, "loss": 0.0534, "step": 33180 }, { "epoch": 0.22898024795269994, "grad_norm": 0.0, "learning_rate": 7.7114393536948e-06, "loss": 0.0, "step": 33190 }, { "epoch": 0.2290492386872443, "grad_norm": 0.0, "learning_rate": 7.710749446349356e-06, "loss": 0.0, "step": 33200 }, { "epoch": 0.22911822942178867, "grad_norm": 0.0, "learning_rate": 7.710059539003912e-06, "loss": 0.0, "step": 33210 }, { "epoch": 0.229187220156333, "grad_norm": 0.0, "learning_rate": 7.709369631658469e-06, "loss": 0.0, "step": 33220 }, { "epoch": 0.22925621089087736, "grad_norm": 0.0, "learning_rate": 7.708679724313025e-06, "loss": 0.0, "step": 33230 }, { "epoch": 0.2293252016254217, "grad_norm": 0.0, "learning_rate": 7.707989816967582e-06, "loss": 0.0, "step": 33240 }, { "epoch": 0.22939419235996605, "grad_norm": 0.0, "learning_rate": 7.707299909622138e-06, "loss": 0.0002, "step": 33250 }, { "epoch": 0.22946318309451041, "grad_norm": 0.0, "learning_rate": 7.706610002276695e-06, "loss": 0.0, "step": 33260 }, { "epoch": 0.22953217382905475, "grad_norm": 8.9177415452113e-10, "learning_rate": 7.705920094931251e-06, "loss": 0.0038, "step": 33270 }, { "epoch": 0.2296011645635991, "grad_norm": 3.8129115864649066e-08, "learning_rate": 7.705230187585808e-06, "loss": 0.0, "step": 33280 }, { "epoch": 0.22967015529814347, "grad_norm": 0.000523974362295121, "learning_rate": 7.704540280240364e-06, "loss": 0.048, "step": 33290 }, { "epoch": 0.2297391460326878, "grad_norm": 0.0, "learning_rate": 7.70385037289492e-06, "loss": 0.0022, "step": 33300 }, { "epoch": 0.22980813676723216, "grad_norm": 0.0, "learning_rate": 7.703160465549477e-06, "loss": 0.0, "step": 33310 }, { "epoch": 0.22987712750177652, "grad_norm": 3.273544564308395e-07, "learning_rate": 7.702470558204034e-06, "loss": 0.0001, "step": 33320 }, { "epoch": 0.22994611823632086, "grad_norm": 0.0, "learning_rate": 7.70178065085859e-06, "loss": 0.0, "step": 33330 }, { "epoch": 0.23001510897086522, "grad_norm": 3.958035904361168e-06, "learning_rate": 7.701090743513147e-06, "loss": 0.0, "step": 33340 }, { "epoch": 0.23008409970540955, "grad_norm": 0.0, "learning_rate": 7.700400836167703e-06, "loss": 0.0001, "step": 33350 }, { "epoch": 0.2301530904399539, "grad_norm": 0.0, "learning_rate": 7.69971092882226e-06, "loss": 0.0, "step": 33360 }, { "epoch": 0.23022208117449827, "grad_norm": 0.0, "learning_rate": 7.699021021476816e-06, "loss": 0.124, "step": 33370 }, { "epoch": 0.2302910719090426, "grad_norm": 0.0009574118885211647, "learning_rate": 7.698400104865917e-06, "loss": 0.1053, "step": 33380 }, { "epoch": 0.23036006264358697, "grad_norm": 3.641016732558455e-08, "learning_rate": 7.697710197520473e-06, "loss": 0.0, "step": 33390 }, { "epoch": 0.23042905337813133, "grad_norm": 2.59740762231786e-09, "learning_rate": 7.69702029017503e-06, "loss": 0.0, "step": 33400 }, { "epoch": 0.23049804411267566, "grad_norm": 0.0, "learning_rate": 7.696330382829586e-06, "loss": 0.0, "step": 33410 }, { "epoch": 0.23056703484722002, "grad_norm": 1.5054170944495127e-05, "learning_rate": 7.695640475484143e-06, "loss": 0.0, "step": 33420 }, { "epoch": 0.23063602558176438, "grad_norm": 0.0, "learning_rate": 7.6949505681387e-06, "loss": 0.0001, "step": 33430 }, { "epoch": 0.23070501631630871, "grad_norm": 0.0, "learning_rate": 7.694260660793256e-06, "loss": 0.0, "step": 33440 }, { "epoch": 0.23077400705085307, "grad_norm": 0.0, "learning_rate": 7.693570753447812e-06, "loss": 0.0, "step": 33450 }, { "epoch": 0.2308429977853974, "grad_norm": 0.0, "learning_rate": 7.692880846102369e-06, "loss": 0.0, "step": 33460 }, { "epoch": 0.23091198851994177, "grad_norm": 0.00023554077779408544, "learning_rate": 7.692190938756925e-06, "loss": 0.0, "step": 33470 }, { "epoch": 0.23098097925448613, "grad_norm": 0.0, "learning_rate": 7.691501031411482e-06, "loss": 0.0, "step": 33480 }, { "epoch": 0.23104996998903046, "grad_norm": 0.0, "learning_rate": 7.690811124066038e-06, "loss": 0.0, "step": 33490 }, { "epoch": 0.23111896072357482, "grad_norm": 0.0, "learning_rate": 7.690121216720595e-06, "loss": 0.0, "step": 33500 }, { "epoch": 0.23118795145811918, "grad_norm": 1.8539085431257263e-05, "learning_rate": 7.689431309375151e-06, "loss": 0.0, "step": 33510 }, { "epoch": 0.23125694219266352, "grad_norm": 0.0, "learning_rate": 7.688741402029708e-06, "loss": 0.0331, "step": 33520 }, { "epoch": 0.23132593292720788, "grad_norm": 0.0, "learning_rate": 7.688051494684264e-06, "loss": 0.0, "step": 33530 }, { "epoch": 0.23139492366175224, "grad_norm": 0.0, "learning_rate": 7.68736158733882e-06, "loss": 0.0061, "step": 33540 }, { "epoch": 0.23146391439629657, "grad_norm": 0.0, "learning_rate": 7.686671679993377e-06, "loss": 0.0213, "step": 33550 }, { "epoch": 0.23153290513084093, "grad_norm": 9.179851878649004e-10, "learning_rate": 7.685981772647934e-06, "loss": 0.0, "step": 33560 }, { "epoch": 0.23160189586538527, "grad_norm": 3.3695576462378085e-07, "learning_rate": 7.68529186530249e-06, "loss": 0.0, "step": 33570 }, { "epoch": 0.23167088659992963, "grad_norm": 0.0, "learning_rate": 7.684601957957047e-06, "loss": 0.0, "step": 33580 }, { "epoch": 0.231739877334474, "grad_norm": 0.0, "learning_rate": 7.683912050611603e-06, "loss": 0.0001, "step": 33590 }, { "epoch": 0.23180886806901832, "grad_norm": 0.0, "learning_rate": 7.68322214326616e-06, "loss": 0.0, "step": 33600 }, { "epoch": 0.23187785880356268, "grad_norm": 0.0, "learning_rate": 7.682532235920716e-06, "loss": 0.0, "step": 33610 }, { "epoch": 0.23194684953810704, "grad_norm": 0.013300956226885319, "learning_rate": 7.681842328575273e-06, "loss": 0.0, "step": 33620 }, { "epoch": 0.23201584027265137, "grad_norm": 0.0, "learning_rate": 7.68115242122983e-06, "loss": 0.0, "step": 33630 }, { "epoch": 0.23208483100719574, "grad_norm": 0.0, "learning_rate": 7.680462513884386e-06, "loss": 0.0311, "step": 33640 }, { "epoch": 0.2321538217417401, "grad_norm": 0.0, "learning_rate": 7.679772606538942e-06, "loss": 0.0, "step": 33650 }, { "epoch": 0.23222281247628443, "grad_norm": 0.0, "learning_rate": 7.679082699193499e-06, "loss": 0.0, "step": 33660 }, { "epoch": 0.2322918032108288, "grad_norm": 0.0, "learning_rate": 7.678392791848055e-06, "loss": 0.0012, "step": 33670 }, { "epoch": 0.23236079394537312, "grad_norm": 0.0, "learning_rate": 7.677702884502612e-06, "loss": 0.0, "step": 33680 }, { "epoch": 0.23242978467991748, "grad_norm": 2.088952344436734e-09, "learning_rate": 7.677012977157168e-06, "loss": 0.0, "step": 33690 }, { "epoch": 0.23249877541446184, "grad_norm": 0.0, "learning_rate": 7.676323069811725e-06, "loss": 0.0, "step": 33700 }, { "epoch": 0.23256776614900618, "grad_norm": 0.0, "learning_rate": 7.675633162466281e-06, "loss": 0.0, "step": 33710 }, { "epoch": 0.23263675688355054, "grad_norm": 8.314611221749146e-10, "learning_rate": 7.674943255120838e-06, "loss": 0.0, "step": 33720 }, { "epoch": 0.2327057476180949, "grad_norm": 0.0, "learning_rate": 7.674253347775394e-06, "loss": 0.0002, "step": 33730 }, { "epoch": 0.23277473835263923, "grad_norm": 0.0, "learning_rate": 7.67356344042995e-06, "loss": 0.0, "step": 33740 }, { "epoch": 0.2328437290871836, "grad_norm": 8.990783673112901e-10, "learning_rate": 7.672873533084507e-06, "loss": 0.0, "step": 33750 }, { "epoch": 0.23291271982172795, "grad_norm": 0.00036688672844320536, "learning_rate": 7.672183625739064e-06, "loss": 0.3103, "step": 33760 }, { "epoch": 0.2329817105562723, "grad_norm": 1.7620725989431207e-09, "learning_rate": 7.67149371839362e-06, "loss": 0.0, "step": 33770 }, { "epoch": 0.23305070129081665, "grad_norm": 0.0, "learning_rate": 7.670803811048177e-06, "loss": 0.0, "step": 33780 }, { "epoch": 0.23311969202536098, "grad_norm": 6.612060019506316e-07, "learning_rate": 7.670113903702735e-06, "loss": 0.0, "step": 33790 }, { "epoch": 0.23318868275990534, "grad_norm": 0.0, "learning_rate": 7.66942399635729e-06, "loss": 0.0172, "step": 33800 }, { "epoch": 0.2332576734944497, "grad_norm": 0.0, "learning_rate": 7.668734089011846e-06, "loss": 0.0, "step": 33810 }, { "epoch": 0.23332666422899404, "grad_norm": 0.0, "learning_rate": 7.668044181666403e-06, "loss": 0.2999, "step": 33820 }, { "epoch": 0.2333956549635384, "grad_norm": 0.0, "learning_rate": 7.667354274320959e-06, "loss": 0.0, "step": 33830 }, { "epoch": 0.23346464569808276, "grad_norm": 0.0, "learning_rate": 7.666664366975515e-06, "loss": 0.0, "step": 33840 }, { "epoch": 0.2335336364326271, "grad_norm": 34.06905746459961, "learning_rate": 7.665974459630072e-06, "loss": 0.0141, "step": 33850 }, { "epoch": 0.23360262716717145, "grad_norm": 0.00011056315997848287, "learning_rate": 7.665284552284628e-06, "loss": 0.0514, "step": 33860 }, { "epoch": 0.2336716179017158, "grad_norm": 0.0, "learning_rate": 7.664594644939185e-06, "loss": 0.0, "step": 33870 }, { "epoch": 0.23374060863626014, "grad_norm": 0.0, "learning_rate": 7.663904737593741e-06, "loss": 0.012, "step": 33880 }, { "epoch": 0.2338095993708045, "grad_norm": 5.0882640323379746e-08, "learning_rate": 7.663214830248298e-06, "loss": 0.0, "step": 33890 }, { "epoch": 0.23387859010534884, "grad_norm": 0.0, "learning_rate": 7.662524922902856e-06, "loss": 0.0008, "step": 33900 }, { "epoch": 0.2339475808398932, "grad_norm": 0.3880530893802643, "learning_rate": 7.661835015557411e-06, "loss": 0.0001, "step": 33910 }, { "epoch": 0.23401657157443756, "grad_norm": 8.318660547956824e-05, "learning_rate": 7.661145108211967e-06, "loss": 0.0018, "step": 33920 }, { "epoch": 0.2340855623089819, "grad_norm": 4.030636773677543e-06, "learning_rate": 7.660455200866524e-06, "loss": 0.0, "step": 33930 }, { "epoch": 0.23415455304352625, "grad_norm": 0.011503039859235287, "learning_rate": 7.65976529352108e-06, "loss": 0.0, "step": 33940 }, { "epoch": 0.23422354377807061, "grad_norm": 0.0, "learning_rate": 7.659075386175637e-06, "loss": 0.0, "step": 33950 }, { "epoch": 0.23429253451261495, "grad_norm": 0.0, "learning_rate": 7.658385478830193e-06, "loss": 0.0001, "step": 33960 }, { "epoch": 0.2343615252471593, "grad_norm": 0.0002538344415370375, "learning_rate": 7.65769557148475e-06, "loss": 0.0, "step": 33970 }, { "epoch": 0.23443051598170367, "grad_norm": 0.0, "learning_rate": 7.657005664139306e-06, "loss": 0.0, "step": 33980 }, { "epoch": 0.234499506716248, "grad_norm": 2.225191354751587, "learning_rate": 7.656315756793863e-06, "loss": 0.0004, "step": 33990 }, { "epoch": 0.23456849745079236, "grad_norm": 0.0, "learning_rate": 7.65562584944842e-06, "loss": 0.0005, "step": 34000 }, { "epoch": 0.2346374881853367, "grad_norm": 0.0, "learning_rate": 7.654935942102978e-06, "loss": 0.0, "step": 34010 }, { "epoch": 0.23470647891988106, "grad_norm": 0.0009202035143971443, "learning_rate": 7.654246034757532e-06, "loss": 0.0, "step": 34020 }, { "epoch": 0.23477546965442542, "grad_norm": 0.0, "learning_rate": 7.653556127412089e-06, "loss": 0.0, "step": 34030 }, { "epoch": 0.23484446038896975, "grad_norm": 0.0, "learning_rate": 7.652866220066645e-06, "loss": 0.0, "step": 34040 }, { "epoch": 0.2349134511235141, "grad_norm": 9.138249046358737e-10, "learning_rate": 7.652176312721202e-06, "loss": 0.0, "step": 34050 }, { "epoch": 0.23498244185805847, "grad_norm": 0.0, "learning_rate": 7.651486405375758e-06, "loss": 0.0, "step": 34060 }, { "epoch": 0.2350514325926028, "grad_norm": 0.0, "learning_rate": 7.650796498030315e-06, "loss": 0.0, "step": 34070 }, { "epoch": 0.23512042332714717, "grad_norm": 0.0, "learning_rate": 7.650106590684871e-06, "loss": 0.0, "step": 34080 }, { "epoch": 0.23518941406169153, "grad_norm": 0.26287323236465454, "learning_rate": 7.649416683339428e-06, "loss": 0.0163, "step": 34090 }, { "epoch": 0.23525840479623586, "grad_norm": 0.0, "learning_rate": 7.648726775993984e-06, "loss": 0.0, "step": 34100 }, { "epoch": 0.23532739553078022, "grad_norm": 0.0005133957020007074, "learning_rate": 7.64803686864854e-06, "loss": 0.0, "step": 34110 }, { "epoch": 0.23539638626532455, "grad_norm": 0.0, "learning_rate": 7.647346961303099e-06, "loss": 0.0, "step": 34120 }, { "epoch": 0.23546537699986891, "grad_norm": 1.8666461301108939e-06, "learning_rate": 7.646657053957655e-06, "loss": 0.0, "step": 34130 }, { "epoch": 0.23553436773441327, "grad_norm": 0.0, "learning_rate": 7.64596714661221e-06, "loss": 0.0, "step": 34140 }, { "epoch": 0.2356033584689576, "grad_norm": 0.012658294290304184, "learning_rate": 7.645277239266767e-06, "loss": 0.0005, "step": 34150 }, { "epoch": 0.23567234920350197, "grad_norm": 0.0, "learning_rate": 7.644587331921323e-06, "loss": 0.0002, "step": 34160 }, { "epoch": 0.23574133993804633, "grad_norm": 0.0, "learning_rate": 7.64389742457588e-06, "loss": 0.0, "step": 34170 }, { "epoch": 0.23581033067259066, "grad_norm": 0.0, "learning_rate": 7.643207517230436e-06, "loss": 0.0, "step": 34180 }, { "epoch": 0.23587932140713502, "grad_norm": 0.0, "learning_rate": 7.642517609884993e-06, "loss": 0.001, "step": 34190 }, { "epoch": 0.23594831214167938, "grad_norm": 0.0, "learning_rate": 7.641827702539549e-06, "loss": 0.0, "step": 34200 }, { "epoch": 0.23601730287622372, "grad_norm": 2.0044219493865967, "learning_rate": 7.641137795194106e-06, "loss": 0.0002, "step": 34210 }, { "epoch": 0.23608629361076808, "grad_norm": 0.0, "learning_rate": 7.640447887848662e-06, "loss": 0.0, "step": 34220 }, { "epoch": 0.23615528434531244, "grad_norm": 0.0, "learning_rate": 7.63975798050322e-06, "loss": 0.0, "step": 34230 }, { "epoch": 0.23622427507985677, "grad_norm": 0.0, "learning_rate": 7.639068073157777e-06, "loss": 0.0, "step": 34240 }, { "epoch": 0.23629326581440113, "grad_norm": 0.0, "learning_rate": 7.638378165812332e-06, "loss": 0.0, "step": 34250 }, { "epoch": 0.23636225654894547, "grad_norm": 0.0, "learning_rate": 7.637688258466888e-06, "loss": 0.0016, "step": 34260 }, { "epoch": 0.23643124728348983, "grad_norm": 0.0, "learning_rate": 7.636998351121445e-06, "loss": 0.0, "step": 34270 }, { "epoch": 0.2365002380180342, "grad_norm": 9.605948259050479e-10, "learning_rate": 7.636308443776001e-06, "loss": 0.0, "step": 34280 }, { "epoch": 0.23656922875257852, "grad_norm": 0.0, "learning_rate": 7.635618536430558e-06, "loss": 0.0, "step": 34290 }, { "epoch": 0.23663821948712288, "grad_norm": 2.862710957174386e-08, "learning_rate": 7.634928629085114e-06, "loss": 0.0, "step": 34300 }, { "epoch": 0.23670721022166724, "grad_norm": 0.0, "learning_rate": 7.63423872173967e-06, "loss": 0.0, "step": 34310 }, { "epoch": 0.23677620095621157, "grad_norm": 0.0, "learning_rate": 7.633548814394227e-06, "loss": 0.1395, "step": 34320 }, { "epoch": 0.23684519169075594, "grad_norm": 0.0, "learning_rate": 7.632858907048784e-06, "loss": 0.0, "step": 34330 }, { "epoch": 0.2369141824253003, "grad_norm": 0.0004049877170473337, "learning_rate": 7.632168999703342e-06, "loss": 0.0, "step": 34340 }, { "epoch": 0.23698317315984463, "grad_norm": 1.4445790839090478e-06, "learning_rate": 7.631479092357898e-06, "loss": 0.0, "step": 34350 }, { "epoch": 0.237052163894389, "grad_norm": 0.0, "learning_rate": 7.630789185012453e-06, "loss": 0.0, "step": 34360 }, { "epoch": 0.23712115462893332, "grad_norm": 0.00010177328658755869, "learning_rate": 7.63009927766701e-06, "loss": 0.0, "step": 34370 }, { "epoch": 0.23719014536347768, "grad_norm": 0.0, "learning_rate": 7.629409370321566e-06, "loss": 0.0, "step": 34380 }, { "epoch": 0.23725913609802204, "grad_norm": 0.0, "learning_rate": 7.628719462976123e-06, "loss": 0.0, "step": 34390 }, { "epoch": 0.23732812683256638, "grad_norm": 0.0, "learning_rate": 7.62802955563068e-06, "loss": 0.0, "step": 34400 }, { "epoch": 0.23739711756711074, "grad_norm": 0.0, "learning_rate": 7.6273396482852354e-06, "loss": 0.0, "step": 34410 }, { "epoch": 0.2374661083016551, "grad_norm": 0.0, "learning_rate": 7.626649740939792e-06, "loss": 0.0, "step": 34420 }, { "epoch": 0.23753509903619943, "grad_norm": 3.396797183086164e-05, "learning_rate": 7.625959833594348e-06, "loss": 0.0, "step": 34430 }, { "epoch": 0.2376040897707438, "grad_norm": 0.0, "learning_rate": 7.625269926248905e-06, "loss": 0.0, "step": 34440 }, { "epoch": 0.23767308050528815, "grad_norm": 0.0, "learning_rate": 7.624580018903462e-06, "loss": 0.0003, "step": 34450 }, { "epoch": 0.2377420712398325, "grad_norm": 0.0, "learning_rate": 7.623890111558019e-06, "loss": 0.0, "step": 34460 }, { "epoch": 0.23781106197437685, "grad_norm": 0.0, "learning_rate": 7.623200204212575e-06, "loss": 0.0, "step": 34470 }, { "epoch": 0.23788005270892118, "grad_norm": 0.0, "learning_rate": 7.622510296867132e-06, "loss": 0.0, "step": 34480 }, { "epoch": 0.23794904344346554, "grad_norm": 7.37014289597937e-08, "learning_rate": 7.621820389521688e-06, "loss": 0.0, "step": 34490 }, { "epoch": 0.2380180341780099, "grad_norm": 0.0, "learning_rate": 7.621130482176245e-06, "loss": 0.0, "step": 34500 }, { "epoch": 0.23808702491255423, "grad_norm": 0.0, "learning_rate": 7.620440574830801e-06, "loss": 0.0, "step": 34510 }, { "epoch": 0.2381560156470986, "grad_norm": 2.867292881011963, "learning_rate": 7.619750667485357e-06, "loss": 0.0006, "step": 34520 }, { "epoch": 0.23822500638164296, "grad_norm": 0.0, "learning_rate": 7.619060760139913e-06, "loss": 0.0, "step": 34530 }, { "epoch": 0.2382939971161873, "grad_norm": 0.0, "learning_rate": 7.61837085279447e-06, "loss": 0.0, "step": 34540 }, { "epoch": 0.23836298785073165, "grad_norm": 0.0, "learning_rate": 7.617680945449026e-06, "loss": 0.0, "step": 34550 }, { "epoch": 0.238431978585276, "grad_norm": 0.0, "learning_rate": 7.616991038103583e-06, "loss": 0.0003, "step": 34560 }, { "epoch": 0.23850096931982034, "grad_norm": 0.0, "learning_rate": 7.61630113075814e-06, "loss": 0.0, "step": 34570 }, { "epoch": 0.2385699600543647, "grad_norm": 0.0, "learning_rate": 7.615611223412697e-06, "loss": 0.0, "step": 34580 }, { "epoch": 0.23863895078890904, "grad_norm": 0.0, "learning_rate": 7.614921316067253e-06, "loss": 0.3754, "step": 34590 }, { "epoch": 0.2387079415234534, "grad_norm": 0.0060450066812336445, "learning_rate": 7.61423140872181e-06, "loss": 0.0, "step": 34600 }, { "epoch": 0.23877693225799776, "grad_norm": 0.0, "learning_rate": 7.613541501376366e-06, "loss": 0.0, "step": 34610 }, { "epoch": 0.2388459229925421, "grad_norm": 0.0, "learning_rate": 7.612851594030923e-06, "loss": 0.0, "step": 34620 }, { "epoch": 0.23891491372708645, "grad_norm": 0.0, "learning_rate": 7.612161686685478e-06, "loss": 0.0001, "step": 34630 }, { "epoch": 0.23898390446163081, "grad_norm": 0.0, "learning_rate": 7.611471779340035e-06, "loss": 1.5211, "step": 34640 }, { "epoch": 0.23905289519617515, "grad_norm": 0.0, "learning_rate": 7.610781871994591e-06, "loss": 0.0, "step": 34650 }, { "epoch": 0.2391218859307195, "grad_norm": 5.226797384239035e-06, "learning_rate": 7.610091964649148e-06, "loss": 0.0001, "step": 34660 }, { "epoch": 0.23919087666526387, "grad_norm": 0.0, "learning_rate": 7.609402057303704e-06, "loss": 0.0, "step": 34670 }, { "epoch": 0.2392598673998082, "grad_norm": 0.0, "learning_rate": 7.6087121499582615e-06, "loss": 0.0, "step": 34680 }, { "epoch": 0.23932885813435256, "grad_norm": 0.0, "learning_rate": 7.608022242612818e-06, "loss": 0.0, "step": 34690 }, { "epoch": 0.2393978488688969, "grad_norm": 2.01933944481425e-05, "learning_rate": 7.6073323352673745e-06, "loss": 0.0001, "step": 34700 }, { "epoch": 0.23946683960344126, "grad_norm": 0.0, "learning_rate": 7.606642427921931e-06, "loss": 0.0001, "step": 34710 }, { "epoch": 0.23953583033798562, "grad_norm": 0.0, "learning_rate": 7.6059525205764875e-06, "loss": 0.0, "step": 34720 }, { "epoch": 0.23960482107252995, "grad_norm": 0.0, "learning_rate": 7.605262613231044e-06, "loss": 0.0001, "step": 34730 }, { "epoch": 0.2396738118070743, "grad_norm": 0.0, "learning_rate": 7.6045727058856e-06, "loss": 0.0, "step": 34740 }, { "epoch": 0.23974280254161867, "grad_norm": 0.0, "learning_rate": 7.603882798540156e-06, "loss": 0.0, "step": 34750 }, { "epoch": 0.239811793276163, "grad_norm": 0.0, "learning_rate": 7.603192891194713e-06, "loss": 0.0893, "step": 34760 }, { "epoch": 0.23988078401070737, "grad_norm": 0.0, "learning_rate": 7.602502983849269e-06, "loss": 0.0013, "step": 34770 }, { "epoch": 0.23994977474525173, "grad_norm": 0.0, "learning_rate": 7.6018130765038256e-06, "loss": 0.0129, "step": 34780 }, { "epoch": 0.24001876547979606, "grad_norm": 0.0, "learning_rate": 7.601123169158383e-06, "loss": 0.0, "step": 34790 }, { "epoch": 0.24008775621434042, "grad_norm": 0.0, "learning_rate": 7.600433261812939e-06, "loss": 0.0, "step": 34800 }, { "epoch": 0.24015674694888475, "grad_norm": 0.0, "learning_rate": 7.599743354467496e-06, "loss": 0.0, "step": 34810 }, { "epoch": 0.2402257376834291, "grad_norm": 7.96481799625326e-06, "learning_rate": 7.599053447122052e-06, "loss": 0.0, "step": 34820 }, { "epoch": 0.24029472841797347, "grad_norm": 27.987932205200195, "learning_rate": 7.598363539776609e-06, "loss": 0.0076, "step": 34830 }, { "epoch": 0.2403637191525178, "grad_norm": 0.01797497272491455, "learning_rate": 7.597673632431165e-06, "loss": 0.0, "step": 34840 }, { "epoch": 0.24043270988706217, "grad_norm": 0.0, "learning_rate": 7.596983725085722e-06, "loss": 0.0, "step": 34850 }, { "epoch": 0.24050170062160653, "grad_norm": 0.0, "learning_rate": 7.5962938177402775e-06, "loss": 0.0, "step": 34860 }, { "epoch": 0.24057069135615086, "grad_norm": 0.0, "learning_rate": 7.595603910394834e-06, "loss": 0.0, "step": 34870 }, { "epoch": 0.24063968209069522, "grad_norm": 0.0, "learning_rate": 7.5949140030493905e-06, "loss": 0.7873, "step": 34880 }, { "epoch": 0.24070867282523958, "grad_norm": 0.0, "learning_rate": 7.594224095703947e-06, "loss": 0.0, "step": 34890 }, { "epoch": 0.24077766355978392, "grad_norm": 0.0, "learning_rate": 7.593534188358504e-06, "loss": 0.0, "step": 34900 }, { "epoch": 0.24084665429432828, "grad_norm": 0.0, "learning_rate": 7.592844281013061e-06, "loss": 0.0, "step": 34910 }, { "epoch": 0.2409156450288726, "grad_norm": 0.021901410073041916, "learning_rate": 7.592154373667617e-06, "loss": 0.0, "step": 34920 }, { "epoch": 0.24098463576341697, "grad_norm": 0.0, "learning_rate": 7.591464466322174e-06, "loss": 0.2842, "step": 34930 }, { "epoch": 0.24105362649796133, "grad_norm": 7.792269752826542e-05, "learning_rate": 7.59077455897673e-06, "loss": 0.0, "step": 34940 }, { "epoch": 0.24112261723250566, "grad_norm": 0.006543013267219067, "learning_rate": 7.590084651631287e-06, "loss": 0.0009, "step": 34950 }, { "epoch": 0.24119160796705003, "grad_norm": 0.0, "learning_rate": 7.589394744285843e-06, "loss": 0.0, "step": 34960 }, { "epoch": 0.2412605987015944, "grad_norm": 0.0, "learning_rate": 7.588704836940399e-06, "loss": 0.0, "step": 34970 }, { "epoch": 0.24132958943613872, "grad_norm": 0.04873797297477722, "learning_rate": 7.588014929594955e-06, "loss": 0.0, "step": 34980 }, { "epoch": 0.24139858017068308, "grad_norm": 8.364700598839647e-10, "learning_rate": 7.587325022249512e-06, "loss": 0.0031, "step": 34990 }, { "epoch": 0.24146757090522744, "grad_norm": 0.0, "learning_rate": 7.586635114904068e-06, "loss": 0.0, "step": 35000 }, { "epoch": 0.24153656163977177, "grad_norm": 0.0, "learning_rate": 7.585945207558626e-06, "loss": 0.0, "step": 35010 }, { "epoch": 0.24160555237431613, "grad_norm": 0.0013921007048338652, "learning_rate": 7.585255300213182e-06, "loss": 0.0, "step": 35020 }, { "epoch": 0.24167454310886047, "grad_norm": 7.703089011101838e-08, "learning_rate": 7.584565392867739e-06, "loss": 0.0, "step": 35030 }, { "epoch": 0.24174353384340483, "grad_norm": 0.0001143598128692247, "learning_rate": 7.583875485522295e-06, "loss": 0.0, "step": 35040 }, { "epoch": 0.2418125245779492, "grad_norm": 0.0, "learning_rate": 7.583185578176852e-06, "loss": 0.0, "step": 35050 }, { "epoch": 0.24188151531249352, "grad_norm": 0.0037796262186020613, "learning_rate": 7.582495670831408e-06, "loss": 0.1351, "step": 35060 }, { "epoch": 0.24195050604703788, "grad_norm": 0.0, "learning_rate": 7.581805763485965e-06, "loss": 0.0, "step": 35070 }, { "epoch": 0.24201949678158224, "grad_norm": 0.00025030432152561843, "learning_rate": 7.58111585614052e-06, "loss": 0.0, "step": 35080 }, { "epoch": 0.24208848751612658, "grad_norm": 0.0, "learning_rate": 7.580425948795077e-06, "loss": 0.0001, "step": 35090 }, { "epoch": 0.24215747825067094, "grad_norm": 0.0, "learning_rate": 7.579736041449633e-06, "loss": 0.0, "step": 35100 }, { "epoch": 0.2422264689852153, "grad_norm": 0.003756160382181406, "learning_rate": 7.57904613410419e-06, "loss": 0.0, "step": 35110 }, { "epoch": 0.24229545971975963, "grad_norm": 2.1044198206254805e-07, "learning_rate": 7.578356226758747e-06, "loss": 0.2871, "step": 35120 }, { "epoch": 0.242364450454304, "grad_norm": 0.0, "learning_rate": 7.577666319413304e-06, "loss": 0.0001, "step": 35130 }, { "epoch": 0.24243344118884833, "grad_norm": 0.0, "learning_rate": 7.57697641206786e-06, "loss": 0.0389, "step": 35140 }, { "epoch": 0.2425024319233927, "grad_norm": 0.0, "learning_rate": 7.5762865047224166e-06, "loss": 0.0, "step": 35150 }, { "epoch": 0.24257142265793705, "grad_norm": 0.0, "learning_rate": 7.575596597376973e-06, "loss": 0.0, "step": 35160 }, { "epoch": 0.24264041339248138, "grad_norm": 3.31876170989176e-09, "learning_rate": 7.5749066900315295e-06, "loss": 0.0, "step": 35170 }, { "epoch": 0.24270940412702574, "grad_norm": 0.0, "learning_rate": 7.574216782686086e-06, "loss": 0.0, "step": 35180 }, { "epoch": 0.2427783948615701, "grad_norm": 0.0, "learning_rate": 7.5735268753406425e-06, "loss": 0.0, "step": 35190 }, { "epoch": 0.24284738559611443, "grad_norm": 0.0, "learning_rate": 7.572836967995198e-06, "loss": 0.0, "step": 35200 }, { "epoch": 0.2429163763306588, "grad_norm": 1.3164053598302417e-05, "learning_rate": 7.572147060649755e-06, "loss": 0.0, "step": 35210 }, { "epoch": 0.24298536706520316, "grad_norm": 0.0, "learning_rate": 7.571457153304311e-06, "loss": 0.0, "step": 35220 }, { "epoch": 0.2430543577997475, "grad_norm": 0.0, "learning_rate": 7.5707672459588685e-06, "loss": 0.0, "step": 35230 }, { "epoch": 0.24312334853429185, "grad_norm": 0.0, "learning_rate": 7.570077338613425e-06, "loss": 0.0, "step": 35240 }, { "epoch": 0.24319233926883618, "grad_norm": 5.7123459555441514e-05, "learning_rate": 7.5693874312679815e-06, "loss": 0.0001, "step": 35250 }, { "epoch": 0.24326133000338054, "grad_norm": 0.0, "learning_rate": 7.568697523922538e-06, "loss": 0.0, "step": 35260 }, { "epoch": 0.2433303207379249, "grad_norm": 7.3790483474731445, "learning_rate": 7.5680076165770944e-06, "loss": 0.0013, "step": 35270 }, { "epoch": 0.24339931147246924, "grad_norm": 0.011852694675326347, "learning_rate": 7.567317709231651e-06, "loss": 0.0001, "step": 35280 }, { "epoch": 0.2434683022070136, "grad_norm": 0.0, "learning_rate": 7.5666278018862074e-06, "loss": 0.0, "step": 35290 }, { "epoch": 0.24353729294155796, "grad_norm": 0.0, "learning_rate": 7.565937894540764e-06, "loss": 0.0, "step": 35300 }, { "epoch": 0.2436062836761023, "grad_norm": 0.0, "learning_rate": 7.5652479871953196e-06, "loss": 0.1551, "step": 35310 }, { "epoch": 0.24367527441064665, "grad_norm": 1.3651856534124818e-07, "learning_rate": 7.564558079849876e-06, "loss": 0.0, "step": 35320 }, { "epoch": 0.243744265145191, "grad_norm": 0.0, "learning_rate": 7.5638681725044325e-06, "loss": 0.0, "step": 35330 }, { "epoch": 0.24381325587973535, "grad_norm": 0.0, "learning_rate": 7.56317826515899e-06, "loss": 0.0045, "step": 35340 }, { "epoch": 0.2438822466142797, "grad_norm": 0.0, "learning_rate": 7.562488357813546e-06, "loss": 0.0, "step": 35350 }, { "epoch": 0.24395123734882404, "grad_norm": 0.0, "learning_rate": 7.561798450468103e-06, "loss": 0.0, "step": 35360 }, { "epoch": 0.2440202280833684, "grad_norm": 0.0, "learning_rate": 7.561108543122659e-06, "loss": 0.0, "step": 35370 }, { "epoch": 0.24408921881791276, "grad_norm": 0.0, "learning_rate": 7.560418635777216e-06, "loss": 0.0, "step": 35380 }, { "epoch": 0.2441582095524571, "grad_norm": 0.0, "learning_rate": 7.559728728431772e-06, "loss": 0.0, "step": 35390 }, { "epoch": 0.24422720028700146, "grad_norm": 0.0, "learning_rate": 7.559038821086329e-06, "loss": 0.0, "step": 35400 }, { "epoch": 0.24429619102154582, "grad_norm": 0.0, "learning_rate": 7.558348913740885e-06, "loss": 0.0001, "step": 35410 }, { "epoch": 0.24436518175609015, "grad_norm": 0.0, "learning_rate": 7.557659006395441e-06, "loss": 0.0, "step": 35420 }, { "epoch": 0.2444341724906345, "grad_norm": 1.8869333267211914, "learning_rate": 7.5569690990499974e-06, "loss": 0.0005, "step": 35430 }, { "epoch": 0.24450316322517887, "grad_norm": 0.0, "learning_rate": 7.556279191704554e-06, "loss": 0.0, "step": 35440 }, { "epoch": 0.2445721539597232, "grad_norm": 0.0, "learning_rate": 7.555589284359111e-06, "loss": 0.0, "step": 35450 }, { "epoch": 0.24464114469426756, "grad_norm": 0.0, "learning_rate": 7.554899377013668e-06, "loss": 0.0, "step": 35460 }, { "epoch": 0.2447101354288119, "grad_norm": 0.0, "learning_rate": 7.554209469668224e-06, "loss": 0.0, "step": 35470 }, { "epoch": 0.24477912616335626, "grad_norm": 0.0, "learning_rate": 7.553519562322781e-06, "loss": 0.0, "step": 35480 }, { "epoch": 0.24484811689790062, "grad_norm": 3.905999183654785, "learning_rate": 7.552829654977337e-06, "loss": 0.0009, "step": 35490 }, { "epoch": 0.24491710763244495, "grad_norm": 0.0, "learning_rate": 7.552139747631894e-06, "loss": 0.0, "step": 35500 }, { "epoch": 0.2449860983669893, "grad_norm": 0.0, "learning_rate": 7.55144984028645e-06, "loss": 0.0044, "step": 35510 }, { "epoch": 0.24505508910153367, "grad_norm": 0.0, "learning_rate": 7.550759932941007e-06, "loss": 0.0, "step": 35520 }, { "epoch": 0.245124079836078, "grad_norm": 0.0, "learning_rate": 7.550070025595563e-06, "loss": 0.0054, "step": 35530 }, { "epoch": 0.24519307057062237, "grad_norm": 0.0, "learning_rate": 7.549380118250119e-06, "loss": 0.0, "step": 35540 }, { "epoch": 0.24526206130516673, "grad_norm": 0.0, "learning_rate": 7.548690210904675e-06, "loss": 0.0068, "step": 35550 }, { "epoch": 0.24533105203971106, "grad_norm": 0.0, "learning_rate": 7.548000303559233e-06, "loss": 0.0, "step": 35560 }, { "epoch": 0.24540004277425542, "grad_norm": 0.0, "learning_rate": 7.547310396213789e-06, "loss": 0.0, "step": 35570 }, { "epoch": 0.24546903350879976, "grad_norm": 9.96428184407705e-08, "learning_rate": 7.546620488868346e-06, "loss": 0.0, "step": 35580 }, { "epoch": 0.24553802424334412, "grad_norm": 0.0, "learning_rate": 7.545930581522902e-06, "loss": 0.0, "step": 35590 }, { "epoch": 0.24560701497788848, "grad_norm": 0.0, "learning_rate": 7.545240674177459e-06, "loss": 0.0, "step": 35600 }, { "epoch": 0.2456760057124328, "grad_norm": 4.5747855503819324e-10, "learning_rate": 7.544550766832015e-06, "loss": 0.0, "step": 35610 }, { "epoch": 0.24574499644697717, "grad_norm": 0.0, "learning_rate": 7.543860859486572e-06, "loss": 0.0033, "step": 35620 }, { "epoch": 0.24581398718152153, "grad_norm": 2.5132450787168636e-08, "learning_rate": 7.543170952141128e-06, "loss": 0.0, "step": 35630 }, { "epoch": 0.24588297791606586, "grad_norm": 0.0, "learning_rate": 7.542481044795685e-06, "loss": 0.0, "step": 35640 }, { "epoch": 0.24595196865061023, "grad_norm": 0.0, "learning_rate": 7.54179113745024e-06, "loss": 0.0, "step": 35650 }, { "epoch": 0.2460209593851546, "grad_norm": 0.0, "learning_rate": 7.541101230104797e-06, "loss": 0.0012, "step": 35660 }, { "epoch": 0.24608995011969892, "grad_norm": 0.0, "learning_rate": 7.540411322759355e-06, "loss": 0.0, "step": 35670 }, { "epoch": 0.24615894085424328, "grad_norm": 0.02591627463698387, "learning_rate": 7.5397214154139106e-06, "loss": 0.0, "step": 35680 }, { "epoch": 0.2462279315887876, "grad_norm": 0.0, "learning_rate": 7.539031508068467e-06, "loss": 0.0, "step": 35690 }, { "epoch": 0.24629692232333197, "grad_norm": 0.0, "learning_rate": 7.5383416007230235e-06, "loss": 0.0, "step": 35700 }, { "epoch": 0.24636591305787633, "grad_norm": 0.0, "learning_rate": 7.53765169337758e-06, "loss": 0.0, "step": 35710 }, { "epoch": 0.24643490379242067, "grad_norm": 0.022486288100481033, "learning_rate": 7.5369617860321365e-06, "loss": 0.0013, "step": 35720 }, { "epoch": 0.24650389452696503, "grad_norm": 0.0, "learning_rate": 7.536271878686693e-06, "loss": 0.0, "step": 35730 }, { "epoch": 0.2465728852615094, "grad_norm": 4.3531397930784976e-10, "learning_rate": 7.5355819713412495e-06, "loss": 0.0, "step": 35740 }, { "epoch": 0.24664187599605372, "grad_norm": 0.0, "learning_rate": 7.534892063995806e-06, "loss": 0.0018, "step": 35750 }, { "epoch": 0.24671086673059808, "grad_norm": 0.0, "learning_rate": 7.534202156650362e-06, "loss": 0.0, "step": 35760 }, { "epoch": 0.24677985746514244, "grad_norm": 0.0, "learning_rate": 7.533512249304918e-06, "loss": 0.0017, "step": 35770 }, { "epoch": 0.24684884819968678, "grad_norm": 0.0, "learning_rate": 7.532822341959476e-06, "loss": 0.0089, "step": 35780 }, { "epoch": 0.24691783893423114, "grad_norm": 0.0, "learning_rate": 7.532132434614032e-06, "loss": 0.0001, "step": 35790 }, { "epoch": 0.24698682966877547, "grad_norm": 0.0, "learning_rate": 7.5314425272685884e-06, "loss": 0.0, "step": 35800 }, { "epoch": 0.24705582040331983, "grad_norm": 0.0, "learning_rate": 7.530752619923145e-06, "loss": 0.0, "step": 35810 }, { "epoch": 0.2471248111378642, "grad_norm": 0.0, "learning_rate": 7.530062712577701e-06, "loss": 0.1126, "step": 35820 }, { "epoch": 0.24719380187240853, "grad_norm": 0.0, "learning_rate": 7.529372805232258e-06, "loss": 0.0002, "step": 35830 }, { "epoch": 0.24726279260695289, "grad_norm": 0.0, "learning_rate": 7.528682897886814e-06, "loss": 0.0, "step": 35840 }, { "epoch": 0.24733178334149725, "grad_norm": 0.0, "learning_rate": 7.527992990541371e-06, "loss": 0.0002, "step": 35850 }, { "epoch": 0.24740077407604158, "grad_norm": 0.0, "learning_rate": 7.527303083195927e-06, "loss": 0.0235, "step": 35860 }, { "epoch": 0.24746976481058594, "grad_norm": 9.46808941080235e-05, "learning_rate": 7.526613175850484e-06, "loss": 0.0, "step": 35870 }, { "epoch": 0.2475387555451303, "grad_norm": 0.0, "learning_rate": 7.5259232685050395e-06, "loss": 0.0001, "step": 35880 }, { "epoch": 0.24760774627967463, "grad_norm": 1.8822514903149568e-05, "learning_rate": 7.525233361159598e-06, "loss": 0.0036, "step": 35890 }, { "epoch": 0.247676737014219, "grad_norm": 0.0, "learning_rate": 7.524543453814153e-06, "loss": 0.0, "step": 35900 }, { "epoch": 0.24774572774876333, "grad_norm": 0.0, "learning_rate": 7.52385354646871e-06, "loss": 0.0, "step": 35910 }, { "epoch": 0.2478147184833077, "grad_norm": 0.0, "learning_rate": 7.523163639123266e-06, "loss": 0.0, "step": 35920 }, { "epoch": 0.24788370921785205, "grad_norm": 0.0, "learning_rate": 7.522473731777823e-06, "loss": 0.0, "step": 35930 }, { "epoch": 0.24795269995239638, "grad_norm": 0.0, "learning_rate": 7.521783824432379e-06, "loss": 0.0, "step": 35940 }, { "epoch": 0.24802169068694074, "grad_norm": 16.917089462280273, "learning_rate": 7.521093917086936e-06, "loss": 0.0035, "step": 35950 }, { "epoch": 0.2480906814214851, "grad_norm": 0.0, "learning_rate": 7.520404009741492e-06, "loss": 0.0, "step": 35960 }, { "epoch": 0.24815967215602944, "grad_norm": 0.0, "learning_rate": 7.519714102396049e-06, "loss": 0.0003, "step": 35970 }, { "epoch": 0.2482286628905738, "grad_norm": 0.0, "learning_rate": 7.519024195050605e-06, "loss": 0.0, "step": 35980 }, { "epoch": 0.24829765362511816, "grad_norm": 0.0, "learning_rate": 7.518334287705161e-06, "loss": 0.0, "step": 35990 }, { "epoch": 0.2483666443596625, "grad_norm": 0.0015563899651169777, "learning_rate": 7.517644380359719e-06, "loss": 0.0, "step": 36000 }, { "epoch": 0.24843563509420685, "grad_norm": 6.156587460282026e-06, "learning_rate": 7.516954473014276e-06, "loss": 0.0, "step": 36010 }, { "epoch": 0.24850462582875119, "grad_norm": 0.0, "learning_rate": 7.516264565668831e-06, "loss": 0.0, "step": 36020 }, { "epoch": 0.24857361656329555, "grad_norm": 0.0, "learning_rate": 7.515574658323388e-06, "loss": 0.0, "step": 36030 }, { "epoch": 0.2486426072978399, "grad_norm": 7.864497456466779e-05, "learning_rate": 7.514884750977944e-06, "loss": 0.0, "step": 36040 }, { "epoch": 0.24871159803238424, "grad_norm": 0.0, "learning_rate": 7.514194843632501e-06, "loss": 0.0, "step": 36050 }, { "epoch": 0.2487805887669286, "grad_norm": 0.0, "learning_rate": 7.513504936287057e-06, "loss": 0.0001, "step": 36060 }, { "epoch": 0.24884957950147296, "grad_norm": 4.391550179061454e-10, "learning_rate": 7.512815028941614e-06, "loss": 0.0, "step": 36070 }, { "epoch": 0.2489185702360173, "grad_norm": 0.0, "learning_rate": 7.51212512159617e-06, "loss": 0.0, "step": 36080 }, { "epoch": 0.24898756097056166, "grad_norm": 0.0, "learning_rate": 7.511435214250727e-06, "loss": 0.0, "step": 36090 }, { "epoch": 0.24905655170510602, "grad_norm": 0.0, "learning_rate": 7.510745306905282e-06, "loss": 0.0, "step": 36100 }, { "epoch": 0.24912554243965035, "grad_norm": 0.0, "learning_rate": 7.5100553995598405e-06, "loss": 0.0002, "step": 36110 }, { "epoch": 0.2491945331741947, "grad_norm": 2.0434224268228718e-07, "learning_rate": 7.509365492214397e-06, "loss": 0.0, "step": 36120 }, { "epoch": 0.24926352390873904, "grad_norm": 9.190723358187824e-05, "learning_rate": 7.508675584868953e-06, "loss": 0.0, "step": 36130 }, { "epoch": 0.2493325146432834, "grad_norm": 0.0, "learning_rate": 7.507985677523509e-06, "loss": 0.0, "step": 36140 }, { "epoch": 0.24940150537782776, "grad_norm": 0.13615694642066956, "learning_rate": 7.507295770178066e-06, "loss": 0.007, "step": 36150 }, { "epoch": 0.2494704961123721, "grad_norm": 0.0, "learning_rate": 7.506605862832622e-06, "loss": 0.0, "step": 36160 }, { "epoch": 0.24953948684691646, "grad_norm": 0.0, "learning_rate": 7.505915955487179e-06, "loss": 0.0, "step": 36170 }, { "epoch": 0.24960847758146082, "grad_norm": 0.0, "learning_rate": 7.505226048141735e-06, "loss": 0.0, "step": 36180 }, { "epoch": 0.24967746831600515, "grad_norm": 0.0, "learning_rate": 7.5045361407962916e-06, "loss": 0.0, "step": 36190 }, { "epoch": 0.2497464590505495, "grad_norm": 0.02043294906616211, "learning_rate": 7.503846233450848e-06, "loss": 0.0177, "step": 36200 }, { "epoch": 0.24981544978509387, "grad_norm": 0.0, "learning_rate": 7.5031563261054045e-06, "loss": 0.0, "step": 36210 }, { "epoch": 0.2498844405196382, "grad_norm": 0.0, "learning_rate": 7.502466418759962e-06, "loss": 0.0, "step": 36220 }, { "epoch": 0.24995343125418257, "grad_norm": 0.2231520265340805, "learning_rate": 7.501776511414518e-06, "loss": 0.0002, "step": 36230 }, { "epoch": 0.25002242198872693, "grad_norm": 0.0, "learning_rate": 7.501086604069074e-06, "loss": 0.0, "step": 36240 }, { "epoch": 0.2500914127232713, "grad_norm": 1.3483792145052576e-06, "learning_rate": 7.5003966967236305e-06, "loss": 0.0088, "step": 36250 }, { "epoch": 0.2501604034578156, "grad_norm": 0.0, "learning_rate": 7.499706789378187e-06, "loss": 0.0006, "step": 36260 }, { "epoch": 0.25022939419235996, "grad_norm": 4.065394065833061e-08, "learning_rate": 7.4990168820327435e-06, "loss": 0.0, "step": 36270 }, { "epoch": 0.2502983849269043, "grad_norm": 0.0, "learning_rate": 7.4983269746873e-06, "loss": 0.0, "step": 36280 }, { "epoch": 0.2503673756614487, "grad_norm": 0.0, "learning_rate": 7.4976370673418565e-06, "loss": 0.0, "step": 36290 }, { "epoch": 0.25043636639599304, "grad_norm": 0.0, "learning_rate": 7.496947159996413e-06, "loss": 0.0, "step": 36300 }, { "epoch": 0.25050535713053734, "grad_norm": 0.0, "learning_rate": 7.4962572526509694e-06, "loss": 0.001, "step": 36310 }, { "epoch": 0.2505743478650817, "grad_norm": 0.0, "learning_rate": 7.495567345305526e-06, "loss": 0.1296, "step": 36320 }, { "epoch": 0.25064333859962606, "grad_norm": 0.0, "learning_rate": 7.494877437960083e-06, "loss": 0.0, "step": 36330 }, { "epoch": 0.2507123293341704, "grad_norm": 0.0, "learning_rate": 7.49418753061464e-06, "loss": 0.0, "step": 36340 }, { "epoch": 0.2507813200687148, "grad_norm": 0.0, "learning_rate": 7.493497623269195e-06, "loss": 0.0, "step": 36350 }, { "epoch": 0.25085031080325915, "grad_norm": 0.0, "learning_rate": 7.492807715923752e-06, "loss": 0.0001, "step": 36360 }, { "epoch": 0.25091930153780345, "grad_norm": 0.0, "learning_rate": 7.492117808578308e-06, "loss": 0.0003, "step": 36370 }, { "epoch": 0.2509882922723478, "grad_norm": 0.0, "learning_rate": 7.491427901232865e-06, "loss": 0.0, "step": 36380 }, { "epoch": 0.2510572830068922, "grad_norm": 0.0, "learning_rate": 7.490737993887421e-06, "loss": 0.0, "step": 36390 }, { "epoch": 0.25112627374143653, "grad_norm": 0.0, "learning_rate": 7.490048086541978e-06, "loss": 0.0, "step": 36400 }, { "epoch": 0.2511952644759809, "grad_norm": 0.0, "learning_rate": 7.489358179196534e-06, "loss": 0.0, "step": 36410 }, { "epoch": 0.2512642552105252, "grad_norm": 56.675270080566406, "learning_rate": 7.488668271851091e-06, "loss": 0.0097, "step": 36420 }, { "epoch": 0.25133324594506956, "grad_norm": 0.0, "learning_rate": 7.487978364505647e-06, "loss": 0.0, "step": 36430 }, { "epoch": 0.2514022366796139, "grad_norm": 0.0, "learning_rate": 7.487288457160205e-06, "loss": 0.0017, "step": 36440 }, { "epoch": 0.2514712274141583, "grad_norm": 0.0, "learning_rate": 7.486598549814761e-06, "loss": 0.0007, "step": 36450 }, { "epoch": 0.25154021814870264, "grad_norm": 0.03382086753845215, "learning_rate": 7.485908642469318e-06, "loss": 0.1864, "step": 36460 }, { "epoch": 0.251609208883247, "grad_norm": 0.0015447885962203145, "learning_rate": 7.485218735123873e-06, "loss": 0.0, "step": 36470 }, { "epoch": 0.2516781996177913, "grad_norm": 0.0, "learning_rate": 7.48452882777843e-06, "loss": 0.0, "step": 36480 }, { "epoch": 0.25174719035233567, "grad_norm": 0.0, "learning_rate": 7.483838920432986e-06, "loss": 0.0, "step": 36490 }, { "epoch": 0.25181618108688003, "grad_norm": 0.0, "learning_rate": 7.483149013087543e-06, "loss": 0.0002, "step": 36500 }, { "epoch": 0.2518851718214244, "grad_norm": 0.0, "learning_rate": 7.482459105742099e-06, "loss": 0.0006, "step": 36510 }, { "epoch": 0.25195416255596875, "grad_norm": 0.0, "learning_rate": 7.481769198396656e-06, "loss": 0.0, "step": 36520 }, { "epoch": 0.25202315329051306, "grad_norm": 0.0, "learning_rate": 7.481079291051212e-06, "loss": 0.0, "step": 36530 }, { "epoch": 0.2520921440250574, "grad_norm": 0.0, "learning_rate": 7.480389383705769e-06, "loss": 0.0, "step": 36540 }, { "epoch": 0.2521611347596018, "grad_norm": 0.0004022032371722162, "learning_rate": 7.479699476360326e-06, "loss": 0.0333, "step": 36550 }, { "epoch": 0.25223012549414614, "grad_norm": 0.0, "learning_rate": 7.4790095690148826e-06, "loss": 0.0, "step": 36560 }, { "epoch": 0.2522991162286905, "grad_norm": 0.0, "learning_rate": 7.478319661669439e-06, "loss": 0.0, "step": 36570 }, { "epoch": 0.25236810696323486, "grad_norm": 5.063108332592492e-08, "learning_rate": 7.477629754323995e-06, "loss": 0.0001, "step": 36580 }, { "epoch": 0.25243709769777917, "grad_norm": 0.0, "learning_rate": 7.476939846978551e-06, "loss": 0.0, "step": 36590 }, { "epoch": 0.2525060884323235, "grad_norm": 0.00015263649402186275, "learning_rate": 7.476249939633108e-06, "loss": 0.0, "step": 36600 }, { "epoch": 0.2525750791668679, "grad_norm": 0.0, "learning_rate": 7.475560032287664e-06, "loss": 0.0109, "step": 36610 }, { "epoch": 0.25264406990141225, "grad_norm": 0.0, "learning_rate": 7.474870124942221e-06, "loss": 0.0, "step": 36620 }, { "epoch": 0.2527130606359566, "grad_norm": 0.0, "learning_rate": 7.474180217596777e-06, "loss": 0.0, "step": 36630 }, { "epoch": 0.2527820513705009, "grad_norm": 0.0, "learning_rate": 7.473490310251334e-06, "loss": 0.0, "step": 36640 }, { "epoch": 0.2528510421050453, "grad_norm": 0.0, "learning_rate": 7.47280040290589e-06, "loss": 0.001, "step": 36650 }, { "epoch": 0.25292003283958964, "grad_norm": 5.780433980362432e-07, "learning_rate": 7.4721104955604475e-06, "loss": 0.0, "step": 36660 }, { "epoch": 0.252989023574134, "grad_norm": 4.935407083017651e-10, "learning_rate": 7.471420588215004e-06, "loss": 0.0, "step": 36670 }, { "epoch": 0.25305801430867836, "grad_norm": 0.0, "learning_rate": 7.4707306808695604e-06, "loss": 0.0, "step": 36680 }, { "epoch": 0.2531270050432227, "grad_norm": 0.0, "learning_rate": 7.470040773524116e-06, "loss": 0.0, "step": 36690 }, { "epoch": 0.253195995777767, "grad_norm": 0.0, "learning_rate": 7.4693508661786726e-06, "loss": 0.0, "step": 36700 }, { "epoch": 0.2532649865123114, "grad_norm": 0.0, "learning_rate": 7.468660958833229e-06, "loss": 0.0, "step": 36710 }, { "epoch": 0.25333397724685575, "grad_norm": 0.08687032014131546, "learning_rate": 7.4679710514877856e-06, "loss": 0.0038, "step": 36720 }, { "epoch": 0.2534029679814001, "grad_norm": 9.497213113718317e-07, "learning_rate": 7.467281144142342e-06, "loss": 0.0, "step": 36730 }, { "epoch": 0.25347195871594447, "grad_norm": 11.538050651550293, "learning_rate": 7.4665912367968985e-06, "loss": 0.0027, "step": 36740 }, { "epoch": 0.2535409494504888, "grad_norm": 0.0, "learning_rate": 7.465901329451455e-06, "loss": 0.0, "step": 36750 }, { "epoch": 0.25360994018503313, "grad_norm": 1.7542208796683667e-09, "learning_rate": 7.4652114221060115e-06, "loss": 0.0, "step": 36760 }, { "epoch": 0.2536789309195775, "grad_norm": 0.0, "learning_rate": 7.464521514760569e-06, "loss": 0.0, "step": 36770 }, { "epoch": 0.25374792165412186, "grad_norm": 0.0061941323801875114, "learning_rate": 7.463831607415125e-06, "loss": 0.0, "step": 36780 }, { "epoch": 0.2538169123886662, "grad_norm": 0.0005129583296366036, "learning_rate": 7.463141700069682e-06, "loss": 0.0, "step": 36790 }, { "epoch": 0.2538859031232106, "grad_norm": 0.0, "learning_rate": 7.462451792724238e-06, "loss": 0.0, "step": 36800 }, { "epoch": 0.2539548938577549, "grad_norm": 0.0, "learning_rate": 7.461761885378794e-06, "loss": 0.0, "step": 36810 }, { "epoch": 0.25402388459229924, "grad_norm": 0.0, "learning_rate": 7.4610719780333505e-06, "loss": 0.0015, "step": 36820 }, { "epoch": 0.2540928753268436, "grad_norm": 4.33061864146822e-10, "learning_rate": 7.460382070687907e-06, "loss": 0.0, "step": 36830 }, { "epoch": 0.25416186606138796, "grad_norm": 0.0, "learning_rate": 7.4596921633424634e-06, "loss": 0.0, "step": 36840 }, { "epoch": 0.2542308567959323, "grad_norm": 0.0, "learning_rate": 7.45900225599702e-06, "loss": 0.0, "step": 36850 }, { "epoch": 0.25429984753047663, "grad_norm": 0.0, "learning_rate": 7.458312348651576e-06, "loss": 0.0, "step": 36860 }, { "epoch": 0.254368838265021, "grad_norm": 0.0, "learning_rate": 7.457622441306133e-06, "loss": 0.2332, "step": 36870 }, { "epoch": 0.25443782899956535, "grad_norm": 0.0, "learning_rate": 7.45693253396069e-06, "loss": 0.0002, "step": 36880 }, { "epoch": 0.2545068197341097, "grad_norm": 0.006251172628253698, "learning_rate": 7.456242626615247e-06, "loss": 0.191, "step": 36890 }, { "epoch": 0.2545758104686541, "grad_norm": 4.6053352797947866e-10, "learning_rate": 7.455552719269803e-06, "loss": 0.0, "step": 36900 }, { "epoch": 0.25464480120319843, "grad_norm": 0.010192696005105972, "learning_rate": 7.45486281192436e-06, "loss": 0.0, "step": 36910 }, { "epoch": 0.25471379193774274, "grad_norm": 4.805706943500354e-10, "learning_rate": 7.454172904578915e-06, "loss": 0.0, "step": 36920 }, { "epoch": 0.2547827826722871, "grad_norm": 1.4466934317169944e-06, "learning_rate": 7.453482997233472e-06, "loss": 0.0, "step": 36930 }, { "epoch": 0.25485177340683146, "grad_norm": 1.3884893235172058e-08, "learning_rate": 7.452793089888028e-06, "loss": 0.0, "step": 36940 }, { "epoch": 0.2549207641413758, "grad_norm": 0.0, "learning_rate": 7.452103182542585e-06, "loss": 0.0, "step": 36950 }, { "epoch": 0.2549897548759202, "grad_norm": 5.604390906910339e-08, "learning_rate": 7.451413275197141e-06, "loss": 0.0, "step": 36960 }, { "epoch": 0.2550587456104645, "grad_norm": 0.0015580153558403254, "learning_rate": 7.450723367851698e-06, "loss": 0.0, "step": 36970 }, { "epoch": 0.25512773634500885, "grad_norm": 0.0, "learning_rate": 7.450033460506254e-06, "loss": 0.0, "step": 36980 }, { "epoch": 0.2551967270795532, "grad_norm": 0.0, "learning_rate": 7.449343553160812e-06, "loss": 0.0, "step": 36990 }, { "epoch": 0.25526571781409757, "grad_norm": 0.0, "learning_rate": 7.448653645815368e-06, "loss": 0.7023, "step": 37000 }, { "epoch": 0.25533470854864193, "grad_norm": 0.0, "learning_rate": 7.447963738469925e-06, "loss": 0.0, "step": 37010 }, { "epoch": 0.2554036992831863, "grad_norm": 0.0, "learning_rate": 7.447273831124481e-06, "loss": 0.0, "step": 37020 }, { "epoch": 0.2554726900177306, "grad_norm": 0.0, "learning_rate": 7.446583923779037e-06, "loss": 0.0, "step": 37030 }, { "epoch": 0.25554168075227496, "grad_norm": 4.5429510153738306e-10, "learning_rate": 7.445894016433593e-06, "loss": 0.0, "step": 37040 }, { "epoch": 0.2556106714868193, "grad_norm": 0.0, "learning_rate": 7.44520410908815e-06, "loss": 0.0, "step": 37050 }, { "epoch": 0.2556796622213637, "grad_norm": 0.0, "learning_rate": 7.444514201742706e-06, "loss": 0.0, "step": 37060 }, { "epoch": 0.25574865295590804, "grad_norm": 3.456605099927401e-06, "learning_rate": 7.443824294397263e-06, "loss": 0.0, "step": 37070 }, { "epoch": 0.25581764369045235, "grad_norm": 5.273923761706101e-06, "learning_rate": 7.443134387051819e-06, "loss": 0.0, "step": 37080 }, { "epoch": 0.2558866344249967, "grad_norm": 0.0, "learning_rate": 7.442444479706376e-06, "loss": 0.0, "step": 37090 }, { "epoch": 0.25595562515954107, "grad_norm": 0.0, "learning_rate": 7.441754572360933e-06, "loss": 0.0, "step": 37100 }, { "epoch": 0.2560246158940854, "grad_norm": 0.0, "learning_rate": 7.4410646650154895e-06, "loss": 0.0, "step": 37110 }, { "epoch": 0.2560936066286298, "grad_norm": 1.1108824082839419e-07, "learning_rate": 7.440374757670046e-06, "loss": 0.0, "step": 37120 }, { "epoch": 0.25616259736317415, "grad_norm": 0.0, "learning_rate": 7.4396848503246025e-06, "loss": 0.0, "step": 37130 }, { "epoch": 0.25623158809771845, "grad_norm": 4.6528797481570905e-10, "learning_rate": 7.438994942979159e-06, "loss": 0.0, "step": 37140 }, { "epoch": 0.2563005788322628, "grad_norm": 5.158657945258938e-09, "learning_rate": 7.438305035633715e-06, "loss": 0.2709, "step": 37150 }, { "epoch": 0.2563695695668072, "grad_norm": 0.0, "learning_rate": 7.437615128288271e-06, "loss": 0.0, "step": 37160 }, { "epoch": 0.25643856030135154, "grad_norm": 0.016699325293302536, "learning_rate": 7.436925220942828e-06, "loss": 0.0, "step": 37170 }, { "epoch": 0.2565075510358959, "grad_norm": 0.0, "learning_rate": 7.436235313597384e-06, "loss": 0.0, "step": 37180 }, { "epoch": 0.2565765417704402, "grad_norm": 0.0, "learning_rate": 7.435545406251941e-06, "loss": 0.0, "step": 37190 }, { "epoch": 0.25664553250498456, "grad_norm": 0.0, "learning_rate": 7.434855498906497e-06, "loss": 0.0, "step": 37200 }, { "epoch": 0.2567145232395289, "grad_norm": 0.809956431388855, "learning_rate": 7.4341655915610544e-06, "loss": 0.0001, "step": 37210 }, { "epoch": 0.2567835139740733, "grad_norm": 0.0, "learning_rate": 7.433475684215611e-06, "loss": 0.0031, "step": 37220 }, { "epoch": 0.25685250470861765, "grad_norm": 0.00017121054406743497, "learning_rate": 7.432785776870167e-06, "loss": 0.0, "step": 37230 }, { "epoch": 0.256921495443162, "grad_norm": 0.0, "learning_rate": 7.432095869524724e-06, "loss": 0.0, "step": 37240 }, { "epoch": 0.2569904861777063, "grad_norm": 0.0, "learning_rate": 7.43140596217928e-06, "loss": 0.0001, "step": 37250 }, { "epoch": 0.2570594769122507, "grad_norm": 0.0, "learning_rate": 7.430716054833836e-06, "loss": 0.0, "step": 37260 }, { "epoch": 0.25712846764679503, "grad_norm": 0.0, "learning_rate": 7.4300261474883925e-06, "loss": 0.0, "step": 37270 }, { "epoch": 0.2571974583813394, "grad_norm": 0.0, "learning_rate": 7.429336240142949e-06, "loss": 0.0, "step": 37280 }, { "epoch": 0.25726644911588376, "grad_norm": 0.0, "learning_rate": 7.4286463327975055e-06, "loss": 0.0007, "step": 37290 }, { "epoch": 0.25733543985042806, "grad_norm": 4.518082019622227e-10, "learning_rate": 7.427956425452062e-06, "loss": 0.0, "step": 37300 }, { "epoch": 0.2574044305849724, "grad_norm": 1.021700143814087, "learning_rate": 7.4272665181066185e-06, "loss": 0.0002, "step": 37310 }, { "epoch": 0.2574734213195168, "grad_norm": 0.0, "learning_rate": 7.426576610761176e-06, "loss": 0.001, "step": 37320 }, { "epoch": 0.25754241205406114, "grad_norm": 0.0, "learning_rate": 7.425886703415732e-06, "loss": 0.0, "step": 37330 }, { "epoch": 0.2576114027886055, "grad_norm": 0.0, "learning_rate": 7.425196796070289e-06, "loss": 0.1441, "step": 37340 }, { "epoch": 0.25768039352314986, "grad_norm": 9.680126822786406e-05, "learning_rate": 7.424506888724845e-06, "loss": 0.0, "step": 37350 }, { "epoch": 0.25774938425769417, "grad_norm": 0.0, "learning_rate": 7.423816981379402e-06, "loss": 0.0, "step": 37360 }, { "epoch": 0.25781837499223853, "grad_norm": 0.0, "learning_rate": 7.4231270740339574e-06, "loss": 0.0, "step": 37370 }, { "epoch": 0.2578873657267829, "grad_norm": 0.0, "learning_rate": 7.422437166688514e-06, "loss": 0.003, "step": 37380 }, { "epoch": 0.25795635646132725, "grad_norm": 2.1739184064006878e-10, "learning_rate": 7.42174725934307e-06, "loss": 0.0036, "step": 37390 }, { "epoch": 0.2580253471958716, "grad_norm": 0.0, "learning_rate": 7.421057351997627e-06, "loss": 0.0, "step": 37400 }, { "epoch": 0.2580943379304159, "grad_norm": 0.0003934430715162307, "learning_rate": 7.420367444652183e-06, "loss": 0.0104, "step": 37410 }, { "epoch": 0.2581633286649603, "grad_norm": 0.0, "learning_rate": 7.41967753730674e-06, "loss": 0.0, "step": 37420 }, { "epoch": 0.25823231939950464, "grad_norm": 0.0, "learning_rate": 7.419056620695841e-06, "loss": 0.1169, "step": 37430 }, { "epoch": 0.258301310134049, "grad_norm": 0.0, "learning_rate": 7.418366713350398e-06, "loss": 0.0178, "step": 37440 }, { "epoch": 0.25837030086859336, "grad_norm": 0.0, "learning_rate": 7.417676806004954e-06, "loss": 0.0009, "step": 37450 }, { "epoch": 0.2584392916031377, "grad_norm": 0.0, "learning_rate": 7.416986898659511e-06, "loss": 0.0, "step": 37460 }, { "epoch": 0.258508282337682, "grad_norm": 915.4453735351562, "learning_rate": 7.416296991314067e-06, "loss": 0.267, "step": 37470 }, { "epoch": 0.2585772730722264, "grad_norm": 1.6182926856345148e-06, "learning_rate": 7.415607083968623e-06, "loss": 0.0, "step": 37480 }, { "epoch": 0.25864626380677075, "grad_norm": 0.0, "learning_rate": 7.4149171766231795e-06, "loss": 0.0002, "step": 37490 }, { "epoch": 0.2587152545413151, "grad_norm": 0.0, "learning_rate": 7.414227269277736e-06, "loss": 0.0, "step": 37500 }, { "epoch": 0.25878424527585947, "grad_norm": 0.0, "learning_rate": 7.4135373619322925e-06, "loss": 0.0, "step": 37510 }, { "epoch": 0.2588532360104038, "grad_norm": 1062.3094482421875, "learning_rate": 7.41284745458685e-06, "loss": 0.7446, "step": 37520 }, { "epoch": 0.25892222674494814, "grad_norm": 0.0, "learning_rate": 7.412157547241406e-06, "loss": 0.0, "step": 37530 }, { "epoch": 0.2589912174794925, "grad_norm": 0.0003129865217488259, "learning_rate": 7.411467639895963e-06, "loss": 0.0093, "step": 37540 }, { "epoch": 0.25906020821403686, "grad_norm": 5.7864535847329535e-06, "learning_rate": 7.410777732550519e-06, "loss": 0.0001, "step": 37550 }, { "epoch": 0.2591291989485812, "grad_norm": 0.0, "learning_rate": 7.410087825205076e-06, "loss": 0.0, "step": 37560 }, { "epoch": 0.2591981896831256, "grad_norm": 9.169200954062262e-10, "learning_rate": 7.409397917859632e-06, "loss": 0.0, "step": 37570 }, { "epoch": 0.2592671804176699, "grad_norm": 4.127901411266066e-05, "learning_rate": 7.408708010514189e-06, "loss": 0.0, "step": 37580 }, { "epoch": 0.25933617115221425, "grad_norm": 1.1614266633987427, "learning_rate": 7.408018103168744e-06, "loss": 0.0001, "step": 37590 }, { "epoch": 0.2594051618867586, "grad_norm": 7.775157428113744e-05, "learning_rate": 7.407328195823301e-06, "loss": 0.0671, "step": 37600 }, { "epoch": 0.25947415262130297, "grad_norm": 0.0, "learning_rate": 7.406638288477857e-06, "loss": 0.0, "step": 37610 }, { "epoch": 0.25954314335584733, "grad_norm": 0.0, "learning_rate": 7.405948381132414e-06, "loss": 0.0, "step": 37620 }, { "epoch": 0.25961213409039163, "grad_norm": 0.0, "learning_rate": 7.405258473786971e-06, "loss": 0.0, "step": 37630 }, { "epoch": 0.259681124824936, "grad_norm": 0.003942232578992844, "learning_rate": 7.404568566441528e-06, "loss": 0.0, "step": 37640 }, { "epoch": 0.25975011555948035, "grad_norm": 0.0, "learning_rate": 7.403878659096084e-06, "loss": 0.0, "step": 37650 }, { "epoch": 0.2598191062940247, "grad_norm": 0.0, "learning_rate": 7.403188751750641e-06, "loss": 0.0002, "step": 37660 }, { "epoch": 0.2598880970285691, "grad_norm": 4.735766778729555e-10, "learning_rate": 7.402498844405197e-06, "loss": 0.0, "step": 37670 }, { "epoch": 0.25995708776311344, "grad_norm": 5.6813117765841525e-08, "learning_rate": 7.401808937059754e-06, "loss": 0.0, "step": 37680 }, { "epoch": 0.26002607849765774, "grad_norm": 0.0026334025897085667, "learning_rate": 7.40111902971431e-06, "loss": 0.0, "step": 37690 }, { "epoch": 0.2600950692322021, "grad_norm": 0.0, "learning_rate": 7.400429122368867e-06, "loss": 0.0, "step": 37700 }, { "epoch": 0.26016405996674646, "grad_norm": 0.0027832696214318275, "learning_rate": 7.399739215023422e-06, "loss": 0.0, "step": 37710 }, { "epoch": 0.2602330507012908, "grad_norm": 0.0, "learning_rate": 7.399049307677979e-06, "loss": 0.0001, "step": 37720 }, { "epoch": 0.2603020414358352, "grad_norm": 0.0, "learning_rate": 7.398359400332535e-06, "loss": 0.0, "step": 37730 }, { "epoch": 0.2603710321703795, "grad_norm": 0.0, "learning_rate": 7.397669492987093e-06, "loss": 0.0, "step": 37740 }, { "epoch": 0.26044002290492385, "grad_norm": 1.7929034584085457e-05, "learning_rate": 7.396979585641649e-06, "loss": 0.0, "step": 37750 }, { "epoch": 0.2605090136394682, "grad_norm": 4.2693945601079974e-10, "learning_rate": 7.3962896782962056e-06, "loss": 0.0, "step": 37760 }, { "epoch": 0.2605780043740126, "grad_norm": 0.0006262511597014964, "learning_rate": 7.395599770950762e-06, "loss": 0.0013, "step": 37770 }, { "epoch": 0.26064699510855693, "grad_norm": 0.0, "learning_rate": 7.3949098636053186e-06, "loss": 0.0, "step": 37780 }, { "epoch": 0.2607159858431013, "grad_norm": 0.0, "learning_rate": 7.394219956259875e-06, "loss": 0.0, "step": 37790 }, { "epoch": 0.2607849765776456, "grad_norm": 9.403579026567854e-10, "learning_rate": 7.3935300489144315e-06, "loss": 0.0003, "step": 37800 }, { "epoch": 0.26085396731218996, "grad_norm": 0.0, "learning_rate": 7.392840141568988e-06, "loss": 0.0, "step": 37810 }, { "epoch": 0.2609229580467343, "grad_norm": 0.0, "learning_rate": 7.392150234223544e-06, "loss": 0.0037, "step": 37820 }, { "epoch": 0.2609919487812787, "grad_norm": 0.0, "learning_rate": 7.3914603268781e-06, "loss": 0.0004, "step": 37830 }, { "epoch": 0.26106093951582304, "grad_norm": 0.049956776201725006, "learning_rate": 7.390770419532657e-06, "loss": 0.0, "step": 37840 }, { "epoch": 0.26112993025036735, "grad_norm": 2.764890538742293e-09, "learning_rate": 7.390080512187214e-06, "loss": 0.0001, "step": 37850 }, { "epoch": 0.2611989209849117, "grad_norm": 0.0, "learning_rate": 7.3893906048417705e-06, "loss": 0.0, "step": 37860 }, { "epoch": 0.26126791171945607, "grad_norm": 4.611935764842201e-06, "learning_rate": 7.388700697496327e-06, "loss": 0.0, "step": 37870 }, { "epoch": 0.26133690245400043, "grad_norm": 0.0, "learning_rate": 7.3880107901508835e-06, "loss": 0.0, "step": 37880 }, { "epoch": 0.2614058931885448, "grad_norm": 0.0, "learning_rate": 7.38732088280544e-06, "loss": 0.0, "step": 37890 }, { "epoch": 0.26147488392308915, "grad_norm": 0.0, "learning_rate": 7.3866309754599964e-06, "loss": 0.0009, "step": 37900 }, { "epoch": 0.26154387465763346, "grad_norm": 0.0, "learning_rate": 7.385941068114553e-06, "loss": 0.616, "step": 37910 }, { "epoch": 0.2616128653921778, "grad_norm": 0.0, "learning_rate": 7.385251160769109e-06, "loss": 0.0, "step": 37920 }, { "epoch": 0.2616818561267222, "grad_norm": 0.0, "learning_rate": 7.384561253423665e-06, "loss": 0.0, "step": 37930 }, { "epoch": 0.26175084686126654, "grad_norm": 0.0, "learning_rate": 7.3838713460782216e-06, "loss": 0.0, "step": 37940 }, { "epoch": 0.2618198375958109, "grad_norm": 1.8060086759419391e-09, "learning_rate": 7.383250429467324e-06, "loss": 0.9157, "step": 37950 }, { "epoch": 0.2618888283303552, "grad_norm": 0.0, "learning_rate": 7.3825605221218796e-06, "loss": 0.0, "step": 37960 }, { "epoch": 0.26195781906489957, "grad_norm": 0.00021310083684511483, "learning_rate": 7.381870614776436e-06, "loss": 0.0, "step": 37970 }, { "epoch": 0.2620268097994439, "grad_norm": 0.0029894940089434385, "learning_rate": 7.3811807074309925e-06, "loss": 0.0, "step": 37980 }, { "epoch": 0.2620958005339883, "grad_norm": 0.0, "learning_rate": 7.380490800085549e-06, "loss": 0.0358, "step": 37990 }, { "epoch": 0.26216479126853265, "grad_norm": 0.0005743624060414732, "learning_rate": 7.3798008927401055e-06, "loss": 0.0, "step": 38000 }, { "epoch": 0.262233782003077, "grad_norm": 1.6207940234380658e-07, "learning_rate": 7.379110985394662e-06, "loss": 0.0, "step": 38010 }, { "epoch": 0.2623027727376213, "grad_norm": 0.0, "learning_rate": 7.3784210780492185e-06, "loss": 0.0, "step": 38020 }, { "epoch": 0.2623717634721657, "grad_norm": 8.575650215148926, "learning_rate": 7.377731170703775e-06, "loss": 0.0015, "step": 38030 }, { "epoch": 0.26244075420671004, "grad_norm": 0.0, "learning_rate": 7.377041263358331e-06, "loss": 0.0, "step": 38040 }, { "epoch": 0.2625097449412544, "grad_norm": 0.0, "learning_rate": 7.376351356012889e-06, "loss": 0.0, "step": 38050 }, { "epoch": 0.26257873567579876, "grad_norm": 0.0, "learning_rate": 7.375661448667445e-06, "loss": 0.0, "step": 38060 }, { "epoch": 0.2626477264103431, "grad_norm": 0.0, "learning_rate": 7.374971541322001e-06, "loss": 0.0001, "step": 38070 }, { "epoch": 0.2627167171448874, "grad_norm": 8.552069630241022e-08, "learning_rate": 7.3742816339765574e-06, "loss": 0.0001, "step": 38080 }, { "epoch": 0.2627857078794318, "grad_norm": 0.0, "learning_rate": 7.373591726631114e-06, "loss": 0.0, "step": 38090 }, { "epoch": 0.26285469861397615, "grad_norm": 0.0, "learning_rate": 7.3729018192856704e-06, "loss": 0.0, "step": 38100 }, { "epoch": 0.2629236893485205, "grad_norm": 0.12948361039161682, "learning_rate": 7.372211911940227e-06, "loss": 0.0001, "step": 38110 }, { "epoch": 0.26299268008306487, "grad_norm": 0.01798807829618454, "learning_rate": 7.371522004594783e-06, "loss": 0.0, "step": 38120 }, { "epoch": 0.2630616708176092, "grad_norm": 0.0, "learning_rate": 7.37083209724934e-06, "loss": 0.0, "step": 38130 }, { "epoch": 0.26313066155215353, "grad_norm": 0.0, "learning_rate": 7.370142189903896e-06, "loss": 0.0, "step": 38140 }, { "epoch": 0.2631996522866979, "grad_norm": 0.0, "learning_rate": 7.369452282558453e-06, "loss": 0.0, "step": 38150 }, { "epoch": 0.26326864302124225, "grad_norm": 0.0, "learning_rate": 7.36876237521301e-06, "loss": 0.0001, "step": 38160 }, { "epoch": 0.2633376337557866, "grad_norm": 0.0, "learning_rate": 7.368072467867567e-06, "loss": 0.0, "step": 38170 }, { "epoch": 0.263406624490331, "grad_norm": 0.0, "learning_rate": 7.367382560522122e-06, "loss": 0.0002, "step": 38180 }, { "epoch": 0.2634756152248753, "grad_norm": 0.0, "learning_rate": 7.366692653176679e-06, "loss": 0.0, "step": 38190 }, { "epoch": 0.26354460595941964, "grad_norm": 0.0, "learning_rate": 7.366002745831235e-06, "loss": 0.0, "step": 38200 }, { "epoch": 0.263613596693964, "grad_norm": 0.0, "learning_rate": 7.365312838485792e-06, "loss": 0.0, "step": 38210 }, { "epoch": 0.26368258742850836, "grad_norm": 0.0, "learning_rate": 7.364622931140348e-06, "loss": 0.0, "step": 38220 }, { "epoch": 0.2637515781630527, "grad_norm": 0.0, "learning_rate": 7.363933023794905e-06, "loss": 0.0006, "step": 38230 }, { "epoch": 0.26382056889759703, "grad_norm": 0.0, "learning_rate": 7.363243116449461e-06, "loss": 0.0, "step": 38240 }, { "epoch": 0.2638895596321414, "grad_norm": 0.0, "learning_rate": 7.362553209104018e-06, "loss": 0.0, "step": 38250 }, { "epoch": 0.26395855036668575, "grad_norm": 0.0, "learning_rate": 7.361863301758574e-06, "loss": 0.0001, "step": 38260 }, { "epoch": 0.2640275411012301, "grad_norm": 0.0, "learning_rate": 7.361173394413132e-06, "loss": 0.0563, "step": 38270 }, { "epoch": 0.2640965318357745, "grad_norm": 0.0, "learning_rate": 7.360483487067688e-06, "loss": 0.0002, "step": 38280 }, { "epoch": 0.26416552257031883, "grad_norm": 0.0, "learning_rate": 7.359793579722245e-06, "loss": 0.0, "step": 38290 }, { "epoch": 0.26423451330486314, "grad_norm": 0.0, "learning_rate": 7.3591036723768e-06, "loss": 0.0, "step": 38300 }, { "epoch": 0.2643035040394075, "grad_norm": 0.0, "learning_rate": 7.358413765031357e-06, "loss": 0.0, "step": 38310 }, { "epoch": 0.26437249477395186, "grad_norm": 0.0, "learning_rate": 7.357723857685913e-06, "loss": 0.0, "step": 38320 }, { "epoch": 0.2644414855084962, "grad_norm": 0.0, "learning_rate": 7.35703395034047e-06, "loss": 0.0, "step": 38330 }, { "epoch": 0.2645104762430406, "grad_norm": 0.0, "learning_rate": 7.356344042995026e-06, "loss": 0.0, "step": 38340 }, { "epoch": 0.2645794669775849, "grad_norm": 0.0, "learning_rate": 7.355654135649583e-06, "loss": 0.0, "step": 38350 }, { "epoch": 0.26464845771212925, "grad_norm": 1.5695299282469932e-07, "learning_rate": 7.354964228304139e-06, "loss": 0.0, "step": 38360 }, { "epoch": 0.2647174484466736, "grad_norm": 0.0, "learning_rate": 7.354274320958696e-06, "loss": 0.0, "step": 38370 }, { "epoch": 0.26478643918121797, "grad_norm": 0.0, "learning_rate": 7.353584413613253e-06, "loss": 0.0239, "step": 38380 }, { "epoch": 0.26485542991576233, "grad_norm": 0.0, "learning_rate": 7.3528945062678095e-06, "loss": 0.0, "step": 38390 }, { "epoch": 0.2649244206503067, "grad_norm": 2.2398413420887664e-05, "learning_rate": 7.352204598922366e-06, "loss": 0.2436, "step": 38400 }, { "epoch": 0.264993411384851, "grad_norm": 0.0, "learning_rate": 7.351514691576922e-06, "loss": 0.0, "step": 38410 }, { "epoch": 0.26506240211939536, "grad_norm": 0.0, "learning_rate": 7.350824784231478e-06, "loss": 0.0004, "step": 38420 }, { "epoch": 0.2651313928539397, "grad_norm": 0.0, "learning_rate": 7.350134876886035e-06, "loss": 0.0, "step": 38430 }, { "epoch": 0.2652003835884841, "grad_norm": 0.0, "learning_rate": 7.349444969540591e-06, "loss": 0.0, "step": 38440 }, { "epoch": 0.26526937432302844, "grad_norm": 0.0, "learning_rate": 7.348755062195148e-06, "loss": 0.0, "step": 38450 }, { "epoch": 0.26533836505757274, "grad_norm": 0.001017861533910036, "learning_rate": 7.348065154849704e-06, "loss": 0.0, "step": 38460 }, { "epoch": 0.2654073557921171, "grad_norm": 0.6606370210647583, "learning_rate": 7.3473752475042606e-06, "loss": 0.0001, "step": 38470 }, { "epoch": 0.26547634652666147, "grad_norm": 0.0009400597773492336, "learning_rate": 7.346685340158817e-06, "loss": 0.0, "step": 38480 }, { "epoch": 0.2655453372612058, "grad_norm": 0.0, "learning_rate": 7.345995432813374e-06, "loss": 0.0, "step": 38490 }, { "epoch": 0.2656143279957502, "grad_norm": 1.123922954704426e-09, "learning_rate": 7.345305525467931e-06, "loss": 0.0, "step": 38500 }, { "epoch": 0.26568331873029455, "grad_norm": 0.0, "learning_rate": 7.344615618122487e-06, "loss": 0.0, "step": 38510 }, { "epoch": 0.26575230946483885, "grad_norm": 0.0, "learning_rate": 7.343925710777043e-06, "loss": 0.0, "step": 38520 }, { "epoch": 0.2658213001993832, "grad_norm": 0.07188908755779266, "learning_rate": 7.3432358034315995e-06, "loss": 0.0028, "step": 38530 }, { "epoch": 0.2658902909339276, "grad_norm": 0.0, "learning_rate": 7.342545896086156e-06, "loss": 0.0014, "step": 38540 }, { "epoch": 0.26595928166847194, "grad_norm": 0.0, "learning_rate": 7.3418559887407125e-06, "loss": 0.0, "step": 38550 }, { "epoch": 0.2660282724030163, "grad_norm": 0.0, "learning_rate": 7.341166081395269e-06, "loss": 0.0, "step": 38560 }, { "epoch": 0.2660972631375606, "grad_norm": 0.0, "learning_rate": 7.3404761740498255e-06, "loss": 0.0, "step": 38570 }, { "epoch": 0.26616625387210496, "grad_norm": 0.0, "learning_rate": 7.339786266704382e-06, "loss": 0.0, "step": 38580 }, { "epoch": 0.2662352446066493, "grad_norm": 0.0, "learning_rate": 7.3390963593589385e-06, "loss": 0.1409, "step": 38590 }, { "epoch": 0.2663042353411937, "grad_norm": 0.0, "learning_rate": 7.338406452013496e-06, "loss": 0.0, "step": 38600 }, { "epoch": 0.26637322607573805, "grad_norm": 1.7232873688044492e-06, "learning_rate": 7.337716544668052e-06, "loss": 0.1604, "step": 38610 }, { "epoch": 0.2664422168102824, "grad_norm": 1.96116634469945e-05, "learning_rate": 7.337026637322609e-06, "loss": 0.0, "step": 38620 }, { "epoch": 0.2665112075448267, "grad_norm": 0.0, "learning_rate": 7.336336729977164e-06, "loss": 0.0, "step": 38630 }, { "epoch": 0.2665801982793711, "grad_norm": 0.0, "learning_rate": 7.335646822631721e-06, "loss": 0.0053, "step": 38640 }, { "epoch": 0.26664918901391543, "grad_norm": 0.0, "learning_rate": 7.334956915286277e-06, "loss": 0.0, "step": 38650 }, { "epoch": 0.2667181797484598, "grad_norm": 7.976258542541359e-10, "learning_rate": 7.334267007940834e-06, "loss": 0.0, "step": 38660 }, { "epoch": 0.26678717048300415, "grad_norm": 0.0, "learning_rate": 7.33357710059539e-06, "loss": 0.0, "step": 38670 }, { "epoch": 0.26685616121754846, "grad_norm": 0.0, "learning_rate": 7.332887193249947e-06, "loss": 0.0, "step": 38680 }, { "epoch": 0.2669251519520928, "grad_norm": 3.788072522326047e-09, "learning_rate": 7.332197285904503e-06, "loss": 0.0009, "step": 38690 }, { "epoch": 0.2669941426866372, "grad_norm": 0.0, "learning_rate": 7.33150737855906e-06, "loss": 0.0, "step": 38700 }, { "epoch": 0.26706313342118154, "grad_norm": 0.0, "learning_rate": 7.330817471213617e-06, "loss": 0.0, "step": 38710 }, { "epoch": 0.2671321241557259, "grad_norm": 8.979275101239637e-10, "learning_rate": 7.330127563868174e-06, "loss": 0.0, "step": 38720 }, { "epoch": 0.26720111489027026, "grad_norm": 0.0, "learning_rate": 7.32943765652273e-06, "loss": 0.0001, "step": 38730 }, { "epoch": 0.26727010562481457, "grad_norm": 2.423665250717022e-07, "learning_rate": 7.328747749177287e-06, "loss": 0.0, "step": 38740 }, { "epoch": 0.26733909635935893, "grad_norm": 0.0, "learning_rate": 7.328057841831842e-06, "loss": 0.0, "step": 38750 }, { "epoch": 0.2674080870939033, "grad_norm": 0.02464974857866764, "learning_rate": 7.327367934486399e-06, "loss": 0.0, "step": 38760 }, { "epoch": 0.26747707782844765, "grad_norm": 0.0, "learning_rate": 7.326678027140955e-06, "loss": 0.0, "step": 38770 }, { "epoch": 0.267546068562992, "grad_norm": 0.0, "learning_rate": 7.325988119795512e-06, "loss": 0.0, "step": 38780 }, { "epoch": 0.2676150592975363, "grad_norm": 0.0, "learning_rate": 7.325298212450068e-06, "loss": 0.0, "step": 38790 }, { "epoch": 0.2676840500320807, "grad_norm": 0.0, "learning_rate": 7.324608305104625e-06, "loss": 0.0006, "step": 38800 }, { "epoch": 0.26775304076662504, "grad_norm": 0.0, "learning_rate": 7.323918397759181e-06, "loss": 0.0, "step": 38810 }, { "epoch": 0.2678220315011694, "grad_norm": 0.0, "learning_rate": 7.323228490413739e-06, "loss": 0.0, "step": 38820 }, { "epoch": 0.26789102223571376, "grad_norm": 0.001431636861525476, "learning_rate": 7.322538583068295e-06, "loss": 0.0, "step": 38830 }, { "epoch": 0.2679600129702581, "grad_norm": 0.0, "learning_rate": 7.3218486757228516e-06, "loss": 0.0026, "step": 38840 }, { "epoch": 0.2680290037048024, "grad_norm": 0.9360167384147644, "learning_rate": 7.321158768377408e-06, "loss": 0.0001, "step": 38850 }, { "epoch": 0.2680979944393468, "grad_norm": 0.0, "learning_rate": 7.320468861031964e-06, "loss": 0.0, "step": 38860 }, { "epoch": 0.26816698517389115, "grad_norm": 0.004220404662191868, "learning_rate": 7.31977895368652e-06, "loss": 0.0, "step": 38870 }, { "epoch": 0.2682359759084355, "grad_norm": 0.0, "learning_rate": 7.319089046341077e-06, "loss": 0.0, "step": 38880 }, { "epoch": 0.26830496664297987, "grad_norm": 0.0, "learning_rate": 7.318399138995633e-06, "loss": 0.0, "step": 38890 }, { "epoch": 0.2683739573775242, "grad_norm": 0.027038592845201492, "learning_rate": 7.31770923165019e-06, "loss": 0.0149, "step": 38900 }, { "epoch": 0.26844294811206854, "grad_norm": 1.9568692266602739e-07, "learning_rate": 7.317019324304746e-06, "loss": 0.0001, "step": 38910 }, { "epoch": 0.2685119388466129, "grad_norm": 0.12096309661865234, "learning_rate": 7.316329416959303e-06, "loss": 0.0045, "step": 38920 }, { "epoch": 0.26858092958115726, "grad_norm": 0.0, "learning_rate": 7.31563950961386e-06, "loss": 0.0, "step": 38930 }, { "epoch": 0.2686499203157016, "grad_norm": 6.261028318022e-08, "learning_rate": 7.3149496022684165e-06, "loss": 0.0005, "step": 38940 }, { "epoch": 0.268718911050246, "grad_norm": 2.6456669729668647e-05, "learning_rate": 7.314259694922973e-06, "loss": 0.0121, "step": 38950 }, { "epoch": 0.2687879017847903, "grad_norm": 2.862464931752129e-09, "learning_rate": 7.3135697875775294e-06, "loss": 0.0534, "step": 38960 }, { "epoch": 0.26885689251933464, "grad_norm": 0.0, "learning_rate": 7.312879880232085e-06, "loss": 0.0, "step": 38970 }, { "epoch": 0.268925883253879, "grad_norm": 0.0, "learning_rate": 7.312189972886642e-06, "loss": 0.0, "step": 38980 }, { "epoch": 0.26899487398842337, "grad_norm": 0.0, "learning_rate": 7.311500065541198e-06, "loss": 0.0, "step": 38990 }, { "epoch": 0.2690638647229677, "grad_norm": 0.0, "learning_rate": 7.3108101581957546e-06, "loss": 0.0, "step": 39000 }, { "epoch": 0.26913285545751203, "grad_norm": 0.0, "learning_rate": 7.310120250850311e-06, "loss": 0.0001, "step": 39010 }, { "epoch": 0.2692018461920564, "grad_norm": 6.099893198552309e-06, "learning_rate": 7.3094303435048675e-06, "loss": 0.0, "step": 39020 }, { "epoch": 0.26927083692660075, "grad_norm": 1.1155313117683363e-08, "learning_rate": 7.308740436159424e-06, "loss": 0.0, "step": 39030 }, { "epoch": 0.2693398276611451, "grad_norm": 0.0, "learning_rate": 7.308050528813981e-06, "loss": 0.0, "step": 39040 }, { "epoch": 0.2694088183956895, "grad_norm": 0.0, "learning_rate": 7.307360621468538e-06, "loss": 0.0, "step": 39050 }, { "epoch": 0.26947780913023384, "grad_norm": 0.0, "learning_rate": 7.306670714123094e-06, "loss": 0.0391, "step": 39060 }, { "epoch": 0.26954679986477814, "grad_norm": 1.2339822319518134e-07, "learning_rate": 7.305980806777651e-06, "loss": 0.2168, "step": 39070 }, { "epoch": 0.2696157905993225, "grad_norm": 0.0, "learning_rate": 7.305290899432207e-06, "loss": 0.0, "step": 39080 }, { "epoch": 0.26968478133386686, "grad_norm": 0.0, "learning_rate": 7.304600992086763e-06, "loss": 0.0, "step": 39090 }, { "epoch": 0.2697537720684112, "grad_norm": 251.88563537597656, "learning_rate": 7.3039110847413195e-06, "loss": 0.0616, "step": 39100 }, { "epoch": 0.2698227628029556, "grad_norm": 0.0, "learning_rate": 7.303221177395876e-06, "loss": 0.0, "step": 39110 }, { "epoch": 0.2698917535374999, "grad_norm": 0.0, "learning_rate": 7.3025312700504324e-06, "loss": 0.0, "step": 39120 }, { "epoch": 0.26996074427204425, "grad_norm": 0.0, "learning_rate": 7.301841362704989e-06, "loss": 0.0, "step": 39130 }, { "epoch": 0.2700297350065886, "grad_norm": 0.0, "learning_rate": 7.3011514553595454e-06, "loss": 0.0, "step": 39140 }, { "epoch": 0.270098725741133, "grad_norm": 0.0, "learning_rate": 7.300461548014103e-06, "loss": 0.0028, "step": 39150 }, { "epoch": 0.27016771647567733, "grad_norm": 0.0, "learning_rate": 7.299771640668659e-06, "loss": 0.0, "step": 39160 }, { "epoch": 0.2702367072102217, "grad_norm": 0.0022596721537411213, "learning_rate": 7.299081733323216e-06, "loss": 0.0, "step": 39170 }, { "epoch": 0.270305697944766, "grad_norm": 0.0035123704001307487, "learning_rate": 7.298391825977772e-06, "loss": 0.2731, "step": 39180 }, { "epoch": 0.27037468867931036, "grad_norm": 0.0, "learning_rate": 7.297701918632329e-06, "loss": 0.0, "step": 39190 }, { "epoch": 0.2704436794138547, "grad_norm": 0.0, "learning_rate": 7.297012011286884e-06, "loss": 0.0, "step": 39200 }, { "epoch": 0.2705126701483991, "grad_norm": 6.752162153134122e-05, "learning_rate": 7.296322103941441e-06, "loss": 0.0014, "step": 39210 }, { "epoch": 0.27058166088294344, "grad_norm": 0.0, "learning_rate": 7.295632196595997e-06, "loss": 0.0, "step": 39220 }, { "epoch": 0.27065065161748775, "grad_norm": 0.0, "learning_rate": 7.294942289250554e-06, "loss": 0.0, "step": 39230 }, { "epoch": 0.2707196423520321, "grad_norm": 0.0, "learning_rate": 7.29425238190511e-06, "loss": 0.0, "step": 39240 }, { "epoch": 0.27078863308657647, "grad_norm": 0.0, "learning_rate": 7.293562474559667e-06, "loss": 0.0, "step": 39250 }, { "epoch": 0.27085762382112083, "grad_norm": 0.0, "learning_rate": 7.292872567214224e-06, "loss": 1.2336, "step": 39260 }, { "epoch": 0.2709266145556652, "grad_norm": 0.0, "learning_rate": 7.292182659868781e-06, "loss": 0.0006, "step": 39270 }, { "epoch": 0.27099560529020955, "grad_norm": 0.0, "learning_rate": 7.291492752523337e-06, "loss": 0.0001, "step": 39280 }, { "epoch": 0.27106459602475386, "grad_norm": 0.0, "learning_rate": 7.290802845177894e-06, "loss": 0.0001, "step": 39290 }, { "epoch": 0.2711335867592982, "grad_norm": 0.0, "learning_rate": 7.29011293783245e-06, "loss": 0.0, "step": 39300 }, { "epoch": 0.2712025774938426, "grad_norm": 0.0, "learning_rate": 7.289423030487006e-06, "loss": 0.0, "step": 39310 }, { "epoch": 0.27127156822838694, "grad_norm": 0.0, "learning_rate": 7.288733123141562e-06, "loss": 0.0, "step": 39320 }, { "epoch": 0.2713405589629313, "grad_norm": 2.115774577760021e-06, "learning_rate": 7.288043215796119e-06, "loss": 0.0001, "step": 39330 }, { "epoch": 0.2714095496974756, "grad_norm": 0.0, "learning_rate": 7.287353308450675e-06, "loss": 0.187, "step": 39340 }, { "epoch": 0.27147854043201997, "grad_norm": 0.0, "learning_rate": 7.286663401105232e-06, "loss": 0.0011, "step": 39350 }, { "epoch": 0.2715475311665643, "grad_norm": 0.0, "learning_rate": 7.285973493759788e-06, "loss": 0.0, "step": 39360 }, { "epoch": 0.2716165219011087, "grad_norm": 0.0, "learning_rate": 7.2852835864143456e-06, "loss": 0.0, "step": 39370 }, { "epoch": 0.27168551263565305, "grad_norm": 4.052931323883513e-09, "learning_rate": 7.284593679068902e-06, "loss": 0.0, "step": 39380 }, { "epoch": 0.2717545033701974, "grad_norm": 0.0, "learning_rate": 7.2839037717234585e-06, "loss": 0.0045, "step": 39390 }, { "epoch": 0.2718234941047417, "grad_norm": 2.0672324296810984e-08, "learning_rate": 7.283213864378015e-06, "loss": 0.0, "step": 39400 }, { "epoch": 0.2718924848392861, "grad_norm": 0.0, "learning_rate": 7.2825239570325715e-06, "loss": 0.0038, "step": 39410 }, { "epoch": 0.27196147557383044, "grad_norm": 0.0, "learning_rate": 7.281834049687128e-06, "loss": 0.0105, "step": 39420 }, { "epoch": 0.2720304663083748, "grad_norm": 0.0, "learning_rate": 7.281144142341684e-06, "loss": 0.0, "step": 39430 }, { "epoch": 0.27209945704291916, "grad_norm": 0.0, "learning_rate": 7.28045423499624e-06, "loss": 0.0522, "step": 39440 }, { "epoch": 0.27216844777746346, "grad_norm": 0.04534636810421944, "learning_rate": 7.279764327650797e-06, "loss": 0.0, "step": 39450 }, { "epoch": 0.2722374385120078, "grad_norm": 3.3380654258508002e-06, "learning_rate": 7.279074420305353e-06, "loss": 0.0, "step": 39460 }, { "epoch": 0.2723064292465522, "grad_norm": 0.0, "learning_rate": 7.27838451295991e-06, "loss": 0.0, "step": 39470 }, { "epoch": 0.27237541998109654, "grad_norm": 1.5880146975177922e-06, "learning_rate": 7.277694605614467e-06, "loss": 0.0, "step": 39480 }, { "epoch": 0.2724444107156409, "grad_norm": 0.0, "learning_rate": 7.2770046982690234e-06, "loss": 0.4078, "step": 39490 }, { "epoch": 0.27251340145018527, "grad_norm": 0.0, "learning_rate": 7.27631479092358e-06, "loss": 0.0, "step": 39500 }, { "epoch": 0.27258239218472957, "grad_norm": 0.0, "learning_rate": 7.275624883578136e-06, "loss": 0.0, "step": 39510 }, { "epoch": 0.27265138291927393, "grad_norm": 0.0, "learning_rate": 7.274934976232693e-06, "loss": 0.0, "step": 39520 }, { "epoch": 0.2727203736538183, "grad_norm": 1.6298672278480808e-07, "learning_rate": 7.274245068887249e-06, "loss": 0.0002, "step": 39530 }, { "epoch": 0.27278936438836265, "grad_norm": 0.0, "learning_rate": 7.273555161541805e-06, "loss": 0.0001, "step": 39540 }, { "epoch": 0.272858355122907, "grad_norm": 0.0, "learning_rate": 7.2728652541963615e-06, "loss": 0.0, "step": 39550 }, { "epoch": 0.2729273458574513, "grad_norm": 0.0, "learning_rate": 7.272175346850918e-06, "loss": 0.0, "step": 39560 }, { "epoch": 0.2729963365919957, "grad_norm": 0.0, "learning_rate": 7.2714854395054745e-06, "loss": 0.0, "step": 39570 }, { "epoch": 0.27306532732654004, "grad_norm": 1.044731902766216e-06, "learning_rate": 7.270795532160031e-06, "loss": 0.0, "step": 39580 }, { "epoch": 0.2731343180610844, "grad_norm": 0.0, "learning_rate": 7.270105624814588e-06, "loss": 0.0, "step": 39590 }, { "epoch": 0.27320330879562876, "grad_norm": 0.0, "learning_rate": 7.269415717469145e-06, "loss": 0.0, "step": 39600 }, { "epoch": 0.2732722995301731, "grad_norm": 0.0, "learning_rate": 7.268725810123701e-06, "loss": 0.0, "step": 39610 }, { "epoch": 0.27334129026471743, "grad_norm": 0.04081987589597702, "learning_rate": 7.268035902778258e-06, "loss": 0.0543, "step": 39620 }, { "epoch": 0.2734102809992618, "grad_norm": 0.0, "learning_rate": 7.267345995432814e-06, "loss": 0.0, "step": 39630 }, { "epoch": 0.27347927173380615, "grad_norm": 2.2865371704101562, "learning_rate": 7.266656088087371e-06, "loss": 0.0002, "step": 39640 }, { "epoch": 0.2735482624683505, "grad_norm": 0.0, "learning_rate": 7.2659661807419264e-06, "loss": 0.0, "step": 39650 }, { "epoch": 0.2736172532028949, "grad_norm": 0.0, "learning_rate": 7.265276273396483e-06, "loss": 0.0, "step": 39660 }, { "epoch": 0.2736862439374392, "grad_norm": 0.0, "learning_rate": 7.264586366051039e-06, "loss": 0.0, "step": 39670 }, { "epoch": 0.27375523467198354, "grad_norm": 0.0, "learning_rate": 7.263896458705596e-06, "loss": 0.0, "step": 39680 }, { "epoch": 0.2738242254065279, "grad_norm": 0.0, "learning_rate": 7.263206551360152e-06, "loss": 0.0, "step": 39690 }, { "epoch": 0.27389321614107226, "grad_norm": 0.0, "learning_rate": 7.26251664401471e-06, "loss": 0.0, "step": 39700 }, { "epoch": 0.2739622068756166, "grad_norm": 3.687272510433104e-06, "learning_rate": 7.261826736669266e-06, "loss": 0.0, "step": 39710 }, { "epoch": 0.274031197610161, "grad_norm": 0.0, "learning_rate": 7.261136829323823e-06, "loss": 0.0, "step": 39720 }, { "epoch": 0.2741001883447053, "grad_norm": 0.0, "learning_rate": 7.260446921978379e-06, "loss": 0.0, "step": 39730 }, { "epoch": 0.27416917907924965, "grad_norm": 0.0, "learning_rate": 7.259757014632936e-06, "loss": 0.0, "step": 39740 }, { "epoch": 0.274238169813794, "grad_norm": 0.0, "learning_rate": 7.259067107287492e-06, "loss": 0.0, "step": 39750 }, { "epoch": 0.27430716054833837, "grad_norm": 0.0046210926957428455, "learning_rate": 7.258377199942049e-06, "loss": 0.0, "step": 39760 }, { "epoch": 0.27437615128288273, "grad_norm": 0.0, "learning_rate": 7.257687292596604e-06, "loss": 0.0, "step": 39770 }, { "epoch": 0.27444514201742704, "grad_norm": 0.0, "learning_rate": 7.256997385251161e-06, "loss": 0.0, "step": 39780 }, { "epoch": 0.2745141327519714, "grad_norm": 0.0, "learning_rate": 7.256307477905717e-06, "loss": 0.0001, "step": 39790 }, { "epoch": 0.27458312348651576, "grad_norm": 0.0, "learning_rate": 7.255617570560274e-06, "loss": 0.0, "step": 39800 }, { "epoch": 0.2746521142210601, "grad_norm": 0.0, "learning_rate": 7.254927663214831e-06, "loss": 0.0, "step": 39810 }, { "epoch": 0.2747211049556045, "grad_norm": 5.98169071963639e-06, "learning_rate": 7.254237755869388e-06, "loss": 0.0001, "step": 39820 }, { "epoch": 0.27479009569014884, "grad_norm": 0.0, "learning_rate": 7.253547848523944e-06, "loss": 0.0, "step": 39830 }, { "epoch": 0.27485908642469314, "grad_norm": 0.0, "learning_rate": 7.252857941178501e-06, "loss": 0.0, "step": 39840 }, { "epoch": 0.2749280771592375, "grad_norm": 0.0, "learning_rate": 7.252168033833057e-06, "loss": 0.0, "step": 39850 }, { "epoch": 0.27499706789378187, "grad_norm": 0.0, "learning_rate": 7.251478126487614e-06, "loss": 0.0, "step": 39860 }, { "epoch": 0.2750660586283262, "grad_norm": 0.0, "learning_rate": 7.25078821914217e-06, "loss": 0.0, "step": 39870 }, { "epoch": 0.2751350493628706, "grad_norm": 0.38289105892181396, "learning_rate": 7.250098311796726e-06, "loss": 0.0001, "step": 39880 }, { "epoch": 0.2752040400974149, "grad_norm": 0.0, "learning_rate": 7.249408404451282e-06, "loss": 0.0, "step": 39890 }, { "epoch": 0.27527303083195925, "grad_norm": 0.0, "learning_rate": 7.248718497105839e-06, "loss": 0.0, "step": 39900 }, { "epoch": 0.2753420215665036, "grad_norm": 0.031553372740745544, "learning_rate": 7.248028589760395e-06, "loss": 0.0, "step": 39910 }, { "epoch": 0.275411012301048, "grad_norm": 0.0, "learning_rate": 7.2473386824149525e-06, "loss": 0.0, "step": 39920 }, { "epoch": 0.27548000303559234, "grad_norm": 0.0, "learning_rate": 7.246648775069509e-06, "loss": 0.0, "step": 39930 }, { "epoch": 0.2755489937701367, "grad_norm": 0.0, "learning_rate": 7.2459588677240655e-06, "loss": 0.0, "step": 39940 }, { "epoch": 0.275617984504681, "grad_norm": 0.0, "learning_rate": 7.245268960378622e-06, "loss": 0.0, "step": 39950 }, { "epoch": 0.27568697523922536, "grad_norm": 0.0, "learning_rate": 7.2445790530331785e-06, "loss": 0.0001, "step": 39960 }, { "epoch": 0.2757559659737697, "grad_norm": 0.0, "learning_rate": 7.243889145687735e-06, "loss": 0.0, "step": 39970 }, { "epoch": 0.2758249567083141, "grad_norm": 2.154805770260282e-06, "learning_rate": 7.2431992383422915e-06, "loss": 0.0, "step": 39980 }, { "epoch": 0.27589394744285844, "grad_norm": 2.7835710625367938e-06, "learning_rate": 7.242509330996847e-06, "loss": 0.0022, "step": 39990 }, { "epoch": 0.27596293817740275, "grad_norm": 0.0, "learning_rate": 7.241819423651404e-06, "loss": 0.0361, "step": 40000 }, { "epoch": 0.2760319289119471, "grad_norm": 9.956645286024468e-09, "learning_rate": 7.24112951630596e-06, "loss": 0.0733, "step": 40010 }, { "epoch": 0.27610091964649147, "grad_norm": 0.0, "learning_rate": 7.240439608960517e-06, "loss": 0.0031, "step": 40020 }, { "epoch": 0.27616991038103583, "grad_norm": 0.0, "learning_rate": 7.239749701615074e-06, "loss": 0.0, "step": 40030 }, { "epoch": 0.2762389011155802, "grad_norm": 0.0, "learning_rate": 7.23905979426963e-06, "loss": 0.0, "step": 40040 }, { "epoch": 0.27630789185012455, "grad_norm": 0.0, "learning_rate": 7.238369886924187e-06, "loss": 0.0, "step": 40050 }, { "epoch": 0.27637688258466886, "grad_norm": 5.2426734065136316e-09, "learning_rate": 7.237679979578743e-06, "loss": 0.0, "step": 40060 }, { "epoch": 0.2764458733192132, "grad_norm": 0.0, "learning_rate": 7.2369900722333e-06, "loss": 0.0001, "step": 40070 }, { "epoch": 0.2765148640537576, "grad_norm": 0.0, "learning_rate": 7.236300164887856e-06, "loss": 0.0001, "step": 40080 }, { "epoch": 0.27658385478830194, "grad_norm": 0.0, "learning_rate": 7.235610257542413e-06, "loss": 0.0, "step": 40090 }, { "epoch": 0.2766528455228463, "grad_norm": 0.00014590761566068977, "learning_rate": 7.2349203501969685e-06, "loss": 0.0, "step": 40100 }, { "epoch": 0.2767218362573906, "grad_norm": 0.0, "learning_rate": 7.234230442851525e-06, "loss": 0.0, "step": 40110 }, { "epoch": 0.27679082699193497, "grad_norm": 0.0, "learning_rate": 7.2335405355060815e-06, "loss": 0.0, "step": 40120 }, { "epoch": 0.27685981772647933, "grad_norm": 0.0, "learning_rate": 7.232850628160638e-06, "loss": 0.0, "step": 40130 }, { "epoch": 0.2769288084610237, "grad_norm": 0.0, "learning_rate": 7.232160720815195e-06, "loss": 0.0001, "step": 40140 }, { "epoch": 0.27699779919556805, "grad_norm": 0.0, "learning_rate": 7.231470813469752e-06, "loss": 0.0, "step": 40150 }, { "epoch": 0.2770667899301124, "grad_norm": 4.691009358737119e-09, "learning_rate": 7.230780906124308e-06, "loss": 0.0, "step": 40160 }, { "epoch": 0.2771357806646567, "grad_norm": 0.0, "learning_rate": 7.230090998778865e-06, "loss": 0.0, "step": 40170 }, { "epoch": 0.2772047713992011, "grad_norm": 0.0, "learning_rate": 7.229401091433421e-06, "loss": 0.0, "step": 40180 }, { "epoch": 0.27727376213374544, "grad_norm": 0.0, "learning_rate": 7.228711184087978e-06, "loss": 0.0, "step": 40190 }, { "epoch": 0.2773427528682898, "grad_norm": 0.0, "learning_rate": 7.228021276742534e-06, "loss": 0.0088, "step": 40200 }, { "epoch": 0.27741174360283416, "grad_norm": 0.0, "learning_rate": 7.227331369397091e-06, "loss": 0.0009, "step": 40210 }, { "epoch": 0.27748073433737847, "grad_norm": 0.0, "learning_rate": 7.226641462051646e-06, "loss": 0.0002, "step": 40220 }, { "epoch": 0.2775497250719228, "grad_norm": 0.0, "learning_rate": 7.225951554706203e-06, "loss": 0.0, "step": 40230 }, { "epoch": 0.2776187158064672, "grad_norm": 0.0, "learning_rate": 7.225261647360759e-06, "loss": 0.0, "step": 40240 }, { "epoch": 0.27768770654101155, "grad_norm": 1.2461619007808622e-06, "learning_rate": 7.224571740015317e-06, "loss": 0.0038, "step": 40250 }, { "epoch": 0.2777566972755559, "grad_norm": 0.0, "learning_rate": 7.223881832669873e-06, "loss": 0.0, "step": 40260 }, { "epoch": 0.27782568801010027, "grad_norm": 0.0, "learning_rate": 7.22319192532443e-06, "loss": 0.0, "step": 40270 }, { "epoch": 0.2778946787446446, "grad_norm": 0.17264869809150696, "learning_rate": 7.222502017978986e-06, "loss": 0.0, "step": 40280 }, { "epoch": 0.27796366947918894, "grad_norm": 0.0, "learning_rate": 7.221812110633543e-06, "loss": 0.0, "step": 40290 }, { "epoch": 0.2780326602137333, "grad_norm": 0.0, "learning_rate": 7.221122203288099e-06, "loss": 0.0, "step": 40300 }, { "epoch": 0.27810165094827766, "grad_norm": 0.0, "learning_rate": 7.220432295942656e-06, "loss": 0.0001, "step": 40310 }, { "epoch": 0.278170641682822, "grad_norm": 0.0, "learning_rate": 7.219742388597212e-06, "loss": 0.0, "step": 40320 }, { "epoch": 0.2782396324173663, "grad_norm": 0.0, "learning_rate": 7.219052481251768e-06, "loss": 0.0, "step": 40330 }, { "epoch": 0.2783086231519107, "grad_norm": 694.5888671875, "learning_rate": 7.218362573906324e-06, "loss": 0.4496, "step": 40340 }, { "epoch": 0.27837761388645504, "grad_norm": 0.0, "learning_rate": 7.217672666560881e-06, "loss": 0.0023, "step": 40350 }, { "epoch": 0.2784466046209994, "grad_norm": 0.0, "learning_rate": 7.216982759215438e-06, "loss": 0.0, "step": 40360 }, { "epoch": 0.27851559535554377, "grad_norm": 0.0, "learning_rate": 7.216292851869995e-06, "loss": 0.0, "step": 40370 }, { "epoch": 0.2785845860900881, "grad_norm": 0.0, "learning_rate": 7.215602944524551e-06, "loss": 0.0, "step": 40380 }, { "epoch": 0.27865357682463243, "grad_norm": 1.381496758767753e-06, "learning_rate": 7.2149130371791076e-06, "loss": 0.0, "step": 40390 }, { "epoch": 0.2787225675591768, "grad_norm": 2.1915736851951806e-06, "learning_rate": 7.214223129833664e-06, "loss": 0.001, "step": 40400 }, { "epoch": 0.27879155829372115, "grad_norm": 23.778778076171875, "learning_rate": 7.2135332224882205e-06, "loss": 0.0053, "step": 40410 }, { "epoch": 0.2788605490282655, "grad_norm": 0.0, "learning_rate": 7.212912305877321e-06, "loss": 0.316, "step": 40420 }, { "epoch": 0.2789295397628099, "grad_norm": 0.0, "learning_rate": 7.212222398531878e-06, "loss": 0.0, "step": 40430 }, { "epoch": 0.2789985304973542, "grad_norm": 0.0, "learning_rate": 7.211532491186433e-06, "loss": 0.2004, "step": 40440 }, { "epoch": 0.27906752123189854, "grad_norm": 0.5479373335838318, "learning_rate": 7.2108425838409915e-06, "loss": 0.0008, "step": 40450 }, { "epoch": 0.2791365119664429, "grad_norm": 9.191258865115515e-10, "learning_rate": 7.210152676495548e-06, "loss": 0.0, "step": 40460 }, { "epoch": 0.27920550270098726, "grad_norm": 0.0, "learning_rate": 7.209462769150104e-06, "loss": 0.0, "step": 40470 }, { "epoch": 0.2792744934355316, "grad_norm": 0.0, "learning_rate": 7.20877286180466e-06, "loss": 0.0, "step": 40480 }, { "epoch": 0.279343484170076, "grad_norm": 0.5799373388290405, "learning_rate": 7.208082954459217e-06, "loss": 0.0002, "step": 40490 }, { "epoch": 0.2794124749046203, "grad_norm": 0.0, "learning_rate": 7.207393047113773e-06, "loss": 0.0, "step": 40500 }, { "epoch": 0.27948146563916465, "grad_norm": 0.0, "learning_rate": 7.20670313976833e-06, "loss": 0.008, "step": 40510 }, { "epoch": 0.279550456373709, "grad_norm": 0.0, "learning_rate": 7.206013232422886e-06, "loss": 0.0, "step": 40520 }, { "epoch": 0.27961944710825337, "grad_norm": 0.0, "learning_rate": 7.205323325077443e-06, "loss": 0.0, "step": 40530 }, { "epoch": 0.27968843784279773, "grad_norm": 0.0, "learning_rate": 7.204633417731999e-06, "loss": 0.003, "step": 40540 }, { "epoch": 0.27975742857734204, "grad_norm": 0.0, "learning_rate": 7.203943510386555e-06, "loss": 0.0, "step": 40550 }, { "epoch": 0.2798264193118864, "grad_norm": 0.0, "learning_rate": 7.203253603041113e-06, "loss": 0.0001, "step": 40560 }, { "epoch": 0.27989541004643076, "grad_norm": 0.0, "learning_rate": 7.202563695695669e-06, "loss": 0.0001, "step": 40570 }, { "epoch": 0.2799644007809751, "grad_norm": 0.0, "learning_rate": 7.201873788350225e-06, "loss": 0.0, "step": 40580 }, { "epoch": 0.2800333915155195, "grad_norm": 0.0, "learning_rate": 7.2011838810047816e-06, "loss": 0.0079, "step": 40590 }, { "epoch": 0.28010238225006384, "grad_norm": 0.0, "learning_rate": 7.200493973659338e-06, "loss": 0.0305, "step": 40600 }, { "epoch": 0.28017137298460815, "grad_norm": 0.0, "learning_rate": 7.1998040663138945e-06, "loss": 0.0, "step": 40610 }, { "epoch": 0.2802403637191525, "grad_norm": 0.004756584297865629, "learning_rate": 7.199114158968451e-06, "loss": 0.0407, "step": 40620 }, { "epoch": 0.28030935445369687, "grad_norm": 0.0, "learning_rate": 7.1984242516230075e-06, "loss": 0.0, "step": 40630 }, { "epoch": 0.28037834518824123, "grad_norm": 0.00023076798242982477, "learning_rate": 7.197734344277564e-06, "loss": 0.0, "step": 40640 }, { "epoch": 0.2804473359227856, "grad_norm": 0.0, "learning_rate": 7.1970444369321205e-06, "loss": 0.0, "step": 40650 }, { "epoch": 0.2805163266573299, "grad_norm": 0.0, "learning_rate": 7.196354529586677e-06, "loss": 0.0, "step": 40660 }, { "epoch": 0.28058531739187426, "grad_norm": 0.0, "learning_rate": 7.195664622241234e-06, "loss": 0.0, "step": 40670 }, { "epoch": 0.2806543081264186, "grad_norm": 0.0, "learning_rate": 7.194974714895791e-06, "loss": 0.0, "step": 40680 }, { "epoch": 0.280723298860963, "grad_norm": 0.0, "learning_rate": 7.1942848075503465e-06, "loss": 0.0, "step": 40690 }, { "epoch": 0.28079228959550734, "grad_norm": 0.0, "learning_rate": 7.193594900204903e-06, "loss": 0.5321, "step": 40700 }, { "epoch": 0.2808612803300517, "grad_norm": 0.0, "learning_rate": 7.1929049928594594e-06, "loss": 0.0012, "step": 40710 }, { "epoch": 0.280930271064596, "grad_norm": 0.0, "learning_rate": 7.192215085514016e-06, "loss": 0.0, "step": 40720 }, { "epoch": 0.28099926179914037, "grad_norm": 0.0, "learning_rate": 7.191525178168572e-06, "loss": 0.0, "step": 40730 }, { "epoch": 0.2810682525336847, "grad_norm": 0.0, "learning_rate": 7.190835270823129e-06, "loss": 0.0001, "step": 40740 }, { "epoch": 0.2811372432682291, "grad_norm": 0.0, "learning_rate": 7.190145363477685e-06, "loss": 0.0007, "step": 40750 }, { "epoch": 0.28120623400277345, "grad_norm": 8.962988129468386e-10, "learning_rate": 7.189455456132242e-06, "loss": 0.0237, "step": 40760 }, { "epoch": 0.28127522473731775, "grad_norm": 0.00010635334183461964, "learning_rate": 7.188765548786798e-06, "loss": 0.0001, "step": 40770 }, { "epoch": 0.2813442154718621, "grad_norm": 0.0, "learning_rate": 7.188075641441356e-06, "loss": 0.0001, "step": 40780 }, { "epoch": 0.2814132062064065, "grad_norm": 0.0, "learning_rate": 7.187385734095912e-06, "loss": 0.001, "step": 40790 }, { "epoch": 0.28148219694095084, "grad_norm": 0.0, "learning_rate": 7.186695826750469e-06, "loss": 0.0, "step": 40800 }, { "epoch": 0.2815511876754952, "grad_norm": 1.777445080008988e-09, "learning_rate": 7.186005919405024e-06, "loss": 0.0001, "step": 40810 }, { "epoch": 0.28162017841003956, "grad_norm": 0.0, "learning_rate": 7.185316012059581e-06, "loss": 0.0007, "step": 40820 }, { "epoch": 0.28168916914458386, "grad_norm": 0.0, "learning_rate": 7.184626104714137e-06, "loss": 0.0, "step": 40830 }, { "epoch": 0.2817581598791282, "grad_norm": 0.0, "learning_rate": 7.183936197368694e-06, "loss": 0.0, "step": 40840 }, { "epoch": 0.2818271506136726, "grad_norm": 1.822227924108688e-09, "learning_rate": 7.18324629002325e-06, "loss": 0.0, "step": 40850 }, { "epoch": 0.28189614134821694, "grad_norm": 0.0, "learning_rate": 7.182556382677807e-06, "loss": 0.0901, "step": 40860 }, { "epoch": 0.2819651320827613, "grad_norm": 0.0, "learning_rate": 7.181866475332363e-06, "loss": 0.0, "step": 40870 }, { "epoch": 0.2820341228173056, "grad_norm": 0.0, "learning_rate": 7.18117656798692e-06, "loss": 0.0, "step": 40880 }, { "epoch": 0.28210311355184997, "grad_norm": 0.0, "learning_rate": 7.180486660641477e-06, "loss": 0.0, "step": 40890 }, { "epoch": 0.28217210428639433, "grad_norm": 0.0, "learning_rate": 7.179796753296034e-06, "loss": 0.0001, "step": 40900 }, { "epoch": 0.2822410950209387, "grad_norm": 0.0, "learning_rate": 7.17910684595059e-06, "loss": 0.0, "step": 40910 }, { "epoch": 0.28231008575548305, "grad_norm": 0.0, "learning_rate": 7.178416938605146e-06, "loss": 0.0001, "step": 40920 }, { "epoch": 0.2823790764900274, "grad_norm": 0.0, "learning_rate": 7.177727031259702e-06, "loss": 0.0, "step": 40930 }, { "epoch": 0.2824480672245717, "grad_norm": 5.843711505804094e-07, "learning_rate": 7.177037123914259e-06, "loss": 0.0, "step": 40940 }, { "epoch": 0.2825170579591161, "grad_norm": 0.00039787410059943795, "learning_rate": 7.176347216568815e-06, "loss": 0.0281, "step": 40950 }, { "epoch": 0.28258604869366044, "grad_norm": 0.0, "learning_rate": 7.175657309223372e-06, "loss": 0.0, "step": 40960 }, { "epoch": 0.2826550394282048, "grad_norm": 0.0, "learning_rate": 7.174967401877928e-06, "loss": 0.0, "step": 40970 }, { "epoch": 0.28272403016274916, "grad_norm": 0.0, "learning_rate": 7.174277494532485e-06, "loss": 0.0, "step": 40980 }, { "epoch": 0.28279302089729347, "grad_norm": 0.0, "learning_rate": 7.173587587187041e-06, "loss": 0.0, "step": 40990 }, { "epoch": 0.28286201163183783, "grad_norm": 0.0, "learning_rate": 7.1728976798415985e-06, "loss": 0.0, "step": 41000 }, { "epoch": 0.2829310023663822, "grad_norm": 1.244142936229764e-06, "learning_rate": 7.172207772496155e-06, "loss": 0.0, "step": 41010 }, { "epoch": 0.28299999310092655, "grad_norm": 0.0, "learning_rate": 7.1715178651507115e-06, "loss": 0.1564, "step": 41020 }, { "epoch": 0.2830689838354709, "grad_norm": 0.0, "learning_rate": 7.170827957805267e-06, "loss": 0.0, "step": 41030 }, { "epoch": 0.28313797457001527, "grad_norm": 0.0, "learning_rate": 7.170138050459824e-06, "loss": 0.0, "step": 41040 }, { "epoch": 0.2832069653045596, "grad_norm": 0.0, "learning_rate": 7.16944814311438e-06, "loss": 0.0008, "step": 41050 }, { "epoch": 0.28327595603910394, "grad_norm": 0.0, "learning_rate": 7.168758235768937e-06, "loss": 0.239, "step": 41060 }, { "epoch": 0.2833449467736483, "grad_norm": 0.0, "learning_rate": 7.168068328423493e-06, "loss": 0.0, "step": 41070 }, { "epoch": 0.28341393750819266, "grad_norm": 0.0, "learning_rate": 7.16737842107805e-06, "loss": 0.2239, "step": 41080 }, { "epoch": 0.283482928242737, "grad_norm": 0.02193543314933777, "learning_rate": 7.166688513732606e-06, "loss": 0.0, "step": 41090 }, { "epoch": 0.2835519189772813, "grad_norm": 0.0, "learning_rate": 7.1659986063871626e-06, "loss": 0.0, "step": 41100 }, { "epoch": 0.2836209097118257, "grad_norm": 0.0, "learning_rate": 7.16530869904172e-06, "loss": 0.0, "step": 41110 }, { "epoch": 0.28368990044637005, "grad_norm": 0.0, "learning_rate": 7.164618791696276e-06, "loss": 0.025, "step": 41120 }, { "epoch": 0.2837588911809144, "grad_norm": 0.16067703068256378, "learning_rate": 7.163928884350833e-06, "loss": 0.1651, "step": 41130 }, { "epoch": 0.28382788191545877, "grad_norm": 0.0, "learning_rate": 7.163238977005389e-06, "loss": 0.019, "step": 41140 }, { "epoch": 0.28389687265000313, "grad_norm": 0.0, "learning_rate": 7.162549069659945e-06, "loss": 0.0, "step": 41150 }, { "epoch": 0.28396586338454743, "grad_norm": 0.0, "learning_rate": 7.1618591623145015e-06, "loss": 0.0, "step": 41160 }, { "epoch": 0.2840348541190918, "grad_norm": 1.7417595472579706e-06, "learning_rate": 7.161169254969058e-06, "loss": 0.0, "step": 41170 }, { "epoch": 0.28410384485363616, "grad_norm": 0.0, "learning_rate": 7.1604793476236145e-06, "loss": 0.0, "step": 41180 }, { "epoch": 0.2841728355881805, "grad_norm": 0.0, "learning_rate": 7.159789440278171e-06, "loss": 0.0, "step": 41190 }, { "epoch": 0.2842418263227249, "grad_norm": 0.0, "learning_rate": 7.1590995329327275e-06, "loss": 0.0, "step": 41200 }, { "epoch": 0.2843108170572692, "grad_norm": 1.790538476598158e-06, "learning_rate": 7.158409625587284e-06, "loss": 0.0, "step": 41210 }, { "epoch": 0.28437980779181354, "grad_norm": 0.0, "learning_rate": 7.157719718241841e-06, "loss": 0.0, "step": 41220 }, { "epoch": 0.2844487985263579, "grad_norm": 0.0, "learning_rate": 7.157029810896398e-06, "loss": 0.0, "step": 41230 }, { "epoch": 0.28451778926090227, "grad_norm": 5.097764699257823e-08, "learning_rate": 7.156339903550954e-06, "loss": 0.0, "step": 41240 }, { "epoch": 0.2845867799954466, "grad_norm": 0.0, "learning_rate": 7.155649996205511e-06, "loss": 0.0, "step": 41250 }, { "epoch": 0.284655770729991, "grad_norm": 0.0, "learning_rate": 7.154960088860066e-06, "loss": 0.0, "step": 41260 }, { "epoch": 0.2847247614645353, "grad_norm": 0.0, "learning_rate": 7.154270181514623e-06, "loss": 0.0, "step": 41270 }, { "epoch": 0.28479375219907965, "grad_norm": 0.002611007774248719, "learning_rate": 7.153580274169179e-06, "loss": 0.0, "step": 41280 }, { "epoch": 0.284862742933624, "grad_norm": 0.01389256864786148, "learning_rate": 7.152890366823736e-06, "loss": 0.0, "step": 41290 }, { "epoch": 0.2849317336681684, "grad_norm": 0.0, "learning_rate": 7.152200459478292e-06, "loss": 0.0, "step": 41300 }, { "epoch": 0.28500072440271274, "grad_norm": 0.0, "learning_rate": 7.151510552132849e-06, "loss": 0.0, "step": 41310 }, { "epoch": 0.28506971513725704, "grad_norm": 0.6857854723930359, "learning_rate": 7.150820644787405e-06, "loss": 0.0001, "step": 41320 }, { "epoch": 0.2851387058718014, "grad_norm": 4.0679592530068476e-06, "learning_rate": 7.150130737441963e-06, "loss": 0.0, "step": 41330 }, { "epoch": 0.28520769660634576, "grad_norm": 0.0, "learning_rate": 7.149440830096519e-06, "loss": 0.0, "step": 41340 }, { "epoch": 0.2852766873408901, "grad_norm": 0.0, "learning_rate": 7.148750922751076e-06, "loss": 0.0, "step": 41350 }, { "epoch": 0.2853456780754345, "grad_norm": 0.0, "learning_rate": 7.148061015405632e-06, "loss": 0.0, "step": 41360 }, { "epoch": 0.28541466880997884, "grad_norm": 0.0, "learning_rate": 7.147371108060188e-06, "loss": 0.0012, "step": 41370 }, { "epoch": 0.28548365954452315, "grad_norm": 0.0, "learning_rate": 7.146681200714744e-06, "loss": 0.0, "step": 41380 }, { "epoch": 0.2855526502790675, "grad_norm": 0.0, "learning_rate": 7.145991293369301e-06, "loss": 0.0, "step": 41390 }, { "epoch": 0.28562164101361187, "grad_norm": 0.0, "learning_rate": 7.145301386023857e-06, "loss": 0.0, "step": 41400 }, { "epoch": 0.28569063174815623, "grad_norm": 0.0, "learning_rate": 7.144611478678414e-06, "loss": 0.0, "step": 41410 }, { "epoch": 0.2857596224827006, "grad_norm": 0.0, "learning_rate": 7.14392157133297e-06, "loss": 0.0, "step": 41420 }, { "epoch": 0.2858286132172449, "grad_norm": 8.529092432318919e-10, "learning_rate": 7.143231663987527e-06, "loss": 0.1011, "step": 41430 }, { "epoch": 0.28589760395178926, "grad_norm": 0.0, "learning_rate": 7.142541756642084e-06, "loss": 0.0001, "step": 41440 }, { "epoch": 0.2859665946863336, "grad_norm": 0.0, "learning_rate": 7.1418518492966406e-06, "loss": 0.0, "step": 41450 }, { "epoch": 0.286035585420878, "grad_norm": 0.0, "learning_rate": 7.141161941951197e-06, "loss": 0.0, "step": 41460 }, { "epoch": 0.28610457615542234, "grad_norm": 0.0, "learning_rate": 7.1404720346057536e-06, "loss": 0.0013, "step": 41470 }, { "epoch": 0.2861735668899667, "grad_norm": 0.0, "learning_rate": 7.139782127260309e-06, "loss": 0.0, "step": 41480 }, { "epoch": 0.286242557624511, "grad_norm": 4.25914326029897e-09, "learning_rate": 7.139092219914866e-06, "loss": 1.3399, "step": 41490 }, { "epoch": 0.28631154835905537, "grad_norm": 0.0, "learning_rate": 7.138402312569422e-06, "loss": 0.0, "step": 41500 }, { "epoch": 0.28638053909359973, "grad_norm": 0.0, "learning_rate": 7.137712405223979e-06, "loss": 0.0, "step": 41510 }, { "epoch": 0.2864495298281441, "grad_norm": 0.0, "learning_rate": 7.137022497878535e-06, "loss": 0.0, "step": 41520 }, { "epoch": 0.28651852056268845, "grad_norm": 0.0, "learning_rate": 7.136332590533092e-06, "loss": 0.0, "step": 41530 }, { "epoch": 0.28658751129723276, "grad_norm": 0.0, "learning_rate": 7.135642683187648e-06, "loss": 0.0, "step": 41540 }, { "epoch": 0.2866565020317771, "grad_norm": 6.631841529269877e-07, "learning_rate": 7.1349527758422055e-06, "loss": 0.0, "step": 41550 }, { "epoch": 0.2867254927663215, "grad_norm": 0.0, "learning_rate": 7.134262868496762e-06, "loss": 0.0, "step": 41560 }, { "epoch": 0.28679448350086584, "grad_norm": 0.0, "learning_rate": 7.1335729611513185e-06, "loss": 0.0011, "step": 41570 }, { "epoch": 0.2868634742354102, "grad_norm": 0.0, "learning_rate": 7.132883053805875e-06, "loss": 0.0, "step": 41580 }, { "epoch": 0.28693246496995456, "grad_norm": 0.0, "learning_rate": 7.1321931464604314e-06, "loss": 0.0, "step": 41590 }, { "epoch": 0.28700145570449886, "grad_norm": 0.07003507018089294, "learning_rate": 7.131503239114987e-06, "loss": 0.0, "step": 41600 }, { "epoch": 0.2870704464390432, "grad_norm": 0.0, "learning_rate": 7.1308133317695436e-06, "loss": 0.0, "step": 41610 }, { "epoch": 0.2871394371735876, "grad_norm": 2.5664125132607296e-05, "learning_rate": 7.1301234244241e-06, "loss": 0.0, "step": 41620 }, { "epoch": 0.28720842790813195, "grad_norm": 0.0, "learning_rate": 7.1294335170786566e-06, "loss": 0.0, "step": 41630 }, { "epoch": 0.2872774186426763, "grad_norm": 0.0, "learning_rate": 7.128743609733213e-06, "loss": 0.0, "step": 41640 }, { "epoch": 0.2873464093772206, "grad_norm": 0.0, "learning_rate": 7.1280537023877695e-06, "loss": 0.1031, "step": 41650 }, { "epoch": 0.287415400111765, "grad_norm": 0.0, "learning_rate": 7.127363795042327e-06, "loss": 0.0, "step": 41660 }, { "epoch": 0.28748439084630933, "grad_norm": 0.0, "learning_rate": 7.126673887696883e-06, "loss": 0.0, "step": 41670 }, { "epoch": 0.2875533815808537, "grad_norm": 0.0, "learning_rate": 7.12598398035144e-06, "loss": 0.0, "step": 41680 }, { "epoch": 0.28762237231539806, "grad_norm": 0.0, "learning_rate": 7.125294073005996e-06, "loss": 0.0, "step": 41690 }, { "epoch": 0.2876913630499424, "grad_norm": 0.0, "learning_rate": 7.124604165660553e-06, "loss": 0.0, "step": 41700 }, { "epoch": 0.2877603537844867, "grad_norm": 0.007582913618534803, "learning_rate": 7.1239142583151085e-06, "loss": 0.0, "step": 41710 }, { "epoch": 0.2878293445190311, "grad_norm": 0.0, "learning_rate": 7.123224350969665e-06, "loss": 0.0, "step": 41720 }, { "epoch": 0.28789833525357544, "grad_norm": 0.0, "learning_rate": 7.1225344436242215e-06, "loss": 0.0, "step": 41730 }, { "epoch": 0.2879673259881198, "grad_norm": 2.593980207166169e-05, "learning_rate": 7.121844536278778e-06, "loss": 0.0, "step": 41740 }, { "epoch": 0.28803631672266417, "grad_norm": 0.0, "learning_rate": 7.1211546289333344e-06, "loss": 0.0, "step": 41750 }, { "epoch": 0.28810530745720847, "grad_norm": 1.7740355730056763, "learning_rate": 7.120464721587891e-06, "loss": 0.0003, "step": 41760 }, { "epoch": 0.28817429819175283, "grad_norm": 0.0, "learning_rate": 7.119774814242448e-06, "loss": 0.0429, "step": 41770 }, { "epoch": 0.2882432889262972, "grad_norm": 0.788506805896759, "learning_rate": 7.119084906897005e-06, "loss": 0.0097, "step": 41780 }, { "epoch": 0.28831227966084155, "grad_norm": 0.0, "learning_rate": 7.118394999551561e-06, "loss": 0.0, "step": 41790 }, { "epoch": 0.2883812703953859, "grad_norm": 0.0, "learning_rate": 7.117705092206118e-06, "loss": 0.0, "step": 41800 }, { "epoch": 0.2884502611299303, "grad_norm": 0.0, "learning_rate": 7.117015184860674e-06, "loss": 0.0, "step": 41810 }, { "epoch": 0.2885192518644746, "grad_norm": 0.0, "learning_rate": 7.11632527751523e-06, "loss": 0.0597, "step": 41820 }, { "epoch": 0.28858824259901894, "grad_norm": 0.0, "learning_rate": 7.115635370169786e-06, "loss": 0.0, "step": 41830 }, { "epoch": 0.2886572333335633, "grad_norm": 0.0, "learning_rate": 7.114945462824343e-06, "loss": 0.0, "step": 41840 }, { "epoch": 0.28872622406810766, "grad_norm": 0.0, "learning_rate": 7.114255555478899e-06, "loss": 0.0001, "step": 41850 }, { "epoch": 0.288795214802652, "grad_norm": 0.00859209056943655, "learning_rate": 7.113565648133456e-06, "loss": 0.0, "step": 41860 }, { "epoch": 0.28886420553719633, "grad_norm": 0.0, "learning_rate": 7.112875740788012e-06, "loss": 0.6328, "step": 41870 }, { "epoch": 0.2889331962717407, "grad_norm": 0.0, "learning_rate": 7.11218583344257e-06, "loss": 0.0, "step": 41880 }, { "epoch": 0.28900218700628505, "grad_norm": 0.0, "learning_rate": 7.111495926097126e-06, "loss": 0.0, "step": 41890 }, { "epoch": 0.2890711777408294, "grad_norm": 3.907179291218199e-07, "learning_rate": 7.110806018751683e-06, "loss": 0.0, "step": 41900 }, { "epoch": 0.28914016847537377, "grad_norm": 0.0, "learning_rate": 7.110116111406239e-06, "loss": 0.1059, "step": 41910 }, { "epoch": 0.28920915920991813, "grad_norm": 0.0, "learning_rate": 7.109426204060796e-06, "loss": 0.0, "step": 41920 }, { "epoch": 0.28927814994446244, "grad_norm": 0.0, "learning_rate": 7.108736296715352e-06, "loss": 0.0, "step": 41930 }, { "epoch": 0.2893471406790068, "grad_norm": 0.0, "learning_rate": 7.108046389369908e-06, "loss": 0.0, "step": 41940 }, { "epoch": 0.28941613141355116, "grad_norm": 4.362367462817929e-07, "learning_rate": 7.107356482024464e-06, "loss": 0.0, "step": 41950 }, { "epoch": 0.2894851221480955, "grad_norm": 1.1224799436604371e-06, "learning_rate": 7.106666574679021e-06, "loss": 0.0006, "step": 41960 }, { "epoch": 0.2895541128826399, "grad_norm": 0.0, "learning_rate": 7.105976667333577e-06, "loss": 0.0, "step": 41970 }, { "epoch": 0.2896231036171842, "grad_norm": 0.0, "learning_rate": 7.105286759988134e-06, "loss": 0.0, "step": 41980 }, { "epoch": 0.28969209435172855, "grad_norm": 3.174858420607052e-07, "learning_rate": 7.104596852642691e-06, "loss": 0.0, "step": 41990 }, { "epoch": 0.2897610850862729, "grad_norm": 0.0, "learning_rate": 7.1039069452972475e-06, "loss": 0.0, "step": 42000 }, { "epoch": 0.28983007582081727, "grad_norm": 0.0, "learning_rate": 7.103217037951804e-06, "loss": 0.0, "step": 42010 }, { "epoch": 0.28989906655536163, "grad_norm": 0.0, "learning_rate": 7.1025271306063605e-06, "loss": 0.174, "step": 42020 }, { "epoch": 0.289968057289906, "grad_norm": 0.0, "learning_rate": 7.101837223260917e-06, "loss": 0.0, "step": 42030 }, { "epoch": 0.2900370480244503, "grad_norm": 0.0, "learning_rate": 7.1011473159154735e-06, "loss": 0.0, "step": 42040 }, { "epoch": 0.29010603875899466, "grad_norm": 0.0, "learning_rate": 7.100457408570029e-06, "loss": 0.0, "step": 42050 }, { "epoch": 0.290175029493539, "grad_norm": 1.8941272994510427e-09, "learning_rate": 7.099767501224586e-06, "loss": 0.0, "step": 42060 }, { "epoch": 0.2902440202280834, "grad_norm": 1.6150670489878394e-05, "learning_rate": 7.099077593879142e-06, "loss": 0.0, "step": 42070 }, { "epoch": 0.29031301096262774, "grad_norm": 2.188157850468997e-05, "learning_rate": 7.098387686533699e-06, "loss": 0.0, "step": 42080 }, { "epoch": 0.29038200169717204, "grad_norm": 0.0, "learning_rate": 7.097697779188255e-06, "loss": 0.0, "step": 42090 }, { "epoch": 0.2904509924317164, "grad_norm": 0.0, "learning_rate": 7.0970078718428124e-06, "loss": 0.0, "step": 42100 }, { "epoch": 0.29051998316626076, "grad_norm": 1.5726680430816486e-05, "learning_rate": 7.096317964497369e-06, "loss": 0.0, "step": 42110 }, { "epoch": 0.2905889739008051, "grad_norm": 0.002558254636824131, "learning_rate": 7.0956280571519254e-06, "loss": 0.0, "step": 42120 }, { "epoch": 0.2906579646353495, "grad_norm": 0.0, "learning_rate": 7.094938149806482e-06, "loss": 0.0001, "step": 42130 }, { "epoch": 0.29072695536989385, "grad_norm": 0.0, "learning_rate": 7.094248242461038e-06, "loss": 0.001, "step": 42140 }, { "epoch": 0.29079594610443815, "grad_norm": 0.14574797451496124, "learning_rate": 7.093558335115595e-06, "loss": 0.2092, "step": 42150 }, { "epoch": 0.2908649368389825, "grad_norm": 7.462674034286465e-07, "learning_rate": 7.0928684277701505e-06, "loss": 0.0, "step": 42160 }, { "epoch": 0.2909339275735269, "grad_norm": 0.0, "learning_rate": 7.092178520424707e-06, "loss": 0.0011, "step": 42170 }, { "epoch": 0.29100291830807123, "grad_norm": 0.0, "learning_rate": 7.0914886130792635e-06, "loss": 0.0, "step": 42180 }, { "epoch": 0.2910719090426156, "grad_norm": 0.0, "learning_rate": 7.09079870573382e-06, "loss": 0.0, "step": 42190 }, { "epoch": 0.2911408997771599, "grad_norm": 0.0, "learning_rate": 7.0901087983883765e-06, "loss": 0.0, "step": 42200 }, { "epoch": 0.29120989051170426, "grad_norm": 1.726275877445005e-05, "learning_rate": 7.089418891042934e-06, "loss": 0.3881, "step": 42210 }, { "epoch": 0.2912788812462486, "grad_norm": 0.0, "learning_rate": 7.08872898369749e-06, "loss": 0.0, "step": 42220 }, { "epoch": 0.291347871980793, "grad_norm": 0.0, "learning_rate": 7.088039076352047e-06, "loss": 0.0, "step": 42230 }, { "epoch": 0.29141686271533734, "grad_norm": 0.0, "learning_rate": 7.087349169006603e-06, "loss": 0.0, "step": 42240 }, { "epoch": 0.2914858534498817, "grad_norm": 7.967135502440215e-07, "learning_rate": 7.08665926166116e-06, "loss": 0.0, "step": 42250 }, { "epoch": 0.291554844184426, "grad_norm": 0.0, "learning_rate": 7.085969354315716e-06, "loss": 0.0002, "step": 42260 }, { "epoch": 0.29162383491897037, "grad_norm": 1.3892648098590143e-07, "learning_rate": 7.085279446970273e-06, "loss": 0.0, "step": 42270 }, { "epoch": 0.29169282565351473, "grad_norm": 0.0, "learning_rate": 7.0845895396248284e-06, "loss": 0.0, "step": 42280 }, { "epoch": 0.2917618163880591, "grad_norm": 0.0, "learning_rate": 7.083899632279385e-06, "loss": 0.0, "step": 42290 }, { "epoch": 0.29183080712260345, "grad_norm": 0.0, "learning_rate": 7.083209724933941e-06, "loss": 0.0001, "step": 42300 }, { "epoch": 0.29189979785714776, "grad_norm": 0.0, "learning_rate": 7.082519817588498e-06, "loss": 0.0, "step": 42310 }, { "epoch": 0.2919687885916921, "grad_norm": 0.0, "learning_rate": 7.081829910243055e-06, "loss": 0.0, "step": 42320 }, { "epoch": 0.2920377793262365, "grad_norm": 0.0, "learning_rate": 7.081140002897612e-06, "loss": 0.0, "step": 42330 }, { "epoch": 0.29210677006078084, "grad_norm": 0.0, "learning_rate": 7.080450095552168e-06, "loss": 0.6539, "step": 42340 }, { "epoch": 0.2921757607953252, "grad_norm": 0.0, "learning_rate": 7.079760188206725e-06, "loss": 0.0, "step": 42350 }, { "epoch": 0.29224475152986956, "grad_norm": 0.0, "learning_rate": 7.079070280861281e-06, "loss": 0.0, "step": 42360 }, { "epoch": 0.29231374226441387, "grad_norm": 0.0, "learning_rate": 7.078380373515838e-06, "loss": 0.0045, "step": 42370 }, { "epoch": 0.29238273299895823, "grad_norm": 0.0, "learning_rate": 7.077690466170394e-06, "loss": 0.0, "step": 42380 }, { "epoch": 0.2924517237335026, "grad_norm": 2.979498503918876e-06, "learning_rate": 7.07700055882495e-06, "loss": 0.0, "step": 42390 }, { "epoch": 0.29252071446804695, "grad_norm": 0.0, "learning_rate": 7.076310651479506e-06, "loss": 0.0, "step": 42400 }, { "epoch": 0.2925897052025913, "grad_norm": 0.0, "learning_rate": 7.075620744134063e-06, "loss": 0.0001, "step": 42410 }, { "epoch": 0.2926586959371356, "grad_norm": 0.0, "learning_rate": 7.074930836788619e-06, "loss": 0.0, "step": 42420 }, { "epoch": 0.29272768667168, "grad_norm": 0.0, "learning_rate": 7.074240929443177e-06, "loss": 0.0, "step": 42430 }, { "epoch": 0.29279667740622434, "grad_norm": 0.0, "learning_rate": 7.073551022097733e-06, "loss": 0.0, "step": 42440 }, { "epoch": 0.2928656681407687, "grad_norm": 0.00047385727521032095, "learning_rate": 7.07286111475229e-06, "loss": 0.878, "step": 42450 }, { "epoch": 0.29293465887531306, "grad_norm": 0.0, "learning_rate": 7.072171207406846e-06, "loss": 0.0013, "step": 42460 }, { "epoch": 0.2930036496098574, "grad_norm": 0.0, "learning_rate": 7.071481300061403e-06, "loss": 0.0001, "step": 42470 }, { "epoch": 0.2930726403444017, "grad_norm": 8.396614074707031, "learning_rate": 7.070791392715959e-06, "loss": 1.1338, "step": 42480 }, { "epoch": 0.2931416310789461, "grad_norm": 0.0, "learning_rate": 7.0701014853705156e-06, "loss": 0.0, "step": 42490 }, { "epoch": 0.29321062181349045, "grad_norm": 0.0, "learning_rate": 7.069411578025071e-06, "loss": 0.0, "step": 42500 }, { "epoch": 0.2932796125480348, "grad_norm": 0.0, "learning_rate": 7.068721670679628e-06, "loss": 0.2036, "step": 42510 }, { "epoch": 0.29334860328257917, "grad_norm": 0.0, "learning_rate": 7.068031763334184e-06, "loss": 0.0, "step": 42520 }, { "epoch": 0.2934175940171235, "grad_norm": 0.0, "learning_rate": 7.067341855988741e-06, "loss": 0.0693, "step": 42530 }, { "epoch": 0.29348658475166783, "grad_norm": 3.40055771630432e-06, "learning_rate": 7.066651948643298e-06, "loss": 0.0, "step": 42540 }, { "epoch": 0.2935555754862122, "grad_norm": 0.0, "learning_rate": 7.0659620412978545e-06, "loss": 0.0, "step": 42550 }, { "epoch": 0.29362456622075656, "grad_norm": 0.0, "learning_rate": 7.065272133952411e-06, "loss": 0.0, "step": 42560 }, { "epoch": 0.2936935569553009, "grad_norm": 4.543967702108631e-10, "learning_rate": 7.0645822266069675e-06, "loss": 0.0, "step": 42570 }, { "epoch": 0.2937625476898453, "grad_norm": 9.44025635440937e-10, "learning_rate": 7.063892319261524e-06, "loss": 0.0, "step": 42580 }, { "epoch": 0.2938315384243896, "grad_norm": 0.0013908512191846967, "learning_rate": 7.0632024119160805e-06, "loss": 0.191, "step": 42590 }, { "epoch": 0.29390052915893394, "grad_norm": 0.0, "learning_rate": 7.062512504570637e-06, "loss": 0.0, "step": 42600 }, { "epoch": 0.2939695198934783, "grad_norm": 0.003961845766752958, "learning_rate": 7.0618225972251935e-06, "loss": 0.0003, "step": 42610 }, { "epoch": 0.29403851062802266, "grad_norm": 0.0, "learning_rate": 7.061132689879749e-06, "loss": 0.0, "step": 42620 }, { "epoch": 0.294107501362567, "grad_norm": 0.0, "learning_rate": 7.060442782534306e-06, "loss": 0.0036, "step": 42630 }, { "epoch": 0.29417649209711133, "grad_norm": 0.0, "learning_rate": 7.059752875188862e-06, "loss": 0.0, "step": 42640 }, { "epoch": 0.2942454828316557, "grad_norm": 0.0, "learning_rate": 7.059062967843419e-06, "loss": 0.0019, "step": 42650 }, { "epoch": 0.29431447356620005, "grad_norm": 0.0, "learning_rate": 7.058373060497976e-06, "loss": 0.0, "step": 42660 }, { "epoch": 0.2943834643007444, "grad_norm": 9.014798365569732e-07, "learning_rate": 7.057683153152532e-06, "loss": 0.0, "step": 42670 }, { "epoch": 0.2944524550352888, "grad_norm": 1.2542999982833862, "learning_rate": 7.056993245807089e-06, "loss": 0.0005, "step": 42680 }, { "epoch": 0.29452144576983313, "grad_norm": 0.0, "learning_rate": 7.056303338461645e-06, "loss": 0.1514, "step": 42690 }, { "epoch": 0.29459043650437744, "grad_norm": 0.004014682490378618, "learning_rate": 7.055613431116202e-06, "loss": 0.0, "step": 42700 }, { "epoch": 0.2946594272389218, "grad_norm": 0.0, "learning_rate": 7.054923523770758e-06, "loss": 0.0, "step": 42710 }, { "epoch": 0.29472841797346616, "grad_norm": 0.0, "learning_rate": 7.054233616425315e-06, "loss": 0.0, "step": 42720 }, { "epoch": 0.2947974087080105, "grad_norm": 0.0, "learning_rate": 7.0535437090798705e-06, "loss": 0.0, "step": 42730 }, { "epoch": 0.2948663994425549, "grad_norm": 0.0, "learning_rate": 7.052853801734427e-06, "loss": 0.0, "step": 42740 }, { "epoch": 0.2949353901770992, "grad_norm": 7.616115181008354e-05, "learning_rate": 7.0521638943889835e-06, "loss": 0.0, "step": 42750 }, { "epoch": 0.29500438091164355, "grad_norm": 0.00018584623467177153, "learning_rate": 7.051473987043541e-06, "loss": 0.0, "step": 42760 }, { "epoch": 0.2950733716461879, "grad_norm": 2.0667194178258796e-08, "learning_rate": 7.050784079698097e-06, "loss": 0.0, "step": 42770 }, { "epoch": 0.29514236238073227, "grad_norm": 0.0, "learning_rate": 7.050094172352654e-06, "loss": 0.0, "step": 42780 }, { "epoch": 0.29521135311527663, "grad_norm": 0.0001325804478256032, "learning_rate": 7.04940426500721e-06, "loss": 0.0, "step": 42790 }, { "epoch": 0.295280343849821, "grad_norm": 0.0, "learning_rate": 7.048714357661767e-06, "loss": 0.0, "step": 42800 }, { "epoch": 0.2953493345843653, "grad_norm": 0.0, "learning_rate": 7.048024450316323e-06, "loss": 0.0, "step": 42810 }, { "epoch": 0.29541832531890966, "grad_norm": 0.00033801706740632653, "learning_rate": 7.04733454297088e-06, "loss": 0.6336, "step": 42820 }, { "epoch": 0.295487316053454, "grad_norm": 0.0, "learning_rate": 7.046644635625436e-06, "loss": 0.0, "step": 42830 }, { "epoch": 0.2955563067879984, "grad_norm": 0.0, "learning_rate": 7.045954728279992e-06, "loss": 0.0, "step": 42840 }, { "epoch": 0.29562529752254274, "grad_norm": 0.0, "learning_rate": 7.045264820934548e-06, "loss": 0.0, "step": 42850 }, { "epoch": 0.29569428825708705, "grad_norm": 0.0, "learning_rate": 7.044574913589105e-06, "loss": 0.0002, "step": 42860 }, { "epoch": 0.2957632789916314, "grad_norm": 0.00027899316046386957, "learning_rate": 7.043885006243662e-06, "loss": 0.0, "step": 42870 }, { "epoch": 0.29583226972617577, "grad_norm": 4.4702400114893237e-10, "learning_rate": 7.043195098898219e-06, "loss": 0.0, "step": 42880 }, { "epoch": 0.29590126046072013, "grad_norm": 0.0, "learning_rate": 7.042505191552775e-06, "loss": 0.0, "step": 42890 }, { "epoch": 0.2959702511952645, "grad_norm": 0.0, "learning_rate": 7.041815284207332e-06, "loss": 0.0, "step": 42900 }, { "epoch": 0.29603924192980885, "grad_norm": 0.0, "learning_rate": 7.041125376861888e-06, "loss": 0.0, "step": 42910 }, { "epoch": 0.29610823266435315, "grad_norm": 2.475000870916233e-09, "learning_rate": 7.040435469516445e-06, "loss": 0.0, "step": 42920 }, { "epoch": 0.2961772233988975, "grad_norm": 0.08551587164402008, "learning_rate": 7.039745562171001e-06, "loss": 0.0, "step": 42930 }, { "epoch": 0.2962462141334419, "grad_norm": 0.0, "learning_rate": 7.039055654825558e-06, "loss": 0.0001, "step": 42940 }, { "epoch": 0.29631520486798624, "grad_norm": 4.688519017470583e-10, "learning_rate": 7.038365747480113e-06, "loss": 0.0, "step": 42950 }, { "epoch": 0.2963841956025306, "grad_norm": 2.0624385797418654e-05, "learning_rate": 7.03767584013467e-06, "loss": 0.0, "step": 42960 }, { "epoch": 0.2964531863370749, "grad_norm": 0.0, "learning_rate": 7.036985932789226e-06, "loss": 0.0, "step": 42970 }, { "epoch": 0.29652217707161926, "grad_norm": 0.0, "learning_rate": 7.036296025443784e-06, "loss": 0.0, "step": 42980 }, { "epoch": 0.2965911678061636, "grad_norm": 0.0, "learning_rate": 7.03560611809834e-06, "loss": 0.0, "step": 42990 }, { "epoch": 0.296660158540708, "grad_norm": 1.1841663763334509e-05, "learning_rate": 7.034916210752897e-06, "loss": 0.0, "step": 43000 }, { "epoch": 0.29672914927525235, "grad_norm": 0.05389532074332237, "learning_rate": 7.034226303407453e-06, "loss": 0.5758, "step": 43010 }, { "epoch": 0.2967981400097967, "grad_norm": 0.0, "learning_rate": 7.0335363960620096e-06, "loss": 0.0003, "step": 43020 }, { "epoch": 0.296867130744341, "grad_norm": 0.0, "learning_rate": 7.032846488716566e-06, "loss": 0.0041, "step": 43030 }, { "epoch": 0.2969361214788854, "grad_norm": 1.9829495556678012e-07, "learning_rate": 7.0321565813711225e-06, "loss": 0.0, "step": 43040 }, { "epoch": 0.29700511221342973, "grad_norm": 0.0, "learning_rate": 7.031466674025679e-06, "loss": 0.0, "step": 43050 }, { "epoch": 0.2970741029479741, "grad_norm": 0.0, "learning_rate": 7.0307767666802355e-06, "loss": 0.0, "step": 43060 }, { "epoch": 0.29714309368251846, "grad_norm": 0.00023221809533424675, "learning_rate": 7.030086859334791e-06, "loss": 0.5473, "step": 43070 }, { "epoch": 0.2972120844170628, "grad_norm": 0.274690181016922, "learning_rate": 7.029396951989348e-06, "loss": 0.0001, "step": 43080 }, { "epoch": 0.2972810751516071, "grad_norm": 0.0, "learning_rate": 7.028707044643905e-06, "loss": 0.0, "step": 43090 }, { "epoch": 0.2973500658861515, "grad_norm": 0.0, "learning_rate": 7.0280171372984615e-06, "loss": 0.0, "step": 43100 }, { "epoch": 0.29741905662069584, "grad_norm": 0.0, "learning_rate": 7.027327229953018e-06, "loss": 0.0, "step": 43110 }, { "epoch": 0.2974880473552402, "grad_norm": 0.0, "learning_rate": 7.0266373226075745e-06, "loss": 0.0, "step": 43120 }, { "epoch": 0.29755703808978456, "grad_norm": 0.0, "learning_rate": 7.025947415262131e-06, "loss": 0.0, "step": 43130 }, { "epoch": 0.29762602882432887, "grad_norm": 0.0, "learning_rate": 7.0252575079166874e-06, "loss": 0.0, "step": 43140 }, { "epoch": 0.29769501955887323, "grad_norm": 0.0, "learning_rate": 7.024567600571244e-06, "loss": 0.0, "step": 43150 }, { "epoch": 0.2977640102934176, "grad_norm": 0.0, "learning_rate": 7.0238776932258004e-06, "loss": 0.0, "step": 43160 }, { "epoch": 0.29783300102796195, "grad_norm": 9.832288014877122e-06, "learning_rate": 7.023187785880357e-06, "loss": 0.0974, "step": 43170 }, { "epoch": 0.2979019917625063, "grad_norm": 0.0, "learning_rate": 7.0224978785349126e-06, "loss": 0.0, "step": 43180 }, { "epoch": 0.2979709824970507, "grad_norm": 1.982948916179339e-09, "learning_rate": 7.021807971189469e-06, "loss": 0.0094, "step": 43190 }, { "epoch": 0.298039973231595, "grad_norm": 0.0, "learning_rate": 7.021118063844027e-06, "loss": 0.0, "step": 43200 }, { "epoch": 0.29810896396613934, "grad_norm": 0.0, "learning_rate": 7.020428156498583e-06, "loss": 0.0, "step": 43210 }, { "epoch": 0.2981779547006837, "grad_norm": 0.0, "learning_rate": 7.019738249153139e-06, "loss": 0.0, "step": 43220 }, { "epoch": 0.29824694543522806, "grad_norm": 0.018246019259095192, "learning_rate": 7.019048341807696e-06, "loss": 0.0, "step": 43230 }, { "epoch": 0.2983159361697724, "grad_norm": 0.0, "learning_rate": 7.018358434462252e-06, "loss": 0.0, "step": 43240 }, { "epoch": 0.2983849269043167, "grad_norm": 0.0, "learning_rate": 7.017668527116809e-06, "loss": 0.0003, "step": 43250 }, { "epoch": 0.2984539176388611, "grad_norm": 0.0, "learning_rate": 7.016978619771365e-06, "loss": 0.0, "step": 43260 }, { "epoch": 0.29852290837340545, "grad_norm": 0.0, "learning_rate": 7.016288712425922e-06, "loss": 0.0, "step": 43270 }, { "epoch": 0.2985918991079498, "grad_norm": 0.02237069420516491, "learning_rate": 7.015598805080478e-06, "loss": 0.0, "step": 43280 }, { "epoch": 0.29866088984249417, "grad_norm": 0.0, "learning_rate": 7.014908897735034e-06, "loss": 0.0002, "step": 43290 }, { "epoch": 0.29872988057703853, "grad_norm": 0.0, "learning_rate": 7.0142189903895904e-06, "loss": 0.0, "step": 43300 }, { "epoch": 0.29879887131158284, "grad_norm": 0.0421195812523365, "learning_rate": 7.013529083044149e-06, "loss": 0.0, "step": 43310 }, { "epoch": 0.2988678620461272, "grad_norm": 8.994599509648538e-10, "learning_rate": 7.012839175698704e-06, "loss": 0.0, "step": 43320 }, { "epoch": 0.29893685278067156, "grad_norm": 0.0, "learning_rate": 7.012149268353261e-06, "loss": 0.0, "step": 43330 }, { "epoch": 0.2990058435152159, "grad_norm": 0.0, "learning_rate": 7.011459361007817e-06, "loss": 0.0, "step": 43340 }, { "epoch": 0.2990748342497603, "grad_norm": 0.0, "learning_rate": 7.010769453662374e-06, "loss": 0.0, "step": 43350 }, { "epoch": 0.2991438249843046, "grad_norm": 0.0, "learning_rate": 7.01007954631693e-06, "loss": 0.0, "step": 43360 }, { "epoch": 0.29921281571884895, "grad_norm": 0.001235152711160481, "learning_rate": 7.009389638971487e-06, "loss": 0.0, "step": 43370 }, { "epoch": 0.2992818064533933, "grad_norm": 4.251310969927147e-10, "learning_rate": 7.008699731626043e-06, "loss": 0.0, "step": 43380 }, { "epoch": 0.29935079718793767, "grad_norm": 0.0, "learning_rate": 7.0080098242806e-06, "loss": 0.0, "step": 43390 }, { "epoch": 0.29941978792248203, "grad_norm": 0.0, "learning_rate": 7.007319916935156e-06, "loss": 0.0, "step": 43400 }, { "epoch": 0.2994887786570264, "grad_norm": 0.5563706755638123, "learning_rate": 7.006630009589712e-06, "loss": 0.0002, "step": 43410 }, { "epoch": 0.2995577693915707, "grad_norm": 3.5235859741078457e-07, "learning_rate": 7.00594010224427e-06, "loss": 0.0001, "step": 43420 }, { "epoch": 0.29962676012611505, "grad_norm": 0.0, "learning_rate": 7.005250194898826e-06, "loss": 0.0001, "step": 43430 }, { "epoch": 0.2996957508606594, "grad_norm": 0.0, "learning_rate": 7.004560287553382e-06, "loss": 0.0, "step": 43440 }, { "epoch": 0.2997647415952038, "grad_norm": 0.0, "learning_rate": 7.003870380207939e-06, "loss": 0.0, "step": 43450 }, { "epoch": 0.29983373232974814, "grad_norm": 0.0, "learning_rate": 7.003180472862495e-06, "loss": 0.0, "step": 43460 }, { "epoch": 0.29990272306429244, "grad_norm": 0.0, "learning_rate": 7.002490565517052e-06, "loss": 0.0, "step": 43470 }, { "epoch": 0.2999717137988368, "grad_norm": 0.0, "learning_rate": 7.001800658171608e-06, "loss": 0.0, "step": 43480 }, { "epoch": 0.30004070453338116, "grad_norm": 0.0, "learning_rate": 7.001110750826165e-06, "loss": 0.0, "step": 43490 }, { "epoch": 0.3001096952679255, "grad_norm": 6.271287045223062e-09, "learning_rate": 7.000420843480721e-06, "loss": 0.0, "step": 43500 }, { "epoch": 0.3001786860024699, "grad_norm": 0.0, "learning_rate": 6.999730936135278e-06, "loss": 0.0098, "step": 43510 }, { "epoch": 0.30024767673701425, "grad_norm": 0.0, "learning_rate": 6.999041028789833e-06, "loss": 0.0, "step": 43520 }, { "epoch": 0.30031666747155855, "grad_norm": 0.0, "learning_rate": 6.998351121444391e-06, "loss": 0.0, "step": 43530 }, { "epoch": 0.3003856582061029, "grad_norm": 0.00022184659610502422, "learning_rate": 6.997661214098948e-06, "loss": 0.0, "step": 43540 }, { "epoch": 0.3004546489406473, "grad_norm": 4.742188308703987e-10, "learning_rate": 6.9969713067535035e-06, "loss": 0.0, "step": 43550 }, { "epoch": 0.30052363967519163, "grad_norm": 0.0, "learning_rate": 6.99628139940806e-06, "loss": 0.0, "step": 43560 }, { "epoch": 0.300592630409736, "grad_norm": 0.0, "learning_rate": 6.9955914920626165e-06, "loss": 0.0, "step": 43570 }, { "epoch": 0.3006616211442803, "grad_norm": 0.0, "learning_rate": 6.994901584717173e-06, "loss": 0.0001, "step": 43580 }, { "epoch": 0.30073061187882466, "grad_norm": 0.0, "learning_rate": 6.9942116773717295e-06, "loss": 0.0003, "step": 43590 }, { "epoch": 0.300799602613369, "grad_norm": 0.0, "learning_rate": 6.993521770026286e-06, "loss": 0.0001, "step": 43600 }, { "epoch": 0.3008685933479134, "grad_norm": 0.0, "learning_rate": 6.9928318626808425e-06, "loss": 0.0001, "step": 43610 }, { "epoch": 0.30093758408245774, "grad_norm": 69.81422424316406, "learning_rate": 6.992141955335399e-06, "loss": 0.0079, "step": 43620 }, { "epoch": 0.3010065748170021, "grad_norm": 0.0, "learning_rate": 6.991452047989955e-06, "loss": 0.0, "step": 43630 }, { "epoch": 0.3010755655515464, "grad_norm": 0.09207481145858765, "learning_rate": 6.990762140644513e-06, "loss": 0.0001, "step": 43640 }, { "epoch": 0.30114455628609077, "grad_norm": 8.705681102583185e-05, "learning_rate": 6.990072233299069e-06, "loss": 0.0, "step": 43650 }, { "epoch": 0.30121354702063513, "grad_norm": 0.0, "learning_rate": 6.989382325953625e-06, "loss": 0.0004, "step": 43660 }, { "epoch": 0.3012825377551795, "grad_norm": 6.277215902628086e-08, "learning_rate": 6.9886924186081814e-06, "loss": 0.0, "step": 43670 }, { "epoch": 0.30135152848972385, "grad_norm": 0.47472986578941345, "learning_rate": 6.988002511262738e-06, "loss": 0.2183, "step": 43680 }, { "epoch": 0.30142051922426816, "grad_norm": 0.0, "learning_rate": 6.987312603917294e-06, "loss": 0.0, "step": 43690 }, { "epoch": 0.3014895099588125, "grad_norm": 0.0, "learning_rate": 6.986622696571851e-06, "loss": 0.0, "step": 43700 }, { "epoch": 0.3015585006933569, "grad_norm": 0.255240797996521, "learning_rate": 6.985932789226407e-06, "loss": 0.0, "step": 43710 }, { "epoch": 0.30162749142790124, "grad_norm": 0.0, "learning_rate": 6.985242881880964e-06, "loss": 0.0, "step": 43720 }, { "epoch": 0.3016964821624456, "grad_norm": 0.0, "learning_rate": 6.98455297453552e-06, "loss": 0.0008, "step": 43730 }, { "epoch": 0.30176547289698996, "grad_norm": 3.417747575440444e-05, "learning_rate": 6.983863067190077e-06, "loss": 0.0001, "step": 43740 }, { "epoch": 0.30183446363153427, "grad_norm": 0.0, "learning_rate": 6.983173159844634e-06, "loss": 0.0013, "step": 43750 }, { "epoch": 0.3019034543660786, "grad_norm": 0.0, "learning_rate": 6.982483252499191e-06, "loss": 0.0, "step": 43760 }, { "epoch": 0.301972445100623, "grad_norm": 102.90641021728516, "learning_rate": 6.981793345153746e-06, "loss": 0.0115, "step": 43770 }, { "epoch": 0.30204143583516735, "grad_norm": 0.0, "learning_rate": 6.981103437808303e-06, "loss": 0.0, "step": 43780 }, { "epoch": 0.3021104265697117, "grad_norm": 0.0, "learning_rate": 6.980413530462859e-06, "loss": 0.0, "step": 43790 }, { "epoch": 0.302179417304256, "grad_norm": 0.0, "learning_rate": 6.979723623117416e-06, "loss": 0.0, "step": 43800 }, { "epoch": 0.3022484080388004, "grad_norm": 0.0, "learning_rate": 6.979033715771972e-06, "loss": 0.0, "step": 43810 }, { "epoch": 0.30231739877334474, "grad_norm": 0.0, "learning_rate": 6.978343808426529e-06, "loss": 0.0, "step": 43820 }, { "epoch": 0.3023863895078891, "grad_norm": 4.636920292178104e-10, "learning_rate": 6.977653901081085e-06, "loss": 0.0, "step": 43830 }, { "epoch": 0.30245538024243346, "grad_norm": 0.0, "learning_rate": 6.976963993735642e-06, "loss": 0.0, "step": 43840 }, { "epoch": 0.3025243709769778, "grad_norm": 0.0, "learning_rate": 6.976274086390198e-06, "loss": 0.0, "step": 43850 }, { "epoch": 0.3025933617115221, "grad_norm": 1.4008020343680982e-06, "learning_rate": 6.975584179044756e-06, "loss": 0.0007, "step": 43860 }, { "epoch": 0.3026623524460665, "grad_norm": 0.0, "learning_rate": 6.974894271699312e-06, "loss": 0.0, "step": 43870 }, { "epoch": 0.30273134318061085, "grad_norm": 0.0, "learning_rate": 6.974204364353869e-06, "loss": 0.0419, "step": 43880 }, { "epoch": 0.3028003339151552, "grad_norm": 2.9344546419451945e-05, "learning_rate": 6.973514457008424e-06, "loss": 0.0, "step": 43890 }, { "epoch": 0.30286932464969957, "grad_norm": 0.0, "learning_rate": 6.972824549662981e-06, "loss": 0.0, "step": 43900 }, { "epoch": 0.3029383153842439, "grad_norm": 0.0, "learning_rate": 6.972134642317537e-06, "loss": 0.0, "step": 43910 }, { "epoch": 0.30300730611878823, "grad_norm": 0.0, "learning_rate": 6.971444734972094e-06, "loss": 0.0, "step": 43920 }, { "epoch": 0.3030762968533326, "grad_norm": 0.0, "learning_rate": 6.97075482762665e-06, "loss": 0.0001, "step": 43930 }, { "epoch": 0.30314528758787695, "grad_norm": 0.0, "learning_rate": 6.970064920281207e-06, "loss": 0.0, "step": 43940 }, { "epoch": 0.3032142783224213, "grad_norm": 0.02213207818567753, "learning_rate": 6.969375012935763e-06, "loss": 0.4531, "step": 43950 }, { "epoch": 0.3032832690569657, "grad_norm": 0.0, "learning_rate": 6.96868510559032e-06, "loss": 0.0, "step": 43960 }, { "epoch": 0.30335225979151, "grad_norm": 0.0002772458828985691, "learning_rate": 6.967995198244877e-06, "loss": 0.0113, "step": 43970 }, { "epoch": 0.30342125052605434, "grad_norm": 0.0, "learning_rate": 6.9673052908994335e-06, "loss": 0.0011, "step": 43980 }, { "epoch": 0.3034902412605987, "grad_norm": 0.0, "learning_rate": 6.96661538355399e-06, "loss": 0.0, "step": 43990 }, { "epoch": 0.30355923199514306, "grad_norm": 0.0, "learning_rate": 6.965925476208546e-06, "loss": 0.0, "step": 44000 }, { "epoch": 0.3036282227296874, "grad_norm": 1.1430282592773438, "learning_rate": 6.965235568863102e-06, "loss": 0.0011, "step": 44010 }, { "epoch": 0.30369721346423173, "grad_norm": 0.0, "learning_rate": 6.964545661517659e-06, "loss": 0.0, "step": 44020 }, { "epoch": 0.3037662041987761, "grad_norm": 0.0, "learning_rate": 6.963855754172215e-06, "loss": 0.0, "step": 44030 }, { "epoch": 0.30383519493332045, "grad_norm": 0.0, "learning_rate": 6.9631658468267716e-06, "loss": 0.0001, "step": 44040 }, { "epoch": 0.3039041856678648, "grad_norm": 0.0, "learning_rate": 6.962475939481328e-06, "loss": 0.0, "step": 44050 }, { "epoch": 0.3039731764024092, "grad_norm": 0.00100987171754241, "learning_rate": 6.9617860321358846e-06, "loss": 0.0, "step": 44060 }, { "epoch": 0.30404216713695353, "grad_norm": 0.0, "learning_rate": 6.961096124790441e-06, "loss": 0.0, "step": 44070 }, { "epoch": 0.30411115787149784, "grad_norm": 0.0, "learning_rate": 6.960406217444998e-06, "loss": 0.0, "step": 44080 }, { "epoch": 0.3041801486060422, "grad_norm": 0.0, "learning_rate": 6.959716310099555e-06, "loss": 0.0, "step": 44090 }, { "epoch": 0.30424913934058656, "grad_norm": 0.245906800031662, "learning_rate": 6.959026402754111e-06, "loss": 0.0, "step": 44100 }, { "epoch": 0.3043181300751309, "grad_norm": 2.147875832747559e-08, "learning_rate": 6.958336495408667e-06, "loss": 0.0, "step": 44110 }, { "epoch": 0.3043871208096753, "grad_norm": 0.0, "learning_rate": 6.9576465880632235e-06, "loss": 0.0, "step": 44120 }, { "epoch": 0.3044561115442196, "grad_norm": 1.2877981703240948e-07, "learning_rate": 6.95695668071778e-06, "loss": 0.0003, "step": 44130 }, { "epoch": 0.30452510227876395, "grad_norm": 0.0, "learning_rate": 6.9562667733723365e-06, "loss": 0.001, "step": 44140 }, { "epoch": 0.3045940930133083, "grad_norm": 0.0, "learning_rate": 6.955576866026893e-06, "loss": 0.0, "step": 44150 }, { "epoch": 0.30466308374785267, "grad_norm": 0.0, "learning_rate": 6.9548869586814495e-06, "loss": 0.0, "step": 44160 }, { "epoch": 0.30473207448239703, "grad_norm": 0.0, "learning_rate": 6.954197051336006e-06, "loss": 0.0, "step": 44170 }, { "epoch": 0.3048010652169414, "grad_norm": 0.0006027265335433185, "learning_rate": 6.9535071439905624e-06, "loss": 0.0, "step": 44180 }, { "epoch": 0.3048700559514857, "grad_norm": 7.980028021847829e-05, "learning_rate": 6.95281723664512e-06, "loss": 0.1831, "step": 44190 }, { "epoch": 0.30493904668603006, "grad_norm": 0.0, "learning_rate": 6.952127329299676e-06, "loss": 0.1854, "step": 44200 }, { "epoch": 0.3050080374205744, "grad_norm": 0.0, "learning_rate": 6.951437421954233e-06, "loss": 0.0005, "step": 44210 }, { "epoch": 0.3050770281551188, "grad_norm": 0.0, "learning_rate": 6.950747514608789e-06, "loss": 0.0, "step": 44220 }, { "epoch": 0.30514601888966314, "grad_norm": 0.0, "learning_rate": 6.950057607263345e-06, "loss": 0.0, "step": 44230 }, { "epoch": 0.30521500962420745, "grad_norm": 4.230862032272853e-06, "learning_rate": 6.949367699917901e-06, "loss": 0.0, "step": 44240 }, { "epoch": 0.3052840003587518, "grad_norm": 4.910173379002458e-10, "learning_rate": 6.948677792572458e-06, "loss": 0.0046, "step": 44250 }, { "epoch": 0.30535299109329617, "grad_norm": 0.0001485193643020466, "learning_rate": 6.947987885227014e-06, "loss": 0.0, "step": 44260 }, { "epoch": 0.3054219818278405, "grad_norm": 0.0, "learning_rate": 6.947297977881571e-06, "loss": 0.0, "step": 44270 }, { "epoch": 0.3054909725623849, "grad_norm": 0.0, "learning_rate": 6.946677061270672e-06, "loss": 1.3853, "step": 44280 }, { "epoch": 0.30555996329692925, "grad_norm": 0.0, "learning_rate": 6.945987153925229e-06, "loss": 0.0, "step": 44290 }, { "epoch": 0.30562895403147355, "grad_norm": 0.0, "learning_rate": 6.945297246579785e-06, "loss": 0.0, "step": 44300 }, { "epoch": 0.3056979447660179, "grad_norm": 0.0, "learning_rate": 6.944607339234342e-06, "loss": 0.0001, "step": 44310 }, { "epoch": 0.3057669355005623, "grad_norm": 0.0, "learning_rate": 6.943917431888898e-06, "loss": 0.0, "step": 44320 }, { "epoch": 0.30583592623510664, "grad_norm": 0.0, "learning_rate": 6.943227524543454e-06, "loss": 0.0, "step": 44330 }, { "epoch": 0.305904916969651, "grad_norm": 0.00016434176359325647, "learning_rate": 6.9425376171980105e-06, "loss": 0.0, "step": 44340 }, { "epoch": 0.3059739077041953, "grad_norm": 0.0, "learning_rate": 6.941847709852567e-06, "loss": 0.0, "step": 44350 }, { "epoch": 0.30604289843873966, "grad_norm": 0.0, "learning_rate": 6.9411578025071234e-06, "loss": 0.0, "step": 44360 }, { "epoch": 0.306111889173284, "grad_norm": 0.0, "learning_rate": 6.94046789516168e-06, "loss": 0.0, "step": 44370 }, { "epoch": 0.3061808799078284, "grad_norm": 0.0, "learning_rate": 6.9397779878162364e-06, "loss": 0.0, "step": 44380 }, { "epoch": 0.30624987064237275, "grad_norm": 0.0, "learning_rate": 6.939088080470794e-06, "loss": 0.0259, "step": 44390 }, { "epoch": 0.3063188613769171, "grad_norm": 0.0, "learning_rate": 6.93839817312535e-06, "loss": 0.3742, "step": 44400 }, { "epoch": 0.3063878521114614, "grad_norm": 0.06201404333114624, "learning_rate": 6.937708265779907e-06, "loss": 0.0, "step": 44410 }, { "epoch": 0.3064568428460058, "grad_norm": 6.698346169287106e-06, "learning_rate": 6.937018358434463e-06, "loss": 0.005, "step": 44420 }, { "epoch": 0.30652583358055013, "grad_norm": 0.0, "learning_rate": 6.93632845108902e-06, "loss": 0.0, "step": 44430 }, { "epoch": 0.3065948243150945, "grad_norm": 0.0, "learning_rate": 6.935638543743576e-06, "loss": 0.0, "step": 44440 }, { "epoch": 0.30666381504963885, "grad_norm": 659.7449340820312, "learning_rate": 6.934948636398132e-06, "loss": 0.5816, "step": 44450 }, { "epoch": 0.30673280578418316, "grad_norm": 0.0, "learning_rate": 6.934258729052688e-06, "loss": 0.0, "step": 44460 }, { "epoch": 0.3068017965187275, "grad_norm": 0.0, "learning_rate": 6.933568821707245e-06, "loss": 0.0, "step": 44470 }, { "epoch": 0.3068707872532719, "grad_norm": 0.0, "learning_rate": 6.932878914361801e-06, "loss": 0.0001, "step": 44480 }, { "epoch": 0.30693977798781624, "grad_norm": 0.0, "learning_rate": 6.932189007016358e-06, "loss": 0.0, "step": 44490 }, { "epoch": 0.3070087687223606, "grad_norm": 1.300905267953567e-07, "learning_rate": 6.931499099670915e-06, "loss": 0.0, "step": 44500 }, { "epoch": 0.30707775945690496, "grad_norm": 0.0, "learning_rate": 6.930809192325472e-06, "loss": 0.0, "step": 44510 }, { "epoch": 0.30714675019144927, "grad_norm": 0.0, "learning_rate": 6.930119284980028e-06, "loss": 0.0, "step": 44520 }, { "epoch": 0.30721574092599363, "grad_norm": 0.0, "learning_rate": 6.929429377634585e-06, "loss": 0.0, "step": 44530 }, { "epoch": 0.307284731660538, "grad_norm": 0.0, "learning_rate": 6.928739470289141e-06, "loss": 0.0, "step": 44540 }, { "epoch": 0.30735372239508235, "grad_norm": 0.0, "learning_rate": 6.928049562943698e-06, "loss": 0.0, "step": 44550 }, { "epoch": 0.3074227131296267, "grad_norm": 0.0, "learning_rate": 6.927359655598253e-06, "loss": 0.0001, "step": 44560 }, { "epoch": 0.307491703864171, "grad_norm": 0.00045866615255363286, "learning_rate": 6.92666974825281e-06, "loss": 0.0, "step": 44570 }, { "epoch": 0.3075606945987154, "grad_norm": 0.0, "learning_rate": 6.925979840907366e-06, "loss": 0.0, "step": 44580 }, { "epoch": 0.30762968533325974, "grad_norm": 0.0, "learning_rate": 6.925289933561923e-06, "loss": 0.0, "step": 44590 }, { "epoch": 0.3076986760678041, "grad_norm": 0.0, "learning_rate": 6.924600026216479e-06, "loss": 0.0, "step": 44600 }, { "epoch": 0.30776766680234846, "grad_norm": 8.274710125988349e-05, "learning_rate": 6.9239101188710366e-06, "loss": 0.0, "step": 44610 }, { "epoch": 0.3078366575368928, "grad_norm": 2.4043238511239906e-08, "learning_rate": 6.923220211525593e-06, "loss": 0.0, "step": 44620 }, { "epoch": 0.3079056482714371, "grad_norm": 0.0, "learning_rate": 6.9225303041801495e-06, "loss": 0.0, "step": 44630 }, { "epoch": 0.3079746390059815, "grad_norm": 0.0, "learning_rate": 6.921840396834706e-06, "loss": 0.0, "step": 44640 }, { "epoch": 0.30804362974052585, "grad_norm": 0.0, "learning_rate": 6.9211504894892625e-06, "loss": 0.0008, "step": 44650 }, { "epoch": 0.3081126204750702, "grad_norm": 0.0, "learning_rate": 6.920460582143819e-06, "loss": 0.0, "step": 44660 }, { "epoch": 0.30818161120961457, "grad_norm": 1.9058694622486883e-09, "learning_rate": 6.919770674798375e-06, "loss": 0.0, "step": 44670 }, { "epoch": 0.3082506019441589, "grad_norm": 0.021299777552485466, "learning_rate": 6.919080767452931e-06, "loss": 0.0, "step": 44680 }, { "epoch": 0.30831959267870324, "grad_norm": 0.0, "learning_rate": 6.918390860107488e-06, "loss": 0.0204, "step": 44690 }, { "epoch": 0.3083885834132476, "grad_norm": 4.867079042014666e-07, "learning_rate": 6.917700952762044e-06, "loss": 0.5073, "step": 44700 }, { "epoch": 0.30845757414779196, "grad_norm": 0.0, "learning_rate": 6.917011045416601e-06, "loss": 0.0, "step": 44710 }, { "epoch": 0.3085265648823363, "grad_norm": 0.0, "learning_rate": 6.916321138071158e-06, "loss": 0.0, "step": 44720 }, { "epoch": 0.3085955556168807, "grad_norm": 0.0, "learning_rate": 6.9156312307257144e-06, "loss": 0.0034, "step": 44730 }, { "epoch": 0.308664546351425, "grad_norm": 0.0, "learning_rate": 6.914941323380271e-06, "loss": 0.0199, "step": 44740 }, { "epoch": 0.30873353708596935, "grad_norm": 0.041417695581912994, "learning_rate": 6.914251416034827e-06, "loss": 0.0849, "step": 44750 }, { "epoch": 0.3088025278205137, "grad_norm": 0.0, "learning_rate": 6.913561508689384e-06, "loss": 0.0, "step": 44760 }, { "epoch": 0.30887151855505807, "grad_norm": 5.211788811720908e-05, "learning_rate": 6.91287160134394e-06, "loss": 0.0, "step": 44770 }, { "epoch": 0.3089405092896024, "grad_norm": 7.899868488311768, "learning_rate": 6.912181693998497e-06, "loss": 0.0009, "step": 44780 }, { "epoch": 0.30900950002414673, "grad_norm": 1.8002893620305827e-09, "learning_rate": 6.9114917866530525e-06, "loss": 0.0, "step": 44790 }, { "epoch": 0.3090784907586911, "grad_norm": 0.0, "learning_rate": 6.910801879307609e-06, "loss": 0.0, "step": 44800 }, { "epoch": 0.30914748149323545, "grad_norm": 0.0, "learning_rate": 6.9101119719621655e-06, "loss": 0.0, "step": 44810 }, { "epoch": 0.3092164722277798, "grad_norm": 0.0, "learning_rate": 6.909422064616722e-06, "loss": 0.0, "step": 44820 }, { "epoch": 0.3092854629623242, "grad_norm": 0.0, "learning_rate": 6.908732157271279e-06, "loss": 0.0, "step": 44830 }, { "epoch": 0.30935445369686854, "grad_norm": 0.0, "learning_rate": 6.908042249925836e-06, "loss": 0.0, "step": 44840 }, { "epoch": 0.30942344443141284, "grad_norm": 8.181193500433892e-10, "learning_rate": 6.907352342580392e-06, "loss": 0.0, "step": 44850 }, { "epoch": 0.3094924351659572, "grad_norm": 1.98007299445635e-09, "learning_rate": 6.906662435234949e-06, "loss": 0.0994, "step": 44860 }, { "epoch": 0.30956142590050156, "grad_norm": 9.021978719658819e-10, "learning_rate": 6.905972527889505e-06, "loss": 0.0, "step": 44870 }, { "epoch": 0.3096304166350459, "grad_norm": 0.0, "learning_rate": 6.905282620544062e-06, "loss": 0.0, "step": 44880 }, { "epoch": 0.3096994073695903, "grad_norm": 15.982380867004395, "learning_rate": 6.904592713198618e-06, "loss": 0.0038, "step": 44890 }, { "epoch": 0.3097683981041346, "grad_norm": 0.0, "learning_rate": 6.903902805853174e-06, "loss": 0.0, "step": 44900 }, { "epoch": 0.30983738883867895, "grad_norm": 0.00024265515094157308, "learning_rate": 6.90321289850773e-06, "loss": 0.0, "step": 44910 }, { "epoch": 0.3099063795732233, "grad_norm": 0.0, "learning_rate": 6.902522991162287e-06, "loss": 0.0, "step": 44920 }, { "epoch": 0.3099753703077677, "grad_norm": 0.0, "learning_rate": 6.901833083816843e-06, "loss": 0.0, "step": 44930 }, { "epoch": 0.31004436104231203, "grad_norm": 0.0, "learning_rate": 6.901143176471401e-06, "loss": 0.0, "step": 44940 }, { "epoch": 0.3101133517768564, "grad_norm": 0.0, "learning_rate": 6.900453269125957e-06, "loss": 0.0, "step": 44950 }, { "epoch": 0.3101823425114007, "grad_norm": 0.0, "learning_rate": 6.899763361780514e-06, "loss": 0.0, "step": 44960 }, { "epoch": 0.31025133324594506, "grad_norm": 0.0, "learning_rate": 6.89907345443507e-06, "loss": 0.0, "step": 44970 }, { "epoch": 0.3103203239804894, "grad_norm": 0.0, "learning_rate": 6.898383547089627e-06, "loss": 0.0, "step": 44980 }, { "epoch": 0.3103893147150338, "grad_norm": 0.0, "learning_rate": 6.897693639744183e-06, "loss": 0.0001, "step": 44990 }, { "epoch": 0.31045830544957814, "grad_norm": 3.703691390910535e-06, "learning_rate": 6.89700373239874e-06, "loss": 0.1872, "step": 45000 }, { "epoch": 0.31052729618412245, "grad_norm": 0.0036246581003069878, "learning_rate": 6.896313825053295e-06, "loss": 0.0, "step": 45010 }, { "epoch": 0.3105962869186668, "grad_norm": 6.165812374092638e-05, "learning_rate": 6.895623917707852e-06, "loss": 0.0, "step": 45020 }, { "epoch": 0.31066527765321117, "grad_norm": 0.0, "learning_rate": 6.894934010362408e-06, "loss": 0.0004, "step": 45030 }, { "epoch": 0.31073426838775553, "grad_norm": 0.0, "learning_rate": 6.894244103016965e-06, "loss": 0.0, "step": 45040 }, { "epoch": 0.3108032591222999, "grad_norm": 5.5043828979250975e-06, "learning_rate": 6.893554195671522e-06, "loss": 0.0, "step": 45050 }, { "epoch": 0.31087224985684425, "grad_norm": 0.0, "learning_rate": 6.892864288326079e-06, "loss": 0.0, "step": 45060 }, { "epoch": 0.31094124059138856, "grad_norm": 0.0, "learning_rate": 6.892174380980635e-06, "loss": 0.0, "step": 45070 }, { "epoch": 0.3110102313259329, "grad_norm": 2.5875824576360174e-06, "learning_rate": 6.891484473635192e-06, "loss": 0.0003, "step": 45080 }, { "epoch": 0.3110792220604773, "grad_norm": 0.0, "learning_rate": 6.890794566289748e-06, "loss": 0.0, "step": 45090 }, { "epoch": 0.31114821279502164, "grad_norm": 0.0, "learning_rate": 6.890104658944305e-06, "loss": 0.0013, "step": 45100 }, { "epoch": 0.311217203529566, "grad_norm": 0.0, "learning_rate": 6.889414751598861e-06, "loss": 0.0, "step": 45110 }, { "epoch": 0.3112861942641103, "grad_norm": 0.0, "learning_rate": 6.8887248442534176e-06, "loss": 0.0, "step": 45120 }, { "epoch": 0.31135518499865467, "grad_norm": 0.0, "learning_rate": 6.888034936907973e-06, "loss": 0.0026, "step": 45130 }, { "epoch": 0.311424175733199, "grad_norm": 0.00012032729864586145, "learning_rate": 6.88734502956253e-06, "loss": 0.0004, "step": 45140 }, { "epoch": 0.3114931664677434, "grad_norm": 2.408187583569088e-06, "learning_rate": 6.886655122217086e-06, "loss": 0.0014, "step": 45150 }, { "epoch": 0.31156215720228775, "grad_norm": 5.01133046171276e-09, "learning_rate": 6.8859652148716435e-06, "loss": 0.0, "step": 45160 }, { "epoch": 0.3116311479368321, "grad_norm": 0.0, "learning_rate": 6.8852753075262e-06, "loss": 0.0, "step": 45170 }, { "epoch": 0.3117001386713764, "grad_norm": 0.0, "learning_rate": 6.8845854001807565e-06, "loss": 0.0078, "step": 45180 }, { "epoch": 0.3117691294059208, "grad_norm": 0.008029049262404442, "learning_rate": 6.883895492835313e-06, "loss": 0.0001, "step": 45190 }, { "epoch": 0.31183812014046514, "grad_norm": 4.350970357336337e-06, "learning_rate": 6.8832055854898695e-06, "loss": 0.0004, "step": 45200 }, { "epoch": 0.3119071108750095, "grad_norm": 0.0, "learning_rate": 6.882515678144426e-06, "loss": 0.2322, "step": 45210 }, { "epoch": 0.31197610160955386, "grad_norm": 8.942540596912352e-10, "learning_rate": 6.8818257707989825e-06, "loss": 0.0, "step": 45220 }, { "epoch": 0.31204509234409816, "grad_norm": 2.541708226999617e-06, "learning_rate": 6.881135863453539e-06, "loss": 0.0069, "step": 45230 }, { "epoch": 0.3121140830786425, "grad_norm": 4.609160900115967, "learning_rate": 6.880445956108095e-06, "loss": 0.0008, "step": 45240 }, { "epoch": 0.3121830738131869, "grad_norm": 0.022396419197320938, "learning_rate": 6.879756048762651e-06, "loss": 0.0, "step": 45250 }, { "epoch": 0.31225206454773125, "grad_norm": 0.0, "learning_rate": 6.879066141417208e-06, "loss": 0.0, "step": 45260 }, { "epoch": 0.3123210552822756, "grad_norm": 0.645513653755188, "learning_rate": 6.878376234071765e-06, "loss": 0.0001, "step": 45270 }, { "epoch": 0.31239004601681997, "grad_norm": 0.05424048751592636, "learning_rate": 6.877686326726321e-06, "loss": 0.0, "step": 45280 }, { "epoch": 0.31245903675136427, "grad_norm": 0.0, "learning_rate": 6.876996419380878e-06, "loss": 0.0023, "step": 45290 }, { "epoch": 0.31252802748590863, "grad_norm": 0.23057273030281067, "learning_rate": 6.876306512035434e-06, "loss": 0.0001, "step": 45300 }, { "epoch": 0.312597018220453, "grad_norm": 0.0, "learning_rate": 6.875616604689991e-06, "loss": 0.118, "step": 45310 }, { "epoch": 0.31266600895499735, "grad_norm": 0.0, "learning_rate": 6.874926697344547e-06, "loss": 0.0523, "step": 45320 }, { "epoch": 0.3127349996895417, "grad_norm": 0.0, "learning_rate": 6.874236789999104e-06, "loss": 0.0, "step": 45330 }, { "epoch": 0.312803990424086, "grad_norm": 0.0, "learning_rate": 6.87354688265366e-06, "loss": 0.0, "step": 45340 }, { "epoch": 0.3128729811586304, "grad_norm": 0.0, "learning_rate": 6.872856975308216e-06, "loss": 0.0, "step": 45350 }, { "epoch": 0.31294197189317474, "grad_norm": 0.0010468738619238138, "learning_rate": 6.8721670679627725e-06, "loss": 0.0113, "step": 45360 }, { "epoch": 0.3130109626277191, "grad_norm": 0.014644600450992584, "learning_rate": 6.871477160617329e-06, "loss": 0.0051, "step": 45370 }, { "epoch": 0.31307995336226346, "grad_norm": 0.0, "learning_rate": 6.870787253271886e-06, "loss": 0.0, "step": 45380 }, { "epoch": 0.3131489440968078, "grad_norm": 0.0, "learning_rate": 6.870097345926443e-06, "loss": 0.0, "step": 45390 }, { "epoch": 0.31321793483135213, "grad_norm": 0.0, "learning_rate": 6.869407438580999e-06, "loss": 0.0, "step": 45400 }, { "epoch": 0.3132869255658965, "grad_norm": 0.0, "learning_rate": 6.868717531235556e-06, "loss": 0.0, "step": 45410 }, { "epoch": 0.31335591630044085, "grad_norm": 0.0, "learning_rate": 6.868027623890112e-06, "loss": 0.0, "step": 45420 }, { "epoch": 0.3134249070349852, "grad_norm": 0.0, "learning_rate": 6.867337716544669e-06, "loss": 0.0, "step": 45430 }, { "epoch": 0.3134938977695296, "grad_norm": 272.8149719238281, "learning_rate": 6.866647809199225e-06, "loss": 0.1084, "step": 45440 }, { "epoch": 0.3135628885040739, "grad_norm": 0.0, "learning_rate": 6.865957901853782e-06, "loss": 0.0001, "step": 45450 }, { "epoch": 0.31363187923861824, "grad_norm": 1.0238304648524377e-09, "learning_rate": 6.865267994508337e-06, "loss": 0.0, "step": 45460 }, { "epoch": 0.3137008699731626, "grad_norm": 0.0, "learning_rate": 6.864578087162894e-06, "loss": 0.0, "step": 45470 }, { "epoch": 0.31376986070770696, "grad_norm": 0.0, "learning_rate": 6.86388817981745e-06, "loss": 0.0001, "step": 45480 }, { "epoch": 0.3138388514422513, "grad_norm": 0.0, "learning_rate": 6.863198272472008e-06, "loss": 0.0, "step": 45490 }, { "epoch": 0.3139078421767957, "grad_norm": 0.0, "learning_rate": 6.862508365126564e-06, "loss": 0.0, "step": 45500 }, { "epoch": 0.31397683291134, "grad_norm": 0.0, "learning_rate": 6.861818457781121e-06, "loss": 0.0, "step": 45510 }, { "epoch": 0.31404582364588435, "grad_norm": 0.0, "learning_rate": 6.861128550435677e-06, "loss": 0.0018, "step": 45520 }, { "epoch": 0.3141148143804287, "grad_norm": 0.0, "learning_rate": 6.860438643090234e-06, "loss": 0.0, "step": 45530 }, { "epoch": 0.31418380511497307, "grad_norm": 0.0, "learning_rate": 6.85974873574479e-06, "loss": 0.0, "step": 45540 }, { "epoch": 0.31425279584951743, "grad_norm": 0.0005821464583277702, "learning_rate": 6.859058828399347e-06, "loss": 0.0038, "step": 45550 }, { "epoch": 0.31432178658406174, "grad_norm": 0.8175028562545776, "learning_rate": 6.858368921053903e-06, "loss": 0.0002, "step": 45560 }, { "epoch": 0.3143907773186061, "grad_norm": 0.0, "learning_rate": 6.85767901370846e-06, "loss": 0.0003, "step": 45570 }, { "epoch": 0.31445976805315046, "grad_norm": 0.0, "learning_rate": 6.856989106363015e-06, "loss": 0.0, "step": 45580 }, { "epoch": 0.3145287587876948, "grad_norm": 0.0, "learning_rate": 6.856299199017572e-06, "loss": 0.0, "step": 45590 }, { "epoch": 0.3145977495222392, "grad_norm": 0.0, "learning_rate": 6.855609291672129e-06, "loss": 0.0, "step": 45600 }, { "epoch": 0.31466674025678354, "grad_norm": 0.0, "learning_rate": 6.854919384326686e-06, "loss": 0.0, "step": 45610 }, { "epoch": 0.31473573099132784, "grad_norm": 1.785837810963642e-09, "learning_rate": 6.854229476981242e-06, "loss": 0.0006, "step": 45620 }, { "epoch": 0.3148047217258722, "grad_norm": 0.0, "learning_rate": 6.8535395696357986e-06, "loss": 0.0, "step": 45630 }, { "epoch": 0.31487371246041657, "grad_norm": 0.0, "learning_rate": 6.852849662290355e-06, "loss": 0.0, "step": 45640 }, { "epoch": 0.3149427031949609, "grad_norm": 0.002674132352694869, "learning_rate": 6.8521597549449116e-06, "loss": 0.0003, "step": 45650 }, { "epoch": 0.3150116939295053, "grad_norm": 0.0, "learning_rate": 6.851469847599468e-06, "loss": 0.0001, "step": 45660 }, { "epoch": 0.3150806846640496, "grad_norm": 0.0, "learning_rate": 6.8507799402540245e-06, "loss": 0.0001, "step": 45670 }, { "epoch": 0.31514967539859395, "grad_norm": 0.0, "learning_rate": 6.850090032908581e-06, "loss": 0.0671, "step": 45680 }, { "epoch": 0.3152186661331383, "grad_norm": 0.00021236490283627063, "learning_rate": 6.849400125563137e-06, "loss": 0.0, "step": 45690 }, { "epoch": 0.3152876568676827, "grad_norm": 0.0, "learning_rate": 6.848710218217693e-06, "loss": 0.0, "step": 45700 }, { "epoch": 0.31535664760222704, "grad_norm": 0.0, "learning_rate": 6.848020310872251e-06, "loss": 0.0, "step": 45710 }, { "epoch": 0.3154256383367714, "grad_norm": 0.022920195013284683, "learning_rate": 6.847330403526807e-06, "loss": 0.0, "step": 45720 }, { "epoch": 0.3154946290713157, "grad_norm": 3.2675626277923584, "learning_rate": 6.8466404961813635e-06, "loss": 0.0362, "step": 45730 }, { "epoch": 0.31556361980586006, "grad_norm": 0.0, "learning_rate": 6.84595058883592e-06, "loss": 0.0003, "step": 45740 }, { "epoch": 0.3156326105404044, "grad_norm": 12.057875633239746, "learning_rate": 6.8452606814904765e-06, "loss": 0.004, "step": 45750 }, { "epoch": 0.3157016012749488, "grad_norm": 0.0, "learning_rate": 6.844570774145033e-06, "loss": 0.0, "step": 45760 }, { "epoch": 0.31577059200949315, "grad_norm": 2.0129700715187937e-06, "learning_rate": 6.8438808667995894e-06, "loss": 0.0, "step": 45770 }, { "epoch": 0.31583958274403745, "grad_norm": 0.0, "learning_rate": 6.843190959454146e-06, "loss": 0.013, "step": 45780 }, { "epoch": 0.3159085734785818, "grad_norm": 8.719306788407266e-05, "learning_rate": 6.842501052108702e-06, "loss": 0.0, "step": 45790 }, { "epoch": 0.31597756421312617, "grad_norm": 0.021643323823809624, "learning_rate": 6.841811144763258e-06, "loss": 0.0001, "step": 45800 }, { "epoch": 0.31604655494767053, "grad_norm": 6.259943620534614e-05, "learning_rate": 6.8411212374178145e-06, "loss": 0.6844, "step": 45810 }, { "epoch": 0.3161155456822149, "grad_norm": 7.235644261527341e-08, "learning_rate": 6.840431330072373e-06, "loss": 0.0005, "step": 45820 }, { "epoch": 0.31618453641675925, "grad_norm": 0.0, "learning_rate": 6.839741422726928e-06, "loss": 0.0, "step": 45830 }, { "epoch": 0.31625352715130356, "grad_norm": 8.794885375529304e-10, "learning_rate": 6.839051515381485e-06, "loss": 0.0, "step": 45840 }, { "epoch": 0.3163225178858479, "grad_norm": 0.0, "learning_rate": 6.838361608036041e-06, "loss": 0.0387, "step": 45850 }, { "epoch": 0.3163915086203923, "grad_norm": 0.0, "learning_rate": 6.837671700690598e-06, "loss": 0.0005, "step": 45860 }, { "epoch": 0.31646049935493664, "grad_norm": 0.0, "learning_rate": 6.836981793345154e-06, "loss": 0.0, "step": 45870 }, { "epoch": 0.316529490089481, "grad_norm": 0.0, "learning_rate": 6.836291885999711e-06, "loss": 0.0, "step": 45880 }, { "epoch": 0.3165984808240253, "grad_norm": 0.0, "learning_rate": 6.835601978654267e-06, "loss": 0.0, "step": 45890 }, { "epoch": 0.31666747155856967, "grad_norm": 0.0, "learning_rate": 6.834912071308824e-06, "loss": 0.0001, "step": 45900 }, { "epoch": 0.31673646229311403, "grad_norm": 9.855404270453505e-10, "learning_rate": 6.83422216396338e-06, "loss": 0.0, "step": 45910 }, { "epoch": 0.3168054530276584, "grad_norm": 0.0, "learning_rate": 6.833532256617936e-06, "loss": 0.0001, "step": 45920 }, { "epoch": 0.31687444376220275, "grad_norm": 0.0, "learning_rate": 6.832842349272494e-06, "loss": 0.0, "step": 45930 }, { "epoch": 0.3169434344967471, "grad_norm": 0.0, "learning_rate": 6.83215244192705e-06, "loss": 0.0001, "step": 45940 }, { "epoch": 0.3170124252312914, "grad_norm": 0.0, "learning_rate": 6.831462534581606e-06, "loss": 0.0, "step": 45950 }, { "epoch": 0.3170814159658358, "grad_norm": 0.0, "learning_rate": 6.830772627236163e-06, "loss": 0.0, "step": 45960 }, { "epoch": 0.31715040670038014, "grad_norm": 0.0, "learning_rate": 6.830082719890719e-06, "loss": 0.0, "step": 45970 }, { "epoch": 0.3172193974349245, "grad_norm": 0.0, "learning_rate": 6.829392812545276e-06, "loss": 0.0, "step": 45980 }, { "epoch": 0.31728838816946886, "grad_norm": 0.0, "learning_rate": 6.828702905199832e-06, "loss": 0.0048, "step": 45990 }, { "epoch": 0.31735737890401317, "grad_norm": 0.0, "learning_rate": 6.828012997854389e-06, "loss": 0.1422, "step": 46000 }, { "epoch": 0.3174263696385575, "grad_norm": 8.094303560746141e-10, "learning_rate": 6.827323090508945e-06, "loss": 0.0, "step": 46010 }, { "epoch": 0.3174953603731019, "grad_norm": 0.0, "learning_rate": 6.826633183163502e-06, "loss": 0.0, "step": 46020 }, { "epoch": 0.31756435110764625, "grad_norm": 0.0, "learning_rate": 6.825943275818057e-06, "loss": 0.0189, "step": 46030 }, { "epoch": 0.3176333418421906, "grad_norm": 4.439801273292687e-07, "learning_rate": 6.8252533684726155e-06, "loss": 0.0, "step": 46040 }, { "epoch": 0.31770233257673497, "grad_norm": 0.0668044239282608, "learning_rate": 6.824563461127172e-06, "loss": 0.0, "step": 46050 }, { "epoch": 0.3177713233112793, "grad_norm": 2.518414248697809e-06, "learning_rate": 6.823873553781728e-06, "loss": 0.0, "step": 46060 }, { "epoch": 0.31784031404582364, "grad_norm": 0.0, "learning_rate": 6.823183646436284e-06, "loss": 0.0172, "step": 46070 }, { "epoch": 0.317909304780368, "grad_norm": 0.0, "learning_rate": 6.822493739090841e-06, "loss": 0.0, "step": 46080 }, { "epoch": 0.31797829551491236, "grad_norm": 0.0, "learning_rate": 6.821803831745397e-06, "loss": 0.0017, "step": 46090 }, { "epoch": 0.3180472862494567, "grad_norm": 0.0, "learning_rate": 6.821113924399954e-06, "loss": 0.0, "step": 46100 }, { "epoch": 0.318116276984001, "grad_norm": 0.0, "learning_rate": 6.82042401705451e-06, "loss": 0.0, "step": 46110 }, { "epoch": 0.3181852677185454, "grad_norm": 5.813530515297316e-05, "learning_rate": 6.819734109709067e-06, "loss": 0.0, "step": 46120 }, { "epoch": 0.31825425845308974, "grad_norm": 0.0, "learning_rate": 6.819044202363623e-06, "loss": 0.0, "step": 46130 }, { "epoch": 0.3183232491876341, "grad_norm": 89.85318756103516, "learning_rate": 6.818354295018179e-06, "loss": 0.0151, "step": 46140 }, { "epoch": 0.31839223992217847, "grad_norm": 0.0, "learning_rate": 6.817664387672737e-06, "loss": 0.0, "step": 46150 }, { "epoch": 0.3184612306567228, "grad_norm": 8.862480194160582e-10, "learning_rate": 6.816974480327293e-06, "loss": 0.0024, "step": 46160 }, { "epoch": 0.31853022139126713, "grad_norm": 5.272399334899092e-07, "learning_rate": 6.816284572981849e-06, "loss": 0.0, "step": 46170 }, { "epoch": 0.3185992121258115, "grad_norm": 0.0, "learning_rate": 6.8155946656364055e-06, "loss": 0.0, "step": 46180 }, { "epoch": 0.31866820286035585, "grad_norm": 0.0, "learning_rate": 6.814904758290962e-06, "loss": 0.5758, "step": 46190 }, { "epoch": 0.3187371935949002, "grad_norm": 0.0, "learning_rate": 6.8142148509455185e-06, "loss": 0.0, "step": 46200 }, { "epoch": 0.3188061843294446, "grad_norm": 0.0, "learning_rate": 6.813524943600075e-06, "loss": 0.3789, "step": 46210 }, { "epoch": 0.3188751750639889, "grad_norm": 0.0, "learning_rate": 6.8128350362546315e-06, "loss": 0.0, "step": 46220 }, { "epoch": 0.31894416579853324, "grad_norm": 0.0, "learning_rate": 6.812145128909188e-06, "loss": 0.2004, "step": 46230 }, { "epoch": 0.3190131565330776, "grad_norm": 0.0, "learning_rate": 6.8114552215637445e-06, "loss": 0.0, "step": 46240 }, { "epoch": 0.31908214726762196, "grad_norm": 0.0, "learning_rate": 6.810765314218301e-06, "loss": 0.0, "step": 46250 }, { "epoch": 0.3191511380021663, "grad_norm": 1.7817486375193425e-09, "learning_rate": 6.810075406872858e-06, "loss": 0.0, "step": 46260 }, { "epoch": 0.3192201287367107, "grad_norm": 0.0, "learning_rate": 6.809385499527415e-06, "loss": 0.0, "step": 46270 }, { "epoch": 0.319289119471255, "grad_norm": 4.777670481459495e-10, "learning_rate": 6.8086955921819704e-06, "loss": 0.0019, "step": 46280 }, { "epoch": 0.31935811020579935, "grad_norm": 0.0, "learning_rate": 6.808005684836527e-06, "loss": 0.0, "step": 46290 }, { "epoch": 0.3194271009403437, "grad_norm": 0.0, "learning_rate": 6.8073157774910834e-06, "loss": 0.0001, "step": 46300 }, { "epoch": 0.31949609167488807, "grad_norm": 0.0, "learning_rate": 6.80662587014564e-06, "loss": 0.0, "step": 46310 }, { "epoch": 0.31956508240943243, "grad_norm": 2.0159306757250306e-07, "learning_rate": 6.805935962800196e-06, "loss": 0.0116, "step": 46320 }, { "epoch": 0.31963407314397674, "grad_norm": 0.0, "learning_rate": 6.805246055454753e-06, "loss": 0.0, "step": 46330 }, { "epoch": 0.3197030638785211, "grad_norm": 0.0, "learning_rate": 6.804556148109309e-06, "loss": 0.0033, "step": 46340 }, { "epoch": 0.31977205461306546, "grad_norm": 3.4986820220947266, "learning_rate": 6.803866240763866e-06, "loss": 0.0007, "step": 46350 }, { "epoch": 0.3198410453476098, "grad_norm": 0.0, "learning_rate": 6.803176333418422e-06, "loss": 0.0, "step": 46360 }, { "epoch": 0.3199100360821542, "grad_norm": 0.0, "learning_rate": 6.80248642607298e-06, "loss": 0.0001, "step": 46370 }, { "epoch": 0.31997902681669854, "grad_norm": 0.03837305307388306, "learning_rate": 6.801796518727536e-06, "loss": 0.0, "step": 46380 }, { "epoch": 0.32004801755124285, "grad_norm": 0.0, "learning_rate": 6.801106611382093e-06, "loss": 0.0003, "step": 46390 }, { "epoch": 0.3201170082857872, "grad_norm": 0.0, "learning_rate": 6.800416704036648e-06, "loss": 0.0, "step": 46400 }, { "epoch": 0.32018599902033157, "grad_norm": 0.0, "learning_rate": 6.799726796691205e-06, "loss": 0.0, "step": 46410 }, { "epoch": 0.32025498975487593, "grad_norm": 0.0, "learning_rate": 6.799036889345761e-06, "loss": 0.0, "step": 46420 }, { "epoch": 0.3203239804894203, "grad_norm": 0.0, "learning_rate": 6.798346982000318e-06, "loss": 0.0, "step": 46430 }, { "epoch": 0.3203929712239646, "grad_norm": 0.0028375277761369944, "learning_rate": 6.797657074654874e-06, "loss": 0.0, "step": 46440 }, { "epoch": 0.32046196195850896, "grad_norm": 0.0, "learning_rate": 6.796967167309431e-06, "loss": 0.0, "step": 46450 }, { "epoch": 0.3205309526930533, "grad_norm": 0.0, "learning_rate": 6.796277259963987e-06, "loss": 0.0, "step": 46460 }, { "epoch": 0.3205999434275977, "grad_norm": 0.0, "learning_rate": 6.795587352618544e-06, "loss": 0.0, "step": 46470 }, { "epoch": 0.32066893416214204, "grad_norm": 9.338423367921678e-10, "learning_rate": 6.794897445273101e-06, "loss": 0.0, "step": 46480 }, { "epoch": 0.3207379248966864, "grad_norm": 0.0, "learning_rate": 6.794207537927658e-06, "loss": 0.0, "step": 46490 }, { "epoch": 0.3208069156312307, "grad_norm": 0.0, "learning_rate": 6.793517630582214e-06, "loss": 0.0, "step": 46500 }, { "epoch": 0.32087590636577507, "grad_norm": 0.0, "learning_rate": 6.79282772323677e-06, "loss": 0.0, "step": 46510 }, { "epoch": 0.3209448971003194, "grad_norm": 0.0, "learning_rate": 6.792137815891326e-06, "loss": 0.0, "step": 46520 }, { "epoch": 0.3210138878348638, "grad_norm": 7.458052095898893e-06, "learning_rate": 6.791447908545883e-06, "loss": 0.0, "step": 46530 }, { "epoch": 0.32108287856940815, "grad_norm": 0.0, "learning_rate": 6.790758001200439e-06, "loss": 0.0, "step": 46540 }, { "epoch": 0.32115186930395245, "grad_norm": 2.636823737134364e-08, "learning_rate": 6.790068093854996e-06, "loss": 0.0, "step": 46550 }, { "epoch": 0.3212208600384968, "grad_norm": 0.0, "learning_rate": 6.789378186509552e-06, "loss": 0.0, "step": 46560 }, { "epoch": 0.3212898507730412, "grad_norm": 0.0, "learning_rate": 6.788688279164109e-06, "loss": 0.0, "step": 46570 }, { "epoch": 0.32135884150758554, "grad_norm": 0.0, "learning_rate": 6.787998371818665e-06, "loss": 0.0814, "step": 46580 }, { "epoch": 0.3214278322421299, "grad_norm": 9.081887464290617e-10, "learning_rate": 6.7873084644732225e-06, "loss": 0.0, "step": 46590 }, { "epoch": 0.32149682297667426, "grad_norm": 0.0, "learning_rate": 6.786618557127779e-06, "loss": 0.0, "step": 46600 }, { "epoch": 0.32156581371121856, "grad_norm": 0.0, "learning_rate": 6.7859286497823355e-06, "loss": 0.0001, "step": 46610 }, { "epoch": 0.3216348044457629, "grad_norm": 1.1349088424594811e-07, "learning_rate": 6.785238742436891e-06, "loss": 0.0, "step": 46620 }, { "epoch": 0.3217037951803073, "grad_norm": 0.001222744700498879, "learning_rate": 6.784548835091448e-06, "loss": 0.0, "step": 46630 }, { "epoch": 0.32177278591485164, "grad_norm": 0.0, "learning_rate": 6.783858927746004e-06, "loss": 0.0, "step": 46640 }, { "epoch": 0.321841776649396, "grad_norm": 0.03581872209906578, "learning_rate": 6.783169020400561e-06, "loss": 0.0, "step": 46650 }, { "epoch": 0.3219107673839403, "grad_norm": 0.0, "learning_rate": 6.782479113055117e-06, "loss": 0.0009, "step": 46660 }, { "epoch": 0.32197975811848467, "grad_norm": 0.0, "learning_rate": 6.7817892057096736e-06, "loss": 0.0004, "step": 46670 }, { "epoch": 0.32204874885302903, "grad_norm": 0.0, "learning_rate": 6.78109929836423e-06, "loss": 0.0, "step": 46680 }, { "epoch": 0.3221177395875734, "grad_norm": 0.0, "learning_rate": 6.7804093910187865e-06, "loss": 0.0, "step": 46690 }, { "epoch": 0.32218673032211775, "grad_norm": 0.0, "learning_rate": 6.779719483673344e-06, "loss": 0.0, "step": 46700 }, { "epoch": 0.3222557210566621, "grad_norm": 0.0, "learning_rate": 6.7790295763279e-06, "loss": 0.0, "step": 46710 }, { "epoch": 0.3223247117912064, "grad_norm": 0.0, "learning_rate": 6.778339668982457e-06, "loss": 0.0002, "step": 46720 }, { "epoch": 0.3223937025257508, "grad_norm": 0.0, "learning_rate": 6.777649761637013e-06, "loss": 0.0, "step": 46730 }, { "epoch": 0.32246269326029514, "grad_norm": 0.0, "learning_rate": 6.776959854291569e-06, "loss": 0.0, "step": 46740 }, { "epoch": 0.3225316839948395, "grad_norm": 0.0, "learning_rate": 6.7762699469461255e-06, "loss": 0.0, "step": 46750 }, { "epoch": 0.32260067472938386, "grad_norm": 35.30790710449219, "learning_rate": 6.775580039600682e-06, "loss": 0.01, "step": 46760 }, { "epoch": 0.32266966546392817, "grad_norm": 0.0, "learning_rate": 6.7748901322552385e-06, "loss": 0.0, "step": 46770 }, { "epoch": 0.32273865619847253, "grad_norm": 0.0, "learning_rate": 6.774200224909795e-06, "loss": 0.0, "step": 46780 }, { "epoch": 0.3228076469330169, "grad_norm": 0.0, "learning_rate": 6.7735103175643515e-06, "loss": 0.0, "step": 46790 }, { "epoch": 0.32287663766756125, "grad_norm": 0.0, "learning_rate": 6.772820410218908e-06, "loss": 0.0, "step": 46800 }, { "epoch": 0.3229456284021056, "grad_norm": 0.0, "learning_rate": 6.772130502873465e-06, "loss": 0.0, "step": 46810 }, { "epoch": 0.32301461913664997, "grad_norm": 0.0, "learning_rate": 6.771440595528022e-06, "loss": 0.0, "step": 46820 }, { "epoch": 0.3230836098711943, "grad_norm": 0.0, "learning_rate": 6.770750688182578e-06, "loss": 0.0004, "step": 46830 }, { "epoch": 0.32315260060573864, "grad_norm": 0.0, "learning_rate": 6.770060780837135e-06, "loss": 0.0, "step": 46840 }, { "epoch": 0.323221591340283, "grad_norm": 0.0, "learning_rate": 6.76937087349169e-06, "loss": 0.0003, "step": 46850 }, { "epoch": 0.32329058207482736, "grad_norm": 0.0, "learning_rate": 6.768680966146247e-06, "loss": 0.0, "step": 46860 }, { "epoch": 0.3233595728093717, "grad_norm": 0.0, "learning_rate": 6.767991058800803e-06, "loss": 0.0, "step": 46870 }, { "epoch": 0.323428563543916, "grad_norm": 0.0, "learning_rate": 6.76730115145536e-06, "loss": 0.0, "step": 46880 }, { "epoch": 0.3234975542784604, "grad_norm": 0.0, "learning_rate": 6.766611244109916e-06, "loss": 0.0, "step": 46890 }, { "epoch": 0.32356654501300475, "grad_norm": 0.0, "learning_rate": 6.765921336764473e-06, "loss": 0.0001, "step": 46900 }, { "epoch": 0.3236355357475491, "grad_norm": 0.0, "learning_rate": 6.765231429419029e-06, "loss": 0.0006, "step": 46910 }, { "epoch": 0.32370452648209347, "grad_norm": 0.0, "learning_rate": 6.764541522073587e-06, "loss": 0.0, "step": 46920 }, { "epoch": 0.32377351721663783, "grad_norm": 0.0, "learning_rate": 6.763851614728143e-06, "loss": 0.0, "step": 46930 }, { "epoch": 0.32384250795118213, "grad_norm": 0.0, "learning_rate": 6.7631617073827e-06, "loss": 0.0, "step": 46940 }, { "epoch": 0.3239114986857265, "grad_norm": 4.902747097190741e-10, "learning_rate": 6.762471800037256e-06, "loss": 0.0, "step": 46950 }, { "epoch": 0.32398048942027086, "grad_norm": 1.2466879297790001e-06, "learning_rate": 6.761781892691812e-06, "loss": 0.0, "step": 46960 }, { "epoch": 0.3240494801548152, "grad_norm": 7.000086732489308e-09, "learning_rate": 6.761091985346368e-06, "loss": 0.0, "step": 46970 }, { "epoch": 0.3241184708893596, "grad_norm": 0.026929859071969986, "learning_rate": 6.760402078000925e-06, "loss": 0.0, "step": 46980 }, { "epoch": 0.3241874616239039, "grad_norm": 0.0, "learning_rate": 6.759712170655481e-06, "loss": 0.0, "step": 46990 }, { "epoch": 0.32425645235844824, "grad_norm": 0.0, "learning_rate": 6.759022263310038e-06, "loss": 0.0, "step": 47000 }, { "epoch": 0.3243254430929926, "grad_norm": 0.0, "learning_rate": 6.758332355964594e-06, "loss": 0.0, "step": 47010 }, { "epoch": 0.32439443382753697, "grad_norm": 0.0, "learning_rate": 6.757642448619151e-06, "loss": 0.0, "step": 47020 }, { "epoch": 0.3244634245620813, "grad_norm": 0.0, "learning_rate": 6.756952541273708e-06, "loss": 0.0015, "step": 47030 }, { "epoch": 0.3245324152966257, "grad_norm": 0.0, "learning_rate": 6.7562626339282646e-06, "loss": 0.0, "step": 47040 }, { "epoch": 0.32460140603117, "grad_norm": 0.16211307048797607, "learning_rate": 6.755572726582821e-06, "loss": 0.0002, "step": 47050 }, { "epoch": 0.32467039676571435, "grad_norm": 0.0, "learning_rate": 6.7548828192373775e-06, "loss": 0.0013, "step": 47060 }, { "epoch": 0.3247393875002587, "grad_norm": 8.754976079217158e-06, "learning_rate": 6.754192911891933e-06, "loss": 0.0, "step": 47070 }, { "epoch": 0.3248083782348031, "grad_norm": 0.0, "learning_rate": 6.75350300454649e-06, "loss": 0.0, "step": 47080 }, { "epoch": 0.32487736896934744, "grad_norm": 0.0, "learning_rate": 6.752813097201046e-06, "loss": 0.0569, "step": 47090 }, { "epoch": 0.32494635970389174, "grad_norm": 0.0, "learning_rate": 6.752123189855603e-06, "loss": 0.0, "step": 47100 }, { "epoch": 0.3250153504384361, "grad_norm": 0.0001604845019755885, "learning_rate": 6.751433282510159e-06, "loss": 0.0, "step": 47110 }, { "epoch": 0.32508434117298046, "grad_norm": 0.0, "learning_rate": 6.750743375164716e-06, "loss": 0.0, "step": 47120 }, { "epoch": 0.3251533319075248, "grad_norm": 0.0, "learning_rate": 6.750053467819272e-06, "loss": 0.0055, "step": 47130 }, { "epoch": 0.3252223226420692, "grad_norm": 0.0, "learning_rate": 6.7493635604738295e-06, "loss": 0.0, "step": 47140 }, { "epoch": 0.32529131337661354, "grad_norm": 0.0, "learning_rate": 6.748673653128386e-06, "loss": 0.048, "step": 47150 }, { "epoch": 0.32536030411115785, "grad_norm": 0.0, "learning_rate": 6.7479837457829424e-06, "loss": 0.0, "step": 47160 }, { "epoch": 0.3254292948457022, "grad_norm": 0.0, "learning_rate": 6.747293838437499e-06, "loss": 0.0001, "step": 47170 }, { "epoch": 0.32549828558024657, "grad_norm": 0.0, "learning_rate": 6.746672921826599e-06, "loss": 0.6029, "step": 47180 }, { "epoch": 0.32556727631479093, "grad_norm": 0.0, "learning_rate": 6.745983014481155e-06, "loss": 0.1036, "step": 47190 }, { "epoch": 0.3256362670493353, "grad_norm": 1.7777416706085205, "learning_rate": 6.745293107135712e-06, "loss": 0.0005, "step": 47200 }, { "epoch": 0.3257052577838796, "grad_norm": 0.0, "learning_rate": 6.744603199790268e-06, "loss": 0.0, "step": 47210 }, { "epoch": 0.32577424851842396, "grad_norm": 0.0, "learning_rate": 6.743913292444825e-06, "loss": 0.0, "step": 47220 }, { "epoch": 0.3258432392529683, "grad_norm": 0.0, "learning_rate": 6.743223385099382e-06, "loss": 0.0064, "step": 47230 }, { "epoch": 0.3259122299875127, "grad_norm": 0.0006976527511142194, "learning_rate": 6.7425334777539385e-06, "loss": 0.0, "step": 47240 }, { "epoch": 0.32598122072205704, "grad_norm": 0.00018832985369954258, "learning_rate": 6.741843570408495e-06, "loss": 0.0001, "step": 47250 }, { "epoch": 0.3260502114566014, "grad_norm": 0.0, "learning_rate": 6.7411536630630515e-06, "loss": 0.0003, "step": 47260 }, { "epoch": 0.3261192021911457, "grad_norm": 0.0, "learning_rate": 6.740463755717608e-06, "loss": 0.0, "step": 47270 }, { "epoch": 0.32618819292569007, "grad_norm": 8.414518561039586e-06, "learning_rate": 6.7397738483721645e-06, "loss": 0.0001, "step": 47280 }, { "epoch": 0.32625718366023443, "grad_norm": 0.012728461995720863, "learning_rate": 6.739083941026721e-06, "loss": 0.0, "step": 47290 }, { "epoch": 0.3263261743947788, "grad_norm": 0.0, "learning_rate": 6.738394033681277e-06, "loss": 0.0, "step": 47300 }, { "epoch": 0.32639516512932315, "grad_norm": 0.01205992791801691, "learning_rate": 6.737704126335833e-06, "loss": 0.0, "step": 47310 }, { "epoch": 0.32646415586386746, "grad_norm": 0.0, "learning_rate": 6.73701421899039e-06, "loss": 0.0, "step": 47320 }, { "epoch": 0.3265331465984118, "grad_norm": 0.0, "learning_rate": 6.736324311644946e-06, "loss": 0.0, "step": 47330 }, { "epoch": 0.3266021373329562, "grad_norm": 0.0, "learning_rate": 6.7356344042995034e-06, "loss": 0.0, "step": 47340 }, { "epoch": 0.32667112806750054, "grad_norm": 3.6986063101096534e-09, "learning_rate": 6.73494449695406e-06, "loss": 0.0, "step": 47350 }, { "epoch": 0.3267401188020449, "grad_norm": 0.0, "learning_rate": 6.7342545896086164e-06, "loss": 0.0, "step": 47360 }, { "epoch": 0.32680910953658926, "grad_norm": 0.0, "learning_rate": 6.733564682263173e-06, "loss": 0.0, "step": 47370 }, { "epoch": 0.32687810027113356, "grad_norm": 0.0, "learning_rate": 6.732874774917729e-06, "loss": 0.0009, "step": 47380 }, { "epoch": 0.3269470910056779, "grad_norm": 9.114627630424366e-08, "learning_rate": 6.732184867572286e-06, "loss": 0.0003, "step": 47390 }, { "epoch": 0.3270160817402223, "grad_norm": 0.0, "learning_rate": 6.731494960226842e-06, "loss": 0.0, "step": 47400 }, { "epoch": 0.32708507247476665, "grad_norm": 0.0, "learning_rate": 6.730805052881398e-06, "loss": 0.0007, "step": 47410 }, { "epoch": 0.327154063209311, "grad_norm": 0.0, "learning_rate": 6.7301151455359545e-06, "loss": 0.0291, "step": 47420 }, { "epoch": 0.3272230539438553, "grad_norm": 5.850496563652996e-06, "learning_rate": 6.729425238190511e-06, "loss": 0.0, "step": 47430 }, { "epoch": 0.3272920446783997, "grad_norm": 0.0, "learning_rate": 6.7287353308450675e-06, "loss": 0.0, "step": 47440 }, { "epoch": 0.32736103541294403, "grad_norm": 0.0, "learning_rate": 6.728045423499625e-06, "loss": 0.0, "step": 47450 }, { "epoch": 0.3274300261474884, "grad_norm": 1.930912540970553e-09, "learning_rate": 6.727355516154181e-06, "loss": 0.0, "step": 47460 }, { "epoch": 0.32749901688203276, "grad_norm": 0.0, "learning_rate": 6.726665608808738e-06, "loss": 0.0, "step": 47470 }, { "epoch": 0.3275680076165771, "grad_norm": 0.0059599317610263824, "learning_rate": 6.725975701463294e-06, "loss": 0.0, "step": 47480 }, { "epoch": 0.3276369983511214, "grad_norm": 0.0, "learning_rate": 6.725285794117851e-06, "loss": 0.0, "step": 47490 }, { "epoch": 0.3277059890856658, "grad_norm": 0.0, "learning_rate": 6.724595886772407e-06, "loss": 0.0, "step": 47500 }, { "epoch": 0.32777497982021014, "grad_norm": 0.0, "learning_rate": 6.723905979426964e-06, "loss": 0.0, "step": 47510 }, { "epoch": 0.3278439705547545, "grad_norm": 0.0, "learning_rate": 6.7232160720815194e-06, "loss": 0.0, "step": 47520 }, { "epoch": 0.32791296128929887, "grad_norm": 0.0, "learning_rate": 6.722526164736076e-06, "loss": 0.0, "step": 47530 }, { "epoch": 0.32798195202384317, "grad_norm": 0.0, "learning_rate": 6.721836257390632e-06, "loss": 0.0, "step": 47540 }, { "epoch": 0.32805094275838753, "grad_norm": 0.0, "learning_rate": 6.721146350045189e-06, "loss": 0.0, "step": 47550 }, { "epoch": 0.3281199334929319, "grad_norm": 0.019669951871037483, "learning_rate": 6.720456442699746e-06, "loss": 0.0023, "step": 47560 }, { "epoch": 0.32818892422747625, "grad_norm": 0.0, "learning_rate": 6.719766535354303e-06, "loss": 0.0018, "step": 47570 }, { "epoch": 0.3282579149620206, "grad_norm": 0.0, "learning_rate": 6.719076628008859e-06, "loss": 0.0, "step": 47580 }, { "epoch": 0.328326905696565, "grad_norm": 0.0, "learning_rate": 6.718386720663416e-06, "loss": 0.0, "step": 47590 }, { "epoch": 0.3283958964311093, "grad_norm": 0.0, "learning_rate": 6.717696813317972e-06, "loss": 0.0, "step": 47600 }, { "epoch": 0.32846488716565364, "grad_norm": 0.0, "learning_rate": 6.717006905972529e-06, "loss": 0.0, "step": 47610 }, { "epoch": 0.328533877900198, "grad_norm": 0.0, "learning_rate": 6.716316998627085e-06, "loss": 0.0, "step": 47620 }, { "epoch": 0.32860286863474236, "grad_norm": 0.0, "learning_rate": 6.715627091281642e-06, "loss": 0.0, "step": 47630 }, { "epoch": 0.3286718593692867, "grad_norm": 0.0, "learning_rate": 6.714937183936197e-06, "loss": 0.2436, "step": 47640 }, { "epoch": 0.32874085010383103, "grad_norm": 9.838677406311035, "learning_rate": 6.714247276590754e-06, "loss": 0.0021, "step": 47650 }, { "epoch": 0.3288098408383754, "grad_norm": 2.3980476271390216e-08, "learning_rate": 6.71355736924531e-06, "loss": 0.0, "step": 47660 }, { "epoch": 0.32887883157291975, "grad_norm": 0.000427614722866565, "learning_rate": 6.712867461899868e-06, "loss": 0.0, "step": 47670 }, { "epoch": 0.3289478223074641, "grad_norm": 0.0, "learning_rate": 6.712177554554424e-06, "loss": 0.0, "step": 47680 }, { "epoch": 0.32901681304200847, "grad_norm": 0.0, "learning_rate": 6.711487647208981e-06, "loss": 0.0, "step": 47690 }, { "epoch": 0.32908580377655283, "grad_norm": 0.0, "learning_rate": 6.710797739863537e-06, "loss": 0.0, "step": 47700 }, { "epoch": 0.32915479451109714, "grad_norm": 0.0, "learning_rate": 6.710107832518094e-06, "loss": 0.0, "step": 47710 }, { "epoch": 0.3292237852456415, "grad_norm": 3.1523313737125136e-06, "learning_rate": 6.70941792517265e-06, "loss": 0.0, "step": 47720 }, { "epoch": 0.32929277598018586, "grad_norm": 0.0, "learning_rate": 6.7087280178272066e-06, "loss": 0.0, "step": 47730 }, { "epoch": 0.3293617667147302, "grad_norm": 0.0, "learning_rate": 6.708038110481763e-06, "loss": 0.0, "step": 47740 }, { "epoch": 0.3294307574492746, "grad_norm": 485.5085754394531, "learning_rate": 6.707348203136319e-06, "loss": 0.1348, "step": 47750 }, { "epoch": 0.3294997481838189, "grad_norm": 0.0, "learning_rate": 6.706658295790875e-06, "loss": 0.0, "step": 47760 }, { "epoch": 0.32956873891836325, "grad_norm": 0.0, "learning_rate": 6.705968388445432e-06, "loss": 0.0, "step": 47770 }, { "epoch": 0.3296377296529076, "grad_norm": 0.0, "learning_rate": 6.705278481099989e-06, "loss": 0.0, "step": 47780 }, { "epoch": 0.32970672038745197, "grad_norm": 0.0, "learning_rate": 6.7045885737545455e-06, "loss": 0.0, "step": 47790 }, { "epoch": 0.32977571112199633, "grad_norm": 0.0, "learning_rate": 6.703898666409102e-06, "loss": 0.0092, "step": 47800 }, { "epoch": 0.3298447018565407, "grad_norm": 0.0, "learning_rate": 6.7032087590636585e-06, "loss": 0.0021, "step": 47810 }, { "epoch": 0.329913692591085, "grad_norm": 0.0, "learning_rate": 6.702518851718215e-06, "loss": 0.0001, "step": 47820 }, { "epoch": 0.32998268332562936, "grad_norm": 3.451175345503543e-08, "learning_rate": 6.7018289443727715e-06, "loss": 0.0, "step": 47830 }, { "epoch": 0.3300516740601737, "grad_norm": 0.0, "learning_rate": 6.701139037027328e-06, "loss": 0.0, "step": 47840 }, { "epoch": 0.3301206647947181, "grad_norm": 0.015162219293415546, "learning_rate": 6.7004491296818845e-06, "loss": 0.8594, "step": 47850 }, { "epoch": 0.33018965552926244, "grad_norm": 0.0, "learning_rate": 6.69975922233644e-06, "loss": 0.0, "step": 47860 }, { "epoch": 0.33025864626380674, "grad_norm": 3.937416477128863e-05, "learning_rate": 6.699069314990997e-06, "loss": 0.0, "step": 47870 }, { "epoch": 0.3303276369983511, "grad_norm": 0.0, "learning_rate": 6.698379407645553e-06, "loss": 0.0, "step": 47880 }, { "epoch": 0.33039662773289546, "grad_norm": 0.00018171903502661735, "learning_rate": 6.69768950030011e-06, "loss": 0.0, "step": 47890 }, { "epoch": 0.3304656184674398, "grad_norm": 0.0, "learning_rate": 6.696999592954667e-06, "loss": 0.0007, "step": 47900 }, { "epoch": 0.3305346092019842, "grad_norm": 1.445470047656272e-06, "learning_rate": 6.696309685609223e-06, "loss": 0.0, "step": 47910 }, { "epoch": 0.33060359993652855, "grad_norm": 1.4895603044351446e-06, "learning_rate": 6.69561977826378e-06, "loss": 0.0, "step": 47920 }, { "epoch": 0.33067259067107285, "grad_norm": 0.0, "learning_rate": 6.694929870918336e-06, "loss": 0.0, "step": 47930 }, { "epoch": 0.3307415814056172, "grad_norm": 0.0, "learning_rate": 6.694239963572893e-06, "loss": 0.0, "step": 47940 }, { "epoch": 0.3308105721401616, "grad_norm": 0.0, "learning_rate": 6.693550056227449e-06, "loss": 0.0, "step": 47950 }, { "epoch": 0.33087956287470593, "grad_norm": 0.0, "learning_rate": 6.692860148882006e-06, "loss": 0.0, "step": 47960 }, { "epoch": 0.3309485536092503, "grad_norm": 0.0, "learning_rate": 6.6921702415365615e-06, "loss": 0.0, "step": 47970 }, { "epoch": 0.3310175443437946, "grad_norm": 0.0, "learning_rate": 6.691480334191118e-06, "loss": 0.0001, "step": 47980 }, { "epoch": 0.33108653507833896, "grad_norm": 0.0012809181353077292, "learning_rate": 6.6907904268456745e-06, "loss": 0.0, "step": 47990 }, { "epoch": 0.3311555258128833, "grad_norm": 0.0001356671709800139, "learning_rate": 6.690100519500232e-06, "loss": 0.482, "step": 48000 }, { "epoch": 0.3312245165474277, "grad_norm": 0.0, "learning_rate": 6.689410612154788e-06, "loss": 0.0, "step": 48010 }, { "epoch": 0.33129350728197204, "grad_norm": 0.0, "learning_rate": 6.688720704809345e-06, "loss": 0.2182, "step": 48020 }, { "epoch": 0.3313624980165164, "grad_norm": 0.0, "learning_rate": 6.688030797463901e-06, "loss": 0.0011, "step": 48030 }, { "epoch": 0.3314314887510607, "grad_norm": 0.0, "learning_rate": 6.687340890118458e-06, "loss": 0.0, "step": 48040 }, { "epoch": 0.33150047948560507, "grad_norm": 0.0, "learning_rate": 6.686650982773014e-06, "loss": 0.0, "step": 48050 }, { "epoch": 0.33156947022014943, "grad_norm": 0.0, "learning_rate": 6.685961075427571e-06, "loss": 0.0, "step": 48060 }, { "epoch": 0.3316384609546938, "grad_norm": 0.0, "learning_rate": 6.685271168082127e-06, "loss": 0.0, "step": 48070 }, { "epoch": 0.33170745168923815, "grad_norm": 1.834815634538245e-06, "learning_rate": 6.684581260736684e-06, "loss": 0.0003, "step": 48080 }, { "epoch": 0.33177644242378246, "grad_norm": 5.670102382282494e-06, "learning_rate": 6.683891353391239e-06, "loss": 0.0024, "step": 48090 }, { "epoch": 0.3318454331583268, "grad_norm": 0.0, "learning_rate": 6.683201446045796e-06, "loss": 0.0633, "step": 48100 }, { "epoch": 0.3319144238928712, "grad_norm": 8.239067761373065e-10, "learning_rate": 6.682511538700353e-06, "loss": 0.0002, "step": 48110 }, { "epoch": 0.33198341462741554, "grad_norm": 0.0, "learning_rate": 6.68182163135491e-06, "loss": 0.0, "step": 48120 }, { "epoch": 0.3320524053619599, "grad_norm": 2.2167314650545222e-09, "learning_rate": 6.681131724009466e-06, "loss": 0.0, "step": 48130 }, { "epoch": 0.33212139609650426, "grad_norm": 0.00016086299729067832, "learning_rate": 6.680441816664023e-06, "loss": 0.0, "step": 48140 }, { "epoch": 0.33219038683104857, "grad_norm": 5.424868376735503e-08, "learning_rate": 6.679751909318579e-06, "loss": 0.0, "step": 48150 }, { "epoch": 0.33225937756559293, "grad_norm": 0.0, "learning_rate": 6.679062001973136e-06, "loss": 0.0, "step": 48160 }, { "epoch": 0.3323283683001373, "grad_norm": 0.0, "learning_rate": 6.678372094627692e-06, "loss": 0.0, "step": 48170 }, { "epoch": 0.33239735903468165, "grad_norm": 0.0, "learning_rate": 6.677682187282249e-06, "loss": 0.0, "step": 48180 }, { "epoch": 0.332466349769226, "grad_norm": 0.054791051894426346, "learning_rate": 6.676992279936805e-06, "loss": 0.0, "step": 48190 }, { "epoch": 0.33253534050377037, "grad_norm": 0.0, "learning_rate": 6.676302372591361e-06, "loss": 0.0151, "step": 48200 }, { "epoch": 0.3326043312383147, "grad_norm": 0.0009703313116915524, "learning_rate": 6.675612465245917e-06, "loss": 0.005, "step": 48210 }, { "epoch": 0.33267332197285904, "grad_norm": 0.0, "learning_rate": 6.6749225579004754e-06, "loss": 0.0, "step": 48220 }, { "epoch": 0.3327423127074034, "grad_norm": 0.0, "learning_rate": 6.674232650555031e-06, "loss": 0.0, "step": 48230 }, { "epoch": 0.33281130344194776, "grad_norm": 0.0, "learning_rate": 6.673542743209588e-06, "loss": 1.0576, "step": 48240 }, { "epoch": 0.3328802941764921, "grad_norm": 0.0, "learning_rate": 6.672852835864144e-06, "loss": 0.0, "step": 48250 }, { "epoch": 0.3329492849110364, "grad_norm": 0.0, "learning_rate": 6.6721629285187006e-06, "loss": 0.0, "step": 48260 }, { "epoch": 0.3330182756455808, "grad_norm": 0.0, "learning_rate": 6.671473021173257e-06, "loss": 0.0029, "step": 48270 }, { "epoch": 0.33308726638012515, "grad_norm": 0.0, "learning_rate": 6.6707831138278135e-06, "loss": 0.0, "step": 48280 }, { "epoch": 0.3331562571146695, "grad_norm": 0.0, "learning_rate": 6.67009320648237e-06, "loss": 0.0, "step": 48290 }, { "epoch": 0.33322524784921387, "grad_norm": 0.0, "learning_rate": 6.6694032991369265e-06, "loss": 0.0, "step": 48300 }, { "epoch": 0.33329423858375823, "grad_norm": 0.0, "learning_rate": 6.668713391791482e-06, "loss": 0.0, "step": 48310 }, { "epoch": 0.33336322931830253, "grad_norm": 0.0, "learning_rate": 6.668023484446039e-06, "loss": 0.0, "step": 48320 }, { "epoch": 0.3334322200528469, "grad_norm": 1.0825621510335282e-09, "learning_rate": 6.667333577100597e-06, "loss": 0.0, "step": 48330 }, { "epoch": 0.33350121078739126, "grad_norm": 0.0, "learning_rate": 6.6666436697551525e-06, "loss": 0.0, "step": 48340 }, { "epoch": 0.3335702015219356, "grad_norm": 0.0, "learning_rate": 6.665953762409709e-06, "loss": 0.0011, "step": 48350 }, { "epoch": 0.33363919225648, "grad_norm": 0.0, "learning_rate": 6.6652638550642655e-06, "loss": 0.0, "step": 48360 }, { "epoch": 0.3337081829910243, "grad_norm": 0.002793417079374194, "learning_rate": 6.664573947718822e-06, "loss": 0.0006, "step": 48370 }, { "epoch": 0.33377717372556864, "grad_norm": 0.0016971139702945948, "learning_rate": 6.6638840403733784e-06, "loss": 0.0, "step": 48380 }, { "epoch": 0.333846164460113, "grad_norm": 0.0, "learning_rate": 6.663194133027935e-06, "loss": 0.0, "step": 48390 }, { "epoch": 0.33391515519465736, "grad_norm": 0.038245365023612976, "learning_rate": 6.6625042256824914e-06, "loss": 0.0, "step": 48400 }, { "epoch": 0.3339841459292017, "grad_norm": 0.0, "learning_rate": 6.661814318337048e-06, "loss": 0.0, "step": 48410 }, { "epoch": 0.3340531366637461, "grad_norm": 6.902739713154915e-09, "learning_rate": 6.661124410991604e-06, "loss": 0.0, "step": 48420 }, { "epoch": 0.3341221273982904, "grad_norm": 0.0, "learning_rate": 6.66043450364616e-06, "loss": 0.0, "step": 48430 }, { "epoch": 0.33419111813283475, "grad_norm": 0.0, "learning_rate": 6.659744596300718e-06, "loss": 0.0, "step": 48440 }, { "epoch": 0.3342601088673791, "grad_norm": 0.0, "learning_rate": 6.659054688955274e-06, "loss": 0.0001, "step": 48450 }, { "epoch": 0.3343290996019235, "grad_norm": 0.0, "learning_rate": 6.65836478160983e-06, "loss": 0.0, "step": 48460 }, { "epoch": 0.33439809033646783, "grad_norm": 0.0, "learning_rate": 6.657674874264387e-06, "loss": 0.0, "step": 48470 }, { "epoch": 0.33446708107101214, "grad_norm": 0.0, "learning_rate": 6.656984966918943e-06, "loss": 0.6441, "step": 48480 }, { "epoch": 0.3345360718055565, "grad_norm": 0.0, "learning_rate": 6.6562950595735e-06, "loss": 0.001, "step": 48490 }, { "epoch": 0.33460506254010086, "grad_norm": 0.0, "learning_rate": 6.655605152228056e-06, "loss": 0.0043, "step": 48500 }, { "epoch": 0.3346740532746452, "grad_norm": 0.15096665918827057, "learning_rate": 6.654915244882613e-06, "loss": 0.0, "step": 48510 }, { "epoch": 0.3347430440091896, "grad_norm": 0.0, "learning_rate": 6.654225337537169e-06, "loss": 0.0, "step": 48520 }, { "epoch": 0.33481203474373394, "grad_norm": 0.0, "learning_rate": 6.653535430191726e-06, "loss": 0.0177, "step": 48530 }, { "epoch": 0.33488102547827825, "grad_norm": 0.0, "learning_rate": 6.6528455228462814e-06, "loss": 0.0, "step": 48540 }, { "epoch": 0.3349500162128226, "grad_norm": 0.11346650123596191, "learning_rate": 6.65215561550084e-06, "loss": 0.0, "step": 48550 }, { "epoch": 0.33501900694736697, "grad_norm": 0.0, "learning_rate": 6.651465708155396e-06, "loss": 0.0, "step": 48560 }, { "epoch": 0.33508799768191133, "grad_norm": 0.0, "learning_rate": 6.650775800809952e-06, "loss": 0.0, "step": 48570 }, { "epoch": 0.3351569884164557, "grad_norm": 0.0, "learning_rate": 6.650085893464508e-06, "loss": 0.0, "step": 48580 }, { "epoch": 0.335225979151, "grad_norm": 0.0, "learning_rate": 6.649395986119065e-06, "loss": 0.0, "step": 48590 }, { "epoch": 0.33529496988554436, "grad_norm": 0.0, "learning_rate": 6.648706078773621e-06, "loss": 0.0002, "step": 48600 }, { "epoch": 0.3353639606200887, "grad_norm": 0.0, "learning_rate": 6.648016171428178e-06, "loss": 0.0, "step": 48610 }, { "epoch": 0.3354329513546331, "grad_norm": 0.0, "learning_rate": 6.647326264082734e-06, "loss": 0.0, "step": 48620 }, { "epoch": 0.33550194208917744, "grad_norm": 0.0, "learning_rate": 6.646636356737291e-06, "loss": 0.0, "step": 48630 }, { "epoch": 0.3355709328237218, "grad_norm": 0.0, "learning_rate": 6.645946449391847e-06, "loss": 0.0, "step": 48640 }, { "epoch": 0.3356399235582661, "grad_norm": 5.627112287243108e-08, "learning_rate": 6.645256542046403e-06, "loss": 0.0, "step": 48650 }, { "epoch": 0.33570891429281047, "grad_norm": 0.0, "learning_rate": 6.644566634700961e-06, "loss": 0.0, "step": 48660 }, { "epoch": 0.33577790502735483, "grad_norm": 0.0, "learning_rate": 6.6438767273555175e-06, "loss": 0.0, "step": 48670 }, { "epoch": 0.3358468957618992, "grad_norm": 0.002178758382797241, "learning_rate": 6.643186820010073e-06, "loss": 0.0, "step": 48680 }, { "epoch": 0.33591588649644355, "grad_norm": 0.0, "learning_rate": 6.64249691266463e-06, "loss": 0.0279, "step": 48690 }, { "epoch": 0.33598487723098786, "grad_norm": 0.0, "learning_rate": 6.641807005319186e-06, "loss": 0.0, "step": 48700 }, { "epoch": 0.3360538679655322, "grad_norm": 0.0014523827703669667, "learning_rate": 6.641117097973743e-06, "loss": 0.002, "step": 48710 }, { "epoch": 0.3361228587000766, "grad_norm": 0.0, "learning_rate": 6.640427190628299e-06, "loss": 0.0, "step": 48720 }, { "epoch": 0.33619184943462094, "grad_norm": 0.00023559408145956695, "learning_rate": 6.639737283282856e-06, "loss": 0.0, "step": 48730 }, { "epoch": 0.3362608401691653, "grad_norm": 0.0, "learning_rate": 6.639047375937412e-06, "loss": 0.0026, "step": 48740 }, { "epoch": 0.33632983090370966, "grad_norm": 9.214473073448914e-10, "learning_rate": 6.638357468591969e-06, "loss": 0.0653, "step": 48750 }, { "epoch": 0.33639882163825396, "grad_norm": 0.0, "learning_rate": 6.637667561246525e-06, "loss": 0.0, "step": 48760 }, { "epoch": 0.3364678123727983, "grad_norm": 0.0, "learning_rate": 6.636977653901082e-06, "loss": 0.0, "step": 48770 }, { "epoch": 0.3365368031073427, "grad_norm": 0.0, "learning_rate": 6.636287746555639e-06, "loss": 0.0, "step": 48780 }, { "epoch": 0.33660579384188705, "grad_norm": 0.0012921111192554235, "learning_rate": 6.6355978392101946e-06, "loss": 0.0, "step": 48790 }, { "epoch": 0.3366747845764314, "grad_norm": 0.0, "learning_rate": 6.634907931864751e-06, "loss": 0.0, "step": 48800 }, { "epoch": 0.3367437753109757, "grad_norm": 7.016439429463617e-09, "learning_rate": 6.6342180245193075e-06, "loss": 0.0, "step": 48810 }, { "epoch": 0.3368127660455201, "grad_norm": 322.56787109375, "learning_rate": 6.633528117173864e-06, "loss": 0.129, "step": 48820 }, { "epoch": 0.33688175678006443, "grad_norm": 1.9350633806425321e-07, "learning_rate": 6.6328382098284205e-06, "loss": 0.0042, "step": 48830 }, { "epoch": 0.3369507475146088, "grad_norm": 0.0039228470996022224, "learning_rate": 6.632148302482977e-06, "loss": 0.0, "step": 48840 }, { "epoch": 0.33701973824915316, "grad_norm": 0.0, "learning_rate": 6.6314583951375335e-06, "loss": 0.0, "step": 48850 }, { "epoch": 0.3370887289836975, "grad_norm": 8.374750177608803e-05, "learning_rate": 6.63076848779209e-06, "loss": 0.0, "step": 48860 }, { "epoch": 0.3371577197182418, "grad_norm": 9.157400948645034e-10, "learning_rate": 6.6300785804466465e-06, "loss": 0.0, "step": 48870 }, { "epoch": 0.3372267104527862, "grad_norm": 0.0, "learning_rate": 6.629388673101204e-06, "loss": 0.0, "step": 48880 }, { "epoch": 0.33729570118733054, "grad_norm": 0.04398076981306076, "learning_rate": 6.62869876575576e-06, "loss": 0.5645, "step": 48890 }, { "epoch": 0.3373646919218749, "grad_norm": 0.0, "learning_rate": 6.628008858410317e-06, "loss": 0.0402, "step": 48900 }, { "epoch": 0.33743368265641926, "grad_norm": 1.1633978829195257e-06, "learning_rate": 6.6273189510648724e-06, "loss": 0.0, "step": 48910 }, { "epoch": 0.33750267339096357, "grad_norm": 5.3339068273317025e-08, "learning_rate": 6.626629043719429e-06, "loss": 0.0, "step": 48920 }, { "epoch": 0.33757166412550793, "grad_norm": 0.0, "learning_rate": 6.625939136373985e-06, "loss": 0.0, "step": 48930 }, { "epoch": 0.3376406548600523, "grad_norm": 0.0, "learning_rate": 6.625249229028542e-06, "loss": 0.0007, "step": 48940 }, { "epoch": 0.33770964559459665, "grad_norm": 0.0, "learning_rate": 6.624559321683098e-06, "loss": 0.0, "step": 48950 }, { "epoch": 0.337778636329141, "grad_norm": 0.0, "learning_rate": 6.623869414337655e-06, "loss": 0.0002, "step": 48960 }, { "epoch": 0.3378476270636854, "grad_norm": 0.0, "learning_rate": 6.623179506992211e-06, "loss": 0.0, "step": 48970 }, { "epoch": 0.3379166177982297, "grad_norm": 4.9775690236231185e-09, "learning_rate": 6.622489599646768e-06, "loss": 0.0, "step": 48980 }, { "epoch": 0.33798560853277404, "grad_norm": 0.0041794306598603725, "learning_rate": 6.621799692301325e-06, "loss": 0.187, "step": 48990 }, { "epoch": 0.3380545992673184, "grad_norm": 8.096119330502916e-08, "learning_rate": 6.621109784955882e-06, "loss": 0.0001, "step": 49000 }, { "epoch": 0.33812359000186276, "grad_norm": 0.0027439144905656576, "learning_rate": 6.620419877610438e-06, "loss": 0.0001, "step": 49010 }, { "epoch": 0.3381925807364071, "grad_norm": 0.0, "learning_rate": 6.619729970264994e-06, "loss": 0.0, "step": 49020 }, { "epoch": 0.3382615714709514, "grad_norm": 1.600804171175696e-05, "learning_rate": 6.61904006291955e-06, "loss": 0.0, "step": 49030 }, { "epoch": 0.3383305622054958, "grad_norm": 4.895523488812614e-07, "learning_rate": 6.618350155574107e-06, "loss": 0.0, "step": 49040 }, { "epoch": 0.33839955294004015, "grad_norm": 0.005733453668653965, "learning_rate": 6.617660248228663e-06, "loss": 0.0, "step": 49050 }, { "epoch": 0.3384685436745845, "grad_norm": 1.7886375713871416e-09, "learning_rate": 6.61697034088322e-06, "loss": 0.0, "step": 49060 }, { "epoch": 0.33853753440912887, "grad_norm": 9.078182483790442e-05, "learning_rate": 6.616280433537776e-06, "loss": 0.0, "step": 49070 }, { "epoch": 0.33860652514367323, "grad_norm": 0.0, "learning_rate": 6.615590526192333e-06, "loss": 0.0, "step": 49080 }, { "epoch": 0.33867551587821754, "grad_norm": 0.06932124495506287, "learning_rate": 6.614900618846889e-06, "loss": 0.0, "step": 49090 }, { "epoch": 0.3387445066127619, "grad_norm": 9.345848894781739e-08, "learning_rate": 6.614210711501447e-06, "loss": 0.0055, "step": 49100 }, { "epoch": 0.33881349734730626, "grad_norm": 4.323508262634277, "learning_rate": 6.613520804156003e-06, "loss": 0.0205, "step": 49110 }, { "epoch": 0.3388824880818506, "grad_norm": 0.0, "learning_rate": 6.61283089681056e-06, "loss": 0.0031, "step": 49120 }, { "epoch": 0.338951478816395, "grad_norm": 0.0, "learning_rate": 6.612140989465115e-06, "loss": 0.0001, "step": 49130 }, { "epoch": 0.3390204695509393, "grad_norm": 0.0, "learning_rate": 6.611451082119672e-06, "loss": 0.0, "step": 49140 }, { "epoch": 0.33908946028548365, "grad_norm": 0.0, "learning_rate": 6.610761174774228e-06, "loss": 0.2012, "step": 49150 }, { "epoch": 0.339158451020028, "grad_norm": 0.0, "learning_rate": 6.610071267428785e-06, "loss": 0.0, "step": 49160 }, { "epoch": 0.33922744175457237, "grad_norm": 0.0, "learning_rate": 6.609381360083341e-06, "loss": 0.0, "step": 49170 }, { "epoch": 0.33929643248911673, "grad_norm": 0.0, "learning_rate": 6.608691452737898e-06, "loss": 0.0, "step": 49180 }, { "epoch": 0.3393654232236611, "grad_norm": 0.0, "learning_rate": 6.608070536126999e-06, "loss": 0.5285, "step": 49190 }, { "epoch": 0.3394344139582054, "grad_norm": 0.0, "learning_rate": 6.607380628781556e-06, "loss": 0.0, "step": 49200 }, { "epoch": 0.33950340469274976, "grad_norm": 0.0, "learning_rate": 6.606690721436112e-06, "loss": 0.0, "step": 49210 }, { "epoch": 0.3395723954272941, "grad_norm": 0.00011352659930707887, "learning_rate": 6.606000814090669e-06, "loss": 0.0, "step": 49220 }, { "epoch": 0.3396413861618385, "grad_norm": 0.0, "learning_rate": 6.605310906745225e-06, "loss": 0.0523, "step": 49230 }, { "epoch": 0.33971037689638284, "grad_norm": 0.3528650104999542, "learning_rate": 6.604620999399781e-06, "loss": 0.0, "step": 49240 }, { "epoch": 0.33977936763092714, "grad_norm": 0.0, "learning_rate": 6.603931092054337e-06, "loss": 0.0001, "step": 49250 }, { "epoch": 0.3398483583654715, "grad_norm": 0.0, "learning_rate": 6.603241184708894e-06, "loss": 0.0, "step": 49260 }, { "epoch": 0.33991734910001586, "grad_norm": 0.0, "learning_rate": 6.60255127736345e-06, "loss": 0.0, "step": 49270 }, { "epoch": 0.3399863398345602, "grad_norm": 0.0, "learning_rate": 6.601861370018007e-06, "loss": 0.0, "step": 49280 }, { "epoch": 0.3400553305691046, "grad_norm": 0.0, "learning_rate": 6.601171462672563e-06, "loss": 0.0, "step": 49290 }, { "epoch": 0.34012432130364895, "grad_norm": 0.0, "learning_rate": 6.600481555327121e-06, "loss": 0.0, "step": 49300 }, { "epoch": 0.34019331203819325, "grad_norm": 0.0, "learning_rate": 6.599791647981677e-06, "loss": 0.0003, "step": 49310 }, { "epoch": 0.3402623027727376, "grad_norm": 0.0, "learning_rate": 6.5991017406362336e-06, "loss": 0.0, "step": 49320 }, { "epoch": 0.340331293507282, "grad_norm": 0.0, "learning_rate": 6.59841183329079e-06, "loss": 0.0, "step": 49330 }, { "epoch": 0.34040028424182633, "grad_norm": 0.0023016713093966246, "learning_rate": 6.5977219259453465e-06, "loss": 0.0, "step": 49340 }, { "epoch": 0.3404692749763707, "grad_norm": 0.0, "learning_rate": 6.597032018599902e-06, "loss": 0.0, "step": 49350 }, { "epoch": 0.340538265710915, "grad_norm": 1.1255200149662414e-07, "learning_rate": 6.596342111254459e-06, "loss": 0.0, "step": 49360 }, { "epoch": 0.34060725644545936, "grad_norm": 0.0, "learning_rate": 6.595652203909015e-06, "loss": 0.0, "step": 49370 }, { "epoch": 0.3406762471800037, "grad_norm": 0.0, "learning_rate": 6.594962296563572e-06, "loss": 0.0, "step": 49380 }, { "epoch": 0.3407452379145481, "grad_norm": 0.0, "learning_rate": 6.594272389218128e-06, "loss": 0.0047, "step": 49390 }, { "epoch": 0.34081422864909244, "grad_norm": 0.0, "learning_rate": 6.593582481872685e-06, "loss": 0.0001, "step": 49400 }, { "epoch": 0.3408832193836368, "grad_norm": 0.0, "learning_rate": 6.592892574527242e-06, "loss": 0.0, "step": 49410 }, { "epoch": 0.3409522101181811, "grad_norm": 0.0, "learning_rate": 6.5922026671817985e-06, "loss": 0.0004, "step": 49420 }, { "epoch": 0.34102120085272547, "grad_norm": 0.0, "learning_rate": 6.591512759836355e-06, "loss": 0.0, "step": 49430 }, { "epoch": 0.34109019158726983, "grad_norm": 0.0, "learning_rate": 6.5908228524909115e-06, "loss": 0.1472, "step": 49440 }, { "epoch": 0.3411591823218142, "grad_norm": 0.0, "learning_rate": 6.590132945145468e-06, "loss": 0.0, "step": 49450 }, { "epoch": 0.34122817305635855, "grad_norm": 0.0011365123791620135, "learning_rate": 6.5894430378000244e-06, "loss": 0.0, "step": 49460 }, { "epoch": 0.34129716379090286, "grad_norm": 0.0, "learning_rate": 6.58875313045458e-06, "loss": 0.0, "step": 49470 }, { "epoch": 0.3413661545254472, "grad_norm": 0.00041695666732266545, "learning_rate": 6.5880632231091366e-06, "loss": 0.0, "step": 49480 }, { "epoch": 0.3414351452599916, "grad_norm": 0.0, "learning_rate": 6.587373315763693e-06, "loss": 0.0003, "step": 49490 }, { "epoch": 0.34150413599453594, "grad_norm": 0.0, "learning_rate": 6.5866834084182495e-06, "loss": 0.0, "step": 49500 }, { "epoch": 0.3415731267290803, "grad_norm": 0.0, "learning_rate": 6.585993501072806e-06, "loss": 0.0, "step": 49510 }, { "epoch": 0.34164211746362466, "grad_norm": 0.0, "learning_rate": 6.585303593727363e-06, "loss": 0.0402, "step": 49520 }, { "epoch": 0.34171110819816897, "grad_norm": 0.0, "learning_rate": 6.58461368638192e-06, "loss": 0.0, "step": 49530 }, { "epoch": 0.3417800989327133, "grad_norm": 1.6668776652295492e-06, "learning_rate": 6.583923779036476e-06, "loss": 0.0, "step": 49540 }, { "epoch": 0.3418490896672577, "grad_norm": 0.003756061429157853, "learning_rate": 6.583233871691033e-06, "loss": 0.0065, "step": 49550 }, { "epoch": 0.34191808040180205, "grad_norm": 9.396153854979161e-10, "learning_rate": 6.582543964345589e-06, "loss": 0.0, "step": 49560 }, { "epoch": 0.3419870711363464, "grad_norm": 1.390722843552794e-07, "learning_rate": 6.581854057000146e-06, "loss": 0.0, "step": 49570 }, { "epoch": 0.3420560618708907, "grad_norm": 0.0, "learning_rate": 6.5811641496547015e-06, "loss": 0.0, "step": 49580 }, { "epoch": 0.3421250526054351, "grad_norm": 0.0, "learning_rate": 6.580474242309258e-06, "loss": 0.0, "step": 49590 }, { "epoch": 0.34219404333997944, "grad_norm": 0.0, "learning_rate": 6.5797843349638145e-06, "loss": 0.0, "step": 49600 }, { "epoch": 0.3422630340745238, "grad_norm": 0.0, "learning_rate": 6.579094427618371e-06, "loss": 0.0, "step": 49610 }, { "epoch": 0.34233202480906816, "grad_norm": 0.0, "learning_rate": 6.5784045202729274e-06, "loss": 0.0, "step": 49620 }, { "epoch": 0.3424010155436125, "grad_norm": 0.0, "learning_rate": 6.577714612927485e-06, "loss": 0.0, "step": 49630 }, { "epoch": 0.3424700062781568, "grad_norm": 0.0, "learning_rate": 6.577024705582041e-06, "loss": 0.0, "step": 49640 }, { "epoch": 0.3425389970127012, "grad_norm": 0.0, "learning_rate": 6.576334798236598e-06, "loss": 0.0, "step": 49650 }, { "epoch": 0.34260798774724555, "grad_norm": 0.0022532902657985687, "learning_rate": 6.575644890891154e-06, "loss": 0.0, "step": 49660 }, { "epoch": 0.3426769784817899, "grad_norm": 0.0, "learning_rate": 6.574954983545711e-06, "loss": 0.4844, "step": 49670 }, { "epoch": 0.34274596921633427, "grad_norm": 0.0, "learning_rate": 6.574265076200267e-06, "loss": 0.0, "step": 49680 }, { "epoch": 0.3428149599508786, "grad_norm": 0.0, "learning_rate": 6.573575168854823e-06, "loss": 0.0, "step": 49690 }, { "epoch": 0.34288395068542293, "grad_norm": 1.9226769065738836e-09, "learning_rate": 6.572885261509379e-06, "loss": 0.0, "step": 49700 }, { "epoch": 0.3429529414199673, "grad_norm": 0.0, "learning_rate": 6.572195354163936e-06, "loss": 0.0, "step": 49710 }, { "epoch": 0.34302193215451166, "grad_norm": 0.0, "learning_rate": 6.571505446818492e-06, "loss": 0.0344, "step": 49720 }, { "epoch": 0.343090922889056, "grad_norm": 0.0, "learning_rate": 6.570815539473049e-06, "loss": 0.0, "step": 49730 }, { "epoch": 0.3431599136236004, "grad_norm": 0.0, "learning_rate": 6.570125632127606e-06, "loss": 0.0007, "step": 49740 }, { "epoch": 0.3432289043581447, "grad_norm": 0.0, "learning_rate": 6.569435724782163e-06, "loss": 0.0, "step": 49750 }, { "epoch": 0.34329789509268904, "grad_norm": 0.0030286326073110104, "learning_rate": 6.568745817436719e-06, "loss": 0.0, "step": 49760 }, { "epoch": 0.3433668858272334, "grad_norm": 0.0, "learning_rate": 6.568055910091276e-06, "loss": 0.0, "step": 49770 }, { "epoch": 0.34343587656177776, "grad_norm": 0.0, "learning_rate": 6.567366002745832e-06, "loss": 0.003, "step": 49780 }, { "epoch": 0.3435048672963221, "grad_norm": 0.0, "learning_rate": 6.566676095400389e-06, "loss": 0.0, "step": 49790 }, { "epoch": 0.34357385803086643, "grad_norm": 0.0, "learning_rate": 6.565986188054945e-06, "loss": 0.0001, "step": 49800 }, { "epoch": 0.3436428487654108, "grad_norm": 0.0, "learning_rate": 6.565296280709501e-06, "loss": 0.0, "step": 49810 }, { "epoch": 0.34371183949995515, "grad_norm": 0.0, "learning_rate": 6.564606373364057e-06, "loss": 0.0, "step": 49820 }, { "epoch": 0.3437808302344995, "grad_norm": 0.0, "learning_rate": 6.563916466018614e-06, "loss": 0.0, "step": 49830 }, { "epoch": 0.3438498209690439, "grad_norm": 0.0, "learning_rate": 6.56322655867317e-06, "loss": 0.0, "step": 49840 }, { "epoch": 0.34391881170358823, "grad_norm": 2.9462868042173795e-05, "learning_rate": 6.5625366513277276e-06, "loss": 0.0, "step": 49850 }, { "epoch": 0.34398780243813254, "grad_norm": 0.0, "learning_rate": 6.561846743982284e-06, "loss": 0.0, "step": 49860 }, { "epoch": 0.3440567931726769, "grad_norm": 0.0, "learning_rate": 6.5611568366368405e-06, "loss": 0.0, "step": 49870 }, { "epoch": 0.34412578390722126, "grad_norm": 0.0, "learning_rate": 6.560466929291397e-06, "loss": 0.0, "step": 49880 }, { "epoch": 0.3441947746417656, "grad_norm": 0.0, "learning_rate": 6.5597770219459535e-06, "loss": 0.0, "step": 49890 }, { "epoch": 0.34426376537631, "grad_norm": 0.0, "learning_rate": 6.55908711460051e-06, "loss": 0.0, "step": 49900 }, { "epoch": 0.3443327561108543, "grad_norm": 0.0, "learning_rate": 6.5583972072550665e-06, "loss": 0.0, "step": 49910 }, { "epoch": 0.34440174684539865, "grad_norm": 1.3277878679218702e-05, "learning_rate": 6.557707299909622e-06, "loss": 0.0, "step": 49920 }, { "epoch": 0.344470737579943, "grad_norm": 0.0, "learning_rate": 6.557017392564179e-06, "loss": 0.0, "step": 49930 }, { "epoch": 0.34453972831448737, "grad_norm": 0.0, "learning_rate": 6.556327485218735e-06, "loss": 0.0, "step": 49940 }, { "epoch": 0.34460871904903173, "grad_norm": 0.0, "learning_rate": 6.555637577873292e-06, "loss": 0.0, "step": 49950 }, { "epoch": 0.3446777097835761, "grad_norm": 1.1702499389648438, "learning_rate": 6.554947670527849e-06, "loss": 0.0001, "step": 49960 }, { "epoch": 0.3447467005181204, "grad_norm": 0.0, "learning_rate": 6.5542577631824054e-06, "loss": 0.0, "step": 49970 }, { "epoch": 0.34481569125266476, "grad_norm": 0.0001836222072597593, "learning_rate": 6.553567855836962e-06, "loss": 0.0613, "step": 49980 }, { "epoch": 0.3448846819872091, "grad_norm": 9.513488885559696e-10, "learning_rate": 6.552877948491518e-06, "loss": 0.0, "step": 49990 }, { "epoch": 0.3449536727217535, "grad_norm": 0.0, "learning_rate": 6.552188041146075e-06, "loss": 0.0001, "step": 50000 }, { "epoch": 0.34502266345629784, "grad_norm": 0.0, "learning_rate": 6.551498133800631e-06, "loss": 0.0, "step": 50010 }, { "epoch": 0.34509165419084215, "grad_norm": 0.0, "learning_rate": 6.550808226455188e-06, "loss": 0.0, "step": 50020 }, { "epoch": 0.3451606449253865, "grad_norm": 0.0, "learning_rate": 6.5501183191097435e-06, "loss": 0.0, "step": 50030 }, { "epoch": 0.34522963565993087, "grad_norm": 8.063936718372133e-08, "learning_rate": 6.5494284117643e-06, "loss": 0.0089, "step": 50040 }, { "epoch": 0.3452986263944752, "grad_norm": 0.0, "learning_rate": 6.5487385044188565e-06, "loss": 0.0, "step": 50050 }, { "epoch": 0.3453676171290196, "grad_norm": 1.1236663794989e-06, "learning_rate": 6.548048597073413e-06, "loss": 0.0, "step": 50060 }, { "epoch": 0.34543660786356395, "grad_norm": 9.154676461342603e-10, "learning_rate": 6.54735868972797e-06, "loss": 0.002, "step": 50070 }, { "epoch": 0.34550559859810825, "grad_norm": 0.0, "learning_rate": 6.546668782382527e-06, "loss": 0.0, "step": 50080 }, { "epoch": 0.3455745893326526, "grad_norm": 0.00041500889346934855, "learning_rate": 6.545978875037083e-06, "loss": 0.0, "step": 50090 }, { "epoch": 0.345643580067197, "grad_norm": 0.0, "learning_rate": 6.54528896769164e-06, "loss": 0.0003, "step": 50100 }, { "epoch": 0.34571257080174134, "grad_norm": 0.0, "learning_rate": 6.544599060346196e-06, "loss": 0.0, "step": 50110 }, { "epoch": 0.3457815615362857, "grad_norm": 0.0, "learning_rate": 6.543909153000753e-06, "loss": 0.0, "step": 50120 }, { "epoch": 0.34585055227083, "grad_norm": 0.0009828883921727538, "learning_rate": 6.543219245655309e-06, "loss": 0.0, "step": 50130 }, { "epoch": 0.34591954300537436, "grad_norm": 0.0, "learning_rate": 6.542529338309866e-06, "loss": 0.0001, "step": 50140 }, { "epoch": 0.3459885337399187, "grad_norm": 0.0, "learning_rate": 6.541839430964421e-06, "loss": 0.0014, "step": 50150 }, { "epoch": 0.3460575244744631, "grad_norm": 0.0, "learning_rate": 6.541149523618978e-06, "loss": 0.0, "step": 50160 }, { "epoch": 0.34612651520900745, "grad_norm": 0.0, "learning_rate": 6.540459616273534e-06, "loss": 0.0, "step": 50170 }, { "epoch": 0.3461955059435518, "grad_norm": 0.0, "learning_rate": 6.539769708928092e-06, "loss": 0.0021, "step": 50180 }, { "epoch": 0.3462644966780961, "grad_norm": 8.57240763707523e-07, "learning_rate": 6.539079801582648e-06, "loss": 0.0, "step": 50190 }, { "epoch": 0.3463334874126405, "grad_norm": 0.0, "learning_rate": 6.538389894237205e-06, "loss": 0.0, "step": 50200 }, { "epoch": 0.34640247814718483, "grad_norm": 0.0, "learning_rate": 6.537699986891761e-06, "loss": 0.0, "step": 50210 }, { "epoch": 0.3464714688817292, "grad_norm": 0.0, "learning_rate": 6.537010079546318e-06, "loss": 0.0, "step": 50220 }, { "epoch": 0.34654045961627356, "grad_norm": 0.0, "learning_rate": 6.536320172200874e-06, "loss": 0.0, "step": 50230 }, { "epoch": 0.34660945035081786, "grad_norm": 0.0, "learning_rate": 6.535630264855431e-06, "loss": 0.0, "step": 50240 }, { "epoch": 0.3466784410853622, "grad_norm": 0.0, "learning_rate": 6.534940357509987e-06, "loss": 0.0, "step": 50250 }, { "epoch": 0.3467474318199066, "grad_norm": 0.0, "learning_rate": 6.534250450164543e-06, "loss": 0.0, "step": 50260 }, { "epoch": 0.34681642255445094, "grad_norm": 9.376717567443848, "learning_rate": 6.533560542819099e-06, "loss": 0.0013, "step": 50270 }, { "epoch": 0.3468854132889953, "grad_norm": 0.03148139640688896, "learning_rate": 6.532870635473656e-06, "loss": 0.0, "step": 50280 }, { "epoch": 0.34695440402353966, "grad_norm": 0.0, "learning_rate": 6.532180728128213e-06, "loss": 0.0002, "step": 50290 }, { "epoch": 0.34702339475808397, "grad_norm": 0.612483024597168, "learning_rate": 6.53149082078277e-06, "loss": 0.0001, "step": 50300 }, { "epoch": 0.34709238549262833, "grad_norm": 0.0, "learning_rate": 6.530800913437326e-06, "loss": 0.0, "step": 50310 }, { "epoch": 0.3471613762271727, "grad_norm": 0.0, "learning_rate": 6.530111006091883e-06, "loss": 0.0, "step": 50320 }, { "epoch": 0.34723036696171705, "grad_norm": 0.0, "learning_rate": 6.529421098746439e-06, "loss": 0.0, "step": 50330 }, { "epoch": 0.3472993576962614, "grad_norm": 4.289747721486492e-06, "learning_rate": 6.528731191400996e-06, "loss": 0.0001, "step": 50340 }, { "epoch": 0.3473683484308057, "grad_norm": 0.0, "learning_rate": 6.528041284055552e-06, "loss": 0.0005, "step": 50350 }, { "epoch": 0.3474373391653501, "grad_norm": 0.0, "learning_rate": 6.5273513767101086e-06, "loss": 0.0127, "step": 50360 }, { "epoch": 0.34750632989989444, "grad_norm": 0.008071240969002247, "learning_rate": 6.526661469364664e-06, "loss": 0.0, "step": 50370 }, { "epoch": 0.3475753206344388, "grad_norm": 68.7182846069336, "learning_rate": 6.525971562019221e-06, "loss": 0.0135, "step": 50380 }, { "epoch": 0.34764431136898316, "grad_norm": 0.0, "learning_rate": 6.525281654673777e-06, "loss": 0.0002, "step": 50390 }, { "epoch": 0.3477133021035275, "grad_norm": 0.6752731800079346, "learning_rate": 6.5245917473283345e-06, "loss": 0.0001, "step": 50400 }, { "epoch": 0.3477822928380718, "grad_norm": 0.0, "learning_rate": 6.523901839982891e-06, "loss": 0.0, "step": 50410 }, { "epoch": 0.3478512835726162, "grad_norm": 0.0, "learning_rate": 6.5232119326374475e-06, "loss": 0.0007, "step": 50420 }, { "epoch": 0.34792027430716055, "grad_norm": 0.0, "learning_rate": 6.522522025292004e-06, "loss": 0.0, "step": 50430 }, { "epoch": 0.3479892650417049, "grad_norm": 0.0, "learning_rate": 6.5218321179465605e-06, "loss": 0.0, "step": 50440 }, { "epoch": 0.34805825577624927, "grad_norm": 0.0, "learning_rate": 6.521142210601117e-06, "loss": 0.0, "step": 50450 }, { "epoch": 0.3481272465107936, "grad_norm": 0.015025528147816658, "learning_rate": 6.5204523032556735e-06, "loss": 0.0, "step": 50460 }, { "epoch": 0.34819623724533794, "grad_norm": 0.0, "learning_rate": 6.51976239591023e-06, "loss": 0.0, "step": 50470 }, { "epoch": 0.3482652279798823, "grad_norm": 0.0, "learning_rate": 6.5190724885647864e-06, "loss": 0.0, "step": 50480 }, { "epoch": 0.34833421871442666, "grad_norm": 0.0, "learning_rate": 6.518382581219342e-06, "loss": 0.0, "step": 50490 }, { "epoch": 0.348403209448971, "grad_norm": 0.0, "learning_rate": 6.517692673873899e-06, "loss": 0.0, "step": 50500 }, { "epoch": 0.3484722001835154, "grad_norm": 0.29596391320228577, "learning_rate": 6.517002766528456e-06, "loss": 0.0, "step": 50510 }, { "epoch": 0.3485411909180597, "grad_norm": 0.0, "learning_rate": 6.516312859183012e-06, "loss": 0.0, "step": 50520 }, { "epoch": 0.34861018165260405, "grad_norm": 0.0, "learning_rate": 6.515622951837569e-06, "loss": 0.0, "step": 50530 }, { "epoch": 0.3486791723871484, "grad_norm": 0.0, "learning_rate": 6.514933044492125e-06, "loss": 0.0, "step": 50540 }, { "epoch": 0.34874816312169277, "grad_norm": 0.0016665668226778507, "learning_rate": 6.514243137146682e-06, "loss": 0.0, "step": 50550 }, { "epoch": 0.3488171538562371, "grad_norm": 0.0, "learning_rate": 6.513553229801238e-06, "loss": 0.0043, "step": 50560 }, { "epoch": 0.34888614459078143, "grad_norm": 0.0, "learning_rate": 6.512863322455795e-06, "loss": 0.0103, "step": 50570 }, { "epoch": 0.3489551353253258, "grad_norm": 0.0, "learning_rate": 6.512173415110351e-06, "loss": 0.0, "step": 50580 }, { "epoch": 0.34902412605987015, "grad_norm": 0.0, "learning_rate": 6.511483507764908e-06, "loss": 0.0, "step": 50590 }, { "epoch": 0.3490931167944145, "grad_norm": 0.0, "learning_rate": 6.5107936004194635e-06, "loss": 0.0, "step": 50600 }, { "epoch": 0.3491621075289589, "grad_norm": 0.0, "learning_rate": 6.51010369307402e-06, "loss": 0.0, "step": 50610 }, { "epoch": 0.34923109826350324, "grad_norm": 0.0, "learning_rate": 6.509413785728578e-06, "loss": 0.0, "step": 50620 }, { "epoch": 0.34930008899804754, "grad_norm": 9.754700380781856e-10, "learning_rate": 6.508723878383134e-06, "loss": 0.0, "step": 50630 }, { "epoch": 0.3493690797325919, "grad_norm": 0.0, "learning_rate": 6.50803397103769e-06, "loss": 0.0, "step": 50640 }, { "epoch": 0.34943807046713626, "grad_norm": 9.152853475136169e-10, "learning_rate": 6.507344063692247e-06, "loss": 0.0, "step": 50650 }, { "epoch": 0.3495070612016806, "grad_norm": 0.0, "learning_rate": 6.506654156346803e-06, "loss": 0.0004, "step": 50660 }, { "epoch": 0.349576051936225, "grad_norm": 0.0, "learning_rate": 6.50596424900136e-06, "loss": 0.0, "step": 50670 }, { "epoch": 0.3496450426707693, "grad_norm": 0.0, "learning_rate": 6.505274341655916e-06, "loss": 0.0, "step": 50680 }, { "epoch": 0.34971403340531365, "grad_norm": 0.0, "learning_rate": 6.504584434310473e-06, "loss": 0.3583, "step": 50690 }, { "epoch": 0.349783024139858, "grad_norm": 0.0, "learning_rate": 6.503894526965029e-06, "loss": 0.0, "step": 50700 }, { "epoch": 0.3498520148744024, "grad_norm": 0.0, "learning_rate": 6.503204619619585e-06, "loss": 0.0001, "step": 50710 }, { "epoch": 0.34992100560894673, "grad_norm": 0.0, "learning_rate": 6.502514712274141e-06, "loss": 0.0, "step": 50720 }, { "epoch": 0.3499899963434911, "grad_norm": 0.0, "learning_rate": 6.501824804928698e-06, "loss": 0.0, "step": 50730 }, { "epoch": 0.3500589870780354, "grad_norm": 0.0, "learning_rate": 6.501134897583255e-06, "loss": 0.0, "step": 50740 }, { "epoch": 0.35012797781257976, "grad_norm": 0.0, "learning_rate": 6.500444990237812e-06, "loss": 0.0, "step": 50750 }, { "epoch": 0.3501969685471241, "grad_norm": 0.0, "learning_rate": 6.499755082892368e-06, "loss": 0.0, "step": 50760 }, { "epoch": 0.3502659592816685, "grad_norm": 8.00547277322039e-05, "learning_rate": 6.499065175546925e-06, "loss": 0.0001, "step": 50770 }, { "epoch": 0.35033495001621284, "grad_norm": 0.0025550287682563066, "learning_rate": 6.498375268201481e-06, "loss": 0.0, "step": 50780 }, { "epoch": 0.35040394075075715, "grad_norm": 0.0010697155958041549, "learning_rate": 6.497685360856038e-06, "loss": 0.0, "step": 50790 }, { "epoch": 0.3504729314853015, "grad_norm": 0.0, "learning_rate": 6.496995453510594e-06, "loss": 0.0, "step": 50800 }, { "epoch": 0.35054192221984587, "grad_norm": 0.0, "learning_rate": 6.496305546165151e-06, "loss": 0.0, "step": 50810 }, { "epoch": 0.35061091295439023, "grad_norm": 3.235627900721738e-06, "learning_rate": 6.495615638819706e-06, "loss": 0.0, "step": 50820 }, { "epoch": 0.3506799036889346, "grad_norm": 0.0, "learning_rate": 6.494925731474263e-06, "loss": 0.0, "step": 50830 }, { "epoch": 0.35074889442347895, "grad_norm": 0.0, "learning_rate": 6.494235824128819e-06, "loss": 0.0, "step": 50840 }, { "epoch": 0.35081788515802326, "grad_norm": 0.0, "learning_rate": 6.493545916783377e-06, "loss": 0.0, "step": 50850 }, { "epoch": 0.3508868758925676, "grad_norm": 0.0, "learning_rate": 6.492856009437933e-06, "loss": 0.0044, "step": 50860 }, { "epoch": 0.350955866627112, "grad_norm": 0.0, "learning_rate": 6.4921661020924896e-06, "loss": 0.0, "step": 50870 }, { "epoch": 0.35102485736165634, "grad_norm": 0.0, "learning_rate": 6.491476194747046e-06, "loss": 0.0, "step": 50880 }, { "epoch": 0.3510938480962007, "grad_norm": 0.0, "learning_rate": 6.4907862874016026e-06, "loss": 0.0002, "step": 50890 }, { "epoch": 0.351162838830745, "grad_norm": 0.035069920122623444, "learning_rate": 6.490096380056159e-06, "loss": 0.0, "step": 50900 }, { "epoch": 0.35123182956528937, "grad_norm": 0.0, "learning_rate": 6.4894064727107155e-06, "loss": 0.0, "step": 50910 }, { "epoch": 0.3513008202998337, "grad_norm": 0.0, "learning_rate": 6.488716565365272e-06, "loss": 0.0, "step": 50920 }, { "epoch": 0.3513698110343781, "grad_norm": 0.0, "learning_rate": 6.4880266580198285e-06, "loss": 0.0564, "step": 50930 }, { "epoch": 0.35143880176892245, "grad_norm": 0.0, "learning_rate": 6.487336750674384e-06, "loss": 0.0, "step": 50940 }, { "epoch": 0.3515077925034668, "grad_norm": 0.0, "learning_rate": 6.486646843328941e-06, "loss": 0.0339, "step": 50950 }, { "epoch": 0.3515767832380111, "grad_norm": 1.8043915250842701e-09, "learning_rate": 6.485956935983498e-06, "loss": 0.0, "step": 50960 }, { "epoch": 0.3516457739725555, "grad_norm": 0.0, "learning_rate": 6.4852670286380545e-06, "loss": 0.0, "step": 50970 }, { "epoch": 0.35171476470709984, "grad_norm": 0.5766947269439697, "learning_rate": 6.484577121292611e-06, "loss": 0.0001, "step": 50980 }, { "epoch": 0.3517837554416442, "grad_norm": 0.0, "learning_rate": 6.4838872139471675e-06, "loss": 0.0, "step": 50990 }, { "epoch": 0.35185274617618856, "grad_norm": 1.9761574776566704e-07, "learning_rate": 6.483197306601724e-06, "loss": 0.0, "step": 51000 }, { "epoch": 0.35192173691073286, "grad_norm": 0.0, "learning_rate": 6.4825073992562804e-06, "loss": 0.0, "step": 51010 }, { "epoch": 0.3519907276452772, "grad_norm": 0.0, "learning_rate": 6.481817491910837e-06, "loss": 0.0, "step": 51020 }, { "epoch": 0.3520597183798216, "grad_norm": 0.0, "learning_rate": 6.481127584565393e-06, "loss": 0.0, "step": 51030 }, { "epoch": 0.35212870911436595, "grad_norm": 836.3893432617188, "learning_rate": 6.48043767721995e-06, "loss": 0.4836, "step": 51040 }, { "epoch": 0.3521976998489103, "grad_norm": 0.0, "learning_rate": 6.4797477698745056e-06, "loss": 0.0, "step": 51050 }, { "epoch": 0.35226669058345467, "grad_norm": 0.0009629593114368618, "learning_rate": 6.479057862529062e-06, "loss": 0.0002, "step": 51060 }, { "epoch": 0.35233568131799897, "grad_norm": 0.0, "learning_rate": 6.47836795518362e-06, "loss": 0.0, "step": 51070 }, { "epoch": 0.35240467205254333, "grad_norm": 0.0, "learning_rate": 6.477678047838176e-06, "loss": 0.0, "step": 51080 }, { "epoch": 0.3524736627870877, "grad_norm": 0.0, "learning_rate": 6.476988140492732e-06, "loss": 0.0, "step": 51090 }, { "epoch": 0.35254265352163205, "grad_norm": 0.0, "learning_rate": 6.476298233147289e-06, "loss": 0.7922, "step": 51100 }, { "epoch": 0.3526116442561764, "grad_norm": 6.40731886960566e-05, "learning_rate": 6.475608325801845e-06, "loss": 0.0032, "step": 51110 }, { "epoch": 0.3526806349907207, "grad_norm": 9.686899829830509e-06, "learning_rate": 6.474918418456402e-06, "loss": 0.0, "step": 51120 }, { "epoch": 0.3527496257252651, "grad_norm": 0.0, "learning_rate": 6.474228511110958e-06, "loss": 0.0, "step": 51130 }, { "epoch": 0.35281861645980944, "grad_norm": 0.0, "learning_rate": 6.473538603765515e-06, "loss": 0.0, "step": 51140 }, { "epoch": 0.3528876071943538, "grad_norm": 0.0006227701669558883, "learning_rate": 6.472848696420071e-06, "loss": 0.0, "step": 51150 }, { "epoch": 0.35295659792889816, "grad_norm": 0.0, "learning_rate": 6.472158789074627e-06, "loss": 0.0002, "step": 51160 }, { "epoch": 0.3530255886634425, "grad_norm": 0.0, "learning_rate": 6.4714688817291834e-06, "loss": 0.0003, "step": 51170 }, { "epoch": 0.35309457939798683, "grad_norm": 2.170688458136283e-05, "learning_rate": 6.470778974383742e-06, "loss": 0.0, "step": 51180 }, { "epoch": 0.3531635701325312, "grad_norm": 6.692272958019885e-09, "learning_rate": 6.470089067038297e-06, "loss": 0.0, "step": 51190 }, { "epoch": 0.35323256086707555, "grad_norm": 0.0, "learning_rate": 6.469399159692854e-06, "loss": 0.0, "step": 51200 }, { "epoch": 0.3533015516016199, "grad_norm": 3.0576808285331936e-07, "learning_rate": 6.46870925234741e-06, "loss": 0.0001, "step": 51210 }, { "epoch": 0.3533705423361643, "grad_norm": 0.0, "learning_rate": 6.468019345001967e-06, "loss": 0.0003, "step": 51220 }, { "epoch": 0.3534395330707086, "grad_norm": 4.628592265021325e-08, "learning_rate": 6.467329437656523e-06, "loss": 0.0, "step": 51230 }, { "epoch": 0.35350852380525294, "grad_norm": 0.0, "learning_rate": 6.46663953031108e-06, "loss": 0.0, "step": 51240 }, { "epoch": 0.3535775145397973, "grad_norm": 0.0, "learning_rate": 6.465949622965636e-06, "loss": 0.0647, "step": 51250 }, { "epoch": 0.35364650527434166, "grad_norm": 868.2990112304688, "learning_rate": 6.465259715620193e-06, "loss": 0.2975, "step": 51260 }, { "epoch": 0.353715496008886, "grad_norm": 0.0, "learning_rate": 6.464638799009294e-06, "loss": 0.7109, "step": 51270 }, { "epoch": 0.3537844867434304, "grad_norm": 0.0, "learning_rate": 6.463948891663851e-06, "loss": 0.0, "step": 51280 }, { "epoch": 0.3538534774779747, "grad_norm": 0.0, "learning_rate": 6.463258984318407e-06, "loss": 0.0, "step": 51290 }, { "epoch": 0.35392246821251905, "grad_norm": 0.0, "learning_rate": 6.462569076972963e-06, "loss": 0.0001, "step": 51300 }, { "epoch": 0.3539914589470634, "grad_norm": 5.896670245419955e-07, "learning_rate": 6.461879169627519e-06, "loss": 0.2535, "step": 51310 }, { "epoch": 0.35406044968160777, "grad_norm": 0.0, "learning_rate": 6.461189262282076e-06, "loss": 0.0, "step": 51320 }, { "epoch": 0.35412944041615213, "grad_norm": 0.0, "learning_rate": 6.460499354936632e-06, "loss": 0.0, "step": 51330 }, { "epoch": 0.35419843115069644, "grad_norm": 0.0, "learning_rate": 6.459809447591189e-06, "loss": 0.0009, "step": 51340 }, { "epoch": 0.3542674218852408, "grad_norm": 0.004679547622799873, "learning_rate": 6.459119540245745e-06, "loss": 0.1567, "step": 51350 }, { "epoch": 0.35433641261978516, "grad_norm": 0.0, "learning_rate": 6.458429632900302e-06, "loss": 0.0, "step": 51360 }, { "epoch": 0.3544054033543295, "grad_norm": 0.0, "learning_rate": 6.457739725554859e-06, "loss": 0.0028, "step": 51370 }, { "epoch": 0.3544743940888739, "grad_norm": 0.0, "learning_rate": 6.457049818209416e-06, "loss": 0.0, "step": 51380 }, { "epoch": 0.35454338482341824, "grad_norm": 0.0, "learning_rate": 6.456359910863972e-06, "loss": 0.0, "step": 51390 }, { "epoch": 0.35461237555796254, "grad_norm": 0.0, "learning_rate": 6.455670003518529e-06, "loss": 0.0, "step": 51400 }, { "epoch": 0.3546813662925069, "grad_norm": 0.04218251258134842, "learning_rate": 6.454980096173084e-06, "loss": 0.0, "step": 51410 }, { "epoch": 0.35475035702705127, "grad_norm": 0.004686353728175163, "learning_rate": 6.454290188827641e-06, "loss": 0.0, "step": 51420 }, { "epoch": 0.3548193477615956, "grad_norm": 0.0, "learning_rate": 6.453600281482197e-06, "loss": 0.0011, "step": 51430 }, { "epoch": 0.35488833849614, "grad_norm": 0.0, "learning_rate": 6.452910374136754e-06, "loss": 0.0001, "step": 51440 }, { "epoch": 0.3549573292306843, "grad_norm": 0.0, "learning_rate": 6.45222046679131e-06, "loss": 0.0, "step": 51450 }, { "epoch": 0.35502631996522865, "grad_norm": 0.0, "learning_rate": 6.451530559445867e-06, "loss": 0.0, "step": 51460 }, { "epoch": 0.355095310699773, "grad_norm": 0.0, "learning_rate": 6.450840652100423e-06, "loss": 0.0, "step": 51470 }, { "epoch": 0.3551643014343174, "grad_norm": 0.0002492847852408886, "learning_rate": 6.4501507447549805e-06, "loss": 0.0, "step": 51480 }, { "epoch": 0.35523329216886174, "grad_norm": 0.0, "learning_rate": 6.449460837409537e-06, "loss": 0.0, "step": 51490 }, { "epoch": 0.3553022829034061, "grad_norm": 3.526331738612498e-06, "learning_rate": 6.4487709300640935e-06, "loss": 0.0, "step": 51500 }, { "epoch": 0.3553712736379504, "grad_norm": 0.0, "learning_rate": 6.44808102271865e-06, "loss": 0.0006, "step": 51510 }, { "epoch": 0.35544026437249476, "grad_norm": 0.0, "learning_rate": 6.4473911153732065e-06, "loss": 0.0, "step": 51520 }, { "epoch": 0.3555092551070391, "grad_norm": 0.0, "learning_rate": 6.446701208027762e-06, "loss": 0.0, "step": 51530 }, { "epoch": 0.3555782458415835, "grad_norm": 0.0, "learning_rate": 6.446011300682319e-06, "loss": 0.0, "step": 51540 }, { "epoch": 0.35564723657612785, "grad_norm": 0.0, "learning_rate": 6.445321393336875e-06, "loss": 0.0, "step": 51550 }, { "epoch": 0.35571622731067215, "grad_norm": 1.6291174631533067e-07, "learning_rate": 6.444631485991432e-06, "loss": 0.0, "step": 51560 }, { "epoch": 0.3557852180452165, "grad_norm": 0.0, "learning_rate": 6.443941578645988e-06, "loss": 0.0, "step": 51570 }, { "epoch": 0.35585420877976087, "grad_norm": 0.0, "learning_rate": 6.4432516713005446e-06, "loss": 0.0, "step": 51580 }, { "epoch": 0.35592319951430523, "grad_norm": 0.0, "learning_rate": 6.442561763955102e-06, "loss": 0.0, "step": 51590 }, { "epoch": 0.3559921902488496, "grad_norm": 0.0, "learning_rate": 6.441871856609658e-06, "loss": 0.0111, "step": 51600 }, { "epoch": 0.35606118098339395, "grad_norm": 0.0, "learning_rate": 6.441181949264215e-06, "loss": 0.0, "step": 51610 }, { "epoch": 0.35613017171793826, "grad_norm": 0.0, "learning_rate": 6.440492041918771e-06, "loss": 0.0, "step": 51620 }, { "epoch": 0.3561991624524826, "grad_norm": 0.0, "learning_rate": 6.439802134573328e-06, "loss": 0.0, "step": 51630 }, { "epoch": 0.356268153187027, "grad_norm": 0.007423896808177233, "learning_rate": 6.4391122272278835e-06, "loss": 0.0, "step": 51640 }, { "epoch": 0.35633714392157134, "grad_norm": 4.474450143732156e-09, "learning_rate": 6.43842231988244e-06, "loss": 0.0, "step": 51650 }, { "epoch": 0.3564061346561157, "grad_norm": 0.0, "learning_rate": 6.4377324125369965e-06, "loss": 0.0, "step": 51660 }, { "epoch": 0.35647512539066, "grad_norm": 0.0, "learning_rate": 6.437042505191553e-06, "loss": 0.0, "step": 51670 }, { "epoch": 0.35654411612520437, "grad_norm": 0.28651612997055054, "learning_rate": 6.4363525978461095e-06, "loss": 0.0001, "step": 51680 }, { "epoch": 0.35661310685974873, "grad_norm": 0.0, "learning_rate": 6.435662690500666e-06, "loss": 0.0, "step": 51690 }, { "epoch": 0.3566820975942931, "grad_norm": 0.0, "learning_rate": 6.434972783155223e-06, "loss": 0.0, "step": 51700 }, { "epoch": 0.35675108832883745, "grad_norm": 0.0, "learning_rate": 6.43428287580978e-06, "loss": 0.0, "step": 51710 }, { "epoch": 0.3568200790633818, "grad_norm": 0.0, "learning_rate": 6.433592968464336e-06, "loss": 0.0, "step": 51720 }, { "epoch": 0.3568890697979261, "grad_norm": 0.32183560729026794, "learning_rate": 6.432903061118893e-06, "loss": 0.0001, "step": 51730 }, { "epoch": 0.3569580605324705, "grad_norm": 0.0, "learning_rate": 6.432213153773449e-06, "loss": 0.0, "step": 51740 }, { "epoch": 0.35702705126701484, "grad_norm": 8.300338749656078e-10, "learning_rate": 6.431523246428005e-06, "loss": 0.0, "step": 51750 }, { "epoch": 0.3570960420015592, "grad_norm": 0.0, "learning_rate": 6.430833339082561e-06, "loss": 0.0, "step": 51760 }, { "epoch": 0.35716503273610356, "grad_norm": 0.0, "learning_rate": 6.430143431737118e-06, "loss": 0.0, "step": 51770 }, { "epoch": 0.35723402347064787, "grad_norm": 0.000443589553469792, "learning_rate": 6.429453524391674e-06, "loss": 0.0, "step": 51780 }, { "epoch": 0.3573030142051922, "grad_norm": 0.00010486302926437929, "learning_rate": 6.428763617046231e-06, "loss": 0.0001, "step": 51790 }, { "epoch": 0.3573720049397366, "grad_norm": 0.0, "learning_rate": 6.428073709700787e-06, "loss": 0.0, "step": 51800 }, { "epoch": 0.35744099567428095, "grad_norm": 0.0, "learning_rate": 6.427383802355344e-06, "loss": 0.4902, "step": 51810 }, { "epoch": 0.3575099864088253, "grad_norm": 0.0, "learning_rate": 6.426693895009901e-06, "loss": 0.0, "step": 51820 }, { "epoch": 0.35757897714336967, "grad_norm": 0.0, "learning_rate": 6.426003987664458e-06, "loss": 0.0, "step": 51830 }, { "epoch": 0.357647967877914, "grad_norm": 0.004734083544462919, "learning_rate": 6.425314080319014e-06, "loss": 0.0, "step": 51840 }, { "epoch": 0.35771695861245834, "grad_norm": 0.0, "learning_rate": 6.424624172973571e-06, "loss": 0.0, "step": 51850 }, { "epoch": 0.3577859493470027, "grad_norm": 0.0, "learning_rate": 6.423934265628126e-06, "loss": 0.0, "step": 51860 }, { "epoch": 0.35785494008154706, "grad_norm": 0.0, "learning_rate": 6.423244358282683e-06, "loss": 0.0, "step": 51870 }, { "epoch": 0.3579239308160914, "grad_norm": 0.0, "learning_rate": 6.422554450937239e-06, "loss": 0.0, "step": 51880 }, { "epoch": 0.3579929215506357, "grad_norm": 0.0, "learning_rate": 6.421864543591796e-06, "loss": 0.0, "step": 51890 }, { "epoch": 0.3580619122851801, "grad_norm": 0.0, "learning_rate": 6.421174636246352e-06, "loss": 0.0, "step": 51900 }, { "epoch": 0.35813090301972444, "grad_norm": 0.0, "learning_rate": 6.420484728900909e-06, "loss": 0.0, "step": 51910 }, { "epoch": 0.3581998937542688, "grad_norm": 0.0, "learning_rate": 6.419794821555465e-06, "loss": 0.0, "step": 51920 }, { "epoch": 0.35826888448881317, "grad_norm": 9.824264735058819e-10, "learning_rate": 6.419104914210023e-06, "loss": 0.0001, "step": 51930 }, { "epoch": 0.3583378752233575, "grad_norm": 1.946612684378124e-07, "learning_rate": 6.418415006864579e-06, "loss": 0.0, "step": 51940 }, { "epoch": 0.35840686595790183, "grad_norm": 0.0, "learning_rate": 6.4177250995191356e-06, "loss": 0.0, "step": 51950 }, { "epoch": 0.3584758566924462, "grad_norm": 0.0, "learning_rate": 6.417035192173692e-06, "loss": 0.0, "step": 51960 }, { "epoch": 0.35854484742699055, "grad_norm": 0.0, "learning_rate": 6.4163452848282485e-06, "loss": 0.0, "step": 51970 }, { "epoch": 0.3586138381615349, "grad_norm": 0.0, "learning_rate": 6.415655377482804e-06, "loss": 0.0, "step": 51980 }, { "epoch": 0.3586828288960793, "grad_norm": 0.0, "learning_rate": 6.414965470137361e-06, "loss": 0.0, "step": 51990 }, { "epoch": 0.3587518196306236, "grad_norm": 3.0257751859608106e-05, "learning_rate": 6.414275562791917e-06, "loss": 0.0, "step": 52000 }, { "epoch": 0.35882081036516794, "grad_norm": 0.0, "learning_rate": 6.413585655446474e-06, "loss": 0.0, "step": 52010 }, { "epoch": 0.3588898010997123, "grad_norm": 0.0, "learning_rate": 6.41289574810103e-06, "loss": 0.0, "step": 52020 }, { "epoch": 0.35895879183425666, "grad_norm": 2.1959505829727277e-05, "learning_rate": 6.412205840755587e-06, "loss": 0.0, "step": 52030 }, { "epoch": 0.359027782568801, "grad_norm": 0.0, "learning_rate": 6.411515933410144e-06, "loss": 0.0, "step": 52040 }, { "epoch": 0.3590967733033454, "grad_norm": 0.0, "learning_rate": 6.4108260260647005e-06, "loss": 0.0, "step": 52050 }, { "epoch": 0.3591657640378897, "grad_norm": 0.0, "learning_rate": 6.410136118719257e-06, "loss": 0.7734, "step": 52060 }, { "epoch": 0.35923475477243405, "grad_norm": 0.0, "learning_rate": 6.4094462113738134e-06, "loss": 0.1903, "step": 52070 }, { "epoch": 0.3593037455069784, "grad_norm": 0.0008258084417320788, "learning_rate": 6.40875630402837e-06, "loss": 0.0, "step": 52080 }, { "epoch": 0.35937273624152277, "grad_norm": 0.0, "learning_rate": 6.408066396682926e-06, "loss": 0.0, "step": 52090 }, { "epoch": 0.35944172697606713, "grad_norm": 0.0, "learning_rate": 6.407376489337482e-06, "loss": 0.0, "step": 52100 }, { "epoch": 0.35951071771061144, "grad_norm": 0.0, "learning_rate": 6.4066865819920386e-06, "loss": 0.0, "step": 52110 }, { "epoch": 0.3595797084451558, "grad_norm": 9.867168193622433e-10, "learning_rate": 6.405996674646595e-06, "loss": 0.0, "step": 52120 }, { "epoch": 0.35964869917970016, "grad_norm": 0.0, "learning_rate": 6.4053067673011515e-06, "loss": 0.0001, "step": 52130 }, { "epoch": 0.3597176899142445, "grad_norm": 0.0, "learning_rate": 6.404616859955708e-06, "loss": 0.0003, "step": 52140 }, { "epoch": 0.3597866806487889, "grad_norm": 0.0, "learning_rate": 6.403926952610265e-06, "loss": 0.0, "step": 52150 }, { "epoch": 0.35985567138333324, "grad_norm": 0.0, "learning_rate": 6.403237045264822e-06, "loss": 0.0, "step": 52160 }, { "epoch": 0.35992466211787755, "grad_norm": 6.122380000306293e-06, "learning_rate": 6.402547137919378e-06, "loss": 0.0, "step": 52170 }, { "epoch": 0.3599936528524219, "grad_norm": 0.0, "learning_rate": 6.401857230573935e-06, "loss": 0.0, "step": 52180 }, { "epoch": 0.36006264358696627, "grad_norm": 0.0, "learning_rate": 6.401167323228491e-06, "loss": 0.0, "step": 52190 }, { "epoch": 0.36013163432151063, "grad_norm": 0.2074791043996811, "learning_rate": 6.400477415883047e-06, "loss": 0.0, "step": 52200 }, { "epoch": 0.360200625056055, "grad_norm": 0.0, "learning_rate": 6.3997875085376035e-06, "loss": 0.0, "step": 52210 }, { "epoch": 0.3602696157905993, "grad_norm": 2.339615292612507e-07, "learning_rate": 6.39909760119216e-06, "loss": 0.0, "step": 52220 }, { "epoch": 0.36033860652514366, "grad_norm": 1.2783141301042633e-07, "learning_rate": 6.3984076938467164e-06, "loss": 0.0, "step": 52230 }, { "epoch": 0.360407597259688, "grad_norm": 0.0, "learning_rate": 6.397717786501273e-06, "loss": 0.0001, "step": 52240 }, { "epoch": 0.3604765879942324, "grad_norm": 0.0, "learning_rate": 6.397027879155829e-06, "loss": 0.0, "step": 52250 }, { "epoch": 0.36054557872877674, "grad_norm": 0.0, "learning_rate": 6.396337971810387e-06, "loss": 0.0, "step": 52260 }, { "epoch": 0.3606145694633211, "grad_norm": 0.0, "learning_rate": 6.395648064464943e-06, "loss": 0.0, "step": 52270 }, { "epoch": 0.3606835601978654, "grad_norm": 0.0, "learning_rate": 6.3949581571195e-06, "loss": 0.0001, "step": 52280 }, { "epoch": 0.36075255093240977, "grad_norm": 0.0, "learning_rate": 6.394268249774056e-06, "loss": 0.0, "step": 52290 }, { "epoch": 0.3608215416669541, "grad_norm": 0.0, "learning_rate": 6.393578342428613e-06, "loss": 0.0, "step": 52300 }, { "epoch": 0.3608905324014985, "grad_norm": 0.0, "learning_rate": 6.392888435083169e-06, "loss": 0.0, "step": 52310 }, { "epoch": 0.36095952313604285, "grad_norm": 0.0, "learning_rate": 6.392198527737725e-06, "loss": 0.0, "step": 52320 }, { "epoch": 0.36102851387058715, "grad_norm": 0.0, "learning_rate": 6.391508620392281e-06, "loss": 0.0, "step": 52330 }, { "epoch": 0.3610975046051315, "grad_norm": 0.3596288859844208, "learning_rate": 6.390818713046838e-06, "loss": 0.0022, "step": 52340 }, { "epoch": 0.3611664953396759, "grad_norm": 0.0, "learning_rate": 6.390128805701394e-06, "loss": 0.0, "step": 52350 }, { "epoch": 0.36123548607422024, "grad_norm": 0.0, "learning_rate": 6.389438898355951e-06, "loss": 0.0014, "step": 52360 }, { "epoch": 0.3613044768087646, "grad_norm": 0.0, "learning_rate": 6.388748991010508e-06, "loss": 0.0, "step": 52370 }, { "epoch": 0.36137346754330896, "grad_norm": 0.0, "learning_rate": 6.388059083665065e-06, "loss": 0.0, "step": 52380 }, { "epoch": 0.36144245827785326, "grad_norm": 0.0, "learning_rate": 6.387369176319621e-06, "loss": 0.0001, "step": 52390 }, { "epoch": 0.3615114490123976, "grad_norm": 7.475036589710271e-09, "learning_rate": 6.386679268974178e-06, "loss": 0.0, "step": 52400 }, { "epoch": 0.361580439746942, "grad_norm": 0.36978551745414734, "learning_rate": 6.385989361628734e-06, "loss": 0.0001, "step": 52410 }, { "epoch": 0.36164943048148634, "grad_norm": 0.0, "learning_rate": 6.385299454283291e-06, "loss": 0.0, "step": 52420 }, { "epoch": 0.3617184212160307, "grad_norm": 0.0, "learning_rate": 6.384609546937846e-06, "loss": 0.0, "step": 52430 }, { "epoch": 0.361787411950575, "grad_norm": 435.36041259765625, "learning_rate": 6.383919639592403e-06, "loss": 0.1095, "step": 52440 }, { "epoch": 0.36185640268511937, "grad_norm": 0.0, "learning_rate": 6.383229732246959e-06, "loss": 0.0, "step": 52450 }, { "epoch": 0.36192539341966373, "grad_norm": 0.0, "learning_rate": 6.382539824901516e-06, "loss": 0.0009, "step": 52460 }, { "epoch": 0.3619943841542081, "grad_norm": 0.0, "learning_rate": 6.381849917556072e-06, "loss": 0.0001, "step": 52470 }, { "epoch": 0.36206337488875245, "grad_norm": 4.5022838435215817e-07, "learning_rate": 6.3811600102106295e-06, "loss": 0.0001, "step": 52480 }, { "epoch": 0.3621323656232968, "grad_norm": 2.1329526134650223e-05, "learning_rate": 6.380470102865186e-06, "loss": 0.0, "step": 52490 }, { "epoch": 0.3622013563578411, "grad_norm": 2.401577035016089e-07, "learning_rate": 6.3797801955197425e-06, "loss": 0.0, "step": 52500 }, { "epoch": 0.3622703470923855, "grad_norm": 1.8900733493865118e-06, "learning_rate": 6.379090288174299e-06, "loss": 0.0, "step": 52510 }, { "epoch": 0.36233933782692984, "grad_norm": 0.0, "learning_rate": 6.3784003808288555e-06, "loss": 0.0, "step": 52520 }, { "epoch": 0.3624083285614742, "grad_norm": 5.384059136304131e-07, "learning_rate": 6.377710473483412e-06, "loss": 0.0009, "step": 52530 }, { "epoch": 0.36247731929601856, "grad_norm": 0.0, "learning_rate": 6.377020566137968e-06, "loss": 0.015, "step": 52540 }, { "epoch": 0.36254631003056287, "grad_norm": 0.0, "learning_rate": 6.376330658792524e-06, "loss": 0.0001, "step": 52550 }, { "epoch": 0.36261530076510723, "grad_norm": 4.186577129416946e-08, "learning_rate": 6.375640751447081e-06, "loss": 0.0005, "step": 52560 }, { "epoch": 0.3626842914996516, "grad_norm": 0.0, "learning_rate": 6.374950844101637e-06, "loss": 0.0, "step": 52570 }, { "epoch": 0.36275328223419595, "grad_norm": 0.0, "learning_rate": 6.374260936756194e-06, "loss": 0.0005, "step": 52580 }, { "epoch": 0.3628222729687403, "grad_norm": 0.0, "learning_rate": 6.373571029410751e-06, "loss": 0.0, "step": 52590 }, { "epoch": 0.36289126370328467, "grad_norm": 0.0, "learning_rate": 6.3728811220653074e-06, "loss": 0.0, "step": 52600 }, { "epoch": 0.362960254437829, "grad_norm": 0.0, "learning_rate": 6.372191214719864e-06, "loss": 0.0, "step": 52610 }, { "epoch": 0.36302924517237334, "grad_norm": 0.0, "learning_rate": 6.37150130737442e-06, "loss": 0.0, "step": 52620 }, { "epoch": 0.3630982359069177, "grad_norm": 0.0, "learning_rate": 6.370811400028977e-06, "loss": 0.0, "step": 52630 }, { "epoch": 0.36316722664146206, "grad_norm": 1.384516716003418, "learning_rate": 6.370121492683533e-06, "loss": 0.5147, "step": 52640 }, { "epoch": 0.3632362173760064, "grad_norm": 0.0, "learning_rate": 6.36943158533809e-06, "loss": 0.0, "step": 52650 }, { "epoch": 0.3633052081105507, "grad_norm": 0.0, "learning_rate": 6.3687416779926455e-06, "loss": 0.0, "step": 52660 }, { "epoch": 0.3633741988450951, "grad_norm": 0.0, "learning_rate": 6.368051770647202e-06, "loss": 0.0, "step": 52670 }, { "epoch": 0.36344318957963945, "grad_norm": 5.1653405535034835e-06, "learning_rate": 6.3673618633017585e-06, "loss": 0.0, "step": 52680 }, { "epoch": 0.3635121803141838, "grad_norm": 0.0, "learning_rate": 6.366671955956315e-06, "loss": 0.0, "step": 52690 }, { "epoch": 0.36358117104872817, "grad_norm": 0.0, "learning_rate": 6.365982048610872e-06, "loss": 0.0, "step": 52700 }, { "epoch": 0.36365016178327253, "grad_norm": 0.0, "learning_rate": 6.365292141265429e-06, "loss": 0.0, "step": 52710 }, { "epoch": 0.36371915251781683, "grad_norm": 0.0, "learning_rate": 6.364602233919985e-06, "loss": 0.0, "step": 52720 }, { "epoch": 0.3637881432523612, "grad_norm": 0.0, "learning_rate": 6.363912326574542e-06, "loss": 0.0, "step": 52730 }, { "epoch": 0.36385713398690556, "grad_norm": 0.0, "learning_rate": 6.363222419229098e-06, "loss": 0.0001, "step": 52740 }, { "epoch": 0.3639261247214499, "grad_norm": 0.0, "learning_rate": 6.362532511883655e-06, "loss": 0.0, "step": 52750 }, { "epoch": 0.3639951154559943, "grad_norm": 0.0, "learning_rate": 6.361842604538211e-06, "loss": 0.0, "step": 52760 }, { "epoch": 0.3640641061905386, "grad_norm": 0.0, "learning_rate": 6.361152697192767e-06, "loss": 0.0, "step": 52770 }, { "epoch": 0.36413309692508294, "grad_norm": 0.0, "learning_rate": 6.360462789847323e-06, "loss": 0.0429, "step": 52780 }, { "epoch": 0.3642020876596273, "grad_norm": 0.0, "learning_rate": 6.35977288250188e-06, "loss": 0.0001, "step": 52790 }, { "epoch": 0.36427107839417167, "grad_norm": 0.0, "learning_rate": 6.359082975156436e-06, "loss": 0.0, "step": 52800 }, { "epoch": 0.364340069128716, "grad_norm": 1.2245102709584899e-07, "learning_rate": 6.358393067810994e-06, "loss": 0.0001, "step": 52810 }, { "epoch": 0.3644090598632604, "grad_norm": 0.0, "learning_rate": 6.35770316046555e-06, "loss": 0.0, "step": 52820 }, { "epoch": 0.3644780505978047, "grad_norm": 0.0, "learning_rate": 6.357013253120107e-06, "loss": 0.0, "step": 52830 }, { "epoch": 0.36454704133234905, "grad_norm": 0.0, "learning_rate": 6.356323345774663e-06, "loss": 0.1137, "step": 52840 }, { "epoch": 0.3646160320668934, "grad_norm": 0.0, "learning_rate": 6.35563343842922e-06, "loss": 0.0, "step": 52850 }, { "epoch": 0.3646850228014378, "grad_norm": 0.0, "learning_rate": 6.354943531083776e-06, "loss": 0.0, "step": 52860 }, { "epoch": 0.36475401353598214, "grad_norm": 0.0, "learning_rate": 6.354253623738333e-06, "loss": 0.0, "step": 52870 }, { "epoch": 0.36482300427052644, "grad_norm": 0.0, "learning_rate": 6.353563716392888e-06, "loss": 0.0, "step": 52880 }, { "epoch": 0.3648919950050708, "grad_norm": 0.0, "learning_rate": 6.352873809047445e-06, "loss": 0.0, "step": 52890 }, { "epoch": 0.36496098573961516, "grad_norm": 0.0, "learning_rate": 6.352183901702001e-06, "loss": 0.0, "step": 52900 }, { "epoch": 0.3650299764741595, "grad_norm": 0.0004221514973323792, "learning_rate": 6.351493994356558e-06, "loss": 0.0, "step": 52910 }, { "epoch": 0.3650989672087039, "grad_norm": 0.0, "learning_rate": 6.350804087011115e-06, "loss": 0.0, "step": 52920 }, { "epoch": 0.36516795794324824, "grad_norm": 0.0, "learning_rate": 6.350114179665672e-06, "loss": 0.35, "step": 52930 }, { "epoch": 0.36523694867779255, "grad_norm": 0.0, "learning_rate": 6.349424272320228e-06, "loss": 0.0, "step": 52940 }, { "epoch": 0.3653059394123369, "grad_norm": 0.0, "learning_rate": 6.348734364974785e-06, "loss": 0.2987, "step": 52950 }, { "epoch": 0.36537493014688127, "grad_norm": 0.0, "learning_rate": 6.348044457629341e-06, "loss": 0.0, "step": 52960 }, { "epoch": 0.36544392088142563, "grad_norm": 0.0, "learning_rate": 6.3473545502838976e-06, "loss": 0.0, "step": 52970 }, { "epoch": 0.36551291161597, "grad_norm": 0.0, "learning_rate": 6.346664642938454e-06, "loss": 0.0, "step": 52980 }, { "epoch": 0.3655819023505143, "grad_norm": 0.0, "learning_rate": 6.3459747355930106e-06, "loss": 0.0021, "step": 52990 }, { "epoch": 0.36565089308505866, "grad_norm": 0.0, "learning_rate": 6.345284828247566e-06, "loss": 0.0, "step": 53000 }, { "epoch": 0.365719883819603, "grad_norm": 0.0, "learning_rate": 6.344594920902123e-06, "loss": 0.0, "step": 53010 }, { "epoch": 0.3657888745541474, "grad_norm": 1.1275965334789362e-05, "learning_rate": 6.343905013556679e-06, "loss": 0.0, "step": 53020 }, { "epoch": 0.36585786528869174, "grad_norm": 0.0, "learning_rate": 6.3432151062112365e-06, "loss": 0.0, "step": 53030 }, { "epoch": 0.3659268560232361, "grad_norm": 2.156047562706931e-09, "learning_rate": 6.342525198865793e-06, "loss": 0.0377, "step": 53040 }, { "epoch": 0.3659958467577804, "grad_norm": 0.0, "learning_rate": 6.3418352915203495e-06, "loss": 0.1613, "step": 53050 }, { "epoch": 0.36606483749232477, "grad_norm": 0.0, "learning_rate": 6.341145384174906e-06, "loss": 0.0429, "step": 53060 }, { "epoch": 0.36613382822686913, "grad_norm": 0.0, "learning_rate": 6.3404554768294625e-06, "loss": 0.0, "step": 53070 }, { "epoch": 0.3662028189614135, "grad_norm": 0.0, "learning_rate": 6.339765569484019e-06, "loss": 0.0, "step": 53080 }, { "epoch": 0.36627180969595785, "grad_norm": 0.0024852491915225983, "learning_rate": 6.3390756621385755e-06, "loss": 0.3215, "step": 53090 }, { "epoch": 0.36634080043050216, "grad_norm": 0.0, "learning_rate": 6.338385754793132e-06, "loss": 0.0, "step": 53100 }, { "epoch": 0.3664097911650465, "grad_norm": 0.0, "learning_rate": 6.337695847447688e-06, "loss": 2.1703, "step": 53110 }, { "epoch": 0.3664787818995909, "grad_norm": 0.0, "learning_rate": 6.337005940102244e-06, "loss": 0.0, "step": 53120 }, { "epoch": 0.36654777263413524, "grad_norm": 0.0, "learning_rate": 6.3363160327568006e-06, "loss": 0.0033, "step": 53130 }, { "epoch": 0.3666167633686796, "grad_norm": 0.0, "learning_rate": 6.335626125411358e-06, "loss": 0.0, "step": 53140 }, { "epoch": 0.36668575410322396, "grad_norm": 0.0, "learning_rate": 6.334936218065914e-06, "loss": 0.0, "step": 53150 }, { "epoch": 0.36675474483776827, "grad_norm": 1.0293479135725647e-05, "learning_rate": 6.334246310720471e-06, "loss": 0.0, "step": 53160 }, { "epoch": 0.3668237355723126, "grad_norm": 262.7918701171875, "learning_rate": 6.333556403375027e-06, "loss": 0.0224, "step": 53170 }, { "epoch": 0.366892726306857, "grad_norm": 0.0, "learning_rate": 6.332866496029584e-06, "loss": 0.0, "step": 53180 }, { "epoch": 0.36696171704140135, "grad_norm": 0.0, "learning_rate": 6.33217658868414e-06, "loss": 0.0, "step": 53190 }, { "epoch": 0.3670307077759457, "grad_norm": 0.0, "learning_rate": 6.331486681338697e-06, "loss": 0.8742, "step": 53200 }, { "epoch": 0.36709969851049, "grad_norm": 0.10914280265569687, "learning_rate": 6.330796773993253e-06, "loss": 0.0, "step": 53210 }, { "epoch": 0.3671686892450344, "grad_norm": 2.665992155925778e-07, "learning_rate": 6.330106866647809e-06, "loss": 0.0, "step": 53220 }, { "epoch": 0.36723767997957873, "grad_norm": 0.0, "learning_rate": 6.3294169593023655e-06, "loss": 0.0, "step": 53230 }, { "epoch": 0.3673066707141231, "grad_norm": 0.0, "learning_rate": 6.328727051956922e-06, "loss": 0.0, "step": 53240 }, { "epoch": 0.36737566144866746, "grad_norm": 0.0021569826640188694, "learning_rate": 6.328037144611479e-06, "loss": 0.0, "step": 53250 }, { "epoch": 0.3674446521832118, "grad_norm": 0.0, "learning_rate": 6.327347237266036e-06, "loss": 0.0, "step": 53260 }, { "epoch": 0.3675136429177561, "grad_norm": 0.002401307225227356, "learning_rate": 6.326657329920592e-06, "loss": 0.0, "step": 53270 }, { "epoch": 0.3675826336523005, "grad_norm": 0.0, "learning_rate": 6.325967422575149e-06, "loss": 0.0, "step": 53280 }, { "epoch": 0.36765162438684484, "grad_norm": 0.0, "learning_rate": 6.325277515229705e-06, "loss": 0.0, "step": 53290 }, { "epoch": 0.3677206151213892, "grad_norm": 0.0, "learning_rate": 6.324587607884262e-06, "loss": 0.0, "step": 53300 }, { "epoch": 0.36778960585593357, "grad_norm": 0.0, "learning_rate": 6.323897700538818e-06, "loss": 0.0001, "step": 53310 }, { "epoch": 0.3678585965904779, "grad_norm": 0.0, "learning_rate": 6.323207793193375e-06, "loss": 0.0, "step": 53320 }, { "epoch": 0.36792758732502223, "grad_norm": 0.0, "learning_rate": 6.32251788584793e-06, "loss": 0.0, "step": 53330 }, { "epoch": 0.3679965780595666, "grad_norm": 0.0, "learning_rate": 6.321827978502487e-06, "loss": 0.0002, "step": 53340 }, { "epoch": 0.36806556879411095, "grad_norm": 0.0, "learning_rate": 6.321138071157043e-06, "loss": 0.0, "step": 53350 }, { "epoch": 0.3681345595286553, "grad_norm": 0.0, "learning_rate": 6.320448163811601e-06, "loss": 0.0, "step": 53360 }, { "epoch": 0.3682035502631997, "grad_norm": 0.0, "learning_rate": 6.319758256466157e-06, "loss": 0.0, "step": 53370 }, { "epoch": 0.368272540997744, "grad_norm": 0.0, "learning_rate": 6.319068349120714e-06, "loss": 0.0, "step": 53380 }, { "epoch": 0.36834153173228834, "grad_norm": 0.0, "learning_rate": 6.31837844177527e-06, "loss": 0.0918, "step": 53390 }, { "epoch": 0.3684105224668327, "grad_norm": 0.0, "learning_rate": 6.317688534429827e-06, "loss": 0.0493, "step": 53400 }, { "epoch": 0.36847951320137706, "grad_norm": 0.0, "learning_rate": 6.316998627084383e-06, "loss": 0.0, "step": 53410 }, { "epoch": 0.3685485039359214, "grad_norm": 0.0, "learning_rate": 6.31630871973894e-06, "loss": 0.0, "step": 53420 }, { "epoch": 0.3686174946704658, "grad_norm": 2.9677866564270516e-07, "learning_rate": 6.315618812393496e-06, "loss": 0.0222, "step": 53430 }, { "epoch": 0.3686864854050101, "grad_norm": 0.0, "learning_rate": 6.314928905048053e-06, "loss": 0.0, "step": 53440 }, { "epoch": 0.36875547613955445, "grad_norm": 1.6090236902236938, "learning_rate": 6.314238997702608e-06, "loss": 0.0017, "step": 53450 }, { "epoch": 0.3688244668740988, "grad_norm": 0.0, "learning_rate": 6.313549090357165e-06, "loss": 0.0006, "step": 53460 }, { "epoch": 0.36889345760864317, "grad_norm": 0.0, "learning_rate": 6.312859183011722e-06, "loss": 0.0009, "step": 53470 }, { "epoch": 0.36896244834318753, "grad_norm": 257.950439453125, "learning_rate": 6.312169275666279e-06, "loss": 0.044, "step": 53480 }, { "epoch": 0.36903143907773184, "grad_norm": 1421.5469970703125, "learning_rate": 6.311479368320835e-06, "loss": 0.3107, "step": 53490 }, { "epoch": 0.3691004298122762, "grad_norm": 0.0, "learning_rate": 6.3107894609753916e-06, "loss": 0.0, "step": 53500 }, { "epoch": 0.36916942054682056, "grad_norm": 0.0, "learning_rate": 6.310099553629948e-06, "loss": 0.0, "step": 53510 }, { "epoch": 0.3692384112813649, "grad_norm": 0.6843840479850769, "learning_rate": 6.3094096462845045e-06, "loss": 0.0004, "step": 53520 }, { "epoch": 0.3693074020159093, "grad_norm": 0.0, "learning_rate": 6.308719738939061e-06, "loss": 0.0, "step": 53530 }, { "epoch": 0.36937639275045364, "grad_norm": 4.340824375503871e-07, "learning_rate": 6.3080298315936175e-06, "loss": 0.0, "step": 53540 }, { "epoch": 0.36944538348499795, "grad_norm": 0.0, "learning_rate": 6.307339924248174e-06, "loss": 0.0, "step": 53550 }, { "epoch": 0.3695143742195423, "grad_norm": 0.0, "learning_rate": 6.30665001690273e-06, "loss": 0.1663, "step": 53560 }, { "epoch": 0.36958336495408667, "grad_norm": 8.709826215635985e-05, "learning_rate": 6.305960109557286e-06, "loss": 0.0, "step": 53570 }, { "epoch": 0.36965235568863103, "grad_norm": 0.0, "learning_rate": 6.305270202211844e-06, "loss": 0.0006, "step": 53580 }, { "epoch": 0.3697213464231754, "grad_norm": 1.2828556350541476e-07, "learning_rate": 6.3045802948664e-06, "loss": 0.0, "step": 53590 }, { "epoch": 0.3697903371577197, "grad_norm": 0.008203966543078423, "learning_rate": 6.3038903875209565e-06, "loss": 0.0, "step": 53600 }, { "epoch": 0.36985932789226406, "grad_norm": 0.0, "learning_rate": 6.303200480175513e-06, "loss": 0.0, "step": 53610 }, { "epoch": 0.3699283186268084, "grad_norm": 0.0, "learning_rate": 6.3025105728300694e-06, "loss": 0.0, "step": 53620 }, { "epoch": 0.3699973093613528, "grad_norm": 0.0, "learning_rate": 6.301820665484626e-06, "loss": 0.0377, "step": 53630 }, { "epoch": 0.37006630009589714, "grad_norm": 0.0, "learning_rate": 6.3011307581391824e-06, "loss": 0.0, "step": 53640 }, { "epoch": 0.3701352908304415, "grad_norm": 0.0, "learning_rate": 6.300440850793739e-06, "loss": 0.0, "step": 53650 }, { "epoch": 0.3702042815649858, "grad_norm": 0.0, "learning_rate": 6.299750943448295e-06, "loss": 0.0, "step": 53660 }, { "epoch": 0.37027327229953017, "grad_norm": 0.0, "learning_rate": 6.299061036102851e-06, "loss": 0.0001, "step": 53670 }, { "epoch": 0.3703422630340745, "grad_norm": 0.0, "learning_rate": 6.2983711287574075e-06, "loss": 0.0, "step": 53680 }, { "epoch": 0.3704112537686189, "grad_norm": 4.501118766597756e-08, "learning_rate": 6.297681221411966e-06, "loss": 0.0, "step": 53690 }, { "epoch": 0.37048024450316325, "grad_norm": 0.0, "learning_rate": 6.296991314066521e-06, "loss": 0.0, "step": 53700 }, { "epoch": 0.37054923523770755, "grad_norm": 0.0, "learning_rate": 6.296301406721078e-06, "loss": 0.0, "step": 53710 }, { "epoch": 0.3706182259722519, "grad_norm": 2.6968313804331956e-08, "learning_rate": 6.295611499375634e-06, "loss": 0.0002, "step": 53720 }, { "epoch": 0.3706872167067963, "grad_norm": 0.0, "learning_rate": 6.294921592030191e-06, "loss": 0.0, "step": 53730 }, { "epoch": 0.37075620744134064, "grad_norm": 0.0, "learning_rate": 6.294231684684747e-06, "loss": 0.0, "step": 53740 }, { "epoch": 0.370825198175885, "grad_norm": 7.549827500952233e-07, "learning_rate": 6.293541777339304e-06, "loss": 0.0002, "step": 53750 }, { "epoch": 0.37089418891042936, "grad_norm": 0.0, "learning_rate": 6.29285186999386e-06, "loss": 0.0, "step": 53760 }, { "epoch": 0.37096317964497366, "grad_norm": 2.8684544304269366e-06, "learning_rate": 6.292161962648417e-06, "loss": 0.0, "step": 53770 }, { "epoch": 0.371032170379518, "grad_norm": 0.17768219113349915, "learning_rate": 6.291472055302973e-06, "loss": 0.0008, "step": 53780 }, { "epoch": 0.3711011611140624, "grad_norm": 0.0, "learning_rate": 6.290782147957529e-06, "loss": 0.0, "step": 53790 }, { "epoch": 0.37117015184860674, "grad_norm": 0.0, "learning_rate": 6.290092240612087e-06, "loss": 0.0, "step": 53800 }, { "epoch": 0.3712391425831511, "grad_norm": 0.010918626561760902, "learning_rate": 6.289402333266643e-06, "loss": 0.0091, "step": 53810 }, { "epoch": 0.3713081333176954, "grad_norm": 0.0, "learning_rate": 6.288712425921199e-06, "loss": 0.0, "step": 53820 }, { "epoch": 0.37137712405223977, "grad_norm": 0.0005918614915572107, "learning_rate": 6.288022518575756e-06, "loss": 0.0005, "step": 53830 }, { "epoch": 0.37144611478678413, "grad_norm": 0.0, "learning_rate": 6.287332611230312e-06, "loss": 0.0, "step": 53840 }, { "epoch": 0.3715151055213285, "grad_norm": 0.0, "learning_rate": 6.286642703884869e-06, "loss": 0.0, "step": 53850 }, { "epoch": 0.37158409625587285, "grad_norm": 0.0, "learning_rate": 6.285952796539425e-06, "loss": 0.1163, "step": 53860 }, { "epoch": 0.3716530869904172, "grad_norm": 0.0, "learning_rate": 6.285262889193982e-06, "loss": 0.0, "step": 53870 }, { "epoch": 0.3717220777249615, "grad_norm": 0.0, "learning_rate": 6.284572981848538e-06, "loss": 0.0, "step": 53880 }, { "epoch": 0.3717910684595059, "grad_norm": 0.0, "learning_rate": 6.283883074503095e-06, "loss": 0.0, "step": 53890 }, { "epoch": 0.37186005919405024, "grad_norm": 0.0, "learning_rate": 6.28319316715765e-06, "loss": 0.0003, "step": 53900 }, { "epoch": 0.3719290499285946, "grad_norm": 0.0, "learning_rate": 6.2825032598122085e-06, "loss": 0.0001, "step": 53910 }, { "epoch": 0.37199804066313896, "grad_norm": 0.0, "learning_rate": 6.281813352466765e-06, "loss": 0.0, "step": 53920 }, { "epoch": 0.37206703139768327, "grad_norm": 0.0, "learning_rate": 6.281123445121321e-06, "loss": 0.0, "step": 53930 }, { "epoch": 0.37213602213222763, "grad_norm": 0.0, "learning_rate": 6.280433537775877e-06, "loss": 0.0, "step": 53940 }, { "epoch": 0.372205012866772, "grad_norm": 0.0012977931182831526, "learning_rate": 6.279743630430434e-06, "loss": 0.0005, "step": 53950 }, { "epoch": 0.37227400360131635, "grad_norm": 0.0, "learning_rate": 6.27905372308499e-06, "loss": 0.0, "step": 53960 }, { "epoch": 0.3723429943358607, "grad_norm": 0.0, "learning_rate": 6.278363815739547e-06, "loss": 0.0, "step": 53970 }, { "epoch": 0.37241198507040507, "grad_norm": 0.18349036574363708, "learning_rate": 6.277673908394103e-06, "loss": 0.0, "step": 53980 }, { "epoch": 0.3724809758049494, "grad_norm": 0.0, "learning_rate": 6.27698400104866e-06, "loss": 0.0, "step": 53990 }, { "epoch": 0.37254996653949374, "grad_norm": 0.0, "learning_rate": 6.276294093703216e-06, "loss": 0.0, "step": 54000 }, { "epoch": 0.3726189572740381, "grad_norm": 0.0, "learning_rate": 6.275604186357772e-06, "loss": 0.0, "step": 54010 }, { "epoch": 0.37268794800858246, "grad_norm": 2.8242279626056188e-08, "learning_rate": 6.27491427901233e-06, "loss": 0.0, "step": 54020 }, { "epoch": 0.3727569387431268, "grad_norm": 0.0, "learning_rate": 6.274224371666886e-06, "loss": 0.0, "step": 54030 }, { "epoch": 0.3728259294776711, "grad_norm": 0.0, "learning_rate": 6.273534464321442e-06, "loss": 0.0, "step": 54040 }, { "epoch": 0.3728949202122155, "grad_norm": 0.0, "learning_rate": 6.2728445569759985e-06, "loss": 0.0004, "step": 54050 }, { "epoch": 0.37296391094675985, "grad_norm": 0.0, "learning_rate": 6.272154649630555e-06, "loss": 0.0, "step": 54060 }, { "epoch": 0.3730329016813042, "grad_norm": 2.5350940902058028e-08, "learning_rate": 6.2714647422851115e-06, "loss": 0.0001, "step": 54070 }, { "epoch": 0.37310189241584857, "grad_norm": 4.99712826673715e-10, "learning_rate": 6.270774834939668e-06, "loss": 0.0, "step": 54080 }, { "epoch": 0.37317088315039293, "grad_norm": 2.5723872809635395e-08, "learning_rate": 6.2700849275942245e-06, "loss": 0.0, "step": 54090 }, { "epoch": 0.37323987388493723, "grad_norm": 2.6685615495125603e-08, "learning_rate": 6.269395020248781e-06, "loss": 0.0, "step": 54100 }, { "epoch": 0.3733088646194816, "grad_norm": 0.0, "learning_rate": 6.2687051129033375e-06, "loss": 0.0, "step": 54110 }, { "epoch": 0.37337785535402596, "grad_norm": 0.0, "learning_rate": 6.268015205557894e-06, "loss": 0.0, "step": 54120 }, { "epoch": 0.3734468460885703, "grad_norm": 0.0, "learning_rate": 6.267325298212451e-06, "loss": 0.0, "step": 54130 }, { "epoch": 0.3735158368231147, "grad_norm": 2.799781322479248, "learning_rate": 6.266635390867008e-06, "loss": 0.0005, "step": 54140 }, { "epoch": 0.373584827557659, "grad_norm": 3.888081209879601e-06, "learning_rate": 6.2659454835215634e-06, "loss": 0.0, "step": 54150 }, { "epoch": 0.37365381829220334, "grad_norm": 0.0, "learning_rate": 6.26525557617612e-06, "loss": 0.0, "step": 54160 }, { "epoch": 0.3737228090267477, "grad_norm": 0.0, "learning_rate": 6.264565668830676e-06, "loss": 0.0, "step": 54170 }, { "epoch": 0.37379179976129207, "grad_norm": 9.067513929039706e-06, "learning_rate": 6.263875761485233e-06, "loss": 0.0006, "step": 54180 }, { "epoch": 0.3738607904958364, "grad_norm": 0.0, "learning_rate": 6.263185854139789e-06, "loss": 0.0, "step": 54190 }, { "epoch": 0.3739297812303808, "grad_norm": 1.236219532074756e-06, "learning_rate": 6.262495946794346e-06, "loss": 0.0, "step": 54200 }, { "epoch": 0.3739987719649251, "grad_norm": 0.0, "learning_rate": 6.261806039448902e-06, "loss": 0.0041, "step": 54210 }, { "epoch": 0.37406776269946945, "grad_norm": 0.0, "learning_rate": 6.261116132103459e-06, "loss": 0.3545, "step": 54220 }, { "epoch": 0.3741367534340138, "grad_norm": 0.0, "learning_rate": 6.260426224758015e-06, "loss": 0.1688, "step": 54230 }, { "epoch": 0.3742057441685582, "grad_norm": 0.0, "learning_rate": 6.259736317412573e-06, "loss": 0.0001, "step": 54240 }, { "epoch": 0.37427473490310254, "grad_norm": 0.0, "learning_rate": 6.259046410067129e-06, "loss": 0.0726, "step": 54250 }, { "epoch": 0.37434372563764684, "grad_norm": 0.0, "learning_rate": 6.258356502721686e-06, "loss": 0.0, "step": 54260 }, { "epoch": 0.3744127163721912, "grad_norm": 0.0, "learning_rate": 6.257666595376241e-06, "loss": 0.0009, "step": 54270 }, { "epoch": 0.37448170710673556, "grad_norm": 0.0, "learning_rate": 6.256976688030798e-06, "loss": 0.0, "step": 54280 }, { "epoch": 0.3745506978412799, "grad_norm": 1.5801893823663704e-05, "learning_rate": 6.256286780685354e-06, "loss": 0.0, "step": 54290 }, { "epoch": 0.3746196885758243, "grad_norm": 0.0009169537806883454, "learning_rate": 6.255596873339911e-06, "loss": 0.0, "step": 54300 }, { "epoch": 0.37468867931036864, "grad_norm": 0.0, "learning_rate": 6.254906965994467e-06, "loss": 0.0, "step": 54310 }, { "epoch": 0.37475767004491295, "grad_norm": 0.0, "learning_rate": 6.254217058649024e-06, "loss": 0.0, "step": 54320 }, { "epoch": 0.3748266607794573, "grad_norm": 0.0, "learning_rate": 6.25352715130358e-06, "loss": 0.0, "step": 54330 }, { "epoch": 0.37489565151400167, "grad_norm": 0.0, "learning_rate": 6.252837243958137e-06, "loss": 0.0004, "step": 54340 }, { "epoch": 0.37496464224854603, "grad_norm": 0.0035362797789275646, "learning_rate": 6.252147336612694e-06, "loss": 0.0, "step": 54350 }, { "epoch": 0.3750336329830904, "grad_norm": 0.0, "learning_rate": 6.251457429267251e-06, "loss": 0.0, "step": 54360 }, { "epoch": 0.3751026237176347, "grad_norm": 0.004642609041184187, "learning_rate": 6.250767521921807e-06, "loss": 0.0, "step": 54370 }, { "epoch": 0.37517161445217906, "grad_norm": 1.1241195352340583e-05, "learning_rate": 6.250077614576363e-06, "loss": 0.0, "step": 54380 }, { "epoch": 0.3752406051867234, "grad_norm": 0.07726994901895523, "learning_rate": 6.249387707230919e-06, "loss": 0.0, "step": 54390 }, { "epoch": 0.3753095959212678, "grad_norm": 0.0, "learning_rate": 6.248697799885476e-06, "loss": 0.0008, "step": 54400 }, { "epoch": 0.37537858665581214, "grad_norm": 0.053300295025110245, "learning_rate": 6.248007892540032e-06, "loss": 0.0, "step": 54410 }, { "epoch": 0.3754475773903565, "grad_norm": 0.0, "learning_rate": 6.247317985194589e-06, "loss": 0.0, "step": 54420 }, { "epoch": 0.3755165681249008, "grad_norm": 0.0, "learning_rate": 6.246628077849145e-06, "loss": 0.0001, "step": 54430 }, { "epoch": 0.37558555885944517, "grad_norm": 0.0, "learning_rate": 6.245938170503702e-06, "loss": 0.0633, "step": 54440 }, { "epoch": 0.37565454959398953, "grad_norm": 0.0, "learning_rate": 6.245248263158258e-06, "loss": 0.0002, "step": 54450 }, { "epoch": 0.3757235403285339, "grad_norm": 0.0, "learning_rate": 6.2445583558128155e-06, "loss": 0.0, "step": 54460 }, { "epoch": 0.37579253106307825, "grad_norm": 0.09672283381223679, "learning_rate": 6.243868448467372e-06, "loss": 0.0008, "step": 54470 }, { "epoch": 0.37586152179762256, "grad_norm": 0.007992210797965527, "learning_rate": 6.2431785411219285e-06, "loss": 0.0, "step": 54480 }, { "epoch": 0.3759305125321669, "grad_norm": 0.0, "learning_rate": 6.242488633776484e-06, "loss": 0.0, "step": 54490 }, { "epoch": 0.3759995032667113, "grad_norm": 0.0, "learning_rate": 6.241867717165585e-06, "loss": 0.2797, "step": 54500 }, { "epoch": 0.37606849400125564, "grad_norm": 0.0, "learning_rate": 6.241177809820141e-06, "loss": 0.0, "step": 54510 }, { "epoch": 0.3761374847358, "grad_norm": 0.0, "learning_rate": 6.240487902474698e-06, "loss": 0.0, "step": 54520 }, { "epoch": 0.37620647547034436, "grad_norm": 0.0, "learning_rate": 6.239797995129254e-06, "loss": 0.0, "step": 54530 }, { "epoch": 0.37627546620488866, "grad_norm": 0.0, "learning_rate": 6.239108087783811e-06, "loss": 0.0, "step": 54540 }, { "epoch": 0.376344456939433, "grad_norm": 0.0, "learning_rate": 6.238418180438368e-06, "loss": 0.0, "step": 54550 }, { "epoch": 0.3764134476739774, "grad_norm": 0.0, "learning_rate": 6.2377282730929246e-06, "loss": 0.0, "step": 54560 }, { "epoch": 0.37648243840852175, "grad_norm": 0.0, "learning_rate": 6.237038365747481e-06, "loss": 0.0011, "step": 54570 }, { "epoch": 0.3765514291430661, "grad_norm": 3.5893006611331657e-07, "learning_rate": 6.2363484584020376e-06, "loss": 0.0, "step": 54580 }, { "epoch": 0.3766204198776104, "grad_norm": 5.55206991492696e-09, "learning_rate": 6.235658551056594e-06, "loss": 0.0, "step": 54590 }, { "epoch": 0.3766894106121548, "grad_norm": 4.77972412109375, "learning_rate": 6.23496864371115e-06, "loss": 0.001, "step": 54600 }, { "epoch": 0.37675840134669913, "grad_norm": 0.0, "learning_rate": 6.234278736365706e-06, "loss": 0.0, "step": 54610 }, { "epoch": 0.3768273920812435, "grad_norm": 0.17097824811935425, "learning_rate": 6.233588829020263e-06, "loss": 0.0, "step": 54620 }, { "epoch": 0.37689638281578786, "grad_norm": 0.0, "learning_rate": 6.232898921674819e-06, "loss": 0.0, "step": 54630 }, { "epoch": 0.3769653735503322, "grad_norm": 2.2251473637879826e-05, "learning_rate": 6.232209014329376e-06, "loss": 0.0002, "step": 54640 }, { "epoch": 0.3770343642848765, "grad_norm": 0.0, "learning_rate": 6.231519106983932e-06, "loss": 0.0, "step": 54650 }, { "epoch": 0.3771033550194209, "grad_norm": 0.0, "learning_rate": 6.2308291996384895e-06, "loss": 0.0, "step": 54660 }, { "epoch": 0.37717234575396524, "grad_norm": 0.0, "learning_rate": 6.230139292293046e-06, "loss": 0.0136, "step": 54670 }, { "epoch": 0.3772413364885096, "grad_norm": 0.0, "learning_rate": 6.2294493849476025e-06, "loss": 0.2169, "step": 54680 }, { "epoch": 0.37731032722305397, "grad_norm": 0.0, "learning_rate": 6.228759477602159e-06, "loss": 0.0, "step": 54690 }, { "epoch": 0.37737931795759827, "grad_norm": 0.2226845622062683, "learning_rate": 6.2280695702567154e-06, "loss": 0.0008, "step": 54700 }, { "epoch": 0.37744830869214263, "grad_norm": 6.639002094743773e-07, "learning_rate": 6.227379662911271e-06, "loss": 0.8344, "step": 54710 }, { "epoch": 0.377517299426687, "grad_norm": 0.0, "learning_rate": 6.2266897555658276e-06, "loss": 0.0, "step": 54720 }, { "epoch": 0.37758629016123135, "grad_norm": 2.3116963348002173e-05, "learning_rate": 6.225999848220384e-06, "loss": 0.0, "step": 54730 }, { "epoch": 0.3776552808957757, "grad_norm": 0.0, "learning_rate": 6.2253099408749405e-06, "loss": 0.0, "step": 54740 }, { "epoch": 0.3777242716303201, "grad_norm": 0.0, "learning_rate": 6.224620033529497e-06, "loss": 0.0, "step": 54750 }, { "epoch": 0.3777932623648644, "grad_norm": 0.0, "learning_rate": 6.2239301261840535e-06, "loss": 0.0, "step": 54760 }, { "epoch": 0.37786225309940874, "grad_norm": 0.0, "learning_rate": 6.223240218838611e-06, "loss": 0.0, "step": 54770 }, { "epoch": 0.3779312438339531, "grad_norm": 0.0, "learning_rate": 6.222550311493167e-06, "loss": 0.0, "step": 54780 }, { "epoch": 0.37800023456849746, "grad_norm": 1.8271359205245972, "learning_rate": 6.221860404147724e-06, "loss": 0.0006, "step": 54790 }, { "epoch": 0.3780692253030418, "grad_norm": 0.0, "learning_rate": 6.22117049680228e-06, "loss": 0.0, "step": 54800 }, { "epoch": 0.37813821603758613, "grad_norm": 0.0, "learning_rate": 6.220480589456837e-06, "loss": 0.0, "step": 54810 }, { "epoch": 0.3782072067721305, "grad_norm": 0.0, "learning_rate": 6.219790682111393e-06, "loss": 0.0727, "step": 54820 }, { "epoch": 0.37827619750667485, "grad_norm": 0.0, "learning_rate": 6.219100774765949e-06, "loss": 0.0013, "step": 54830 }, { "epoch": 0.3783451882412192, "grad_norm": 0.4376320540904999, "learning_rate": 6.2184108674205055e-06, "loss": 0.0, "step": 54840 }, { "epoch": 0.37841417897576357, "grad_norm": 0.004327361937612295, "learning_rate": 6.217720960075062e-06, "loss": 0.0, "step": 54850 }, { "epoch": 0.37848316971030793, "grad_norm": 0.0, "learning_rate": 6.2170310527296184e-06, "loss": 0.0725, "step": 54860 }, { "epoch": 0.37855216044485224, "grad_norm": 0.0, "learning_rate": 6.216341145384175e-06, "loss": 0.0, "step": 54870 }, { "epoch": 0.3786211511793966, "grad_norm": 0.0, "learning_rate": 6.215651238038732e-06, "loss": 0.0113, "step": 54880 }, { "epoch": 0.37869014191394096, "grad_norm": 0.0, "learning_rate": 6.214961330693289e-06, "loss": 0.0, "step": 54890 }, { "epoch": 0.3787591326484853, "grad_norm": 0.0, "learning_rate": 6.214271423347845e-06, "loss": 0.0002, "step": 54900 }, { "epoch": 0.3788281233830297, "grad_norm": 0.0, "learning_rate": 6.213581516002402e-06, "loss": 0.0002, "step": 54910 }, { "epoch": 0.378897114117574, "grad_norm": 0.0, "learning_rate": 6.212891608656958e-06, "loss": 0.0053, "step": 54920 }, { "epoch": 0.37896610485211835, "grad_norm": 0.0, "learning_rate": 6.212201701311515e-06, "loss": 0.0, "step": 54930 }, { "epoch": 0.3790350955866627, "grad_norm": 0.0, "learning_rate": 6.21151179396607e-06, "loss": 0.0, "step": 54940 }, { "epoch": 0.37910408632120707, "grad_norm": 0.0, "learning_rate": 6.210821886620627e-06, "loss": 0.0, "step": 54950 }, { "epoch": 0.37917307705575143, "grad_norm": 0.0, "learning_rate": 6.210131979275183e-06, "loss": 0.0, "step": 54960 }, { "epoch": 0.3792420677902958, "grad_norm": 0.0, "learning_rate": 6.20944207192974e-06, "loss": 0.0, "step": 54970 }, { "epoch": 0.3793110585248401, "grad_norm": 0.0, "learning_rate": 6.208752164584296e-06, "loss": 0.0, "step": 54980 }, { "epoch": 0.37938004925938446, "grad_norm": 0.0, "learning_rate": 6.208062257238854e-06, "loss": 0.0, "step": 54990 }, { "epoch": 0.3794490399939288, "grad_norm": 0.0, "learning_rate": 6.20737234989341e-06, "loss": 0.0, "step": 55000 }, { "epoch": 0.3795180307284732, "grad_norm": 0.0, "learning_rate": 6.206682442547967e-06, "loss": 0.0001, "step": 55010 }, { "epoch": 0.37958702146301754, "grad_norm": 0.00025303696747869253, "learning_rate": 6.205992535202523e-06, "loss": 0.0, "step": 55020 }, { "epoch": 0.37965601219756184, "grad_norm": 0.0, "learning_rate": 6.20530262785708e-06, "loss": 0.0, "step": 55030 }, { "epoch": 0.3797250029321062, "grad_norm": 0.0, "learning_rate": 6.204612720511636e-06, "loss": 0.0028, "step": 55040 }, { "epoch": 0.37979399366665056, "grad_norm": 0.0, "learning_rate": 6.203922813166192e-06, "loss": 1.2266, "step": 55050 }, { "epoch": 0.3798629844011949, "grad_norm": 0.0, "learning_rate": 6.203232905820748e-06, "loss": 0.0, "step": 55060 }, { "epoch": 0.3799319751357393, "grad_norm": 0.0, "learning_rate": 6.202542998475305e-06, "loss": 0.0005, "step": 55070 }, { "epoch": 0.38000096587028365, "grad_norm": 0.0, "learning_rate": 6.201853091129861e-06, "loss": 0.0, "step": 55080 }, { "epoch": 0.38006995660482795, "grad_norm": 0.0, "learning_rate": 6.201163183784418e-06, "loss": 0.0, "step": 55090 }, { "epoch": 0.3801389473393723, "grad_norm": 0.0, "learning_rate": 6.200473276438975e-06, "loss": 0.0006, "step": 55100 }, { "epoch": 0.3802079380739167, "grad_norm": 0.0003746901056729257, "learning_rate": 6.1997833690935315e-06, "loss": 0.0, "step": 55110 }, { "epoch": 0.38027692880846103, "grad_norm": 0.0, "learning_rate": 6.199093461748088e-06, "loss": 0.0001, "step": 55120 }, { "epoch": 0.3803459195430054, "grad_norm": 0.0, "learning_rate": 6.1984035544026445e-06, "loss": 0.0, "step": 55130 }, { "epoch": 0.3804149102775497, "grad_norm": 0.0, "learning_rate": 6.197713647057201e-06, "loss": 0.0, "step": 55140 }, { "epoch": 0.38048390101209406, "grad_norm": 0.0, "learning_rate": 6.1970237397117575e-06, "loss": 0.0, "step": 55150 }, { "epoch": 0.3805528917466384, "grad_norm": 0.0, "learning_rate": 6.196333832366314e-06, "loss": 0.4266, "step": 55160 }, { "epoch": 0.3806218824811828, "grad_norm": 0.0004689408524427563, "learning_rate": 6.19564392502087e-06, "loss": 0.0, "step": 55170 }, { "epoch": 0.38069087321572714, "grad_norm": 0.0, "learning_rate": 6.194954017675426e-06, "loss": 0.0, "step": 55180 }, { "epoch": 0.3807598639502715, "grad_norm": 0.0, "learning_rate": 6.194264110329983e-06, "loss": 0.1701, "step": 55190 }, { "epoch": 0.3808288546848158, "grad_norm": 0.0, "learning_rate": 6.193574202984539e-06, "loss": 0.0, "step": 55200 }, { "epoch": 0.38089784541936017, "grad_norm": 0.0, "learning_rate": 6.1928842956390964e-06, "loss": 0.0, "step": 55210 }, { "epoch": 0.38096683615390453, "grad_norm": 0.00835868064314127, "learning_rate": 6.192194388293653e-06, "loss": 0.1136, "step": 55220 }, { "epoch": 0.3810358268884489, "grad_norm": 4.671475437589834e-07, "learning_rate": 6.1915044809482094e-06, "loss": 0.0137, "step": 55230 }, { "epoch": 0.38110481762299325, "grad_norm": 0.0, "learning_rate": 6.190814573602766e-06, "loss": 0.0, "step": 55240 }, { "epoch": 0.38117380835753756, "grad_norm": 0.0, "learning_rate": 6.190124666257322e-06, "loss": 0.0029, "step": 55250 }, { "epoch": 0.3812427990920819, "grad_norm": 0.0, "learning_rate": 6.189434758911879e-06, "loss": 0.0, "step": 55260 }, { "epoch": 0.3813117898266263, "grad_norm": 0.0, "learning_rate": 6.188744851566435e-06, "loss": 0.0, "step": 55270 }, { "epoch": 0.38138078056117064, "grad_norm": 3.6447808327011444e-08, "learning_rate": 6.188054944220991e-06, "loss": 0.0, "step": 55280 }, { "epoch": 0.381449771295715, "grad_norm": 0.11424826085567474, "learning_rate": 6.1873650368755475e-06, "loss": 0.0, "step": 55290 }, { "epoch": 0.38151876203025936, "grad_norm": 0.0, "learning_rate": 6.186675129530104e-06, "loss": 0.0, "step": 55300 }, { "epoch": 0.38158775276480367, "grad_norm": 0.0, "learning_rate": 6.1859852221846605e-06, "loss": 0.0, "step": 55310 }, { "epoch": 0.38165674349934803, "grad_norm": 0.0017013035248965025, "learning_rate": 6.185295314839218e-06, "loss": 0.0, "step": 55320 }, { "epoch": 0.3817257342338924, "grad_norm": 9.270196699162625e-08, "learning_rate": 6.184605407493774e-06, "loss": 0.0, "step": 55330 }, { "epoch": 0.38179472496843675, "grad_norm": 0.0, "learning_rate": 6.183915500148331e-06, "loss": 0.0001, "step": 55340 }, { "epoch": 0.3818637157029811, "grad_norm": 0.0, "learning_rate": 6.183225592802887e-06, "loss": 0.0043, "step": 55350 }, { "epoch": 0.3819327064375254, "grad_norm": 0.006450190674513578, "learning_rate": 6.182535685457444e-06, "loss": 0.0, "step": 55360 }, { "epoch": 0.3820016971720698, "grad_norm": 0.0, "learning_rate": 6.181845778112e-06, "loss": 0.0, "step": 55370 }, { "epoch": 0.38207068790661414, "grad_norm": 0.0, "learning_rate": 6.181155870766557e-06, "loss": 0.0047, "step": 55380 }, { "epoch": 0.3821396786411585, "grad_norm": 0.0, "learning_rate": 6.180465963421112e-06, "loss": 0.0, "step": 55390 }, { "epoch": 0.38220866937570286, "grad_norm": 9.17267373168329e-10, "learning_rate": 6.179776056075669e-06, "loss": 0.0, "step": 55400 }, { "epoch": 0.3822776601102472, "grad_norm": 0.0, "learning_rate": 6.179086148730225e-06, "loss": 0.0, "step": 55410 }, { "epoch": 0.3823466508447915, "grad_norm": 0.0, "learning_rate": 6.178396241384782e-06, "loss": 0.0003, "step": 55420 }, { "epoch": 0.3824156415793359, "grad_norm": 0.0, "learning_rate": 6.177706334039339e-06, "loss": 0.0, "step": 55430 }, { "epoch": 0.38248463231388025, "grad_norm": 1.2796429871286819e-07, "learning_rate": 6.177016426693896e-06, "loss": 0.0, "step": 55440 }, { "epoch": 0.3825536230484246, "grad_norm": 0.0, "learning_rate": 6.176326519348452e-06, "loss": 0.0, "step": 55450 }, { "epoch": 0.38262261378296897, "grad_norm": 1.7372332461462747e-09, "learning_rate": 6.175636612003009e-06, "loss": 0.0, "step": 55460 }, { "epoch": 0.3826916045175133, "grad_norm": 0.22413499653339386, "learning_rate": 6.174946704657565e-06, "loss": 0.0, "step": 55470 }, { "epoch": 0.38276059525205763, "grad_norm": 0.0, "learning_rate": 6.174256797312122e-06, "loss": 0.0, "step": 55480 }, { "epoch": 0.382829585986602, "grad_norm": 0.0, "learning_rate": 6.173566889966678e-06, "loss": 0.0, "step": 55490 }, { "epoch": 0.38289857672114636, "grad_norm": 0.0, "learning_rate": 6.172876982621235e-06, "loss": 0.0, "step": 55500 }, { "epoch": 0.3829675674556907, "grad_norm": 0.0, "learning_rate": 6.17218707527579e-06, "loss": 0.0, "step": 55510 }, { "epoch": 0.3830365581902351, "grad_norm": 224.2919921875, "learning_rate": 6.171497167930347e-06, "loss": 0.3236, "step": 55520 }, { "epoch": 0.3831055489247794, "grad_norm": 0.2697772681713104, "learning_rate": 6.170807260584903e-06, "loss": 0.0001, "step": 55530 }, { "epoch": 0.38317453965932374, "grad_norm": 0.0, "learning_rate": 6.170117353239461e-06, "loss": 0.0004, "step": 55540 }, { "epoch": 0.3832435303938681, "grad_norm": 3.219560312572867e-05, "learning_rate": 6.169427445894017e-06, "loss": 0.0, "step": 55550 }, { "epoch": 0.38331252112841246, "grad_norm": 0.0, "learning_rate": 6.168737538548574e-06, "loss": 0.0, "step": 55560 }, { "epoch": 0.3833815118629568, "grad_norm": 4.1865173550093004e-09, "learning_rate": 6.16804763120313e-06, "loss": 0.0, "step": 55570 }, { "epoch": 0.38345050259750113, "grad_norm": 0.0, "learning_rate": 6.167357723857687e-06, "loss": 0.0, "step": 55580 }, { "epoch": 0.3835194933320455, "grad_norm": 0.0, "learning_rate": 6.166667816512243e-06, "loss": 0.0, "step": 55590 }, { "epoch": 0.38358848406658985, "grad_norm": 0.0, "learning_rate": 6.1659779091667996e-06, "loss": 0.1245, "step": 55600 }, { "epoch": 0.3836574748011342, "grad_norm": 0.0, "learning_rate": 6.165288001821356e-06, "loss": 0.0, "step": 55610 }, { "epoch": 0.3837264655356786, "grad_norm": 0.006121823564171791, "learning_rate": 6.164598094475912e-06, "loss": 0.0, "step": 55620 }, { "epoch": 0.38379545627022293, "grad_norm": 2.5452036922501975e-08, "learning_rate": 6.163908187130468e-06, "loss": 0.0, "step": 55630 }, { "epoch": 0.38386444700476724, "grad_norm": 0.0, "learning_rate": 6.163218279785025e-06, "loss": 0.0, "step": 55640 }, { "epoch": 0.3839334377393116, "grad_norm": 0.0, "learning_rate": 6.162528372439582e-06, "loss": 0.0, "step": 55650 }, { "epoch": 0.38400242847385596, "grad_norm": 0.0, "learning_rate": 6.1618384650941385e-06, "loss": 0.0, "step": 55660 }, { "epoch": 0.3840714192084003, "grad_norm": 1.4503708598567755e-06, "learning_rate": 6.161148557748695e-06, "loss": 0.0002, "step": 55670 }, { "epoch": 0.3841404099429447, "grad_norm": 0.0, "learning_rate": 6.1604586504032515e-06, "loss": 0.1825, "step": 55680 }, { "epoch": 0.384209400677489, "grad_norm": 0.0, "learning_rate": 6.159768743057808e-06, "loss": 0.0, "step": 55690 }, { "epoch": 0.38427839141203335, "grad_norm": 9.240924692122121e-10, "learning_rate": 6.1590788357123645e-06, "loss": 0.0, "step": 55700 }, { "epoch": 0.3843473821465777, "grad_norm": 0.0, "learning_rate": 6.158388928366921e-06, "loss": 0.0043, "step": 55710 }, { "epoch": 0.38441637288112207, "grad_norm": 5.683835411218752e-07, "learning_rate": 6.1576990210214775e-06, "loss": 0.0, "step": 55720 }, { "epoch": 0.38448536361566643, "grad_norm": 0.0, "learning_rate": 6.157009113676033e-06, "loss": 0.0, "step": 55730 }, { "epoch": 0.3845543543502108, "grad_norm": 5.987767053738935e-06, "learning_rate": 6.15631920633059e-06, "loss": 0.0, "step": 55740 }, { "epoch": 0.3846233450847551, "grad_norm": 2.0352820229163626e-06, "learning_rate": 6.155629298985146e-06, "loss": 0.0, "step": 55750 }, { "epoch": 0.38469233581929946, "grad_norm": 0.0, "learning_rate": 6.154939391639703e-06, "loss": 0.0, "step": 55760 }, { "epoch": 0.3847613265538438, "grad_norm": 0.0, "learning_rate": 6.15424948429426e-06, "loss": 0.0, "step": 55770 }, { "epoch": 0.3848303172883882, "grad_norm": 0.0, "learning_rate": 6.153559576948816e-06, "loss": 0.0, "step": 55780 }, { "epoch": 0.38489930802293254, "grad_norm": 0.0026983499992638826, "learning_rate": 6.152869669603373e-06, "loss": 0.1418, "step": 55790 }, { "epoch": 0.38496829875747685, "grad_norm": 0.0, "learning_rate": 6.152179762257929e-06, "loss": 0.0, "step": 55800 }, { "epoch": 0.3850372894920212, "grad_norm": 0.0, "learning_rate": 6.151489854912486e-06, "loss": 0.0, "step": 55810 }, { "epoch": 0.38510628022656557, "grad_norm": 168.53286743164062, "learning_rate": 6.150799947567042e-06, "loss": 0.0515, "step": 55820 }, { "epoch": 0.38517527096110993, "grad_norm": 0.13199575245380402, "learning_rate": 6.150110040221599e-06, "loss": 0.0, "step": 55830 }, { "epoch": 0.3852442616956543, "grad_norm": 0.0, "learning_rate": 6.149420132876155e-06, "loss": 0.0, "step": 55840 }, { "epoch": 0.38531325243019865, "grad_norm": 0.0, "learning_rate": 6.148730225530711e-06, "loss": 0.0, "step": 55850 }, { "epoch": 0.38538224316474295, "grad_norm": 0.0, "learning_rate": 6.1480403181852675e-06, "loss": 0.0, "step": 55860 }, { "epoch": 0.3854512338992873, "grad_norm": 0.0, "learning_rate": 6.147350410839825e-06, "loss": 0.0, "step": 55870 }, { "epoch": 0.3855202246338317, "grad_norm": 0.0, "learning_rate": 6.146660503494381e-06, "loss": 0.0086, "step": 55880 }, { "epoch": 0.38558921536837604, "grad_norm": 0.0, "learning_rate": 6.145970596148938e-06, "loss": 0.0, "step": 55890 }, { "epoch": 0.3856582061029204, "grad_norm": 0.0, "learning_rate": 6.145280688803494e-06, "loss": 0.0, "step": 55900 }, { "epoch": 0.3857271968374647, "grad_norm": 0.0, "learning_rate": 6.144590781458051e-06, "loss": 0.0235, "step": 55910 }, { "epoch": 0.38579618757200906, "grad_norm": 0.0, "learning_rate": 6.143900874112607e-06, "loss": 0.0011, "step": 55920 }, { "epoch": 0.3858651783065534, "grad_norm": 0.0, "learning_rate": 6.143210966767164e-06, "loss": 0.0002, "step": 55930 }, { "epoch": 0.3859341690410978, "grad_norm": 8.098172838799655e-05, "learning_rate": 6.14252105942172e-06, "loss": 0.0, "step": 55940 }, { "epoch": 0.38600315977564215, "grad_norm": 0.0, "learning_rate": 6.141831152076277e-06, "loss": 0.0, "step": 55950 }, { "epoch": 0.3860721505101865, "grad_norm": 0.0, "learning_rate": 6.141141244730832e-06, "loss": 0.0013, "step": 55960 }, { "epoch": 0.3861411412447308, "grad_norm": 0.0028657943475991488, "learning_rate": 6.140451337385389e-06, "loss": 0.0, "step": 55970 }, { "epoch": 0.3862101319792752, "grad_norm": 0.0, "learning_rate": 6.139761430039946e-06, "loss": 0.0024, "step": 55980 }, { "epoch": 0.38627912271381953, "grad_norm": 0.0, "learning_rate": 6.139071522694503e-06, "loss": 0.0001, "step": 55990 }, { "epoch": 0.3863481134483639, "grad_norm": 0.0, "learning_rate": 6.138381615349059e-06, "loss": 0.0, "step": 56000 }, { "epoch": 0.38641710418290826, "grad_norm": 0.0, "learning_rate": 6.137691708003616e-06, "loss": 0.0, "step": 56010 }, { "epoch": 0.38648609491745256, "grad_norm": 0.0, "learning_rate": 6.137001800658172e-06, "loss": 0.0, "step": 56020 }, { "epoch": 0.3865550856519969, "grad_norm": 0.0, "learning_rate": 6.136311893312729e-06, "loss": 0.0, "step": 56030 }, { "epoch": 0.3866240763865413, "grad_norm": 0.0, "learning_rate": 6.135621985967285e-06, "loss": 0.0, "step": 56040 }, { "epoch": 0.38669306712108564, "grad_norm": 0.0, "learning_rate": 6.134932078621842e-06, "loss": 0.0002, "step": 56050 }, { "epoch": 0.38676205785563, "grad_norm": 2.4157950306857856e-08, "learning_rate": 6.134242171276398e-06, "loss": 0.0105, "step": 56060 }, { "epoch": 0.38683104859017436, "grad_norm": 0.0, "learning_rate": 6.133552263930954e-06, "loss": 0.0, "step": 56070 }, { "epoch": 0.38690003932471867, "grad_norm": 0.0, "learning_rate": 6.13286235658551e-06, "loss": 0.0, "step": 56080 }, { "epoch": 0.38696903005926303, "grad_norm": 0.0, "learning_rate": 6.1321724492400684e-06, "loss": 0.0004, "step": 56090 }, { "epoch": 0.3870380207938074, "grad_norm": 0.0, "learning_rate": 6.131482541894624e-06, "loss": 0.0, "step": 56100 }, { "epoch": 0.38710701152835175, "grad_norm": 8.23408699943684e-05, "learning_rate": 6.1307926345491806e-06, "loss": 0.0, "step": 56110 }, { "epoch": 0.3871760022628961, "grad_norm": 0.0, "learning_rate": 6.130102727203737e-06, "loss": 0.0008, "step": 56120 }, { "epoch": 0.3872449929974404, "grad_norm": 0.0, "learning_rate": 6.1294128198582936e-06, "loss": 0.0, "step": 56130 }, { "epoch": 0.3873139837319848, "grad_norm": 0.0, "learning_rate": 6.12872291251285e-06, "loss": 0.0, "step": 56140 }, { "epoch": 0.38738297446652914, "grad_norm": 0.0, "learning_rate": 6.1280330051674065e-06, "loss": 0.0, "step": 56150 }, { "epoch": 0.3874519652010735, "grad_norm": 0.0, "learning_rate": 6.127343097821963e-06, "loss": 0.0, "step": 56160 }, { "epoch": 0.38752095593561786, "grad_norm": 1.6700771084288135e-05, "learning_rate": 6.1266531904765195e-06, "loss": 0.0, "step": 56170 }, { "epoch": 0.3875899466701622, "grad_norm": 0.0, "learning_rate": 6.125963283131075e-06, "loss": 0.0, "step": 56180 }, { "epoch": 0.3876589374047065, "grad_norm": 0.0, "learning_rate": 6.125273375785632e-06, "loss": 0.0, "step": 56190 }, { "epoch": 0.3877279281392509, "grad_norm": 0.0, "learning_rate": 6.12458346844019e-06, "loss": 0.0022, "step": 56200 }, { "epoch": 0.38779691887379525, "grad_norm": 0.0, "learning_rate": 6.1238935610947455e-06, "loss": 0.0, "step": 56210 }, { "epoch": 0.3878659096083396, "grad_norm": 0.0, "learning_rate": 6.123203653749302e-06, "loss": 0.0007, "step": 56220 }, { "epoch": 0.38793490034288397, "grad_norm": 1.635532259941101, "learning_rate": 6.1225137464038585e-06, "loss": 0.0003, "step": 56230 }, { "epoch": 0.3880038910774283, "grad_norm": 0.0, "learning_rate": 6.121823839058415e-06, "loss": 0.0, "step": 56240 }, { "epoch": 0.38807288181197264, "grad_norm": 1.7715757749670047e-09, "learning_rate": 6.1211339317129714e-06, "loss": 0.0, "step": 56250 }, { "epoch": 0.388141872546517, "grad_norm": 0.0, "learning_rate": 6.120444024367528e-06, "loss": 0.0, "step": 56260 }, { "epoch": 0.38821086328106136, "grad_norm": 0.0, "learning_rate": 6.119754117022084e-06, "loss": 0.0, "step": 56270 }, { "epoch": 0.3882798540156057, "grad_norm": 0.0, "learning_rate": 6.119064209676641e-06, "loss": 0.0, "step": 56280 }, { "epoch": 0.3883488447501501, "grad_norm": 0.06376428157091141, "learning_rate": 6.118374302331197e-06, "loss": 0.0, "step": 56290 }, { "epoch": 0.3884178354846944, "grad_norm": 6.970270987949334e-06, "learning_rate": 6.117684394985753e-06, "loss": 0.0001, "step": 56300 }, { "epoch": 0.38848682621923875, "grad_norm": 0.0, "learning_rate": 6.116994487640311e-06, "loss": 0.0, "step": 56310 }, { "epoch": 0.3885558169537831, "grad_norm": 1.4182120561599731, "learning_rate": 6.116304580294867e-06, "loss": 0.0002, "step": 56320 }, { "epoch": 0.38862480768832747, "grad_norm": 0.0004731070948764682, "learning_rate": 6.115614672949423e-06, "loss": 0.3451, "step": 56330 }, { "epoch": 0.38869379842287183, "grad_norm": 9.716685234195666e-10, "learning_rate": 6.11492476560398e-06, "loss": 0.0, "step": 56340 }, { "epoch": 0.38876278915741613, "grad_norm": 0.0, "learning_rate": 6.114234858258536e-06, "loss": 0.0, "step": 56350 }, { "epoch": 0.3888317798919605, "grad_norm": 0.0, "learning_rate": 6.113544950913093e-06, "loss": 0.0, "step": 56360 }, { "epoch": 0.38890077062650485, "grad_norm": 0.0, "learning_rate": 6.112855043567649e-06, "loss": 0.0001, "step": 56370 }, { "epoch": 0.3889697613610492, "grad_norm": 4.92419758302276e-08, "learning_rate": 6.112165136222206e-06, "loss": 0.2758, "step": 56380 }, { "epoch": 0.3890387520955936, "grad_norm": 0.0, "learning_rate": 6.111475228876762e-06, "loss": 0.0013, "step": 56390 }, { "epoch": 0.38910774283013794, "grad_norm": 0.00025528090191073716, "learning_rate": 6.110785321531319e-06, "loss": 0.0, "step": 56400 }, { "epoch": 0.38917673356468224, "grad_norm": 0.0, "learning_rate": 6.1100954141858744e-06, "loss": 0.0, "step": 56410 }, { "epoch": 0.3892457242992266, "grad_norm": 0.0, "learning_rate": 6.109405506840433e-06, "loss": 0.0, "step": 56420 }, { "epoch": 0.38931471503377096, "grad_norm": 0.0, "learning_rate": 6.108715599494989e-06, "loss": 0.0, "step": 56430 }, { "epoch": 0.3893837057683153, "grad_norm": 0.002434544963762164, "learning_rate": 6.108025692149545e-06, "loss": 0.0, "step": 56440 }, { "epoch": 0.3894526965028597, "grad_norm": 1.8608644625217607e-09, "learning_rate": 6.107335784804101e-06, "loss": 0.0003, "step": 56450 }, { "epoch": 0.389521687237404, "grad_norm": 0.0, "learning_rate": 6.106645877458658e-06, "loss": 0.0, "step": 56460 }, { "epoch": 0.38959067797194835, "grad_norm": 4.590802049619924e-08, "learning_rate": 6.105955970113214e-06, "loss": 0.0, "step": 56470 }, { "epoch": 0.3896596687064927, "grad_norm": 0.0, "learning_rate": 6.105266062767771e-06, "loss": 0.0, "step": 56480 }, { "epoch": 0.3897286594410371, "grad_norm": 0.0, "learning_rate": 6.104576155422327e-06, "loss": 0.0, "step": 56490 }, { "epoch": 0.38979765017558143, "grad_norm": 0.624225914478302, "learning_rate": 6.103886248076884e-06, "loss": 0.0048, "step": 56500 }, { "epoch": 0.3898666409101258, "grad_norm": 0.0, "learning_rate": 6.10319634073144e-06, "loss": 0.0056, "step": 56510 }, { "epoch": 0.3899356316446701, "grad_norm": 0.0, "learning_rate": 6.102506433385996e-06, "loss": 0.0, "step": 56520 }, { "epoch": 0.39000462237921446, "grad_norm": 0.0, "learning_rate": 6.101816526040554e-06, "loss": 0.0015, "step": 56530 }, { "epoch": 0.3900736131137588, "grad_norm": 0.0, "learning_rate": 6.1011266186951105e-06, "loss": 0.0, "step": 56540 }, { "epoch": 0.3901426038483032, "grad_norm": 4.329888392273773e-10, "learning_rate": 6.100436711349666e-06, "loss": 0.0, "step": 56550 }, { "epoch": 0.39021159458284754, "grad_norm": 0.11823488026857376, "learning_rate": 6.099746804004223e-06, "loss": 0.0, "step": 56560 }, { "epoch": 0.39028058531739185, "grad_norm": 0.0, "learning_rate": 6.099056896658779e-06, "loss": 0.0, "step": 56570 }, { "epoch": 0.3903495760519362, "grad_norm": 0.0019926249515265226, "learning_rate": 6.098366989313336e-06, "loss": 0.0, "step": 56580 }, { "epoch": 0.39041856678648057, "grad_norm": 0.0, "learning_rate": 6.097677081967892e-06, "loss": 0.0, "step": 56590 }, { "epoch": 0.39048755752102493, "grad_norm": 0.0, "learning_rate": 6.096987174622449e-06, "loss": 0.0, "step": 56600 }, { "epoch": 0.3905565482555693, "grad_norm": 0.0, "learning_rate": 6.096297267277005e-06, "loss": 0.0, "step": 56610 }, { "epoch": 0.39062553899011365, "grad_norm": 0.0, "learning_rate": 6.095607359931562e-06, "loss": 0.0, "step": 56620 }, { "epoch": 0.39069452972465796, "grad_norm": 0.0, "learning_rate": 6.094917452586118e-06, "loss": 0.0004, "step": 56630 }, { "epoch": 0.3907635204592023, "grad_norm": 0.0, "learning_rate": 6.094227545240675e-06, "loss": 0.0008, "step": 56640 }, { "epoch": 0.3908325111937467, "grad_norm": 0.0, "learning_rate": 6.093537637895232e-06, "loss": 0.0, "step": 56650 }, { "epoch": 0.39090150192829104, "grad_norm": 0.0, "learning_rate": 6.0928477305497875e-06, "loss": 0.0, "step": 56660 }, { "epoch": 0.3909704926628354, "grad_norm": 0.0, "learning_rate": 6.092157823204344e-06, "loss": 0.0, "step": 56670 }, { "epoch": 0.3910394833973797, "grad_norm": 0.07848416268825531, "learning_rate": 6.0914679158589005e-06, "loss": 0.0, "step": 56680 }, { "epoch": 0.39110847413192407, "grad_norm": 0.0, "learning_rate": 6.090778008513457e-06, "loss": 0.0, "step": 56690 }, { "epoch": 0.3911774648664684, "grad_norm": 0.0, "learning_rate": 6.0900881011680135e-06, "loss": 0.0, "step": 56700 }, { "epoch": 0.3912464556010128, "grad_norm": 4.518360963157164e-10, "learning_rate": 6.08939819382257e-06, "loss": 0.0, "step": 56710 }, { "epoch": 0.39131544633555715, "grad_norm": 4.793644237111039e-08, "learning_rate": 6.0887082864771265e-06, "loss": 0.0372, "step": 56720 }, { "epoch": 0.3913844370701015, "grad_norm": 0.0, "learning_rate": 6.088018379131683e-06, "loss": 0.0, "step": 56730 }, { "epoch": 0.3914534278046458, "grad_norm": 0.0, "learning_rate": 6.0873284717862395e-06, "loss": 0.0, "step": 56740 }, { "epoch": 0.3915224185391902, "grad_norm": 0.0, "learning_rate": 6.086638564440797e-06, "loss": 0.0, "step": 56750 }, { "epoch": 0.39159140927373454, "grad_norm": 0.0, "learning_rate": 6.085948657095353e-06, "loss": 0.0, "step": 56760 }, { "epoch": 0.3916604000082789, "grad_norm": 0.0, "learning_rate": 6.08525874974991e-06, "loss": 0.0001, "step": 56770 }, { "epoch": 0.39172939074282326, "grad_norm": 0.0, "learning_rate": 6.0845688424044654e-06, "loss": 0.0, "step": 56780 }, { "epoch": 0.39179838147736756, "grad_norm": 3.010267164427205e-07, "learning_rate": 6.083878935059022e-06, "loss": 0.0, "step": 56790 }, { "epoch": 0.3918673722119119, "grad_norm": 0.0, "learning_rate": 6.083189027713578e-06, "loss": 0.0, "step": 56800 }, { "epoch": 0.3919363629464563, "grad_norm": 0.0, "learning_rate": 6.082499120368135e-06, "loss": 0.0, "step": 56810 }, { "epoch": 0.39200535368100065, "grad_norm": 0.0, "learning_rate": 6.081809213022691e-06, "loss": 0.0, "step": 56820 }, { "epoch": 0.392074344415545, "grad_norm": 0.0, "learning_rate": 6.081119305677248e-06, "loss": 0.0, "step": 56830 }, { "epoch": 0.39214333515008937, "grad_norm": 9.613947622710839e-05, "learning_rate": 6.080429398331804e-06, "loss": 0.0, "step": 56840 }, { "epoch": 0.3922123258846337, "grad_norm": 0.0, "learning_rate": 6.079739490986361e-06, "loss": 0.0, "step": 56850 }, { "epoch": 0.39228131661917803, "grad_norm": 0.0, "learning_rate": 6.079049583640918e-06, "loss": 0.0, "step": 56860 }, { "epoch": 0.3923503073537224, "grad_norm": 0.0, "learning_rate": 6.078359676295475e-06, "loss": 0.0, "step": 56870 }, { "epoch": 0.39241929808826675, "grad_norm": 0.0, "learning_rate": 6.077669768950031e-06, "loss": 0.0, "step": 56880 }, { "epoch": 0.3924882888228111, "grad_norm": 0.0, "learning_rate": 6.076979861604587e-06, "loss": 0.0, "step": 56890 }, { "epoch": 0.3925572795573554, "grad_norm": 0.0, "learning_rate": 6.076289954259143e-06, "loss": 0.1911, "step": 56900 }, { "epoch": 0.3926262702918998, "grad_norm": 2.7228872776031494, "learning_rate": 6.0756000469137e-06, "loss": 0.0003, "step": 56910 }, { "epoch": 0.39269526102644414, "grad_norm": 0.0, "learning_rate": 6.074910139568256e-06, "loss": 0.0, "step": 56920 }, { "epoch": 0.3927642517609885, "grad_norm": 0.0, "learning_rate": 6.074220232222813e-06, "loss": 0.0, "step": 56930 }, { "epoch": 0.39283324249553286, "grad_norm": 0.0, "learning_rate": 6.073530324877369e-06, "loss": 0.0002, "step": 56940 }, { "epoch": 0.3929022332300772, "grad_norm": 4.683251009218736e-10, "learning_rate": 6.072840417531926e-06, "loss": 0.0, "step": 56950 }, { "epoch": 0.39297122396462153, "grad_norm": 0.0, "learning_rate": 6.072150510186482e-06, "loss": 0.0001, "step": 56960 }, { "epoch": 0.3930402146991659, "grad_norm": 0.0, "learning_rate": 6.07146060284104e-06, "loss": 0.0, "step": 56970 }, { "epoch": 0.39310920543371025, "grad_norm": 0.0, "learning_rate": 6.070770695495596e-06, "loss": 0.0, "step": 56980 }, { "epoch": 0.3931781961682546, "grad_norm": 235.7193603515625, "learning_rate": 6.0700807881501526e-06, "loss": 0.0758, "step": 56990 }, { "epoch": 0.393247186902799, "grad_norm": 0.0004806480719707906, "learning_rate": 6.069390880804708e-06, "loss": 0.0, "step": 57000 }, { "epoch": 0.3933161776373433, "grad_norm": 0.00013156459317542613, "learning_rate": 6.068700973459265e-06, "loss": 0.0009, "step": 57010 }, { "epoch": 0.39338516837188764, "grad_norm": 0.0, "learning_rate": 6.068011066113821e-06, "loss": 0.0, "step": 57020 }, { "epoch": 0.393454159106432, "grad_norm": 0.0, "learning_rate": 6.067321158768378e-06, "loss": 0.0, "step": 57030 }, { "epoch": 0.39352314984097636, "grad_norm": 0.0, "learning_rate": 6.066631251422934e-06, "loss": 0.0, "step": 57040 }, { "epoch": 0.3935921405755207, "grad_norm": 0.0, "learning_rate": 6.065941344077491e-06, "loss": 0.0, "step": 57050 }, { "epoch": 0.3936611313100651, "grad_norm": 1.2601099014282227, "learning_rate": 6.065251436732047e-06, "loss": 0.0002, "step": 57060 }, { "epoch": 0.3937301220446094, "grad_norm": 0.0, "learning_rate": 6.064561529386604e-06, "loss": 0.0, "step": 57070 }, { "epoch": 0.39379911277915375, "grad_norm": 0.0, "learning_rate": 6.063871622041161e-06, "loss": 0.0, "step": 57080 }, { "epoch": 0.3938681035136981, "grad_norm": 0.0006193273002281785, "learning_rate": 6.0631817146957175e-06, "loss": 0.0052, "step": 57090 }, { "epoch": 0.39393709424824247, "grad_norm": 4.476706783052009e-10, "learning_rate": 6.062491807350274e-06, "loss": 0.0, "step": 57100 }, { "epoch": 0.39400608498278683, "grad_norm": 0.0, "learning_rate": 6.0618019000048305e-06, "loss": 0.0013, "step": 57110 }, { "epoch": 0.39407507571733114, "grad_norm": 0.0, "learning_rate": 6.061111992659386e-06, "loss": 0.0, "step": 57120 }, { "epoch": 0.3941440664518755, "grad_norm": 0.0, "learning_rate": 6.060422085313943e-06, "loss": 0.0, "step": 57130 }, { "epoch": 0.39421305718641986, "grad_norm": 0.0, "learning_rate": 6.059732177968499e-06, "loss": 0.0001, "step": 57140 }, { "epoch": 0.3942820479209642, "grad_norm": 0.0, "learning_rate": 6.0590422706230556e-06, "loss": 0.0005, "step": 57150 }, { "epoch": 0.3943510386555086, "grad_norm": 4.48142495335091e-10, "learning_rate": 6.058352363277612e-06, "loss": 0.0, "step": 57160 }, { "epoch": 0.39442002939005294, "grad_norm": 0.0, "learning_rate": 6.0576624559321686e-06, "loss": 0.0, "step": 57170 }, { "epoch": 0.39448902012459724, "grad_norm": 0.0, "learning_rate": 6.056972548586725e-06, "loss": 0.0, "step": 57180 }, { "epoch": 0.3945580108591416, "grad_norm": 0.0, "learning_rate": 6.056282641241282e-06, "loss": 0.0043, "step": 57190 }, { "epoch": 0.39462700159368597, "grad_norm": 0.0001748184149619192, "learning_rate": 6.055592733895839e-06, "loss": 0.0726, "step": 57200 }, { "epoch": 0.3946959923282303, "grad_norm": 1.9876635633409023e-05, "learning_rate": 6.054902826550395e-06, "loss": 0.0, "step": 57210 }, { "epoch": 0.3947649830627747, "grad_norm": 1.5168774325502454e-07, "learning_rate": 6.054212919204952e-06, "loss": 0.0, "step": 57220 }, { "epoch": 0.394833973797319, "grad_norm": 2.2972858459979761e-07, "learning_rate": 6.0535230118595075e-06, "loss": 0.0, "step": 57230 }, { "epoch": 0.39490296453186335, "grad_norm": 0.0, "learning_rate": 6.052833104514064e-06, "loss": 0.0, "step": 57240 }, { "epoch": 0.3949719552664077, "grad_norm": 0.0, "learning_rate": 6.0521431971686205e-06, "loss": 0.1383, "step": 57250 }, { "epoch": 0.3950409460009521, "grad_norm": 0.0010140405502170324, "learning_rate": 6.051453289823177e-06, "loss": 0.0, "step": 57260 }, { "epoch": 0.39510993673549644, "grad_norm": 0.0, "learning_rate": 6.0507633824777335e-06, "loss": 0.0, "step": 57270 }, { "epoch": 0.3951789274700408, "grad_norm": 0.0, "learning_rate": 6.05007347513229e-06, "loss": 0.0, "step": 57280 }, { "epoch": 0.3952479182045851, "grad_norm": 2.835417012292396e-09, "learning_rate": 6.0493835677868464e-06, "loss": 0.0, "step": 57290 }, { "epoch": 0.39531690893912946, "grad_norm": 0.011000720784068108, "learning_rate": 6.048693660441404e-06, "loss": 0.0, "step": 57300 }, { "epoch": 0.3953858996736738, "grad_norm": 0.0049164132215082645, "learning_rate": 6.04800375309596e-06, "loss": 0.0, "step": 57310 }, { "epoch": 0.3954548904082182, "grad_norm": 0.0, "learning_rate": 6.047313845750517e-06, "loss": 0.0, "step": 57320 }, { "epoch": 0.39552388114276255, "grad_norm": 0.0, "learning_rate": 6.046623938405073e-06, "loss": 0.0839, "step": 57330 }, { "epoch": 0.39559287187730685, "grad_norm": 0.0, "learning_rate": 6.045934031059629e-06, "loss": 0.0001, "step": 57340 }, { "epoch": 0.3956618626118512, "grad_norm": 0.0, "learning_rate": 6.045244123714185e-06, "loss": 0.0, "step": 57350 }, { "epoch": 0.3957308533463956, "grad_norm": 0.0, "learning_rate": 6.044554216368742e-06, "loss": 0.0, "step": 57360 }, { "epoch": 0.39579984408093993, "grad_norm": 0.0, "learning_rate": 6.043864309023298e-06, "loss": 0.0, "step": 57370 }, { "epoch": 0.3958688348154843, "grad_norm": 0.0, "learning_rate": 6.043174401677855e-06, "loss": 0.0, "step": 57380 }, { "epoch": 0.39593782555002865, "grad_norm": 0.0, "learning_rate": 6.042484494332411e-06, "loss": 0.0614, "step": 57390 }, { "epoch": 0.39600681628457296, "grad_norm": 0.0, "learning_rate": 6.041794586986968e-06, "loss": 0.0, "step": 57400 }, { "epoch": 0.3960758070191173, "grad_norm": 0.0, "learning_rate": 6.041104679641525e-06, "loss": 0.0, "step": 57410 }, { "epoch": 0.3961447977536617, "grad_norm": 0.0, "learning_rate": 6.040414772296082e-06, "loss": 0.0, "step": 57420 }, { "epoch": 0.39621378848820604, "grad_norm": 0.0, "learning_rate": 6.039724864950638e-06, "loss": 0.0, "step": 57430 }, { "epoch": 0.3962827792227504, "grad_norm": 0.0, "learning_rate": 6.039034957605195e-06, "loss": 0.0, "step": 57440 }, { "epoch": 0.3963517699572947, "grad_norm": 0.0, "learning_rate": 6.038345050259751e-06, "loss": 0.0285, "step": 57450 }, { "epoch": 0.39642076069183907, "grad_norm": 0.0, "learning_rate": 6.037655142914307e-06, "loss": 0.0, "step": 57460 }, { "epoch": 0.39648975142638343, "grad_norm": 0.0, "learning_rate": 6.036965235568863e-06, "loss": 0.0, "step": 57470 }, { "epoch": 0.3965587421609278, "grad_norm": 0.0, "learning_rate": 6.03627532822342e-06, "loss": 0.0, "step": 57480 }, { "epoch": 0.39662773289547215, "grad_norm": 0.0, "learning_rate": 6.035585420877976e-06, "loss": 0.0, "step": 57490 }, { "epoch": 0.3966967236300165, "grad_norm": 0.0, "learning_rate": 6.034895513532533e-06, "loss": 0.1054, "step": 57500 }, { "epoch": 0.3967657143645608, "grad_norm": 0.0, "learning_rate": 6.034205606187089e-06, "loss": 0.0018, "step": 57510 }, { "epoch": 0.3968347050991052, "grad_norm": 0.0, "learning_rate": 6.0335156988416466e-06, "loss": 0.0018, "step": 57520 }, { "epoch": 0.39690369583364954, "grad_norm": 0.0, "learning_rate": 6.032825791496203e-06, "loss": 0.0004, "step": 57530 }, { "epoch": 0.3969726865681939, "grad_norm": 0.0, "learning_rate": 6.0321358841507595e-06, "loss": 0.0, "step": 57540 }, { "epoch": 0.39704167730273826, "grad_norm": 0.001273843110539019, "learning_rate": 6.031445976805316e-06, "loss": 0.0, "step": 57550 }, { "epoch": 0.39711066803728257, "grad_norm": 3.107517443368124e-08, "learning_rate": 6.0307560694598725e-06, "loss": 0.0, "step": 57560 }, { "epoch": 0.3971796587718269, "grad_norm": 0.0, "learning_rate": 6.030066162114428e-06, "loss": 0.0, "step": 57570 }, { "epoch": 0.3972486495063713, "grad_norm": 0.0, "learning_rate": 6.029376254768985e-06, "loss": 0.0057, "step": 57580 }, { "epoch": 0.39731764024091565, "grad_norm": 0.0, "learning_rate": 6.028686347423541e-06, "loss": 0.0001, "step": 57590 }, { "epoch": 0.39738663097546, "grad_norm": 0.0, "learning_rate": 6.027996440078098e-06, "loss": 0.0001, "step": 57600 }, { "epoch": 0.39745562171000437, "grad_norm": 29.133642196655273, "learning_rate": 6.027306532732654e-06, "loss": 0.0071, "step": 57610 }, { "epoch": 0.3975246124445487, "grad_norm": 0.0, "learning_rate": 6.026616625387211e-06, "loss": 0.0, "step": 57620 }, { "epoch": 0.39759360317909304, "grad_norm": 0.0, "learning_rate": 6.025926718041768e-06, "loss": 0.0002, "step": 57630 }, { "epoch": 0.3976625939136374, "grad_norm": 0.0, "learning_rate": 6.0252368106963244e-06, "loss": 0.0001, "step": 57640 }, { "epoch": 0.39773158464818176, "grad_norm": 1.2381713531794958e-05, "learning_rate": 6.024546903350881e-06, "loss": 0.0, "step": 57650 }, { "epoch": 0.3978005753827261, "grad_norm": 0.0, "learning_rate": 6.0238569960054374e-06, "loss": 0.0, "step": 57660 }, { "epoch": 0.3978695661172704, "grad_norm": 0.0, "learning_rate": 6.023167088659994e-06, "loss": 0.0, "step": 57670 }, { "epoch": 0.3979385568518148, "grad_norm": 8.026274755934537e-09, "learning_rate": 6.0224771813145496e-06, "loss": 0.0002, "step": 57680 }, { "epoch": 0.39800754758635914, "grad_norm": 0.0, "learning_rate": 6.021787273969106e-06, "loss": 0.0, "step": 57690 }, { "epoch": 0.3980765383209035, "grad_norm": 0.0, "learning_rate": 6.0210973666236625e-06, "loss": 0.0, "step": 57700 }, { "epoch": 0.39814552905544787, "grad_norm": 0.009298167191445827, "learning_rate": 6.020407459278219e-06, "loss": 0.0, "step": 57710 }, { "epoch": 0.3982145197899922, "grad_norm": 1.642198913032189e-05, "learning_rate": 6.0197175519327755e-06, "loss": 0.0, "step": 57720 }, { "epoch": 0.39828351052453653, "grad_norm": 0.0, "learning_rate": 6.019027644587332e-06, "loss": 0.0, "step": 57730 }, { "epoch": 0.3983525012590809, "grad_norm": 0.0, "learning_rate": 6.018337737241889e-06, "loss": 0.0, "step": 57740 }, { "epoch": 0.39842149199362525, "grad_norm": 0.0, "learning_rate": 6.017647829896446e-06, "loss": 0.0, "step": 57750 }, { "epoch": 0.3984904827281696, "grad_norm": 0.0, "learning_rate": 6.016957922551002e-06, "loss": 0.0, "step": 57760 }, { "epoch": 0.398559473462714, "grad_norm": 0.02610335871577263, "learning_rate": 6.016268015205559e-06, "loss": 0.0, "step": 57770 }, { "epoch": 0.3986284641972583, "grad_norm": 0.0, "learning_rate": 6.015578107860115e-06, "loss": 0.0, "step": 57780 }, { "epoch": 0.39869745493180264, "grad_norm": 0.0, "learning_rate": 6.014888200514671e-06, "loss": 0.0, "step": 57790 }, { "epoch": 0.398766445666347, "grad_norm": 2.4291327918035677e-06, "learning_rate": 6.0141982931692274e-06, "loss": 0.0, "step": 57800 }, { "epoch": 0.39883543640089136, "grad_norm": 1.2050320741252563e-08, "learning_rate": 6.013508385823784e-06, "loss": 0.0, "step": 57810 }, { "epoch": 0.3989044271354357, "grad_norm": 0.0, "learning_rate": 6.0128184784783404e-06, "loss": 0.0, "step": 57820 }, { "epoch": 0.3989734178699801, "grad_norm": 0.0, "learning_rate": 6.012128571132897e-06, "loss": 0.0, "step": 57830 }, { "epoch": 0.3990424086045244, "grad_norm": 280.42071533203125, "learning_rate": 6.011438663787453e-06, "loss": 0.0536, "step": 57840 }, { "epoch": 0.39911139933906875, "grad_norm": 0.0, "learning_rate": 6.010748756442011e-06, "loss": 0.0213, "step": 57850 }, { "epoch": 0.3991803900736131, "grad_norm": 0.0, "learning_rate": 6.010058849096567e-06, "loss": 0.0, "step": 57860 }, { "epoch": 0.3992493808081575, "grad_norm": 0.0, "learning_rate": 6.009368941751124e-06, "loss": 0.0, "step": 57870 }, { "epoch": 0.39931837154270183, "grad_norm": 0.0, "learning_rate": 6.00867903440568e-06, "loss": 0.0, "step": 57880 }, { "epoch": 0.39938736227724614, "grad_norm": 0.0, "learning_rate": 6.007989127060237e-06, "loss": 0.0, "step": 57890 }, { "epoch": 0.3994563530117905, "grad_norm": 0.0, "learning_rate": 6.007299219714793e-06, "loss": 0.0, "step": 57900 }, { "epoch": 0.39952534374633486, "grad_norm": 0.0, "learning_rate": 6.006609312369349e-06, "loss": 0.0, "step": 57910 }, { "epoch": 0.3995943344808792, "grad_norm": 0.13083402812480927, "learning_rate": 6.005919405023905e-06, "loss": 0.0, "step": 57920 }, { "epoch": 0.3996633252154236, "grad_norm": 0.0, "learning_rate": 6.005229497678462e-06, "loss": 0.0, "step": 57930 }, { "epoch": 0.39973231594996794, "grad_norm": 9.836826211540028e-05, "learning_rate": 6.004539590333018e-06, "loss": 0.0, "step": 57940 }, { "epoch": 0.39980130668451225, "grad_norm": 0.0, "learning_rate": 6.003849682987575e-06, "loss": 0.0, "step": 57950 }, { "epoch": 0.3998702974190566, "grad_norm": 0.0, "learning_rate": 6.003159775642132e-06, "loss": 0.0, "step": 57960 }, { "epoch": 0.39993928815360097, "grad_norm": 0.0, "learning_rate": 6.002469868296689e-06, "loss": 0.0, "step": 57970 }, { "epoch": 0.40000827888814533, "grad_norm": 0.0, "learning_rate": 6.001779960951245e-06, "loss": 0.0, "step": 57980 }, { "epoch": 0.4000772696226897, "grad_norm": 0.0, "learning_rate": 6.001090053605802e-06, "loss": 0.0, "step": 57990 }, { "epoch": 0.400146260357234, "grad_norm": 1.2928440810355823e-05, "learning_rate": 6.000400146260358e-06, "loss": 0.0, "step": 58000 }, { "epoch": 0.40021525109177836, "grad_norm": 0.0, "learning_rate": 5.999710238914915e-06, "loss": 0.0, "step": 58010 }, { "epoch": 0.4002842418263227, "grad_norm": 0.046858225017786026, "learning_rate": 5.99902033156947e-06, "loss": 0.0, "step": 58020 }, { "epoch": 0.4003532325608671, "grad_norm": 2.005464693866088e-06, "learning_rate": 5.998330424224027e-06, "loss": 0.0, "step": 58030 }, { "epoch": 0.40042222329541144, "grad_norm": 1.0044138321063656e-07, "learning_rate": 5.997640516878583e-06, "loss": 0.0, "step": 58040 }, { "epoch": 0.4004912140299558, "grad_norm": 0.00011691428517224267, "learning_rate": 5.99695060953314e-06, "loss": 0.0, "step": 58050 }, { "epoch": 0.4005602047645001, "grad_norm": 0.0, "learning_rate": 5.996260702187696e-06, "loss": 0.0, "step": 58060 }, { "epoch": 0.40062919549904447, "grad_norm": 0.0, "learning_rate": 5.9955707948422535e-06, "loss": 0.0, "step": 58070 }, { "epoch": 0.4006981862335888, "grad_norm": 4.244372121320339e-06, "learning_rate": 5.99488088749681e-06, "loss": 0.0, "step": 58080 }, { "epoch": 0.4007671769681332, "grad_norm": 0.0, "learning_rate": 5.9941909801513665e-06, "loss": 0.0, "step": 58090 }, { "epoch": 0.40083616770267755, "grad_norm": 0.0003876199189107865, "learning_rate": 5.993501072805923e-06, "loss": 0.0002, "step": 58100 }, { "epoch": 0.40090515843722185, "grad_norm": 0.003690573154017329, "learning_rate": 5.9928111654604795e-06, "loss": 0.0, "step": 58110 }, { "epoch": 0.4009741491717662, "grad_norm": 0.0, "learning_rate": 5.992121258115036e-06, "loss": 0.0, "step": 58120 }, { "epoch": 0.4010431399063106, "grad_norm": 0.008085214532911777, "learning_rate": 5.991431350769592e-06, "loss": 0.0, "step": 58130 }, { "epoch": 0.40111213064085494, "grad_norm": 0.0, "learning_rate": 5.990741443424148e-06, "loss": 0.002, "step": 58140 }, { "epoch": 0.4011811213753993, "grad_norm": 4.208536211081082e-06, "learning_rate": 5.990051536078705e-06, "loss": 0.0, "step": 58150 }, { "epoch": 0.40125011210994366, "grad_norm": 0.19480331242084503, "learning_rate": 5.989361628733261e-06, "loss": 0.0165, "step": 58160 }, { "epoch": 0.40131910284448796, "grad_norm": 4.850200525652326e-07, "learning_rate": 5.988671721387818e-06, "loss": 0.0, "step": 58170 }, { "epoch": 0.4013880935790323, "grad_norm": 0.0, "learning_rate": 5.987981814042375e-06, "loss": 0.0, "step": 58180 }, { "epoch": 0.4014570843135767, "grad_norm": 0.08359970152378082, "learning_rate": 5.987291906696931e-06, "loss": 0.0001, "step": 58190 }, { "epoch": 0.40152607504812104, "grad_norm": 6.887652830300794e-07, "learning_rate": 5.986601999351488e-06, "loss": 0.0, "step": 58200 }, { "epoch": 0.4015950657826654, "grad_norm": 0.0013147451682016253, "learning_rate": 5.985912092006044e-06, "loss": 0.0, "step": 58210 }, { "epoch": 0.4016640565172097, "grad_norm": 0.0, "learning_rate": 5.985222184660601e-06, "loss": 0.0, "step": 58220 }, { "epoch": 0.40173304725175407, "grad_norm": 0.0, "learning_rate": 5.984532277315157e-06, "loss": 0.0, "step": 58230 }, { "epoch": 0.40180203798629843, "grad_norm": 0.0, "learning_rate": 5.983842369969714e-06, "loss": 0.0, "step": 58240 }, { "epoch": 0.4018710287208428, "grad_norm": 0.0, "learning_rate": 5.9831524626242695e-06, "loss": 0.0, "step": 58250 }, { "epoch": 0.40194001945538715, "grad_norm": 0.0, "learning_rate": 5.982462555278826e-06, "loss": 0.0, "step": 58260 }, { "epoch": 0.4020090101899315, "grad_norm": 0.0, "learning_rate": 5.9817726479333825e-06, "loss": 0.0, "step": 58270 }, { "epoch": 0.4020780009244758, "grad_norm": 0.0, "learning_rate": 5.981082740587939e-06, "loss": 0.0, "step": 58280 }, { "epoch": 0.4021469916590202, "grad_norm": 0.0, "learning_rate": 5.980392833242496e-06, "loss": 0.0002, "step": 58290 }, { "epoch": 0.40221598239356454, "grad_norm": 0.0, "learning_rate": 5.979702925897053e-06, "loss": 0.0, "step": 58300 }, { "epoch": 0.4022849731281089, "grad_norm": 9.228948329109699e-06, "learning_rate": 5.979013018551609e-06, "loss": 0.0001, "step": 58310 }, { "epoch": 0.40235396386265326, "grad_norm": 0.0, "learning_rate": 5.978323111206166e-06, "loss": 0.0007, "step": 58320 }, { "epoch": 0.4024229545971976, "grad_norm": 0.5624728798866272, "learning_rate": 5.977633203860722e-06, "loss": 0.0005, "step": 58330 }, { "epoch": 0.40249194533174193, "grad_norm": 0.0, "learning_rate": 5.976943296515279e-06, "loss": 0.0, "step": 58340 }, { "epoch": 0.4025609360662863, "grad_norm": 0.0, "learning_rate": 5.976253389169835e-06, "loss": 0.0, "step": 58350 }, { "epoch": 0.40262992680083065, "grad_norm": 7.086819096002728e-05, "learning_rate": 5.975563481824391e-06, "loss": 0.0, "step": 58360 }, { "epoch": 0.402698917535375, "grad_norm": 0.0, "learning_rate": 5.974873574478947e-06, "loss": 0.0, "step": 58370 }, { "epoch": 0.4027679082699194, "grad_norm": 0.0, "learning_rate": 5.974183667133504e-06, "loss": 0.0, "step": 58380 }, { "epoch": 0.4028368990044637, "grad_norm": 0.0, "learning_rate": 5.97349375978806e-06, "loss": 0.0, "step": 58390 }, { "epoch": 0.40290588973900804, "grad_norm": 0.0003629494458436966, "learning_rate": 5.972803852442618e-06, "loss": 0.0001, "step": 58400 }, { "epoch": 0.4029748804735524, "grad_norm": 0.0, "learning_rate": 5.972113945097174e-06, "loss": 0.0, "step": 58410 }, { "epoch": 0.40304387120809676, "grad_norm": 0.0, "learning_rate": 5.971424037751731e-06, "loss": 0.0, "step": 58420 }, { "epoch": 0.4031128619426411, "grad_norm": 4.6831974032102153e-07, "learning_rate": 5.970734130406287e-06, "loss": 0.012, "step": 58430 }, { "epoch": 0.4031818526771855, "grad_norm": 0.3016344904899597, "learning_rate": 5.970044223060844e-06, "loss": 0.0001, "step": 58440 }, { "epoch": 0.4032508434117298, "grad_norm": 0.0, "learning_rate": 5.9693543157154e-06, "loss": 0.0, "step": 58450 }, { "epoch": 0.40331983414627415, "grad_norm": 0.0, "learning_rate": 5.968664408369957e-06, "loss": 0.0, "step": 58460 }, { "epoch": 0.4033888248808185, "grad_norm": 0.0, "learning_rate": 5.967974501024512e-06, "loss": 0.0, "step": 58470 }, { "epoch": 0.40345781561536287, "grad_norm": 0.0, "learning_rate": 5.967284593679069e-06, "loss": 0.0, "step": 58480 }, { "epoch": 0.40352680634990723, "grad_norm": 0.0, "learning_rate": 5.966594686333625e-06, "loss": 0.0, "step": 58490 }, { "epoch": 0.40359579708445154, "grad_norm": 0.0, "learning_rate": 5.965904778988182e-06, "loss": 0.0009, "step": 58500 }, { "epoch": 0.4036647878189959, "grad_norm": 0.0, "learning_rate": 5.965214871642739e-06, "loss": 0.0002, "step": 58510 }, { "epoch": 0.40373377855354026, "grad_norm": 0.0, "learning_rate": 5.964524964297296e-06, "loss": 0.0, "step": 58520 }, { "epoch": 0.4038027692880846, "grad_norm": 7.296505941667419e-07, "learning_rate": 5.963835056951852e-06, "loss": 0.0, "step": 58530 }, { "epoch": 0.403871760022629, "grad_norm": 0.0, "learning_rate": 5.963145149606409e-06, "loss": 0.0, "step": 58540 }, { "epoch": 0.40394075075717334, "grad_norm": 0.0, "learning_rate": 5.962455242260965e-06, "loss": 0.0, "step": 58550 }, { "epoch": 0.40400974149171764, "grad_norm": 0.0, "learning_rate": 5.9617653349155216e-06, "loss": 0.0002, "step": 58560 }, { "epoch": 0.404078732226262, "grad_norm": 0.0, "learning_rate": 5.961075427570078e-06, "loss": 0.0, "step": 58570 }, { "epoch": 0.40414772296080637, "grad_norm": 2.373796269239392e-05, "learning_rate": 5.9603855202246345e-06, "loss": 0.0111, "step": 58580 }, { "epoch": 0.4042167136953507, "grad_norm": 0.0, "learning_rate": 5.95969561287919e-06, "loss": 0.0, "step": 58590 }, { "epoch": 0.4042857044298951, "grad_norm": 0.0, "learning_rate": 5.959005705533747e-06, "loss": 0.0, "step": 58600 }, { "epoch": 0.4043546951644394, "grad_norm": 6.994843261054484e-06, "learning_rate": 5.958315798188303e-06, "loss": 0.0, "step": 58610 }, { "epoch": 0.40442368589898375, "grad_norm": 0.0, "learning_rate": 5.9576258908428605e-06, "loss": 0.0, "step": 58620 }, { "epoch": 0.4044926766335281, "grad_norm": 0.0, "learning_rate": 5.956935983497417e-06, "loss": 0.0, "step": 58630 }, { "epoch": 0.4045616673680725, "grad_norm": 0.0, "learning_rate": 5.9562460761519735e-06, "loss": 0.0, "step": 58640 }, { "epoch": 0.40463065810261684, "grad_norm": 0.0, "learning_rate": 5.95555616880653e-06, "loss": 0.0, "step": 58650 }, { "epoch": 0.4046996488371612, "grad_norm": 0.0, "learning_rate": 5.9548662614610865e-06, "loss": 0.0, "step": 58660 }, { "epoch": 0.4047686395717055, "grad_norm": 2.2283454248483991e-10, "learning_rate": 5.954176354115643e-06, "loss": 0.0, "step": 58670 }, { "epoch": 0.40483763030624986, "grad_norm": 0.0, "learning_rate": 5.9534864467701994e-06, "loss": 0.0, "step": 58680 }, { "epoch": 0.4049066210407942, "grad_norm": 0.0, "learning_rate": 5.952796539424756e-06, "loss": 0.0003, "step": 58690 }, { "epoch": 0.4049756117753386, "grad_norm": 0.0, "learning_rate": 5.952106632079312e-06, "loss": 0.0, "step": 58700 }, { "epoch": 0.40504460250988295, "grad_norm": 0.0, "learning_rate": 5.951416724733868e-06, "loss": 0.0, "step": 58710 }, { "epoch": 0.40511359324442725, "grad_norm": 0.0, "learning_rate": 5.9507268173884246e-06, "loss": 0.0025, "step": 58720 }, { "epoch": 0.4051825839789716, "grad_norm": 0.0, "learning_rate": 5.950036910042982e-06, "loss": 0.0003, "step": 58730 }, { "epoch": 0.40525157471351597, "grad_norm": 0.0, "learning_rate": 5.949347002697538e-06, "loss": 0.0, "step": 58740 }, { "epoch": 0.40532056544806033, "grad_norm": 0.0, "learning_rate": 5.948657095352095e-06, "loss": 0.0007, "step": 58750 }, { "epoch": 0.4053895561826047, "grad_norm": 4.398719610776425e-08, "learning_rate": 5.947967188006651e-06, "loss": 0.0259, "step": 58760 }, { "epoch": 0.40545854691714905, "grad_norm": 0.0, "learning_rate": 5.947277280661208e-06, "loss": 0.0129, "step": 58770 }, { "epoch": 0.40552753765169336, "grad_norm": 0.0, "learning_rate": 5.946587373315764e-06, "loss": 0.0, "step": 58780 }, { "epoch": 0.4055965283862377, "grad_norm": 0.0, "learning_rate": 5.945897465970321e-06, "loss": 0.0001, "step": 58790 }, { "epoch": 0.4056655191207821, "grad_norm": 0.577299952507019, "learning_rate": 5.945207558624877e-06, "loss": 0.0001, "step": 58800 }, { "epoch": 0.40573450985532644, "grad_norm": 0.0, "learning_rate": 5.944586642013977e-06, "loss": 0.1159, "step": 58810 }, { "epoch": 0.4058035005898708, "grad_norm": 0.0, "learning_rate": 5.943896734668535e-06, "loss": 0.0002, "step": 58820 }, { "epoch": 0.4058724913244151, "grad_norm": 6.334661483764648, "learning_rate": 5.943206827323091e-06, "loss": 0.0011, "step": 58830 }, { "epoch": 0.40594148205895947, "grad_norm": 0.0, "learning_rate": 5.9425169199776475e-06, "loss": 0.0, "step": 58840 }, { "epoch": 0.40601047279350383, "grad_norm": 0.0, "learning_rate": 5.941827012632204e-06, "loss": 0.0, "step": 58850 }, { "epoch": 0.4060794635280482, "grad_norm": 0.0, "learning_rate": 5.9411371052867605e-06, "loss": 0.0021, "step": 58860 }, { "epoch": 0.40614845426259255, "grad_norm": 4.860330093947596e-08, "learning_rate": 5.940447197941317e-06, "loss": 0.0, "step": 58870 }, { "epoch": 0.4062174449971369, "grad_norm": 229.7114715576172, "learning_rate": 5.9397572905958734e-06, "loss": 0.048, "step": 58880 }, { "epoch": 0.4062864357316812, "grad_norm": 5.710251116397558e-08, "learning_rate": 5.93906738325043e-06, "loss": 0.0015, "step": 58890 }, { "epoch": 0.4063554264662256, "grad_norm": 0.0, "learning_rate": 5.938377475904986e-06, "loss": 0.0, "step": 58900 }, { "epoch": 0.40642441720076994, "grad_norm": 0.0, "learning_rate": 5.937687568559543e-06, "loss": 0.0, "step": 58910 }, { "epoch": 0.4064934079353143, "grad_norm": 0.0, "learning_rate": 5.9369976612140985e-06, "loss": 0.0693, "step": 58920 }, { "epoch": 0.40656239866985866, "grad_norm": 4.417508581155971e-10, "learning_rate": 5.936307753868657e-06, "loss": 0.0, "step": 58930 }, { "epoch": 0.40663138940440297, "grad_norm": 0.0, "learning_rate": 5.935617846523213e-06, "loss": 0.0, "step": 58940 }, { "epoch": 0.4067003801389473, "grad_norm": 0.0, "learning_rate": 5.934927939177769e-06, "loss": 0.0003, "step": 58950 }, { "epoch": 0.4067693708734917, "grad_norm": 0.0, "learning_rate": 5.934238031832325e-06, "loss": 0.0875, "step": 58960 }, { "epoch": 0.40683836160803605, "grad_norm": 0.0, "learning_rate": 5.933548124486882e-06, "loss": 0.0, "step": 58970 }, { "epoch": 0.4069073523425804, "grad_norm": 0.0, "learning_rate": 5.932858217141438e-06, "loss": 0.0009, "step": 58980 }, { "epoch": 0.40697634307712477, "grad_norm": 0.0, "learning_rate": 5.932168309795995e-06, "loss": 0.0, "step": 58990 }, { "epoch": 0.4070453338116691, "grad_norm": 0.0, "learning_rate": 5.931478402450551e-06, "loss": 0.0, "step": 59000 }, { "epoch": 0.40711432454621344, "grad_norm": 0.0, "learning_rate": 5.930788495105108e-06, "loss": 0.0, "step": 59010 }, { "epoch": 0.4071833152807578, "grad_norm": 0.0, "learning_rate": 5.930098587759664e-06, "loss": 0.1049, "step": 59020 }, { "epoch": 0.40725230601530216, "grad_norm": 8.108017921447754, "learning_rate": 5.92940868041422e-06, "loss": 0.0011, "step": 59030 }, { "epoch": 0.4073212967498465, "grad_norm": 0.0, "learning_rate": 5.928718773068778e-06, "loss": 0.0, "step": 59040 }, { "epoch": 0.4073902874843908, "grad_norm": 2.02359977663491e-08, "learning_rate": 5.928028865723335e-06, "loss": 0.2521, "step": 59050 }, { "epoch": 0.4074592782189352, "grad_norm": 2.163207000194234e-06, "learning_rate": 5.92733895837789e-06, "loss": 0.0387, "step": 59060 }, { "epoch": 0.40752826895347954, "grad_norm": 0.0, "learning_rate": 5.926649051032447e-06, "loss": 0.0147, "step": 59070 }, { "epoch": 0.4075972596880239, "grad_norm": 0.007901228964328766, "learning_rate": 5.925959143687003e-06, "loss": 0.0, "step": 59080 }, { "epoch": 0.40766625042256827, "grad_norm": 0.0, "learning_rate": 5.92526923634156e-06, "loss": 0.0, "step": 59090 }, { "epoch": 0.4077352411571126, "grad_norm": 0.0, "learning_rate": 5.924579328996116e-06, "loss": 0.0048, "step": 59100 }, { "epoch": 0.40780423189165693, "grad_norm": 0.0, "learning_rate": 5.923889421650673e-06, "loss": 0.0, "step": 59110 }, { "epoch": 0.4078732226262013, "grad_norm": 0.0, "learning_rate": 5.923199514305229e-06, "loss": 0.007, "step": 59120 }, { "epoch": 0.40794221336074565, "grad_norm": 0.0, "learning_rate": 5.922509606959786e-06, "loss": 0.0, "step": 59130 }, { "epoch": 0.40801120409529, "grad_norm": 0.0, "learning_rate": 5.921819699614342e-06, "loss": 0.0005, "step": 59140 }, { "epoch": 0.4080801948298344, "grad_norm": 0.0, "learning_rate": 5.9211297922688995e-06, "loss": 0.0, "step": 59150 }, { "epoch": 0.4081491855643787, "grad_norm": 0.0, "learning_rate": 5.920439884923456e-06, "loss": 0.0, "step": 59160 }, { "epoch": 0.40821817629892304, "grad_norm": 4.628163408071373e-10, "learning_rate": 5.919749977578012e-06, "loss": 0.0, "step": 59170 }, { "epoch": 0.4082871670334674, "grad_norm": 0.0007107885321602225, "learning_rate": 5.919060070232568e-06, "loss": 0.0, "step": 59180 }, { "epoch": 0.40835615776801176, "grad_norm": 0.0, "learning_rate": 5.918370162887125e-06, "loss": 0.0, "step": 59190 }, { "epoch": 0.4084251485025561, "grad_norm": 0.0, "learning_rate": 5.917680255541681e-06, "loss": 0.0001, "step": 59200 }, { "epoch": 0.4084941392371005, "grad_norm": 0.0, "learning_rate": 5.916990348196238e-06, "loss": 0.0, "step": 59210 }, { "epoch": 0.4085631299716448, "grad_norm": 0.0, "learning_rate": 5.916300440850794e-06, "loss": 0.0002, "step": 59220 }, { "epoch": 0.40863212070618915, "grad_norm": 8.786363303592282e-10, "learning_rate": 5.915610533505351e-06, "loss": 0.0, "step": 59230 }, { "epoch": 0.4087011114407335, "grad_norm": 0.0, "learning_rate": 5.914920626159907e-06, "loss": 0.0, "step": 59240 }, { "epoch": 0.40877010217527787, "grad_norm": 0.00012982876796741039, "learning_rate": 5.9142307188144636e-06, "loss": 0.0001, "step": 59250 }, { "epoch": 0.40883909290982223, "grad_norm": 0.0, "learning_rate": 5.913540811469021e-06, "loss": 0.0, "step": 59260 }, { "epoch": 0.40890808364436654, "grad_norm": 0.0, "learning_rate": 5.912850904123577e-06, "loss": 0.0, "step": 59270 }, { "epoch": 0.4089770743789109, "grad_norm": 0.0, "learning_rate": 5.912160996778134e-06, "loss": 0.0, "step": 59280 }, { "epoch": 0.40904606511345526, "grad_norm": 0.0, "learning_rate": 5.9114710894326895e-06, "loss": 0.0002, "step": 59290 }, { "epoch": 0.4091150558479996, "grad_norm": 0.00011850119335576892, "learning_rate": 5.910781182087246e-06, "loss": 0.0032, "step": 59300 }, { "epoch": 0.409184046582544, "grad_norm": 0.0, "learning_rate": 5.9100912747418025e-06, "loss": 0.0, "step": 59310 }, { "epoch": 0.40925303731708834, "grad_norm": 0.0, "learning_rate": 5.909401367396359e-06, "loss": 0.0, "step": 59320 }, { "epoch": 0.40932202805163265, "grad_norm": 0.0003999821492470801, "learning_rate": 5.9087114600509155e-06, "loss": 0.0, "step": 59330 }, { "epoch": 0.409391018786177, "grad_norm": 0.0, "learning_rate": 5.908021552705472e-06, "loss": 0.0088, "step": 59340 }, { "epoch": 0.40946000952072137, "grad_norm": 0.0, "learning_rate": 5.9073316453600285e-06, "loss": 0.0, "step": 59350 }, { "epoch": 0.40952900025526573, "grad_norm": 9.623555342841428e-06, "learning_rate": 5.906641738014585e-06, "loss": 0.0, "step": 59360 }, { "epoch": 0.4095979909898101, "grad_norm": 0.0, "learning_rate": 5.905951830669142e-06, "loss": 0.0, "step": 59370 }, { "epoch": 0.4096669817243544, "grad_norm": 0.0, "learning_rate": 5.905261923323699e-06, "loss": 0.0, "step": 59380 }, { "epoch": 0.40973597245889876, "grad_norm": 0.0, "learning_rate": 5.904572015978255e-06, "loss": 0.0, "step": 59390 }, { "epoch": 0.4098049631934431, "grad_norm": 0.0, "learning_rate": 5.903882108632811e-06, "loss": 0.0051, "step": 59400 }, { "epoch": 0.4098739539279875, "grad_norm": 2.372068905742708e-07, "learning_rate": 5.903192201287367e-06, "loss": 0.0, "step": 59410 }, { "epoch": 0.40994294466253184, "grad_norm": 0.4272094964981079, "learning_rate": 5.902502293941924e-06, "loss": 0.0001, "step": 59420 }, { "epoch": 0.4100119353970762, "grad_norm": 0.0, "learning_rate": 5.90181238659648e-06, "loss": 0.0, "step": 59430 }, { "epoch": 0.4100809261316205, "grad_norm": 0.0, "learning_rate": 5.901122479251037e-06, "loss": 0.0002, "step": 59440 }, { "epoch": 0.41014991686616487, "grad_norm": 0.0, "learning_rate": 5.900432571905593e-06, "loss": 0.0, "step": 59450 }, { "epoch": 0.4102189076007092, "grad_norm": 0.0, "learning_rate": 5.89974266456015e-06, "loss": 0.0006, "step": 59460 }, { "epoch": 0.4102878983352536, "grad_norm": 4.763747174507671e-10, "learning_rate": 5.899052757214706e-06, "loss": 0.482, "step": 59470 }, { "epoch": 0.41035688906979795, "grad_norm": 0.0, "learning_rate": 5.898362849869264e-06, "loss": 0.1172, "step": 59480 }, { "epoch": 0.41042587980434225, "grad_norm": 0.002104072831571102, "learning_rate": 5.89767294252382e-06, "loss": 0.001, "step": 59490 }, { "epoch": 0.4104948705388866, "grad_norm": 0.0, "learning_rate": 5.896983035178377e-06, "loss": 0.0, "step": 59500 }, { "epoch": 0.410563861273431, "grad_norm": 0.0, "learning_rate": 5.896293127832932e-06, "loss": 0.0, "step": 59510 }, { "epoch": 0.41063285200797534, "grad_norm": 0.0, "learning_rate": 5.895603220487489e-06, "loss": 0.0, "step": 59520 }, { "epoch": 0.4107018427425197, "grad_norm": 0.0, "learning_rate": 5.894913313142045e-06, "loss": 0.0, "step": 59530 }, { "epoch": 0.41077083347706406, "grad_norm": 0.0, "learning_rate": 5.894223405796602e-06, "loss": 0.0, "step": 59540 }, { "epoch": 0.41083982421160836, "grad_norm": 0.0, "learning_rate": 5.893533498451158e-06, "loss": 0.0, "step": 59550 }, { "epoch": 0.4109088149461527, "grad_norm": 0.0, "learning_rate": 5.892843591105715e-06, "loss": 0.0, "step": 59560 }, { "epoch": 0.4109778056806971, "grad_norm": 0.0, "learning_rate": 5.892153683760271e-06, "loss": 0.0003, "step": 59570 }, { "epoch": 0.41104679641524144, "grad_norm": 0.0, "learning_rate": 5.891463776414828e-06, "loss": 0.0, "step": 59580 }, { "epoch": 0.4111157871497858, "grad_norm": 0.0, "learning_rate": 5.890773869069385e-06, "loss": 0.0, "step": 59590 }, { "epoch": 0.4111847778843301, "grad_norm": 0.00011904434359166771, "learning_rate": 5.890083961723942e-06, "loss": 0.0, "step": 59600 }, { "epoch": 0.41125376861887447, "grad_norm": 0.0, "learning_rate": 5.889394054378498e-06, "loss": 0.0, "step": 59610 }, { "epoch": 0.41132275935341883, "grad_norm": 0.0, "learning_rate": 5.8887041470330546e-06, "loss": 0.0, "step": 59620 }, { "epoch": 0.4113917500879632, "grad_norm": 0.0, "learning_rate": 5.88801423968761e-06, "loss": 0.0, "step": 59630 }, { "epoch": 0.41146074082250755, "grad_norm": 9.936236165231094e-05, "learning_rate": 5.887324332342167e-06, "loss": 0.0, "step": 59640 }, { "epoch": 0.4115297315570519, "grad_norm": 2.134287058197515e-08, "learning_rate": 5.886634424996723e-06, "loss": 0.0, "step": 59650 }, { "epoch": 0.4115987222915962, "grad_norm": 0.0, "learning_rate": 5.88594451765128e-06, "loss": 0.0, "step": 59660 }, { "epoch": 0.4116677130261406, "grad_norm": 0.0, "learning_rate": 5.885254610305836e-06, "loss": 0.0, "step": 59670 }, { "epoch": 0.41173670376068494, "grad_norm": 0.0, "learning_rate": 5.884564702960393e-06, "loss": 0.0001, "step": 59680 }, { "epoch": 0.4118056944952293, "grad_norm": 0.0, "learning_rate": 5.883874795614949e-06, "loss": 0.0, "step": 59690 }, { "epoch": 0.41187468522977366, "grad_norm": 0.0, "learning_rate": 5.8831848882695065e-06, "loss": 0.0, "step": 59700 }, { "epoch": 0.41194367596431797, "grad_norm": 0.0, "learning_rate": 5.882494980924063e-06, "loss": 0.0003, "step": 59710 }, { "epoch": 0.41201266669886233, "grad_norm": 0.0, "learning_rate": 5.8818050735786195e-06, "loss": 0.0, "step": 59720 }, { "epoch": 0.4120816574334067, "grad_norm": 0.0, "learning_rate": 5.881115166233176e-06, "loss": 0.0, "step": 59730 }, { "epoch": 0.41215064816795105, "grad_norm": 0.0, "learning_rate": 5.880425258887732e-06, "loss": 0.0, "step": 59740 }, { "epoch": 0.4122196389024954, "grad_norm": 0.0, "learning_rate": 5.879735351542288e-06, "loss": 0.0, "step": 59750 }, { "epoch": 0.41228862963703977, "grad_norm": 675.55224609375, "learning_rate": 5.879045444196845e-06, "loss": 0.5651, "step": 59760 }, { "epoch": 0.4123576203715841, "grad_norm": 0.0, "learning_rate": 5.878355536851401e-06, "loss": 0.0313, "step": 59770 }, { "epoch": 0.41242661110612844, "grad_norm": 0.0, "learning_rate": 5.8776656295059576e-06, "loss": 0.0, "step": 59780 }, { "epoch": 0.4124956018406728, "grad_norm": 0.0, "learning_rate": 5.876975722160514e-06, "loss": 0.0018, "step": 59790 }, { "epoch": 0.41256459257521716, "grad_norm": 0.0, "learning_rate": 5.8762858148150705e-06, "loss": 0.0, "step": 59800 }, { "epoch": 0.4126335833097615, "grad_norm": 0.0, "learning_rate": 5.875595907469628e-06, "loss": 0.0, "step": 59810 }, { "epoch": 0.4127025740443058, "grad_norm": 3.3830306165327784e-06, "learning_rate": 5.874906000124184e-06, "loss": 0.0, "step": 59820 }, { "epoch": 0.4127715647788502, "grad_norm": 0.0, "learning_rate": 5.874216092778741e-06, "loss": 0.0, "step": 59830 }, { "epoch": 0.41284055551339455, "grad_norm": 0.0, "learning_rate": 5.873526185433297e-06, "loss": 0.0, "step": 59840 }, { "epoch": 0.4129095462479389, "grad_norm": 0.0, "learning_rate": 5.872836278087853e-06, "loss": 0.0153, "step": 59850 }, { "epoch": 0.41297853698248327, "grad_norm": 0.0, "learning_rate": 5.8721463707424095e-06, "loss": 0.0035, "step": 59860 }, { "epoch": 0.41304752771702763, "grad_norm": 8.570363263515901e-08, "learning_rate": 5.871456463396966e-06, "loss": 0.0, "step": 59870 }, { "epoch": 0.41311651845157193, "grad_norm": 0.0, "learning_rate": 5.8707665560515225e-06, "loss": 0.2245, "step": 59880 }, { "epoch": 0.4131855091861163, "grad_norm": 0.0, "learning_rate": 5.870076648706079e-06, "loss": 0.0, "step": 59890 }, { "epoch": 0.41325449992066066, "grad_norm": 0.0, "learning_rate": 5.8693867413606354e-06, "loss": 0.0, "step": 59900 }, { "epoch": 0.413323490655205, "grad_norm": 0.0, "learning_rate": 5.868696834015192e-06, "loss": 0.0001, "step": 59910 }, { "epoch": 0.4133924813897494, "grad_norm": 0.0014781218487769365, "learning_rate": 5.868006926669749e-06, "loss": 0.0, "step": 59920 }, { "epoch": 0.4134614721242937, "grad_norm": 0.0, "learning_rate": 5.867317019324306e-06, "loss": 0.0, "step": 59930 }, { "epoch": 0.41353046285883804, "grad_norm": 0.0, "learning_rate": 5.866627111978862e-06, "loss": 0.0002, "step": 59940 }, { "epoch": 0.4135994535933824, "grad_norm": 0.0, "learning_rate": 5.865937204633419e-06, "loss": 0.0, "step": 59950 }, { "epoch": 0.41366844432792677, "grad_norm": 0.0, "learning_rate": 5.865247297287975e-06, "loss": 0.0001, "step": 59960 }, { "epoch": 0.4137374350624711, "grad_norm": 0.0, "learning_rate": 5.864557389942531e-06, "loss": 0.0006, "step": 59970 }, { "epoch": 0.4138064257970155, "grad_norm": 45.626197814941406, "learning_rate": 5.863867482597087e-06, "loss": 0.0077, "step": 59980 }, { "epoch": 0.4138754165315598, "grad_norm": 102.7043228149414, "learning_rate": 5.863177575251644e-06, "loss": 0.0149, "step": 59990 }, { "epoch": 0.41394440726610415, "grad_norm": 0.0, "learning_rate": 5.8624876679062e-06, "loss": 0.0001, "step": 60000 } ], "logging_steps": 10, "max_steps": 144947, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }