{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 12464, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 6.7782012296174035, "learning_rate": 2.6737967914438503e-08, "loss": 1.3154, "step": 1 }, { "epoch": 0.0, "grad_norm": 7.401347519306018, "learning_rate": 5.3475935828877005e-08, "loss": 1.372, "step": 2 }, { "epoch": 0.0, "grad_norm": 6.5985544146511925, "learning_rate": 8.021390374331552e-08, "loss": 1.3674, "step": 3 }, { "epoch": 0.0, "grad_norm": 2.416751268109562, "learning_rate": 1.0695187165775401e-07, "loss": 1.3505, "step": 4 }, { "epoch": 0.0, "grad_norm": 7.499206370703229, "learning_rate": 1.3368983957219251e-07, "loss": 1.4123, "step": 5 }, { "epoch": 0.0, "grad_norm": 7.857202154933578, "learning_rate": 1.6042780748663104e-07, "loss": 1.4046, "step": 6 }, { "epoch": 0.0, "grad_norm": 7.066284844533282, "learning_rate": 1.8716577540106952e-07, "loss": 1.4246, "step": 7 }, { "epoch": 0.0, "grad_norm": 6.130085775570846, "learning_rate": 2.1390374331550802e-07, "loss": 1.2694, "step": 8 }, { "epoch": 0.0, "grad_norm": 7.260778613597487, "learning_rate": 2.4064171122994655e-07, "loss": 1.3858, "step": 9 }, { "epoch": 0.0, "grad_norm": 6.2677734603592175, "learning_rate": 2.6737967914438503e-07, "loss": 1.3965, "step": 10 }, { "epoch": 0.0, "grad_norm": 7.198162166213362, "learning_rate": 2.9411764705882356e-07, "loss": 1.4326, "step": 11 }, { "epoch": 0.0, "grad_norm": 7.548656471974227, "learning_rate": 3.208556149732621e-07, "loss": 1.4168, "step": 12 }, { "epoch": 0.0, "grad_norm": 6.9047660966760445, "learning_rate": 3.4759358288770056e-07, "loss": 1.3972, "step": 13 }, { "epoch": 0.0, "grad_norm": 7.068196959142517, "learning_rate": 3.7433155080213904e-07, "loss": 1.4404, "step": 14 }, { "epoch": 0.0, "grad_norm": 7.100133406123299, "learning_rate": 4.0106951871657757e-07, "loss": 1.37, "step": 15 }, { "epoch": 0.0, "grad_norm": 6.38010898431197, "learning_rate": 4.2780748663101604e-07, "loss": 1.4224, "step": 16 }, { "epoch": 0.0, "grad_norm": 6.970892738698906, "learning_rate": 4.5454545454545457e-07, "loss": 1.2825, "step": 17 }, { "epoch": 0.0, "grad_norm": 7.659361259195055, "learning_rate": 4.812834224598931e-07, "loss": 1.3848, "step": 18 }, { "epoch": 0.0, "grad_norm": 7.351085437989884, "learning_rate": 5.080213903743316e-07, "loss": 1.5052, "step": 19 }, { "epoch": 0.0, "grad_norm": 6.48655672103779, "learning_rate": 5.347593582887701e-07, "loss": 1.3905, "step": 20 }, { "epoch": 0.0, "grad_norm": 2.434512621861015, "learning_rate": 5.614973262032086e-07, "loss": 1.3759, "step": 21 }, { "epoch": 0.0, "grad_norm": 5.9997292246294105, "learning_rate": 5.882352941176471e-07, "loss": 1.3716, "step": 22 }, { "epoch": 0.0, "grad_norm": 6.23579727651579, "learning_rate": 6.149732620320856e-07, "loss": 1.3089, "step": 23 }, { "epoch": 0.0, "grad_norm": 7.287208076755039, "learning_rate": 6.417112299465242e-07, "loss": 1.4492, "step": 24 }, { "epoch": 0.0, "grad_norm": 6.8678880297581495, "learning_rate": 6.684491978609627e-07, "loss": 1.3663, "step": 25 }, { "epoch": 0.0, "grad_norm": 6.117495029815131, "learning_rate": 6.951871657754011e-07, "loss": 1.3751, "step": 26 }, { "epoch": 0.0, "grad_norm": 5.902439861327881, "learning_rate": 7.219251336898397e-07, "loss": 1.3404, "step": 27 }, { "epoch": 0.0, "grad_norm": 6.394737690984814, "learning_rate": 7.486631016042781e-07, "loss": 1.4086, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.40858594970431, "learning_rate": 7.754010695187167e-07, "loss": 1.385, "step": 29 }, { "epoch": 0.0, "grad_norm": 5.849514966612606, "learning_rate": 8.021390374331551e-07, "loss": 1.3826, "step": 30 }, { "epoch": 0.0, "grad_norm": 5.4908123251624765, "learning_rate": 8.288770053475937e-07, "loss": 1.3148, "step": 31 }, { "epoch": 0.0, "grad_norm": 5.81061860321785, "learning_rate": 8.556149732620321e-07, "loss": 1.3841, "step": 32 }, { "epoch": 0.0, "grad_norm": 5.012893559459193, "learning_rate": 8.823529411764707e-07, "loss": 1.3631, "step": 33 }, { "epoch": 0.0, "grad_norm": 5.76203032889139, "learning_rate": 9.090909090909091e-07, "loss": 1.3023, "step": 34 }, { "epoch": 0.0, "grad_norm": 5.576812547841188, "learning_rate": 9.358288770053477e-07, "loss": 1.3222, "step": 35 }, { "epoch": 0.0, "grad_norm": 5.660928583706245, "learning_rate": 9.625668449197862e-07, "loss": 1.3277, "step": 36 }, { "epoch": 0.0, "grad_norm": 4.714892183621894, "learning_rate": 9.893048128342248e-07, "loss": 1.3456, "step": 37 }, { "epoch": 0.0, "grad_norm": 4.303008621046351, "learning_rate": 1.0160427807486633e-06, "loss": 1.2667, "step": 38 }, { "epoch": 0.0, "grad_norm": 4.665456931966554, "learning_rate": 1.0427807486631017e-06, "loss": 1.3254, "step": 39 }, { "epoch": 0.0, "grad_norm": 4.145129369536635, "learning_rate": 1.0695187165775401e-06, "loss": 1.3165, "step": 40 }, { "epoch": 0.0, "grad_norm": 4.324919286220631, "learning_rate": 1.0962566844919787e-06, "loss": 1.3135, "step": 41 }, { "epoch": 0.0, "grad_norm": 3.8932822587645965, "learning_rate": 1.1229946524064172e-06, "loss": 1.3801, "step": 42 }, { "epoch": 0.0, "grad_norm": 3.7533254164181646, "learning_rate": 1.1497326203208558e-06, "loss": 1.3273, "step": 43 }, { "epoch": 0.0, "grad_norm": 3.6310745974008025, "learning_rate": 1.1764705882352942e-06, "loss": 1.2343, "step": 44 }, { "epoch": 0.0, "grad_norm": 3.316246853177768, "learning_rate": 1.2032085561497326e-06, "loss": 1.2036, "step": 45 }, { "epoch": 0.0, "grad_norm": 3.4306426605255624, "learning_rate": 1.2299465240641713e-06, "loss": 1.2371, "step": 46 }, { "epoch": 0.0, "grad_norm": 3.2239862034255538, "learning_rate": 1.2566844919786097e-06, "loss": 1.2778, "step": 47 }, { "epoch": 0.0, "grad_norm": 3.0936291849127606, "learning_rate": 1.2834224598930483e-06, "loss": 1.2062, "step": 48 }, { "epoch": 0.0, "grad_norm": 3.2666271086267358, "learning_rate": 1.3101604278074868e-06, "loss": 1.1664, "step": 49 }, { "epoch": 0.0, "grad_norm": 2.800576975918958, "learning_rate": 1.3368983957219254e-06, "loss": 1.2244, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.4502554235688105, "learning_rate": 1.3636363636363636e-06, "loss": 1.1918, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.3007937184620317, "learning_rate": 1.3903743315508022e-06, "loss": 1.1664, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.1931360501804686, "learning_rate": 1.4171122994652409e-06, "loss": 1.2175, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.111515784653167, "learning_rate": 1.4438502673796793e-06, "loss": 1.2425, "step": 54 }, { "epoch": 0.0, "grad_norm": 2.144436947205643, "learning_rate": 1.4705882352941177e-06, "loss": 1.1785, "step": 55 }, { "epoch": 0.0, "grad_norm": 2.1765083947098374, "learning_rate": 1.4973262032085562e-06, "loss": 1.2607, "step": 56 }, { "epoch": 0.0, "grad_norm": 2.07671033670113, "learning_rate": 1.5240641711229948e-06, "loss": 1.2282, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.5725341201883896, "learning_rate": 1.5508021390374334e-06, "loss": 1.3187, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.9335282059481156, "learning_rate": 1.5775401069518716e-06, "loss": 1.195, "step": 59 }, { "epoch": 0.0, "grad_norm": 2.009007179067198, "learning_rate": 1.6042780748663103e-06, "loss": 1.1317, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.9694706636693953, "learning_rate": 1.631016042780749e-06, "loss": 1.1673, "step": 61 }, { "epoch": 0.0, "grad_norm": 1.9359575209049924, "learning_rate": 1.6577540106951873e-06, "loss": 1.1938, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.9420895367049347, "learning_rate": 1.684491978609626e-06, "loss": 1.2261, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.8773321072783626, "learning_rate": 1.7112299465240642e-06, "loss": 1.1594, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.9401452492937292, "learning_rate": 1.7379679144385028e-06, "loss": 1.1125, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.7415120315561607, "learning_rate": 1.7647058823529414e-06, "loss": 1.1536, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.77922139292567, "learning_rate": 1.7914438502673799e-06, "loss": 1.1667, "step": 67 }, { "epoch": 0.01, "grad_norm": 1.943977056834105, "learning_rate": 1.8181818181818183e-06, "loss": 1.0904, "step": 68 }, { "epoch": 0.01, "grad_norm": 1.6636556551761967, "learning_rate": 1.8449197860962567e-06, "loss": 1.1664, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.863746578896042, "learning_rate": 1.8716577540106954e-06, "loss": 1.1581, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.406990767360614, "learning_rate": 1.898395721925134e-06, "loss": 1.3855, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.7471377534749015, "learning_rate": 1.9251336898395724e-06, "loss": 1.1278, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.7772450562842448, "learning_rate": 1.951871657754011e-06, "loss": 1.0773, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.6416503558736697, "learning_rate": 1.9786096256684497e-06, "loss": 1.1633, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.7553053622221313, "learning_rate": 2.0053475935828877e-06, "loss": 1.0293, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.9311381021274994, "learning_rate": 2.0320855614973265e-06, "loss": 1.1742, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.7984719062272627, "learning_rate": 2.058823529411765e-06, "loss": 1.1453, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.9571998762030964, "learning_rate": 2.0855614973262034e-06, "loss": 1.1183, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.7348247070925025, "learning_rate": 2.112299465240642e-06, "loss": 1.0005, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.5404069057449405, "learning_rate": 2.1390374331550802e-06, "loss": 1.1682, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.6872028718730576, "learning_rate": 2.165775401069519e-06, "loss": 1.0626, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.708502165239471, "learning_rate": 2.1925133689839575e-06, "loss": 1.1357, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.6395311626671796, "learning_rate": 2.219251336898396e-06, "loss": 1.1261, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.7113249341359087, "learning_rate": 2.2459893048128343e-06, "loss": 1.128, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.614926585183808, "learning_rate": 2.2727272727272728e-06, "loss": 1.1306, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.8131438441645662, "learning_rate": 2.2994652406417116e-06, "loss": 1.1958, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.680183820477123, "learning_rate": 2.32620320855615e-06, "loss": 1.0452, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.750740867921424, "learning_rate": 2.3529411764705885e-06, "loss": 1.1231, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.6238086067073154, "learning_rate": 2.379679144385027e-06, "loss": 1.0713, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.6032620037399257, "learning_rate": 2.4064171122994653e-06, "loss": 1.1075, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.6454970916236602, "learning_rate": 2.433155080213904e-06, "loss": 1.1106, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.6997486806165423, "learning_rate": 2.4598930481283426e-06, "loss": 1.1078, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.5688177252105697, "learning_rate": 2.486631016042781e-06, "loss": 1.0592, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.6740780022823822, "learning_rate": 2.5133689839572194e-06, "loss": 1.0854, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.36462046075457, "learning_rate": 2.5401069518716583e-06, "loss": 1.3855, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.6674656216077521, "learning_rate": 2.5668449197860967e-06, "loss": 1.0836, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.1659189286585874, "learning_rate": 2.5935828877005347e-06, "loss": 1.3713, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.7331399996139798, "learning_rate": 2.6203208556149735e-06, "loss": 1.0612, "step": 98 }, { "epoch": 0.01, "grad_norm": 1.6904850517612737, "learning_rate": 2.647058823529412e-06, "loss": 1.1184, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.9867508790537807, "learning_rate": 2.673796791443851e-06, "loss": 1.3398, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.6174079261988488, "learning_rate": 2.7005347593582892e-06, "loss": 1.063, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.6733551851126403, "learning_rate": 2.7272727272727272e-06, "loss": 1.0697, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.6394187648579581, "learning_rate": 2.754010695187166e-06, "loss": 1.3216, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.3311306269101206, "learning_rate": 2.7807486631016045e-06, "loss": 1.0237, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.6841728277715124, "learning_rate": 2.807486631016043e-06, "loss": 1.0376, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.728148138182015, "learning_rate": 2.8342245989304818e-06, "loss": 1.0421, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.6563947480680483, "learning_rate": 2.8609625668449198e-06, "loss": 1.0944, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.7968837108862217, "learning_rate": 2.8877005347593586e-06, "loss": 1.0684, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.8330604511521116, "learning_rate": 2.914438502673797e-06, "loss": 1.0887, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.5449051036528985, "learning_rate": 2.9411764705882355e-06, "loss": 1.0419, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.325087987969179, "learning_rate": 2.9679144385026743e-06, "loss": 1.3336, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.26116633828592, "learning_rate": 2.9946524064171123e-06, "loss": 1.3216, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.2182907540723362, "learning_rate": 3.0213903743315507e-06, "loss": 1.2995, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.7007166762191699, "learning_rate": 3.0481283422459896e-06, "loss": 1.0124, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.7079128672425548, "learning_rate": 3.074866310160428e-06, "loss": 1.1157, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.8116776644026351, "learning_rate": 3.101604278074867e-06, "loss": 1.0315, "step": 116 }, { "epoch": 0.01, "grad_norm": 2.9250369310126723, "learning_rate": 3.128342245989305e-06, "loss": 1.0841, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.6158353263107859, "learning_rate": 3.1550802139037433e-06, "loss": 1.0489, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.6815143584372974, "learning_rate": 3.181818181818182e-06, "loss": 0.9644, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.660051493858394, "learning_rate": 3.2085561497326205e-06, "loss": 1.1633, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.6947857591941244, "learning_rate": 3.2352941176470594e-06, "loss": 1.0905, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.7580202446509152, "learning_rate": 3.262032085561498e-06, "loss": 1.0099, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.8053001612938018, "learning_rate": 3.288770053475936e-06, "loss": 1.0476, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.831633578791614, "learning_rate": 3.3155080213903747e-06, "loss": 1.1014, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.0423276707915021, "learning_rate": 3.342245989304813e-06, "loss": 1.2979, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.7816211885515854, "learning_rate": 3.368983957219252e-06, "loss": 1.0508, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.8065126762895813, "learning_rate": 3.3957219251336904e-06, "loss": 0.9765, "step": 127 }, { "epoch": 0.01, "grad_norm": 1.5990954510363118, "learning_rate": 3.4224598930481284e-06, "loss": 1.0768, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.585782750176935, "learning_rate": 3.449197860962567e-06, "loss": 1.0432, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.732520450159707, "learning_rate": 3.4759358288770056e-06, "loss": 1.0168, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.1258475246457988, "learning_rate": 3.5026737967914445e-06, "loss": 1.3368, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.7058790970058442, "learning_rate": 3.529411764705883e-06, "loss": 1.0296, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.6418877269220324, "learning_rate": 3.556149732620321e-06, "loss": 1.0455, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.5492695592309698, "learning_rate": 3.5828877005347597e-06, "loss": 0.9839, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.8799833208278507, "learning_rate": 3.609625668449198e-06, "loss": 1.1338, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.052951148669076, "learning_rate": 3.6363636363636366e-06, "loss": 1.3087, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.682742643586665, "learning_rate": 3.6631016042780754e-06, "loss": 1.0169, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.0155504941743332, "learning_rate": 3.6898395721925134e-06, "loss": 1.3072, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.6240264674493865, "learning_rate": 3.716577540106952e-06, "loss": 1.0004, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.7338249148680371, "learning_rate": 3.7433155080213907e-06, "loss": 1.1341, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.6660281755339907, "learning_rate": 3.770053475935829e-06, "loss": 1.0311, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.5784557321392956, "learning_rate": 3.796791443850268e-06, "loss": 1.0118, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.6200212404624486, "learning_rate": 3.8235294117647055e-06, "loss": 1.0138, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.7518680081010531, "learning_rate": 3.850267379679145e-06, "loss": 1.0325, "step": 144 }, { "epoch": 0.01, "grad_norm": 0.8853775007445426, "learning_rate": 3.877005347593583e-06, "loss": 1.3215, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.8546331448502054, "learning_rate": 3.903743315508022e-06, "loss": 1.0077, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.6513207288177008, "learning_rate": 3.93048128342246e-06, "loss": 1.0415, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.6789714170086183, "learning_rate": 3.957219251336899e-06, "loss": 1.0329, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.7581412197477366, "learning_rate": 3.983957219251337e-06, "loss": 0.9085, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.7460174171385607, "learning_rate": 4.010695187165775e-06, "loss": 1.02, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.6677576903886222, "learning_rate": 4.037433155080215e-06, "loss": 0.94, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.6149469889320314, "learning_rate": 4.064171122994653e-06, "loss": 1.027, "step": 152 }, { "epoch": 0.01, "grad_norm": 0.9416237721141347, "learning_rate": 4.0909090909090915e-06, "loss": 1.3041, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.6761266057035356, "learning_rate": 4.11764705882353e-06, "loss": 0.985, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.6845151349092773, "learning_rate": 4.144385026737968e-06, "loss": 1.0277, "step": 155 }, { "epoch": 0.01, "grad_norm": 1.6456095057810234, "learning_rate": 4.171122994652407e-06, "loss": 0.9715, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.6427589795129955, "learning_rate": 4.197860962566845e-06, "loss": 0.9857, "step": 157 }, { "epoch": 0.01, "grad_norm": 0.942636164299908, "learning_rate": 4.224598930481284e-06, "loss": 1.2801, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.6818495173237056, "learning_rate": 4.251336898395722e-06, "loss": 1.0736, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.673317004087276, "learning_rate": 4.2780748663101604e-06, "loss": 1.0476, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.6579551062032738, "learning_rate": 4.304812834224599e-06, "loss": 1.1425, "step": 161 }, { "epoch": 0.01, "grad_norm": 1.6402153038560567, "learning_rate": 4.331550802139038e-06, "loss": 1.0614, "step": 162 }, { "epoch": 0.01, "grad_norm": 1.7889679705277794, "learning_rate": 4.3582887700534766e-06, "loss": 1.1135, "step": 163 }, { "epoch": 0.01, "grad_norm": 1.6118486078317327, "learning_rate": 4.385026737967915e-06, "loss": 1.0414, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.89673461751575, "learning_rate": 4.411764705882353e-06, "loss": 1.0867, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.7143734112255669, "learning_rate": 4.438502673796792e-06, "loss": 1.0716, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.5999482780004672, "learning_rate": 4.46524064171123e-06, "loss": 1.0208, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.7281296508062456, "learning_rate": 4.491978609625669e-06, "loss": 0.989, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.9433879335264962, "learning_rate": 4.518716577540107e-06, "loss": 1.3175, "step": 169 }, { "epoch": 0.01, "grad_norm": 1.6337703097025655, "learning_rate": 4.5454545454545455e-06, "loss": 1.1339, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.804087677783003, "learning_rate": 4.572192513368984e-06, "loss": 1.0788, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.6721395533867491, "learning_rate": 4.598930481283423e-06, "loss": 1.0327, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.6175828742311005, "learning_rate": 4.625668449197862e-06, "loss": 1.0366, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.6472291697591595, "learning_rate": 4.6524064171123e-06, "loss": 0.9914, "step": 174 }, { "epoch": 0.01, "grad_norm": 0.8964001300033594, "learning_rate": 4.6791443850267385e-06, "loss": 1.2853, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.5686764709503878, "learning_rate": 4.705882352941177e-06, "loss": 0.9891, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.7595835927415242, "learning_rate": 4.732620320855615e-06, "loss": 1.0183, "step": 177 }, { "epoch": 0.01, "grad_norm": 2.00075721370661, "learning_rate": 4.759358288770054e-06, "loss": 0.9857, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.8945538515945914, "learning_rate": 4.786096256684493e-06, "loss": 1.274, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.581446601392783, "learning_rate": 4.812834224598931e-06, "loss": 1.1132, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.7677277932551543, "learning_rate": 4.839572192513369e-06, "loss": 1.0201, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.601127301311296, "learning_rate": 4.866310160427808e-06, "loss": 0.9692, "step": 182 }, { "epoch": 0.01, "grad_norm": 1.7120255063263343, "learning_rate": 4.893048128342247e-06, "loss": 1.1433, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.7243519838011778, "learning_rate": 4.919786096256685e-06, "loss": 1.0281, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.9685886849863812, "learning_rate": 4.9465240641711236e-06, "loss": 1.0211, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.567250784740927, "learning_rate": 4.973262032085562e-06, "loss": 1.0142, "step": 186 }, { "epoch": 0.02, "grad_norm": 1.6965801728703, "learning_rate": 5e-06, "loss": 0.9994, "step": 187 }, { "epoch": 0.02, "grad_norm": 1.6182534867163023, "learning_rate": 5.026737967914439e-06, "loss": 1.0344, "step": 188 }, { "epoch": 0.02, "grad_norm": 1.839686587793268, "learning_rate": 5.053475935828877e-06, "loss": 1.0805, "step": 189 }, { "epoch": 0.02, "grad_norm": 0.8911490269228441, "learning_rate": 5.0802139037433165e-06, "loss": 1.3445, "step": 190 }, { "epoch": 0.02, "grad_norm": 0.9136077670033413, "learning_rate": 5.106951871657755e-06, "loss": 1.288, "step": 191 }, { "epoch": 0.02, "grad_norm": 1.7067331670819195, "learning_rate": 5.133689839572193e-06, "loss": 0.9915, "step": 192 }, { "epoch": 0.02, "grad_norm": 1.6831864211212457, "learning_rate": 5.160427807486631e-06, "loss": 1.0045, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.6391612895852137, "learning_rate": 5.187165775401069e-06, "loss": 1.0169, "step": 194 }, { "epoch": 0.02, "grad_norm": 1.8254036582210817, "learning_rate": 5.213903743315508e-06, "loss": 1.017, "step": 195 }, { "epoch": 0.02, "grad_norm": 1.8898312374429613, "learning_rate": 5.240641711229947e-06, "loss": 0.9066, "step": 196 }, { "epoch": 0.02, "grad_norm": 1.7309979971666078, "learning_rate": 5.2673796791443855e-06, "loss": 0.9044, "step": 197 }, { "epoch": 0.02, "grad_norm": 1.725961084471421, "learning_rate": 5.294117647058824e-06, "loss": 1.0472, "step": 198 }, { "epoch": 0.02, "grad_norm": 1.693516064454112, "learning_rate": 5.320855614973262e-06, "loss": 0.9931, "step": 199 }, { "epoch": 0.02, "grad_norm": 1.6025343266244938, "learning_rate": 5.347593582887702e-06, "loss": 0.9461, "step": 200 }, { "epoch": 0.02, "grad_norm": 1.7509743685523245, "learning_rate": 5.37433155080214e-06, "loss": 0.959, "step": 201 }, { "epoch": 0.02, "grad_norm": 1.8315515418222552, "learning_rate": 5.4010695187165785e-06, "loss": 1.016, "step": 202 }, { "epoch": 0.02, "grad_norm": 1.6106657161327298, "learning_rate": 5.427807486631016e-06, "loss": 1.0601, "step": 203 }, { "epoch": 0.02, "grad_norm": 1.6566123491722393, "learning_rate": 5.4545454545454545e-06, "loss": 1.0186, "step": 204 }, { "epoch": 0.02, "grad_norm": 1.6314345288305356, "learning_rate": 5.481283422459893e-06, "loss": 1.0039, "step": 205 }, { "epoch": 0.02, "grad_norm": 1.681194950959268, "learning_rate": 5.508021390374332e-06, "loss": 1.0076, "step": 206 }, { "epoch": 0.02, "grad_norm": 1.628406486640975, "learning_rate": 5.5347593582887706e-06, "loss": 1.0995, "step": 207 }, { "epoch": 0.02, "grad_norm": 1.740064519856547, "learning_rate": 5.561497326203209e-06, "loss": 0.9441, "step": 208 }, { "epoch": 0.02, "grad_norm": 1.700201957673078, "learning_rate": 5.588235294117647e-06, "loss": 1.005, "step": 209 }, { "epoch": 0.02, "grad_norm": 1.720457954350711, "learning_rate": 5.614973262032086e-06, "loss": 0.9218, "step": 210 }, { "epoch": 0.02, "grad_norm": 1.6059634958287854, "learning_rate": 5.641711229946525e-06, "loss": 0.9719, "step": 211 }, { "epoch": 0.02, "grad_norm": 0.9730707625064416, "learning_rate": 5.6684491978609635e-06, "loss": 1.3031, "step": 212 }, { "epoch": 0.02, "grad_norm": 0.9472987656004846, "learning_rate": 5.695187165775401e-06, "loss": 1.3051, "step": 213 }, { "epoch": 0.02, "grad_norm": 1.719301753728789, "learning_rate": 5.7219251336898395e-06, "loss": 1.0337, "step": 214 }, { "epoch": 0.02, "grad_norm": 0.862038263117422, "learning_rate": 5.748663101604278e-06, "loss": 1.2741, "step": 215 }, { "epoch": 0.02, "grad_norm": 1.8169719951778056, "learning_rate": 5.775401069518717e-06, "loss": 1.0638, "step": 216 }, { "epoch": 0.02, "grad_norm": 1.8240506806345749, "learning_rate": 5.802139037433156e-06, "loss": 0.9513, "step": 217 }, { "epoch": 0.02, "grad_norm": 1.711573112497114, "learning_rate": 5.828877005347594e-06, "loss": 0.8484, "step": 218 }, { "epoch": 0.02, "grad_norm": 1.6418927389674376, "learning_rate": 5.8556149732620325e-06, "loss": 0.9835, "step": 219 }, { "epoch": 0.02, "grad_norm": 1.6443398047697138, "learning_rate": 5.882352941176471e-06, "loss": 1.0394, "step": 220 }, { "epoch": 0.02, "grad_norm": 1.6140767743927953, "learning_rate": 5.90909090909091e-06, "loss": 1.0312, "step": 221 }, { "epoch": 0.02, "grad_norm": 1.599461408294564, "learning_rate": 5.935828877005349e-06, "loss": 0.9694, "step": 222 }, { "epoch": 0.02, "grad_norm": 0.9712802482387606, "learning_rate": 5.962566844919787e-06, "loss": 1.2997, "step": 223 }, { "epoch": 0.02, "grad_norm": 1.6267235857564968, "learning_rate": 5.989304812834225e-06, "loss": 0.9027, "step": 224 }, { "epoch": 0.02, "grad_norm": 1.5585354363542179, "learning_rate": 6.016042780748663e-06, "loss": 0.935, "step": 225 }, { "epoch": 0.02, "grad_norm": 1.7750118993434965, "learning_rate": 6.0427807486631015e-06, "loss": 0.985, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.5802599756579798, "learning_rate": 6.069518716577541e-06, "loss": 1.0869, "step": 227 }, { "epoch": 0.02, "grad_norm": 0.8547978937904448, "learning_rate": 6.096256684491979e-06, "loss": 1.3026, "step": 228 }, { "epoch": 0.02, "grad_norm": 1.5899711735595479, "learning_rate": 6.122994652406418e-06, "loss": 0.911, "step": 229 }, { "epoch": 0.02, "grad_norm": 1.827550234078434, "learning_rate": 6.149732620320856e-06, "loss": 1.0285, "step": 230 }, { "epoch": 0.02, "grad_norm": 0.8757858970945072, "learning_rate": 6.176470588235295e-06, "loss": 1.284, "step": 231 }, { "epoch": 0.02, "grad_norm": 0.8692741927138358, "learning_rate": 6.203208556149734e-06, "loss": 1.2999, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.6783208799409806, "learning_rate": 6.229946524064172e-06, "loss": 0.9687, "step": 233 }, { "epoch": 0.02, "grad_norm": 1.6521359340339714, "learning_rate": 6.25668449197861e-06, "loss": 0.9094, "step": 234 }, { "epoch": 0.02, "grad_norm": 2.0490886608174566, "learning_rate": 6.283422459893048e-06, "loss": 1.0102, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.817985850694022, "learning_rate": 6.3101604278074865e-06, "loss": 1.0262, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.699216894618949, "learning_rate": 6.336898395721926e-06, "loss": 0.995, "step": 237 }, { "epoch": 0.02, "grad_norm": 1.6359773881893027, "learning_rate": 6.363636363636364e-06, "loss": 0.9094, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.7047799148498088, "learning_rate": 6.390374331550803e-06, "loss": 1.0249, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.5496327081352343, "learning_rate": 6.417112299465241e-06, "loss": 0.9852, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.546985729807792, "learning_rate": 6.4438502673796795e-06, "loss": 0.988, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.6235319580404075, "learning_rate": 6.470588235294119e-06, "loss": 1.0401, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.7858326314118382, "learning_rate": 6.497326203208557e-06, "loss": 0.977, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.9215192342065, "learning_rate": 6.524064171122996e-06, "loss": 0.9439, "step": 244 }, { "epoch": 0.02, "grad_norm": 1.6234861968143297, "learning_rate": 6.550802139037433e-06, "loss": 0.9796, "step": 245 }, { "epoch": 0.02, "grad_norm": 1.5428677185434194, "learning_rate": 6.577540106951872e-06, "loss": 0.9237, "step": 246 }, { "epoch": 0.02, "grad_norm": 1.646665709681037, "learning_rate": 6.60427807486631e-06, "loss": 0.8975, "step": 247 }, { "epoch": 0.02, "grad_norm": 1.5202862255953613, "learning_rate": 6.631016042780749e-06, "loss": 1.0537, "step": 248 }, { "epoch": 0.02, "grad_norm": 1.7035913347433047, "learning_rate": 6.657754010695188e-06, "loss": 0.9191, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.0833285085287374, "learning_rate": 6.684491978609626e-06, "loss": 1.2878, "step": 250 }, { "epoch": 0.02, "grad_norm": 1.6996945195613555, "learning_rate": 6.711229946524065e-06, "loss": 1.0344, "step": 251 }, { "epoch": 0.02, "grad_norm": 1.7205883772778936, "learning_rate": 6.737967914438504e-06, "loss": 1.0155, "step": 252 }, { "epoch": 0.02, "grad_norm": 1.8879522750767714, "learning_rate": 6.764705882352942e-06, "loss": 0.8822, "step": 253 }, { "epoch": 0.02, "grad_norm": 1.6924076626852802, "learning_rate": 6.791443850267381e-06, "loss": 1.0087, "step": 254 }, { "epoch": 0.02, "grad_norm": 1.707080849706548, "learning_rate": 6.818181818181818e-06, "loss": 0.9837, "step": 255 }, { "epoch": 0.02, "grad_norm": 1.5250609261388683, "learning_rate": 6.844919786096257e-06, "loss": 0.9946, "step": 256 }, { "epoch": 0.02, "grad_norm": 0.8860791458013021, "learning_rate": 6.871657754010695e-06, "loss": 1.2398, "step": 257 }, { "epoch": 0.02, "grad_norm": 1.6893195031147463, "learning_rate": 6.898395721925134e-06, "loss": 0.9228, "step": 258 }, { "epoch": 0.02, "grad_norm": 1.7756464652648931, "learning_rate": 6.925133689839573e-06, "loss": 1.048, "step": 259 }, { "epoch": 0.02, "grad_norm": 1.57256545872689, "learning_rate": 6.951871657754011e-06, "loss": 0.8823, "step": 260 }, { "epoch": 0.02, "grad_norm": 1.6828440093329764, "learning_rate": 6.97860962566845e-06, "loss": 1.0102, "step": 261 }, { "epoch": 0.02, "grad_norm": 1.5037501274704808, "learning_rate": 7.005347593582889e-06, "loss": 0.9849, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.747158669018813, "learning_rate": 7.032085561497327e-06, "loss": 0.9902, "step": 263 }, { "epoch": 0.02, "grad_norm": 0.8311578597982793, "learning_rate": 7.058823529411766e-06, "loss": 1.2745, "step": 264 }, { "epoch": 0.02, "grad_norm": 1.5616266099103957, "learning_rate": 7.085561497326203e-06, "loss": 0.9442, "step": 265 }, { "epoch": 0.02, "grad_norm": 1.8932123161178773, "learning_rate": 7.112299465240642e-06, "loss": 0.9597, "step": 266 }, { "epoch": 0.02, "grad_norm": 1.7976550405472533, "learning_rate": 7.13903743315508e-06, "loss": 0.9656, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.7266421741458091, "learning_rate": 7.1657754010695195e-06, "loss": 1.0149, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.5830867914599367, "learning_rate": 7.192513368983958e-06, "loss": 0.9254, "step": 269 }, { "epoch": 0.02, "grad_norm": 1.6728946333943526, "learning_rate": 7.219251336898396e-06, "loss": 1.0008, "step": 270 }, { "epoch": 0.02, "grad_norm": 1.6960637229779523, "learning_rate": 7.245989304812835e-06, "loss": 0.9458, "step": 271 }, { "epoch": 0.02, "grad_norm": 1.5910388033396852, "learning_rate": 7.272727272727273e-06, "loss": 0.9572, "step": 272 }, { "epoch": 0.02, "grad_norm": 1.7273540990510239, "learning_rate": 7.2994652406417124e-06, "loss": 0.9379, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.6699737001101487, "learning_rate": 7.326203208556151e-06, "loss": 0.9862, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.0125117471817617, "learning_rate": 7.352941176470589e-06, "loss": 1.2909, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.8888979041409342, "learning_rate": 7.379679144385027e-06, "loss": 0.8539, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.6843254542057433, "learning_rate": 7.406417112299465e-06, "loss": 0.9129, "step": 277 }, { "epoch": 0.02, "grad_norm": 1.7150599174851449, "learning_rate": 7.433155080213904e-06, "loss": 0.9316, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.9344576564988232, "learning_rate": 7.459893048128343e-06, "loss": 0.9501, "step": 279 }, { "epoch": 0.02, "grad_norm": 1.722406243916013, "learning_rate": 7.486631016042781e-06, "loss": 0.9627, "step": 280 }, { "epoch": 0.02, "grad_norm": 1.6541527004561716, "learning_rate": 7.51336898395722e-06, "loss": 0.9587, "step": 281 }, { "epoch": 0.02, "grad_norm": 0.9612983022631244, "learning_rate": 7.540106951871658e-06, "loss": 1.2829, "step": 282 }, { "epoch": 0.02, "grad_norm": 1.6260207978616372, "learning_rate": 7.5668449197860975e-06, "loss": 0.999, "step": 283 }, { "epoch": 0.02, "grad_norm": 1.6983558234316496, "learning_rate": 7.593582887700536e-06, "loss": 0.9444, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.728134534297393, "learning_rate": 7.620320855614974e-06, "loss": 1.0072, "step": 285 }, { "epoch": 0.02, "grad_norm": 1.608036154103953, "learning_rate": 7.647058823529411e-06, "loss": 0.9667, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.5954525816110636, "learning_rate": 7.67379679144385e-06, "loss": 0.9614, "step": 287 }, { "epoch": 0.02, "grad_norm": 0.949390713502756, "learning_rate": 7.70053475935829e-06, "loss": 1.2866, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.7064434731267104, "learning_rate": 7.727272727272727e-06, "loss": 1.0574, "step": 289 }, { "epoch": 0.02, "grad_norm": 1.7750140327384567, "learning_rate": 7.754010695187166e-06, "loss": 0.8476, "step": 290 }, { "epoch": 0.02, "grad_norm": 1.8158884685606924, "learning_rate": 7.780748663101606e-06, "loss": 1.0228, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.5938517616645385, "learning_rate": 7.807486631016043e-06, "loss": 0.9129, "step": 292 }, { "epoch": 0.02, "grad_norm": 0.8891577501766359, "learning_rate": 7.834224598930483e-06, "loss": 1.3145, "step": 293 }, { "epoch": 0.02, "grad_norm": 1.5262988165709592, "learning_rate": 7.86096256684492e-06, "loss": 0.8343, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.5270838162567688, "learning_rate": 7.88770053475936e-06, "loss": 0.9989, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.548058369483167, "learning_rate": 7.914438502673799e-06, "loss": 0.9297, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.8684740476400858, "learning_rate": 7.941176470588236e-06, "loss": 0.9603, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.649763370830464, "learning_rate": 7.967914438502674e-06, "loss": 1.0185, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.7479959350069487, "learning_rate": 7.994652406417113e-06, "loss": 1.0523, "step": 299 }, { "epoch": 0.02, "grad_norm": 0.8553081083531295, "learning_rate": 8.02139037433155e-06, "loss": 1.261, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.5506693313123037, "learning_rate": 8.04812834224599e-06, "loss": 0.9031, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.7866068718090098, "learning_rate": 8.07486631016043e-06, "loss": 0.957, "step": 302 }, { "epoch": 0.02, "grad_norm": 0.8095175631029055, "learning_rate": 8.101604278074867e-06, "loss": 1.282, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.6361471081758008, "learning_rate": 8.128342245989306e-06, "loss": 1.0175, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.7497482368844108, "learning_rate": 8.155080213903744e-06, "loss": 0.8959, "step": 305 }, { "epoch": 0.02, "grad_norm": 0.8688365878907724, "learning_rate": 8.181818181818183e-06, "loss": 1.254, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.6560811189952516, "learning_rate": 8.20855614973262e-06, "loss": 0.9925, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.6814079201571874, "learning_rate": 8.23529411764706e-06, "loss": 0.9509, "step": 308 }, { "epoch": 0.02, "grad_norm": 0.8496793258592301, "learning_rate": 8.262032085561497e-06, "loss": 1.3053, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.605953265274145, "learning_rate": 8.288770053475937e-06, "loss": 0.8953, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.6042779172543353, "learning_rate": 8.315508021390374e-06, "loss": 1.0336, "step": 311 }, { "epoch": 0.03, "grad_norm": 0.8654892703516974, "learning_rate": 8.342245989304813e-06, "loss": 1.3111, "step": 312 }, { "epoch": 0.03, "grad_norm": 1.5472422647163915, "learning_rate": 8.368983957219253e-06, "loss": 0.8905, "step": 313 }, { "epoch": 0.03, "grad_norm": 1.7329507872119396, "learning_rate": 8.39572192513369e-06, "loss": 0.9063, "step": 314 }, { "epoch": 0.03, "grad_norm": 1.7122893310256027, "learning_rate": 8.42245989304813e-06, "loss": 1.0017, "step": 315 }, { "epoch": 0.03, "grad_norm": 1.6884654330228583, "learning_rate": 8.449197860962567e-06, "loss": 1.009, "step": 316 }, { "epoch": 0.03, "grad_norm": 1.916277685593287, "learning_rate": 8.475935828877005e-06, "loss": 0.9124, "step": 317 }, { "epoch": 0.03, "grad_norm": 0.8763280648151808, "learning_rate": 8.502673796791444e-06, "loss": 1.273, "step": 318 }, { "epoch": 0.03, "grad_norm": 1.667673774920465, "learning_rate": 8.529411764705883e-06, "loss": 0.9552, "step": 319 }, { "epoch": 0.03, "grad_norm": 0.8402045760269671, "learning_rate": 8.556149732620321e-06, "loss": 1.2751, "step": 320 }, { "epoch": 0.03, "grad_norm": 1.6896959178398792, "learning_rate": 8.58288770053476e-06, "loss": 0.9334, "step": 321 }, { "epoch": 0.03, "grad_norm": 1.8202310020228063, "learning_rate": 8.609625668449198e-06, "loss": 1.064, "step": 322 }, { "epoch": 0.03, "grad_norm": 1.657559430622702, "learning_rate": 8.636363636363637e-06, "loss": 0.9057, "step": 323 }, { "epoch": 0.03, "grad_norm": 1.7138438137155545, "learning_rate": 8.663101604278076e-06, "loss": 0.9105, "step": 324 }, { "epoch": 0.03, "grad_norm": 1.6971731752446384, "learning_rate": 8.689839572192514e-06, "loss": 0.9514, "step": 325 }, { "epoch": 0.03, "grad_norm": 0.9432398940495196, "learning_rate": 8.716577540106953e-06, "loss": 1.2672, "step": 326 }, { "epoch": 0.03, "grad_norm": 0.8971779119961532, "learning_rate": 8.743315508021392e-06, "loss": 1.3042, "step": 327 }, { "epoch": 0.03, "grad_norm": 1.7117447718424714, "learning_rate": 8.77005347593583e-06, "loss": 0.958, "step": 328 }, { "epoch": 0.03, "grad_norm": 0.8102997436760981, "learning_rate": 8.796791443850268e-06, "loss": 1.2761, "step": 329 }, { "epoch": 0.03, "grad_norm": 0.813828877687282, "learning_rate": 8.823529411764707e-06, "loss": 1.2846, "step": 330 }, { "epoch": 0.03, "grad_norm": 1.5449758393383635, "learning_rate": 8.850267379679144e-06, "loss": 1.0099, "step": 331 }, { "epoch": 0.03, "grad_norm": 1.7655860369688334, "learning_rate": 8.877005347593584e-06, "loss": 0.8669, "step": 332 }, { "epoch": 0.03, "grad_norm": 1.5785143616236195, "learning_rate": 8.903743315508023e-06, "loss": 0.8427, "step": 333 }, { "epoch": 0.03, "grad_norm": 1.6358973467391211, "learning_rate": 8.93048128342246e-06, "loss": 0.9039, "step": 334 }, { "epoch": 0.03, "grad_norm": 1.6286817818750994, "learning_rate": 8.9572192513369e-06, "loss": 0.9735, "step": 335 }, { "epoch": 0.03, "grad_norm": 1.6155763610544203, "learning_rate": 8.983957219251337e-06, "loss": 0.971, "step": 336 }, { "epoch": 0.03, "grad_norm": 1.7557332778993522, "learning_rate": 9.010695187165777e-06, "loss": 1.0301, "step": 337 }, { "epoch": 0.03, "grad_norm": 1.6047531042873184, "learning_rate": 9.037433155080214e-06, "loss": 0.9479, "step": 338 }, { "epoch": 0.03, "grad_norm": 1.8418487792719318, "learning_rate": 9.064171122994653e-06, "loss": 0.9315, "step": 339 }, { "epoch": 0.03, "grad_norm": 1.7893492458699531, "learning_rate": 9.090909090909091e-06, "loss": 0.9656, "step": 340 }, { "epoch": 0.03, "grad_norm": 1.6452995896562552, "learning_rate": 9.11764705882353e-06, "loss": 0.9057, "step": 341 }, { "epoch": 0.03, "grad_norm": 1.6859366686041781, "learning_rate": 9.144385026737968e-06, "loss": 0.9259, "step": 342 }, { "epoch": 0.03, "grad_norm": 1.0465615979633054, "learning_rate": 9.171122994652407e-06, "loss": 1.2266, "step": 343 }, { "epoch": 0.03, "grad_norm": 0.9021971524278636, "learning_rate": 9.197860962566846e-06, "loss": 1.2314, "step": 344 }, { "epoch": 0.03, "grad_norm": 0.8788333460307477, "learning_rate": 9.224598930481284e-06, "loss": 1.287, "step": 345 }, { "epoch": 0.03, "grad_norm": 1.7473589081901069, "learning_rate": 9.251336898395723e-06, "loss": 0.9958, "step": 346 }, { "epoch": 0.03, "grad_norm": 1.8215791072083818, "learning_rate": 9.278074866310161e-06, "loss": 0.8234, "step": 347 }, { "epoch": 0.03, "grad_norm": 1.6288486950596233, "learning_rate": 9.3048128342246e-06, "loss": 0.9347, "step": 348 }, { "epoch": 0.03, "grad_norm": 1.6628947162214622, "learning_rate": 9.331550802139038e-06, "loss": 0.8916, "step": 349 }, { "epoch": 0.03, "grad_norm": 1.7398928821579398, "learning_rate": 9.358288770053477e-06, "loss": 0.9581, "step": 350 }, { "epoch": 0.03, "grad_norm": 1.7300763952703848, "learning_rate": 9.385026737967915e-06, "loss": 0.9696, "step": 351 }, { "epoch": 0.03, "grad_norm": 1.7642186601242813, "learning_rate": 9.411764705882354e-06, "loss": 0.9615, "step": 352 }, { "epoch": 0.03, "grad_norm": 1.628333920910577, "learning_rate": 9.438502673796791e-06, "loss": 0.909, "step": 353 }, { "epoch": 0.03, "grad_norm": 1.5731596416651115, "learning_rate": 9.46524064171123e-06, "loss": 1.0685, "step": 354 }, { "epoch": 0.03, "grad_norm": 1.3955115534245028, "learning_rate": 9.49197860962567e-06, "loss": 1.3058, "step": 355 }, { "epoch": 0.03, "grad_norm": 1.60648085798287, "learning_rate": 9.518716577540108e-06, "loss": 0.9221, "step": 356 }, { "epoch": 0.03, "grad_norm": 1.666065988871153, "learning_rate": 9.545454545454547e-06, "loss": 0.9936, "step": 357 }, { "epoch": 0.03, "grad_norm": 1.6294006007537274, "learning_rate": 9.572192513368986e-06, "loss": 0.9437, "step": 358 }, { "epoch": 0.03, "grad_norm": 1.7483773104421196, "learning_rate": 9.598930481283422e-06, "loss": 0.892, "step": 359 }, { "epoch": 0.03, "grad_norm": 1.7247476441286762, "learning_rate": 9.625668449197861e-06, "loss": 0.971, "step": 360 }, { "epoch": 0.03, "grad_norm": 1.5821126524961222, "learning_rate": 9.6524064171123e-06, "loss": 0.8899, "step": 361 }, { "epoch": 0.03, "grad_norm": 1.5999572505257, "learning_rate": 9.679144385026738e-06, "loss": 0.9065, "step": 362 }, { "epoch": 0.03, "grad_norm": 1.9151428325252255, "learning_rate": 9.705882352941177e-06, "loss": 0.9727, "step": 363 }, { "epoch": 0.03, "grad_norm": 1.811509411319549, "learning_rate": 9.732620320855617e-06, "loss": 0.9686, "step": 364 }, { "epoch": 0.03, "grad_norm": 1.7867004700225126, "learning_rate": 9.759358288770054e-06, "loss": 0.8992, "step": 365 }, { "epoch": 0.03, "grad_norm": 1.6037002049017484, "learning_rate": 9.786096256684493e-06, "loss": 0.9359, "step": 366 }, { "epoch": 0.03, "grad_norm": 1.64679571661576, "learning_rate": 9.812834224598931e-06, "loss": 0.89, "step": 367 }, { "epoch": 0.03, "grad_norm": 1.6178854784673113, "learning_rate": 9.83957219251337e-06, "loss": 0.9553, "step": 368 }, { "epoch": 0.03, "grad_norm": 1.579457724331343, "learning_rate": 9.866310160427808e-06, "loss": 0.8985, "step": 369 }, { "epoch": 0.03, "grad_norm": 1.7374424163478515, "learning_rate": 9.893048128342247e-06, "loss": 0.9167, "step": 370 }, { "epoch": 0.03, "grad_norm": 1.684017113550306, "learning_rate": 9.919786096256685e-06, "loss": 0.8826, "step": 371 }, { "epoch": 0.03, "grad_norm": 1.701861404588261, "learning_rate": 9.946524064171124e-06, "loss": 0.91, "step": 372 }, { "epoch": 0.03, "grad_norm": 1.6090868520163608, "learning_rate": 9.973262032085562e-06, "loss": 0.8827, "step": 373 }, { "epoch": 0.03, "grad_norm": 1.6520436655859032, "learning_rate": 1e-05, "loss": 0.9056, "step": 374 }, { "epoch": 0.03, "grad_norm": 1.6243794348049438, "learning_rate": 9.999999831194285e-06, "loss": 0.9581, "step": 375 }, { "epoch": 0.03, "grad_norm": 1.551574413749861, "learning_rate": 9.999999324777145e-06, "loss": 0.9347, "step": 376 }, { "epoch": 0.03, "grad_norm": 1.390114445135984, "learning_rate": 9.99999848074862e-06, "loss": 1.2743, "step": 377 }, { "epoch": 0.03, "grad_norm": 1.787028272704262, "learning_rate": 9.999997299108763e-06, "loss": 0.9842, "step": 378 }, { "epoch": 0.03, "grad_norm": 2.1903372318951186, "learning_rate": 9.999995779857656e-06, "loss": 0.9636, "step": 379 }, { "epoch": 0.03, "grad_norm": 1.8018731055283046, "learning_rate": 9.9999939229954e-06, "loss": 0.9551, "step": 380 }, { "epoch": 0.03, "grad_norm": 0.9344489078516135, "learning_rate": 9.999991728522121e-06, "loss": 1.291, "step": 381 }, { "epoch": 0.03, "grad_norm": 1.6394082574765416, "learning_rate": 9.99998919643797e-06, "loss": 0.9651, "step": 382 }, { "epoch": 0.03, "grad_norm": 0.9172563410887771, "learning_rate": 9.999986326743111e-06, "loss": 1.2319, "step": 383 }, { "epoch": 0.03, "grad_norm": 1.6432180415049298, "learning_rate": 9.999983119437745e-06, "loss": 1.0143, "step": 384 }, { "epoch": 0.03, "grad_norm": 1.6638150027747303, "learning_rate": 9.999979574522085e-06, "loss": 0.9233, "step": 385 }, { "epoch": 0.03, "grad_norm": 1.545287995515525, "learning_rate": 9.99997569199637e-06, "loss": 0.953, "step": 386 }, { "epoch": 0.03, "grad_norm": 1.0220271690567286, "learning_rate": 9.999971471860864e-06, "loss": 1.2658, "step": 387 }, { "epoch": 0.03, "grad_norm": 1.6881111738570367, "learning_rate": 9.99996691411585e-06, "loss": 0.9825, "step": 388 }, { "epoch": 0.03, "grad_norm": 1.6124409104293327, "learning_rate": 9.99996201876164e-06, "loss": 0.9518, "step": 389 }, { "epoch": 0.03, "grad_norm": 1.534545140458619, "learning_rate": 9.99995678579856e-06, "loss": 0.9232, "step": 390 }, { "epoch": 0.03, "grad_norm": 1.5251052016625228, "learning_rate": 9.999951215226962e-06, "loss": 0.8441, "step": 391 }, { "epoch": 0.03, "grad_norm": 1.5963204254120489, "learning_rate": 9.999945307047228e-06, "loss": 0.9252, "step": 392 }, { "epoch": 0.03, "grad_norm": 1.6322385152938577, "learning_rate": 9.999939061259751e-06, "loss": 0.9571, "step": 393 }, { "epoch": 0.03, "grad_norm": 1.5995817861085302, "learning_rate": 9.999932477864958e-06, "loss": 0.9701, "step": 394 }, { "epoch": 0.03, "grad_norm": 1.6239612447743899, "learning_rate": 9.99992555686329e-06, "loss": 0.9427, "step": 395 }, { "epoch": 0.03, "grad_norm": 1.7108357788213928, "learning_rate": 9.999918298255215e-06, "loss": 1.0103, "step": 396 }, { "epoch": 0.03, "grad_norm": 1.7440732981296752, "learning_rate": 9.999910702041225e-06, "loss": 0.84, "step": 397 }, { "epoch": 0.03, "grad_norm": 1.6949964614460773, "learning_rate": 9.99990276822183e-06, "loss": 0.9718, "step": 398 }, { "epoch": 0.03, "grad_norm": 1.787382784168786, "learning_rate": 9.999894496797569e-06, "loss": 1.0048, "step": 399 }, { "epoch": 0.03, "grad_norm": 1.659609798714727, "learning_rate": 9.999885887768996e-06, "loss": 0.8776, "step": 400 }, { "epoch": 0.03, "grad_norm": 1.9228044140085423, "learning_rate": 9.999876941136697e-06, "loss": 0.9901, "step": 401 }, { "epoch": 0.03, "grad_norm": 1.7529652531809627, "learning_rate": 9.999867656901273e-06, "loss": 0.88, "step": 402 }, { "epoch": 0.03, "grad_norm": 1.5250613889327829, "learning_rate": 9.999858035063353e-06, "loss": 0.9798, "step": 403 }, { "epoch": 0.03, "grad_norm": 1.7619461159888476, "learning_rate": 9.999848075623584e-06, "loss": 0.9949, "step": 404 }, { "epoch": 0.03, "grad_norm": 1.0561991856729636, "learning_rate": 9.999837778582641e-06, "loss": 1.2836, "step": 405 }, { "epoch": 0.03, "grad_norm": 0.92681210621099, "learning_rate": 9.999827143941217e-06, "loss": 1.2826, "step": 406 }, { "epoch": 0.03, "grad_norm": 1.681315556539873, "learning_rate": 9.999816171700034e-06, "loss": 0.8997, "step": 407 }, { "epoch": 0.03, "grad_norm": 1.7495770520936509, "learning_rate": 9.999804861859828e-06, "loss": 0.8305, "step": 408 }, { "epoch": 0.03, "grad_norm": 1.7647805360266469, "learning_rate": 9.999793214421366e-06, "loss": 0.9522, "step": 409 }, { "epoch": 0.03, "grad_norm": 1.6329353867917615, "learning_rate": 9.999781229385433e-06, "loss": 0.9091, "step": 410 }, { "epoch": 0.03, "grad_norm": 1.7174912605692367, "learning_rate": 9.99976890675284e-06, "loss": 0.8806, "step": 411 }, { "epoch": 0.03, "grad_norm": 1.6010419763471635, "learning_rate": 9.999756246524416e-06, "loss": 0.9584, "step": 412 }, { "epoch": 0.03, "grad_norm": 1.747255689396597, "learning_rate": 9.99974324870102e-06, "loss": 0.8597, "step": 413 }, { "epoch": 0.03, "grad_norm": 1.9348825283079543, "learning_rate": 9.999729913283525e-06, "loss": 1.2965, "step": 414 }, { "epoch": 0.03, "grad_norm": 1.5696759401062859, "learning_rate": 9.999716240272834e-06, "loss": 0.8643, "step": 415 }, { "epoch": 0.03, "grad_norm": 1.520368120172788, "learning_rate": 9.99970222966987e-06, "loss": 0.9118, "step": 416 }, { "epoch": 0.03, "grad_norm": 1.1941450079809515, "learning_rate": 9.99968788147558e-06, "loss": 1.264, "step": 417 }, { "epoch": 0.03, "grad_norm": 1.6572101313148722, "learning_rate": 9.999673195690931e-06, "loss": 0.9812, "step": 418 }, { "epoch": 0.03, "grad_norm": 1.82528756433914, "learning_rate": 9.999658172316915e-06, "loss": 0.849, "step": 419 }, { "epoch": 0.03, "grad_norm": 1.806457508443531, "learning_rate": 9.999642811354545e-06, "loss": 0.9743, "step": 420 }, { "epoch": 0.03, "grad_norm": 1.7046973363366897, "learning_rate": 9.999627112804863e-06, "loss": 0.8863, "step": 421 }, { "epoch": 0.03, "grad_norm": 1.746524894819276, "learning_rate": 9.999611076668926e-06, "loss": 1.2741, "step": 422 }, { "epoch": 0.03, "grad_norm": 1.7425078545663912, "learning_rate": 9.999594702947817e-06, "loss": 0.9435, "step": 423 }, { "epoch": 0.03, "grad_norm": 1.767315942542136, "learning_rate": 9.999577991642639e-06, "loss": 0.9825, "step": 424 }, { "epoch": 0.03, "grad_norm": 1.6702346837860618, "learning_rate": 9.999560942754525e-06, "loss": 0.8508, "step": 425 }, { "epoch": 0.03, "grad_norm": 1.5734237744581654, "learning_rate": 9.999543556284623e-06, "loss": 0.9614, "step": 426 }, { "epoch": 0.03, "grad_norm": 1.9809453845491745, "learning_rate": 9.999525832234107e-06, "loss": 0.8673, "step": 427 }, { "epoch": 0.03, "grad_norm": 1.6435487971039735, "learning_rate": 9.999507770604177e-06, "loss": 0.9654, "step": 428 }, { "epoch": 0.03, "grad_norm": 1.5653056216237808, "learning_rate": 9.999489371396049e-06, "loss": 0.8478, "step": 429 }, { "epoch": 0.03, "grad_norm": 1.9815977599537147, "learning_rate": 9.999470634610966e-06, "loss": 0.9588, "step": 430 }, { "epoch": 0.03, "grad_norm": 1.7757758904493655, "learning_rate": 9.999451560250196e-06, "loss": 0.9365, "step": 431 }, { "epoch": 0.03, "grad_norm": 1.735111945701849, "learning_rate": 9.999432148315022e-06, "loss": 0.9867, "step": 432 }, { "epoch": 0.03, "grad_norm": 0.9591930291021465, "learning_rate": 9.999412398806758e-06, "loss": 1.2311, "step": 433 }, { "epoch": 0.03, "grad_norm": 1.5799794587699147, "learning_rate": 9.999392311726738e-06, "loss": 0.9355, "step": 434 }, { "epoch": 0.03, "grad_norm": 0.8333285339838076, "learning_rate": 9.999371887076317e-06, "loss": 1.2618, "step": 435 }, { "epoch": 0.03, "grad_norm": 0.799588790289206, "learning_rate": 9.999351124856873e-06, "loss": 1.2627, "step": 436 }, { "epoch": 0.04, "grad_norm": 1.700043263455576, "learning_rate": 9.999330025069812e-06, "loss": 1.0191, "step": 437 }, { "epoch": 0.04, "grad_norm": 1.5805374389348381, "learning_rate": 9.999308587716554e-06, "loss": 0.9201, "step": 438 }, { "epoch": 0.04, "grad_norm": 1.6951041251944257, "learning_rate": 9.99928681279855e-06, "loss": 0.885, "step": 439 }, { "epoch": 0.04, "grad_norm": 1.8731187741216433, "learning_rate": 9.999264700317268e-06, "loss": 0.897, "step": 440 }, { "epoch": 0.04, "grad_norm": 1.8296784188937354, "learning_rate": 9.999242250274201e-06, "loss": 0.9396, "step": 441 }, { "epoch": 0.04, "grad_norm": 1.1243571314785823, "learning_rate": 9.999219462670867e-06, "loss": 1.2621, "step": 442 }, { "epoch": 0.04, "grad_norm": 1.6923038597516902, "learning_rate": 9.999196337508804e-06, "loss": 0.9493, "step": 443 }, { "epoch": 0.04, "grad_norm": 1.607177321689406, "learning_rate": 9.999172874789572e-06, "loss": 0.862, "step": 444 }, { "epoch": 0.04, "grad_norm": 1.5891323735076752, "learning_rate": 9.999149074514757e-06, "loss": 0.9944, "step": 445 }, { "epoch": 0.04, "grad_norm": 0.8838148719690812, "learning_rate": 9.999124936685965e-06, "loss": 1.2631, "step": 446 }, { "epoch": 0.04, "grad_norm": 1.563782081104667, "learning_rate": 9.999100461304825e-06, "loss": 0.9901, "step": 447 }, { "epoch": 0.04, "grad_norm": 1.5336731531971337, "learning_rate": 9.999075648372991e-06, "loss": 0.8467, "step": 448 }, { "epoch": 0.04, "grad_norm": 1.5474563845348597, "learning_rate": 9.99905049789214e-06, "loss": 0.8726, "step": 449 }, { "epoch": 0.04, "grad_norm": 0.9850284613117485, "learning_rate": 9.999025009863967e-06, "loss": 1.2528, "step": 450 }, { "epoch": 0.04, "grad_norm": 1.5313709653756034, "learning_rate": 9.998999184290194e-06, "loss": 0.8875, "step": 451 }, { "epoch": 0.04, "grad_norm": 1.5741386583618306, "learning_rate": 9.998973021172564e-06, "loss": 0.8189, "step": 452 }, { "epoch": 0.04, "grad_norm": 1.5789443175873787, "learning_rate": 9.998946520512847e-06, "loss": 0.8695, "step": 453 }, { "epoch": 0.04, "grad_norm": 1.5684631550614898, "learning_rate": 9.99891968231283e-06, "loss": 0.9885, "step": 454 }, { "epoch": 0.04, "grad_norm": 0.9007688030773668, "learning_rate": 9.998892506574325e-06, "loss": 1.2603, "step": 455 }, { "epoch": 0.04, "grad_norm": 1.5583066302768747, "learning_rate": 9.998864993299167e-06, "loss": 0.8531, "step": 456 }, { "epoch": 0.04, "grad_norm": 1.6115227654509239, "learning_rate": 9.998837142489213e-06, "loss": 0.8908, "step": 457 }, { "epoch": 0.04, "grad_norm": 1.5672610972532492, "learning_rate": 9.998808954146347e-06, "loss": 0.8991, "step": 458 }, { "epoch": 0.04, "grad_norm": 1.3914220138293192, "learning_rate": 9.998780428272467e-06, "loss": 0.8893, "step": 459 }, { "epoch": 0.04, "grad_norm": 1.8793608060421403, "learning_rate": 9.998751564869504e-06, "loss": 0.8796, "step": 460 }, { "epoch": 0.04, "grad_norm": 1.5752520182530787, "learning_rate": 9.998722363939407e-06, "loss": 0.8969, "step": 461 }, { "epoch": 0.04, "grad_norm": 1.6165856956405618, "learning_rate": 9.998692825484142e-06, "loss": 0.9758, "step": 462 }, { "epoch": 0.04, "grad_norm": 1.6493681139196776, "learning_rate": 9.998662949505708e-06, "loss": 0.9003, "step": 463 }, { "epoch": 0.04, "grad_norm": 0.8764917987604242, "learning_rate": 9.998632736006124e-06, "loss": 1.2573, "step": 464 }, { "epoch": 0.04, "grad_norm": 1.573399940478985, "learning_rate": 9.998602184987425e-06, "loss": 0.8808, "step": 465 }, { "epoch": 0.04, "grad_norm": 1.6468968805043236, "learning_rate": 9.998571296451677e-06, "loss": 0.949, "step": 466 }, { "epoch": 0.04, "grad_norm": 1.986551763307262, "learning_rate": 9.998540070400966e-06, "loss": 0.8755, "step": 467 }, { "epoch": 0.04, "grad_norm": 1.757450531122584, "learning_rate": 9.998508506837398e-06, "loss": 0.8433, "step": 468 }, { "epoch": 0.04, "grad_norm": 1.9491703744019895, "learning_rate": 9.998476605763107e-06, "loss": 0.9387, "step": 469 }, { "epoch": 0.04, "grad_norm": 0.899977480061322, "learning_rate": 9.998444367180247e-06, "loss": 1.2475, "step": 470 }, { "epoch": 0.04, "grad_norm": 1.8549146049737377, "learning_rate": 9.998411791090992e-06, "loss": 0.9733, "step": 471 }, { "epoch": 0.04, "grad_norm": 1.8567296404077995, "learning_rate": 9.998378877497543e-06, "loss": 0.8655, "step": 472 }, { "epoch": 0.04, "grad_norm": 1.5299359127547578, "learning_rate": 9.998345626402124e-06, "loss": 0.8116, "step": 473 }, { "epoch": 0.04, "grad_norm": 1.6874725348895478, "learning_rate": 9.998312037806978e-06, "loss": 0.8258, "step": 474 }, { "epoch": 0.04, "grad_norm": 1.4494965684918637, "learning_rate": 9.998278111714374e-06, "loss": 0.9289, "step": 475 }, { "epoch": 0.04, "grad_norm": 1.6015726873260077, "learning_rate": 9.998243848126604e-06, "loss": 0.8999, "step": 476 }, { "epoch": 0.04, "grad_norm": 1.7656568248434399, "learning_rate": 9.998209247045978e-06, "loss": 0.8936, "step": 477 }, { "epoch": 0.04, "grad_norm": 1.6085268692275922, "learning_rate": 9.998174308474836e-06, "loss": 0.8998, "step": 478 }, { "epoch": 0.04, "grad_norm": 1.6942513697776738, "learning_rate": 9.998139032415534e-06, "loss": 0.8594, "step": 479 }, { "epoch": 0.04, "grad_norm": 0.9884116921183193, "learning_rate": 9.998103418870459e-06, "loss": 1.2684, "step": 480 }, { "epoch": 0.04, "grad_norm": 1.6595450239198377, "learning_rate": 9.998067467842009e-06, "loss": 0.9525, "step": 481 }, { "epoch": 0.04, "grad_norm": 0.8279241638857247, "learning_rate": 9.998031179332618e-06, "loss": 1.2366, "step": 482 }, { "epoch": 0.04, "grad_norm": 0.8613005337877088, "learning_rate": 9.99799455334473e-06, "loss": 1.235, "step": 483 }, { "epoch": 0.04, "grad_norm": 0.9029121265924763, "learning_rate": 9.997957589880823e-06, "loss": 1.2391, "step": 484 }, { "epoch": 0.04, "grad_norm": 0.8115637439600158, "learning_rate": 9.997920288943388e-06, "loss": 1.2395, "step": 485 }, { "epoch": 0.04, "grad_norm": 1.749345629544039, "learning_rate": 9.99788265053495e-06, "loss": 0.946, "step": 486 }, { "epoch": 0.04, "grad_norm": 1.7000515965620613, "learning_rate": 9.997844674658046e-06, "loss": 0.7775, "step": 487 }, { "epoch": 0.04, "grad_norm": 1.7343184224722692, "learning_rate": 9.99780636131524e-06, "loss": 0.9099, "step": 488 }, { "epoch": 0.04, "grad_norm": 1.589715732675177, "learning_rate": 9.997767710509123e-06, "loss": 0.8861, "step": 489 }, { "epoch": 0.04, "grad_norm": 1.5313660285620998, "learning_rate": 9.9977287222423e-06, "loss": 0.8574, "step": 490 }, { "epoch": 0.04, "grad_norm": 1.597332022101124, "learning_rate": 9.997689396517408e-06, "loss": 0.832, "step": 491 }, { "epoch": 0.04, "grad_norm": 1.6224510464653679, "learning_rate": 9.997649733337097e-06, "loss": 0.9924, "step": 492 }, { "epoch": 0.04, "grad_norm": 1.4748902636403407, "learning_rate": 9.99760973270405e-06, "loss": 1.2782, "step": 493 }, { "epoch": 0.04, "grad_norm": 1.162276791513521, "learning_rate": 9.997569394620965e-06, "loss": 1.2356, "step": 494 }, { "epoch": 0.04, "grad_norm": 1.8347024982060705, "learning_rate": 9.997528719090567e-06, "loss": 0.946, "step": 495 }, { "epoch": 0.04, "grad_norm": 1.64819431145681, "learning_rate": 9.997487706115604e-06, "loss": 0.9524, "step": 496 }, { "epoch": 0.04, "grad_norm": 1.8763102605397277, "learning_rate": 9.997446355698843e-06, "loss": 0.9231, "step": 497 }, { "epoch": 0.04, "grad_norm": 1.6694220706832703, "learning_rate": 9.997404667843076e-06, "loss": 0.8081, "step": 498 }, { "epoch": 0.04, "grad_norm": 1.8323873433359208, "learning_rate": 9.997362642551118e-06, "loss": 0.8759, "step": 499 }, { "epoch": 0.04, "grad_norm": 1.6738505520574647, "learning_rate": 9.99732027982581e-06, "loss": 0.9464, "step": 500 }, { "epoch": 0.04, "grad_norm": 1.653498491201669, "learning_rate": 9.997277579670007e-06, "loss": 0.8697, "step": 501 }, { "epoch": 0.04, "grad_norm": 1.537275229888343, "learning_rate": 9.997234542086595e-06, "loss": 0.8666, "step": 502 }, { "epoch": 0.04, "grad_norm": 1.5801384157809617, "learning_rate": 9.997191167078479e-06, "loss": 0.9432, "step": 503 }, { "epoch": 0.04, "grad_norm": 1.8937147518405208, "learning_rate": 9.99714745464859e-06, "loss": 0.8627, "step": 504 }, { "epoch": 0.04, "grad_norm": 1.7628763307939042, "learning_rate": 9.997103404799879e-06, "loss": 0.9767, "step": 505 }, { "epoch": 0.04, "grad_norm": 2.0991500160472127, "learning_rate": 9.99705901753532e-06, "loss": 1.2741, "step": 506 }, { "epoch": 0.04, "grad_norm": 1.8986050017446972, "learning_rate": 9.997014292857907e-06, "loss": 0.9643, "step": 507 }, { "epoch": 0.04, "grad_norm": 1.638679397836925, "learning_rate": 9.996969230770665e-06, "loss": 0.8487, "step": 508 }, { "epoch": 0.04, "grad_norm": 1.6360459056178525, "learning_rate": 9.996923831276632e-06, "loss": 0.952, "step": 509 }, { "epoch": 0.04, "grad_norm": 1.711916397036682, "learning_rate": 9.996878094378878e-06, "loss": 0.8952, "step": 510 }, { "epoch": 0.04, "grad_norm": 1.5641866021629862, "learning_rate": 9.996832020080488e-06, "loss": 0.8311, "step": 511 }, { "epoch": 0.04, "grad_norm": 0.9739476368531711, "learning_rate": 9.996785608384573e-06, "loss": 1.2281, "step": 512 }, { "epoch": 0.04, "grad_norm": 1.534359993741927, "learning_rate": 9.99673885929427e-06, "loss": 0.9411, "step": 513 }, { "epoch": 0.04, "grad_norm": 1.527861611324215, "learning_rate": 9.996691772812733e-06, "loss": 0.8473, "step": 514 }, { "epoch": 0.04, "grad_norm": 1.7207582918012991, "learning_rate": 9.996644348943141e-06, "loss": 0.9302, "step": 515 }, { "epoch": 0.04, "grad_norm": 1.6378467215926737, "learning_rate": 9.996596587688697e-06, "loss": 0.8768, "step": 516 }, { "epoch": 0.04, "grad_norm": 1.673046656354974, "learning_rate": 9.996548489052627e-06, "loss": 0.9053, "step": 517 }, { "epoch": 0.04, "grad_norm": 1.7510180395657364, "learning_rate": 9.996500053038176e-06, "loss": 0.8892, "step": 518 }, { "epoch": 0.04, "grad_norm": 1.3683029432047211, "learning_rate": 9.996451279648618e-06, "loss": 1.2197, "step": 519 }, { "epoch": 0.04, "grad_norm": 1.5062711123405232, "learning_rate": 9.996402168887243e-06, "loss": 0.9609, "step": 520 }, { "epoch": 0.04, "grad_norm": 1.5808593627357073, "learning_rate": 9.99635272075737e-06, "loss": 0.8981, "step": 521 }, { "epoch": 0.04, "grad_norm": 1.7561868963204466, "learning_rate": 9.996302935262337e-06, "loss": 0.9481, "step": 522 }, { "epoch": 0.04, "grad_norm": 1.6423204785829326, "learning_rate": 9.996252812405503e-06, "loss": 0.871, "step": 523 }, { "epoch": 0.04, "grad_norm": 1.749309031025904, "learning_rate": 9.996202352190256e-06, "loss": 0.8546, "step": 524 }, { "epoch": 0.04, "grad_norm": 1.6434650989279864, "learning_rate": 9.996151554620001e-06, "loss": 0.8435, "step": 525 }, { "epoch": 0.04, "grad_norm": 0.8571766691478683, "learning_rate": 9.996100419698168e-06, "loss": 1.2331, "step": 526 }, { "epoch": 0.04, "grad_norm": 1.608118832698862, "learning_rate": 9.996048947428212e-06, "loss": 0.8707, "step": 527 }, { "epoch": 0.04, "grad_norm": 1.6229247756000789, "learning_rate": 9.995997137813606e-06, "loss": 0.8998, "step": 528 }, { "epoch": 0.04, "grad_norm": 1.6887980655650596, "learning_rate": 9.995944990857848e-06, "loss": 0.8708, "step": 529 }, { "epoch": 0.04, "grad_norm": 1.8482670601679823, "learning_rate": 9.995892506564461e-06, "loss": 0.9478, "step": 530 }, { "epoch": 0.04, "grad_norm": 1.6494954313277905, "learning_rate": 9.99583968493699e-06, "loss": 0.8775, "step": 531 }, { "epoch": 0.04, "grad_norm": 0.9430354874142154, "learning_rate": 9.995786525978998e-06, "loss": 1.2639, "step": 532 }, { "epoch": 0.04, "grad_norm": 1.6602049074855336, "learning_rate": 9.995733029694077e-06, "loss": 0.8766, "step": 533 }, { "epoch": 0.04, "grad_norm": 0.810467583152964, "learning_rate": 9.99567919608584e-06, "loss": 1.1945, "step": 534 }, { "epoch": 0.04, "grad_norm": 1.5644503989725047, "learning_rate": 9.995625025157918e-06, "loss": 0.9197, "step": 535 }, { "epoch": 0.04, "grad_norm": 1.659735984300469, "learning_rate": 9.995570516913971e-06, "loss": 0.9214, "step": 536 }, { "epoch": 0.04, "grad_norm": 1.49522612022363, "learning_rate": 9.995515671357681e-06, "loss": 0.8636, "step": 537 }, { "epoch": 0.04, "grad_norm": 1.6669462794406056, "learning_rate": 9.995460488492749e-06, "loss": 0.9339, "step": 538 }, { "epoch": 0.04, "grad_norm": 1.5454763962002922, "learning_rate": 9.995404968322902e-06, "loss": 0.9158, "step": 539 }, { "epoch": 0.04, "grad_norm": 1.5025009251840864, "learning_rate": 9.99534911085189e-06, "loss": 0.9333, "step": 540 }, { "epoch": 0.04, "grad_norm": 1.5092647622177995, "learning_rate": 9.995292916083482e-06, "loss": 0.7782, "step": 541 }, { "epoch": 0.04, "grad_norm": 1.5360754095581055, "learning_rate": 9.995236384021474e-06, "loss": 0.8158, "step": 542 }, { "epoch": 0.04, "grad_norm": 1.0911125085420645, "learning_rate": 9.995179514669683e-06, "loss": 1.2155, "step": 543 }, { "epoch": 0.04, "grad_norm": 0.9846391808132821, "learning_rate": 9.995122308031951e-06, "loss": 1.2169, "step": 544 }, { "epoch": 0.04, "grad_norm": 2.119711133683519, "learning_rate": 9.995064764112135e-06, "loss": 0.9179, "step": 545 }, { "epoch": 0.04, "grad_norm": 0.8869390509546895, "learning_rate": 9.995006882914127e-06, "loss": 1.2032, "step": 546 }, { "epoch": 0.04, "grad_norm": 1.7609955593494857, "learning_rate": 9.994948664441832e-06, "loss": 0.9029, "step": 547 }, { "epoch": 0.04, "grad_norm": 1.6610543371469302, "learning_rate": 9.994890108699182e-06, "loss": 0.9351, "step": 548 }, { "epoch": 0.04, "grad_norm": 1.6123857058586843, "learning_rate": 9.99483121569013e-06, "loss": 0.865, "step": 549 }, { "epoch": 0.04, "grad_norm": 1.6083944492443754, "learning_rate": 9.994771985418653e-06, "loss": 0.8647, "step": 550 }, { "epoch": 0.04, "grad_norm": 1.5959016478107604, "learning_rate": 9.99471241788875e-06, "loss": 0.9241, "step": 551 }, { "epoch": 0.04, "grad_norm": 1.5919004567567854, "learning_rate": 9.994652513104443e-06, "loss": 0.855, "step": 552 }, { "epoch": 0.04, "grad_norm": 1.710211387541457, "learning_rate": 9.994592271069778e-06, "loss": 0.9691, "step": 553 }, { "epoch": 0.04, "grad_norm": 1.5042139723841543, "learning_rate": 9.994531691788822e-06, "loss": 0.9477, "step": 554 }, { "epoch": 0.04, "grad_norm": 1.5367310849988596, "learning_rate": 9.994470775265665e-06, "loss": 1.2088, "step": 555 }, { "epoch": 0.04, "grad_norm": 1.4831720639619212, "learning_rate": 9.99440952150442e-06, "loss": 0.8747, "step": 556 }, { "epoch": 0.04, "grad_norm": 1.6151909973235963, "learning_rate": 9.994347930509225e-06, "loss": 0.8923, "step": 557 }, { "epoch": 0.04, "grad_norm": 1.7065905303547673, "learning_rate": 9.994286002284238e-06, "loss": 0.8577, "step": 558 }, { "epoch": 0.04, "grad_norm": 1.6677749827378603, "learning_rate": 9.994223736833638e-06, "loss": 0.8453, "step": 559 }, { "epoch": 0.04, "grad_norm": 1.58826101692309, "learning_rate": 9.994161134161635e-06, "loss": 0.8675, "step": 560 }, { "epoch": 0.05, "grad_norm": 1.5648526352982406, "learning_rate": 9.994098194272449e-06, "loss": 0.8729, "step": 561 }, { "epoch": 0.05, "grad_norm": 1.6039288089554729, "learning_rate": 9.994034917170334e-06, "loss": 0.9103, "step": 562 }, { "epoch": 0.05, "grad_norm": 1.5969341875232201, "learning_rate": 9.993971302859561e-06, "loss": 0.8989, "step": 563 }, { "epoch": 0.05, "grad_norm": 1.6001751682564351, "learning_rate": 9.993907351344427e-06, "loss": 0.9074, "step": 564 }, { "epoch": 0.05, "grad_norm": 1.5152697728804674, "learning_rate": 9.993843062629252e-06, "loss": 0.8708, "step": 565 }, { "epoch": 0.05, "grad_norm": 1.2762873156748002, "learning_rate": 9.99377843671837e-06, "loss": 1.2133, "step": 566 }, { "epoch": 0.05, "grad_norm": 1.0488258187183748, "learning_rate": 9.993713473616151e-06, "loss": 1.2448, "step": 567 }, { "epoch": 0.05, "grad_norm": 0.8302823334761562, "learning_rate": 9.99364817332698e-06, "loss": 1.2179, "step": 568 }, { "epoch": 0.05, "grad_norm": 1.7478522933950016, "learning_rate": 9.993582535855265e-06, "loss": 0.8843, "step": 569 }, { "epoch": 0.05, "grad_norm": 1.8010008166730094, "learning_rate": 9.993516561205439e-06, "loss": 0.8445, "step": 570 }, { "epoch": 0.05, "grad_norm": 1.6580887441813017, "learning_rate": 9.993450249381955e-06, "loss": 0.8709, "step": 571 }, { "epoch": 0.05, "grad_norm": 1.6130303398140315, "learning_rate": 9.993383600389294e-06, "loss": 0.9551, "step": 572 }, { "epoch": 0.05, "grad_norm": 1.541957169675673, "learning_rate": 9.993316614231954e-06, "loss": 0.8371, "step": 573 }, { "epoch": 0.05, "grad_norm": 1.630107462392517, "learning_rate": 9.993249290914457e-06, "loss": 0.8282, "step": 574 }, { "epoch": 0.05, "grad_norm": 1.6110711383662422, "learning_rate": 9.993181630441352e-06, "loss": 0.8558, "step": 575 }, { "epoch": 0.05, "grad_norm": 1.6765435883047133, "learning_rate": 9.993113632817203e-06, "loss": 0.8896, "step": 576 }, { "epoch": 0.05, "grad_norm": 1.6571740147127099, "learning_rate": 9.993045298046605e-06, "loss": 0.9455, "step": 577 }, { "epoch": 0.05, "grad_norm": 1.5629925174600878, "learning_rate": 9.992976626134171e-06, "loss": 0.8485, "step": 578 }, { "epoch": 0.05, "grad_norm": 2.4406178321139556, "learning_rate": 9.99290761708454e-06, "loss": 1.2458, "step": 579 }, { "epoch": 0.05, "grad_norm": 1.6788244620966644, "learning_rate": 9.992838270902367e-06, "loss": 0.9033, "step": 580 }, { "epoch": 0.05, "grad_norm": 1.4964194969985083, "learning_rate": 9.99276858759234e-06, "loss": 0.8512, "step": 581 }, { "epoch": 0.05, "grad_norm": 1.6938564136054508, "learning_rate": 9.99269856715916e-06, "loss": 0.8532, "step": 582 }, { "epoch": 0.05, "grad_norm": 1.7802682045493063, "learning_rate": 9.992628209607556e-06, "loss": 0.9366, "step": 583 }, { "epoch": 0.05, "grad_norm": 1.486744420231548, "learning_rate": 9.992557514942278e-06, "loss": 0.8586, "step": 584 }, { "epoch": 0.05, "grad_norm": 1.580408205444079, "learning_rate": 9.992486483168103e-06, "loss": 0.9601, "step": 585 }, { "epoch": 0.05, "grad_norm": 1.5561523297814972, "learning_rate": 9.992415114289822e-06, "loss": 0.9062, "step": 586 }, { "epoch": 0.05, "grad_norm": 1.8403719743874054, "learning_rate": 9.992343408312258e-06, "loss": 0.7997, "step": 587 }, { "epoch": 0.05, "grad_norm": 1.6522894687190806, "learning_rate": 9.992271365240251e-06, "loss": 0.9172, "step": 588 }, { "epoch": 0.05, "grad_norm": 1.613680937844445, "learning_rate": 9.992198985078667e-06, "loss": 0.7513, "step": 589 }, { "epoch": 0.05, "grad_norm": 1.7951403310189302, "learning_rate": 9.992126267832392e-06, "loss": 0.907, "step": 590 }, { "epoch": 0.05, "grad_norm": 1.6666747091665577, "learning_rate": 9.992053213506333e-06, "loss": 0.946, "step": 591 }, { "epoch": 0.05, "grad_norm": 1.5174424251431535, "learning_rate": 9.99197982210543e-06, "loss": 0.7802, "step": 592 }, { "epoch": 0.05, "grad_norm": 1.57905431067547, "learning_rate": 9.991906093634633e-06, "loss": 0.8486, "step": 593 }, { "epoch": 0.05, "grad_norm": 0.927276188212811, "learning_rate": 9.991832028098923e-06, "loss": 1.2057, "step": 594 }, { "epoch": 0.05, "grad_norm": 1.5455666145700724, "learning_rate": 9.991757625503298e-06, "loss": 0.8989, "step": 595 }, { "epoch": 0.05, "grad_norm": 1.4946866303217154, "learning_rate": 9.991682885852784e-06, "loss": 0.9335, "step": 596 }, { "epoch": 0.05, "grad_norm": 1.4733671090024694, "learning_rate": 9.991607809152428e-06, "loss": 0.763, "step": 597 }, { "epoch": 0.05, "grad_norm": 1.7381609630529256, "learning_rate": 9.991532395407299e-06, "loss": 0.8846, "step": 598 }, { "epoch": 0.05, "grad_norm": 1.768201055694979, "learning_rate": 9.991456644622489e-06, "loss": 0.9364, "step": 599 }, { "epoch": 0.05, "grad_norm": 1.621717442507501, "learning_rate": 9.991380556803113e-06, "loss": 0.7931, "step": 600 }, { "epoch": 0.05, "grad_norm": 1.6440221177127945, "learning_rate": 9.991304131954307e-06, "loss": 0.8169, "step": 601 }, { "epoch": 0.05, "grad_norm": 1.5362828411071268, "learning_rate": 9.991227370081233e-06, "loss": 0.9469, "step": 602 }, { "epoch": 0.05, "grad_norm": 1.0037931545702514, "learning_rate": 9.991150271189074e-06, "loss": 1.1896, "step": 603 }, { "epoch": 0.05, "grad_norm": 0.8795664631760622, "learning_rate": 9.991072835283035e-06, "loss": 1.2511, "step": 604 }, { "epoch": 0.05, "grad_norm": 1.5754226940928284, "learning_rate": 9.990995062368346e-06, "loss": 0.8803, "step": 605 }, { "epoch": 0.05, "grad_norm": 1.5880314045473087, "learning_rate": 9.99091695245026e-06, "loss": 0.9296, "step": 606 }, { "epoch": 0.05, "grad_norm": 1.5410767803454686, "learning_rate": 9.990838505534047e-06, "loss": 0.8992, "step": 607 }, { "epoch": 0.05, "grad_norm": 1.704934495640686, "learning_rate": 9.990759721625005e-06, "loss": 0.8759, "step": 608 }, { "epoch": 0.05, "grad_norm": 1.0957230453121363, "learning_rate": 9.990680600728456e-06, "loss": 1.2116, "step": 609 }, { "epoch": 0.05, "grad_norm": 1.538624042535066, "learning_rate": 9.99060114284974e-06, "loss": 0.9854, "step": 610 }, { "epoch": 0.05, "grad_norm": 1.711206233378193, "learning_rate": 9.990521347994224e-06, "loss": 0.9945, "step": 611 }, { "epoch": 0.05, "grad_norm": 1.5498646427037646, "learning_rate": 9.990441216167295e-06, "loss": 0.8909, "step": 612 }, { "epoch": 0.05, "grad_norm": 1.5204349330275777, "learning_rate": 9.990360747374363e-06, "loss": 0.9561, "step": 613 }, { "epoch": 0.05, "grad_norm": 1.557888200095083, "learning_rate": 9.990279941620861e-06, "loss": 0.86, "step": 614 }, { "epoch": 0.05, "grad_norm": 1.5229902516586238, "learning_rate": 9.990198798912249e-06, "loss": 0.766, "step": 615 }, { "epoch": 0.05, "grad_norm": 0.8672973522704668, "learning_rate": 9.990117319254002e-06, "loss": 1.2268, "step": 616 }, { "epoch": 0.05, "grad_norm": 1.7336154199377132, "learning_rate": 9.990035502651624e-06, "loss": 0.8863, "step": 617 }, { "epoch": 0.05, "grad_norm": 1.5702541511971044, "learning_rate": 9.989953349110637e-06, "loss": 0.9203, "step": 618 }, { "epoch": 0.05, "grad_norm": 1.5675104621469005, "learning_rate": 9.98987085863659e-06, "loss": 0.8137, "step": 619 }, { "epoch": 0.05, "grad_norm": 1.7478097807567814, "learning_rate": 9.989788031235054e-06, "loss": 0.9069, "step": 620 }, { "epoch": 0.05, "grad_norm": 1.5816514734533278, "learning_rate": 9.989704866911617e-06, "loss": 0.9474, "step": 621 }, { "epoch": 0.05, "grad_norm": 1.5307604203283434, "learning_rate": 9.989621365671902e-06, "loss": 0.9027, "step": 622 }, { "epoch": 0.05, "grad_norm": 1.7099158693623115, "learning_rate": 9.98953752752154e-06, "loss": 0.8647, "step": 623 }, { "epoch": 0.05, "grad_norm": 1.6031767482718504, "learning_rate": 9.989453352466196e-06, "loss": 0.8469, "step": 624 }, { "epoch": 0.05, "grad_norm": 1.603681108229473, "learning_rate": 9.989368840511553e-06, "loss": 0.9328, "step": 625 }, { "epoch": 0.05, "grad_norm": 1.6542118265919625, "learning_rate": 9.989283991663316e-06, "loss": 0.9612, "step": 626 }, { "epoch": 0.05, "grad_norm": 1.7026090617826815, "learning_rate": 9.989198805927216e-06, "loss": 0.9237, "step": 627 }, { "epoch": 0.05, "grad_norm": 1.6802956360336565, "learning_rate": 9.989113283309003e-06, "loss": 0.8404, "step": 628 }, { "epoch": 0.05, "grad_norm": 1.7098201471561392, "learning_rate": 9.989027423814454e-06, "loss": 0.8346, "step": 629 }, { "epoch": 0.05, "grad_norm": 1.4249275244651514, "learning_rate": 9.988941227449365e-06, "loss": 0.8522, "step": 630 }, { "epoch": 0.05, "grad_norm": 0.9768677434350856, "learning_rate": 9.988854694219556e-06, "loss": 1.2524, "step": 631 }, { "epoch": 0.05, "grad_norm": 1.6397667580516584, "learning_rate": 9.98876782413087e-06, "loss": 0.9268, "step": 632 }, { "epoch": 0.05, "grad_norm": 0.8404328183385964, "learning_rate": 9.988680617189173e-06, "loss": 1.2356, "step": 633 }, { "epoch": 0.05, "grad_norm": 1.509760364395218, "learning_rate": 9.988593073400354e-06, "loss": 0.9461, "step": 634 }, { "epoch": 0.05, "grad_norm": 0.8403025964964629, "learning_rate": 9.988505192770324e-06, "loss": 1.2676, "step": 635 }, { "epoch": 0.05, "grad_norm": 1.5747824208245265, "learning_rate": 9.988416975305016e-06, "loss": 1.2729, "step": 636 }, { "epoch": 0.05, "grad_norm": 2.0050449092714855, "learning_rate": 9.988328421010387e-06, "loss": 0.8385, "step": 637 }, { "epoch": 0.05, "grad_norm": 1.5456888845915189, "learning_rate": 9.988239529892416e-06, "loss": 0.859, "step": 638 }, { "epoch": 0.05, "grad_norm": 1.5725474684614429, "learning_rate": 9.988150301957107e-06, "loss": 0.9187, "step": 639 }, { "epoch": 0.05, "grad_norm": 0.9406963468307236, "learning_rate": 9.988060737210483e-06, "loss": 1.2383, "step": 640 }, { "epoch": 0.05, "grad_norm": 1.5091036443477883, "learning_rate": 9.987970835658592e-06, "loss": 0.9659, "step": 641 }, { "epoch": 0.05, "grad_norm": 1.5248098770384597, "learning_rate": 9.987880597307504e-06, "loss": 0.8901, "step": 642 }, { "epoch": 0.05, "grad_norm": 1.4934607510261524, "learning_rate": 9.987790022163312e-06, "loss": 0.9549, "step": 643 }, { "epoch": 0.05, "grad_norm": 0.8669290362240009, "learning_rate": 9.987699110232134e-06, "loss": 1.2622, "step": 644 }, { "epoch": 0.05, "grad_norm": 1.555145461021801, "learning_rate": 9.987607861520107e-06, "loss": 0.8903, "step": 645 }, { "epoch": 0.05, "grad_norm": 0.8167948456544997, "learning_rate": 9.987516276033392e-06, "loss": 1.2434, "step": 646 }, { "epoch": 0.05, "grad_norm": 1.573000541361675, "learning_rate": 9.987424353778172e-06, "loss": 0.883, "step": 647 }, { "epoch": 0.05, "grad_norm": 1.6452898824730504, "learning_rate": 9.987332094760657e-06, "loss": 0.84, "step": 648 }, { "epoch": 0.05, "grad_norm": 1.5505766672269359, "learning_rate": 9.987239498987074e-06, "loss": 0.9677, "step": 649 }, { "epoch": 0.05, "grad_norm": 1.5597647220195894, "learning_rate": 9.987146566463677e-06, "loss": 0.8992, "step": 650 }, { "epoch": 0.05, "grad_norm": 1.7278327778950622, "learning_rate": 9.987053297196739e-06, "loss": 0.8776, "step": 651 }, { "epoch": 0.05, "grad_norm": 1.6095740584926126, "learning_rate": 9.986959691192558e-06, "loss": 0.8749, "step": 652 }, { "epoch": 0.05, "grad_norm": 1.658747598103537, "learning_rate": 9.986865748457457e-06, "loss": 0.8319, "step": 653 }, { "epoch": 0.05, "grad_norm": 2.374034712631668, "learning_rate": 9.986771468997775e-06, "loss": 0.9244, "step": 654 }, { "epoch": 0.05, "grad_norm": 1.03444190148565, "learning_rate": 9.986676852819883e-06, "loss": 1.2029, "step": 655 }, { "epoch": 0.05, "grad_norm": 0.9148144746449407, "learning_rate": 9.986581899930167e-06, "loss": 1.2383, "step": 656 }, { "epoch": 0.05, "grad_norm": 1.5606565181319547, "learning_rate": 9.986486610335038e-06, "loss": 0.9174, "step": 657 }, { "epoch": 0.05, "grad_norm": 0.8710684720213665, "learning_rate": 9.98639098404093e-06, "loss": 1.2552, "step": 658 }, { "epoch": 0.05, "grad_norm": 1.5312592018774447, "learning_rate": 9.986295021054302e-06, "loss": 0.9297, "step": 659 }, { "epoch": 0.05, "grad_norm": 1.7093262945412542, "learning_rate": 9.98619872138163e-06, "loss": 0.8883, "step": 660 }, { "epoch": 0.05, "grad_norm": 1.4895729487095428, "learning_rate": 9.986102085029422e-06, "loss": 0.8958, "step": 661 }, { "epoch": 0.05, "grad_norm": 1.0806053871780958, "learning_rate": 9.986005112004198e-06, "loss": 1.2015, "step": 662 }, { "epoch": 0.05, "grad_norm": 1.5223136808938662, "learning_rate": 9.985907802312509e-06, "loss": 0.9453, "step": 663 }, { "epoch": 0.05, "grad_norm": 1.559165079945979, "learning_rate": 9.985810155960921e-06, "loss": 0.9099, "step": 664 }, { "epoch": 0.05, "grad_norm": 1.6459652934260187, "learning_rate": 9.985712172956035e-06, "loss": 0.8524, "step": 665 }, { "epoch": 0.05, "grad_norm": 0.8286421654293442, "learning_rate": 9.985613853304459e-06, "loss": 1.2233, "step": 666 }, { "epoch": 0.05, "grad_norm": 0.9420887536025467, "learning_rate": 9.985515197012835e-06, "loss": 1.1933, "step": 667 }, { "epoch": 0.05, "grad_norm": 1.6595594912318345, "learning_rate": 9.985416204087828e-06, "loss": 0.915, "step": 668 }, { "epoch": 0.05, "grad_norm": 1.5888052057616477, "learning_rate": 9.985316874536117e-06, "loss": 0.9199, "step": 669 }, { "epoch": 0.05, "grad_norm": 1.839121537061138, "learning_rate": 9.985217208364413e-06, "loss": 0.888, "step": 670 }, { "epoch": 0.05, "grad_norm": 1.5907781771823426, "learning_rate": 9.985117205579442e-06, "loss": 0.9079, "step": 671 }, { "epoch": 0.05, "grad_norm": 0.9829742380070137, "learning_rate": 9.985016866187958e-06, "loss": 1.2284, "step": 672 }, { "epoch": 0.05, "grad_norm": 1.7103142340605464, "learning_rate": 9.984916190196736e-06, "loss": 0.9585, "step": 673 }, { "epoch": 0.05, "grad_norm": 1.5896713394004327, "learning_rate": 9.984815177612574e-06, "loss": 0.8648, "step": 674 }, { "epoch": 0.05, "grad_norm": 1.633415888881344, "learning_rate": 9.984713828442294e-06, "loss": 0.9065, "step": 675 }, { "epoch": 0.05, "grad_norm": 1.6350011716002886, "learning_rate": 9.984612142692738e-06, "loss": 0.9171, "step": 676 }, { "epoch": 0.05, "grad_norm": 1.667908928147892, "learning_rate": 9.984510120370771e-06, "loss": 0.8634, "step": 677 }, { "epoch": 0.05, "grad_norm": 1.6537885089182556, "learning_rate": 9.984407761483283e-06, "loss": 0.8037, "step": 678 }, { "epoch": 0.05, "grad_norm": 1.5561847863622056, "learning_rate": 9.984305066037186e-06, "loss": 0.9414, "step": 679 }, { "epoch": 0.05, "grad_norm": 1.5812995329230877, "learning_rate": 9.984202034039414e-06, "loss": 0.8331, "step": 680 }, { "epoch": 0.05, "grad_norm": 1.5989725026429866, "learning_rate": 9.984098665496923e-06, "loss": 0.8515, "step": 681 }, { "epoch": 0.05, "grad_norm": 1.5944374234149283, "learning_rate": 9.983994960416694e-06, "loss": 0.9365, "step": 682 }, { "epoch": 0.05, "grad_norm": 1.6772548456317202, "learning_rate": 9.983890918805727e-06, "loss": 0.8367, "step": 683 }, { "epoch": 0.05, "grad_norm": 1.5131124911678855, "learning_rate": 9.983786540671052e-06, "loss": 0.9453, "step": 684 }, { "epoch": 0.05, "grad_norm": 0.9755789630286947, "learning_rate": 9.98368182601971e-06, "loss": 1.2229, "step": 685 }, { "epoch": 0.06, "grad_norm": 1.555723489823332, "learning_rate": 9.983576774858776e-06, "loss": 0.8555, "step": 686 }, { "epoch": 0.06, "grad_norm": 1.6190411536338511, "learning_rate": 9.983471387195344e-06, "loss": 0.8755, "step": 687 }, { "epoch": 0.06, "grad_norm": 1.484149186152568, "learning_rate": 9.983365663036528e-06, "loss": 0.8814, "step": 688 }, { "epoch": 0.06, "grad_norm": 1.6786912403174066, "learning_rate": 9.983259602389469e-06, "loss": 0.826, "step": 689 }, { "epoch": 0.06, "grad_norm": 1.6068327199795442, "learning_rate": 9.983153205261324e-06, "loss": 0.8497, "step": 690 }, { "epoch": 0.06, "grad_norm": 1.5445774210221108, "learning_rate": 9.98304647165928e-06, "loss": 0.9258, "step": 691 }, { "epoch": 0.06, "grad_norm": 1.5315113041813724, "learning_rate": 9.982939401590545e-06, "loss": 0.9213, "step": 692 }, { "epoch": 0.06, "grad_norm": 1.5899543418513098, "learning_rate": 9.982831995062346e-06, "loss": 0.926, "step": 693 }, { "epoch": 0.06, "grad_norm": 1.6751246762494365, "learning_rate": 9.982724252081939e-06, "loss": 0.9669, "step": 694 }, { "epoch": 0.06, "grad_norm": 1.5516510769708267, "learning_rate": 9.982616172656594e-06, "loss": 0.8255, "step": 695 }, { "epoch": 0.06, "grad_norm": 1.706260872923349, "learning_rate": 9.982507756793613e-06, "loss": 0.8482, "step": 696 }, { "epoch": 0.06, "grad_norm": 1.598076169056707, "learning_rate": 9.982399004500317e-06, "loss": 0.9474, "step": 697 }, { "epoch": 0.06, "grad_norm": 1.2136121244158218, "learning_rate": 9.982289915784044e-06, "loss": 1.1819, "step": 698 }, { "epoch": 0.06, "grad_norm": 1.720504965562063, "learning_rate": 9.982180490652165e-06, "loss": 0.8769, "step": 699 }, { "epoch": 0.06, "grad_norm": 1.5383289037586392, "learning_rate": 9.982070729112068e-06, "loss": 0.9425, "step": 700 }, { "epoch": 0.06, "grad_norm": 1.5963427159188732, "learning_rate": 9.981960631171162e-06, "loss": 0.9602, "step": 701 }, { "epoch": 0.06, "grad_norm": 1.627023960602584, "learning_rate": 9.98185019683688e-06, "loss": 0.8973, "step": 702 }, { "epoch": 0.06, "grad_norm": 1.5683098037613818, "learning_rate": 9.981739426116683e-06, "loss": 0.9381, "step": 703 }, { "epoch": 0.06, "grad_norm": 1.5990424212653616, "learning_rate": 9.98162831901805e-06, "loss": 0.8936, "step": 704 }, { "epoch": 0.06, "grad_norm": 1.588155402693836, "learning_rate": 9.98151687554848e-06, "loss": 0.8985, "step": 705 }, { "epoch": 0.06, "grad_norm": 1.636514694246623, "learning_rate": 9.9814050957155e-06, "loss": 0.8852, "step": 706 }, { "epoch": 0.06, "grad_norm": 1.55467141455963, "learning_rate": 9.981292979526656e-06, "loss": 0.857, "step": 707 }, { "epoch": 0.06, "grad_norm": 1.6410517232357096, "learning_rate": 9.981180526989521e-06, "loss": 0.835, "step": 708 }, { "epoch": 0.06, "grad_norm": 1.580974143948201, "learning_rate": 9.981067738111688e-06, "loss": 0.9093, "step": 709 }, { "epoch": 0.06, "grad_norm": 1.5174794488558365, "learning_rate": 9.980954612900768e-06, "loss": 0.8953, "step": 710 }, { "epoch": 0.06, "grad_norm": 1.7920119434215045, "learning_rate": 9.980841151364405e-06, "loss": 0.898, "step": 711 }, { "epoch": 0.06, "grad_norm": 1.4628579953318983, "learning_rate": 9.980727353510257e-06, "loss": 1.215, "step": 712 }, { "epoch": 0.06, "grad_norm": 1.5887559776008755, "learning_rate": 9.980613219346012e-06, "loss": 0.8952, "step": 713 }, { "epoch": 0.06, "grad_norm": 1.5657538135950384, "learning_rate": 9.98049874887937e-06, "loss": 0.8115, "step": 714 }, { "epoch": 0.06, "grad_norm": 1.6039110285504001, "learning_rate": 9.980383942118066e-06, "loss": 0.8548, "step": 715 }, { "epoch": 0.06, "grad_norm": 1.5438521834723717, "learning_rate": 9.980268799069848e-06, "loss": 0.8248, "step": 716 }, { "epoch": 0.06, "grad_norm": 1.7472637221841802, "learning_rate": 9.980153319742494e-06, "loss": 0.9288, "step": 717 }, { "epoch": 0.06, "grad_norm": 1.6303226675408926, "learning_rate": 9.9800375041438e-06, "loss": 0.8262, "step": 718 }, { "epoch": 0.06, "grad_norm": 0.8730337927007267, "learning_rate": 9.979921352281585e-06, "loss": 1.2351, "step": 719 }, { "epoch": 0.06, "grad_norm": 1.666848112413282, "learning_rate": 9.979804864163695e-06, "loss": 0.8641, "step": 720 }, { "epoch": 0.06, "grad_norm": 1.6167557433585886, "learning_rate": 9.979688039797993e-06, "loss": 0.8687, "step": 721 }, { "epoch": 0.06, "grad_norm": 1.6495922239507568, "learning_rate": 9.979570879192365e-06, "loss": 0.8167, "step": 722 }, { "epoch": 0.06, "grad_norm": 1.6603340424215878, "learning_rate": 9.97945338235473e-06, "loss": 0.7958, "step": 723 }, { "epoch": 0.06, "grad_norm": 1.5799663922895817, "learning_rate": 9.979335549293013e-06, "loss": 0.9047, "step": 724 }, { "epoch": 0.06, "grad_norm": 1.5124978712176635, "learning_rate": 9.979217380015173e-06, "loss": 0.8814, "step": 725 }, { "epoch": 0.06, "grad_norm": 1.5965181659074317, "learning_rate": 9.979098874529192e-06, "loss": 0.7939, "step": 726 }, { "epoch": 0.06, "grad_norm": 1.5077987423420147, "learning_rate": 9.978980032843068e-06, "loss": 0.8388, "step": 727 }, { "epoch": 0.06, "grad_norm": 1.553440561187466, "learning_rate": 9.978860854964827e-06, "loss": 0.9127, "step": 728 }, { "epoch": 0.06, "grad_norm": 1.8264754858935193, "learning_rate": 9.978741340902518e-06, "loss": 0.8823, "step": 729 }, { "epoch": 0.06, "grad_norm": 1.5545555089631016, "learning_rate": 9.978621490664208e-06, "loss": 0.8589, "step": 730 }, { "epoch": 0.06, "grad_norm": 1.6266049848090909, "learning_rate": 9.978501304257991e-06, "loss": 0.9402, "step": 731 }, { "epoch": 0.06, "grad_norm": 1.5614390447484343, "learning_rate": 9.978380781691982e-06, "loss": 0.8536, "step": 732 }, { "epoch": 0.06, "grad_norm": 1.5554993156724606, "learning_rate": 9.978259922974318e-06, "loss": 0.8689, "step": 733 }, { "epoch": 0.06, "grad_norm": 1.5046641498486533, "learning_rate": 9.97813872811316e-06, "loss": 0.858, "step": 734 }, { "epoch": 0.06, "grad_norm": 1.606611499411753, "learning_rate": 9.978017197116694e-06, "loss": 0.8776, "step": 735 }, { "epoch": 0.06, "grad_norm": 1.5210110312767826, "learning_rate": 9.97789532999312e-06, "loss": 0.8236, "step": 736 }, { "epoch": 0.06, "grad_norm": 1.7465464581178427, "learning_rate": 9.977773126750677e-06, "loss": 0.9767, "step": 737 }, { "epoch": 0.06, "grad_norm": 1.6111250587942993, "learning_rate": 9.977650587397606e-06, "loss": 0.8865, "step": 738 }, { "epoch": 0.06, "grad_norm": 1.480647135578578, "learning_rate": 9.977527711942186e-06, "loss": 0.8715, "step": 739 }, { "epoch": 0.06, "grad_norm": 1.0666711139883507, "learning_rate": 9.977404500392711e-06, "loss": 1.2361, "step": 740 }, { "epoch": 0.06, "grad_norm": 1.5396949612043953, "learning_rate": 9.977280952757505e-06, "loss": 0.8523, "step": 741 }, { "epoch": 0.06, "grad_norm": 1.5749007841252676, "learning_rate": 9.977157069044907e-06, "loss": 0.869, "step": 742 }, { "epoch": 0.06, "grad_norm": 0.8209669877614383, "learning_rate": 9.977032849263284e-06, "loss": 1.2254, "step": 743 }, { "epoch": 0.06, "grad_norm": 1.5371445876897547, "learning_rate": 9.976908293421022e-06, "loss": 0.809, "step": 744 }, { "epoch": 0.06, "grad_norm": 1.6175345896676243, "learning_rate": 9.97678340152653e-06, "loss": 0.9048, "step": 745 }, { "epoch": 0.06, "grad_norm": 1.6654187789642825, "learning_rate": 9.976658173588244e-06, "loss": 0.9158, "step": 746 }, { "epoch": 0.06, "grad_norm": 1.5590356266672274, "learning_rate": 9.976532609614617e-06, "loss": 0.8143, "step": 747 }, { "epoch": 0.06, "grad_norm": 1.5892721295071321, "learning_rate": 9.97640670961413e-06, "loss": 1.0021, "step": 748 }, { "epoch": 0.06, "grad_norm": 1.5206359572073573, "learning_rate": 9.976280473595284e-06, "loss": 0.822, "step": 749 }, { "epoch": 0.06, "grad_norm": 1.4996376307617352, "learning_rate": 9.976153901566598e-06, "loss": 0.7713, "step": 750 }, { "epoch": 0.06, "grad_norm": 1.5643904527388357, "learning_rate": 9.976026993536625e-06, "loss": 0.9362, "step": 751 }, { "epoch": 0.06, "grad_norm": 1.5978878376660144, "learning_rate": 9.975899749513928e-06, "loss": 0.8737, "step": 752 }, { "epoch": 0.06, "grad_norm": 1.941335736585203, "learning_rate": 9.975772169507106e-06, "loss": 0.8129, "step": 753 }, { "epoch": 0.06, "grad_norm": 1.5498726969354564, "learning_rate": 9.975644253524766e-06, "loss": 0.8792, "step": 754 }, { "epoch": 0.06, "grad_norm": 1.2373839718074042, "learning_rate": 9.975516001575549e-06, "loss": 1.2587, "step": 755 }, { "epoch": 0.06, "grad_norm": 1.6190563426852662, "learning_rate": 9.975387413668115e-06, "loss": 0.9161, "step": 756 }, { "epoch": 0.06, "grad_norm": 0.9786834931376954, "learning_rate": 9.975258489811146e-06, "loss": 1.2285, "step": 757 }, { "epoch": 0.06, "grad_norm": 1.5904891919408752, "learning_rate": 9.975129230013347e-06, "loss": 0.9175, "step": 758 }, { "epoch": 0.06, "grad_norm": 1.6491689405453176, "learning_rate": 9.974999634283447e-06, "loss": 0.9621, "step": 759 }, { "epoch": 0.06, "grad_norm": 1.5425456155206803, "learning_rate": 9.974869702630193e-06, "loss": 0.8477, "step": 760 }, { "epoch": 0.06, "grad_norm": 1.6323690984462185, "learning_rate": 9.974739435062364e-06, "loss": 0.9159, "step": 761 }, { "epoch": 0.06, "grad_norm": 1.597473102522369, "learning_rate": 9.97460883158875e-06, "loss": 0.8274, "step": 762 }, { "epoch": 0.06, "grad_norm": 1.7591030012424824, "learning_rate": 9.974477892218175e-06, "loss": 0.9676, "step": 763 }, { "epoch": 0.06, "grad_norm": 1.4515123197981594, "learning_rate": 9.974346616959476e-06, "loss": 1.2239, "step": 764 }, { "epoch": 0.06, "grad_norm": 1.623752285977115, "learning_rate": 9.97421500582152e-06, "loss": 0.9116, "step": 765 }, { "epoch": 0.06, "grad_norm": 1.5259449040384303, "learning_rate": 9.974083058813192e-06, "loss": 0.8631, "step": 766 }, { "epoch": 0.06, "grad_norm": 1.0480510879441411, "learning_rate": 9.973950775943403e-06, "loss": 1.2349, "step": 767 }, { "epoch": 0.06, "grad_norm": 0.8704545487560228, "learning_rate": 9.973818157221084e-06, "loss": 1.1815, "step": 768 }, { "epoch": 0.06, "grad_norm": 1.5269801868692314, "learning_rate": 9.973685202655187e-06, "loss": 0.8548, "step": 769 }, { "epoch": 0.06, "grad_norm": 1.6544800795913106, "learning_rate": 9.973551912254696e-06, "loss": 0.7791, "step": 770 }, { "epoch": 0.06, "grad_norm": 1.5514811024722737, "learning_rate": 9.973418286028604e-06, "loss": 0.8721, "step": 771 }, { "epoch": 0.06, "grad_norm": 1.621469922705513, "learning_rate": 9.97328432398594e-06, "loss": 0.9063, "step": 772 }, { "epoch": 0.06, "grad_norm": 1.4964916349472792, "learning_rate": 9.973150026135743e-06, "loss": 1.2128, "step": 773 }, { "epoch": 0.06, "grad_norm": 1.341534462295757, "learning_rate": 9.973015392487087e-06, "loss": 1.2319, "step": 774 }, { "epoch": 0.06, "grad_norm": 1.5823867981938509, "learning_rate": 9.972880423049058e-06, "loss": 0.9171, "step": 775 }, { "epoch": 0.06, "grad_norm": 1.844438164131746, "learning_rate": 9.972745117830774e-06, "loss": 0.819, "step": 776 }, { "epoch": 0.06, "grad_norm": 1.612375097820019, "learning_rate": 9.972609476841368e-06, "loss": 0.8955, "step": 777 }, { "epoch": 0.06, "grad_norm": 1.5785070904723566, "learning_rate": 9.972473500089998e-06, "loss": 0.8625, "step": 778 }, { "epoch": 0.06, "grad_norm": 1.538494166835394, "learning_rate": 9.972337187585848e-06, "loss": 0.8907, "step": 779 }, { "epoch": 0.06, "grad_norm": 1.5283398002076578, "learning_rate": 9.972200539338122e-06, "loss": 1.2961, "step": 780 }, { "epoch": 0.06, "grad_norm": 1.5859564775424169, "learning_rate": 9.972063555356047e-06, "loss": 0.8669, "step": 781 }, { "epoch": 0.06, "grad_norm": 1.5090774919281724, "learning_rate": 9.971926235648868e-06, "loss": 0.835, "step": 782 }, { "epoch": 0.06, "grad_norm": 1.9445687642109515, "learning_rate": 9.971788580225864e-06, "loss": 0.8807, "step": 783 }, { "epoch": 0.06, "grad_norm": 1.6083399799055387, "learning_rate": 9.971650589096324e-06, "loss": 0.9281, "step": 784 }, { "epoch": 0.06, "grad_norm": 0.9813029896629548, "learning_rate": 9.971512262269568e-06, "loss": 1.2021, "step": 785 }, { "epoch": 0.06, "grad_norm": 1.6046692968507614, "learning_rate": 9.971373599754936e-06, "loss": 0.8743, "step": 786 }, { "epoch": 0.06, "grad_norm": 1.5651722015270195, "learning_rate": 9.971234601561793e-06, "loss": 0.8819, "step": 787 }, { "epoch": 0.06, "grad_norm": 1.537118391205696, "learning_rate": 9.97109526769952e-06, "loss": 0.8208, "step": 788 }, { "epoch": 0.06, "grad_norm": 1.5672592648201562, "learning_rate": 9.970955598177527e-06, "loss": 0.8875, "step": 789 }, { "epoch": 0.06, "grad_norm": 1.5678209152764904, "learning_rate": 9.970815593005248e-06, "loss": 0.8423, "step": 790 }, { "epoch": 0.06, "grad_norm": 1.531325471259386, "learning_rate": 9.970675252192133e-06, "loss": 0.8477, "step": 791 }, { "epoch": 0.06, "grad_norm": 1.656475536341903, "learning_rate": 9.970534575747658e-06, "loss": 0.8787, "step": 792 }, { "epoch": 0.06, "grad_norm": 1.6635111196453198, "learning_rate": 9.97039356368132e-06, "loss": 0.8332, "step": 793 }, { "epoch": 0.06, "grad_norm": 1.6422252576877494, "learning_rate": 9.970252216002647e-06, "loss": 0.9709, "step": 794 }, { "epoch": 0.06, "grad_norm": 1.6090937231293232, "learning_rate": 9.970110532721178e-06, "loss": 0.8753, "step": 795 }, { "epoch": 0.06, "grad_norm": 1.5462977286701667, "learning_rate": 9.96996851384648e-06, "loss": 0.8564, "step": 796 }, { "epoch": 0.06, "grad_norm": 1.3282387274437408, "learning_rate": 9.969826159388145e-06, "loss": 1.2532, "step": 797 }, { "epoch": 0.06, "grad_norm": 1.4674759499030112, "learning_rate": 9.969683469355781e-06, "loss": 0.8873, "step": 798 }, { "epoch": 0.06, "grad_norm": 1.5443975266399053, "learning_rate": 9.969540443759027e-06, "loss": 0.7424, "step": 799 }, { "epoch": 0.06, "grad_norm": 1.785355030482525, "learning_rate": 9.96939708260754e-06, "loss": 0.838, "step": 800 }, { "epoch": 0.06, "grad_norm": 1.4410309972787625, "learning_rate": 9.969253385910997e-06, "loss": 0.8712, "step": 801 }, { "epoch": 0.06, "grad_norm": 1.6673873896844216, "learning_rate": 9.969109353679104e-06, "loss": 0.8428, "step": 802 }, { "epoch": 0.06, "grad_norm": 1.595269550655003, "learning_rate": 9.968964985921584e-06, "loss": 0.8763, "step": 803 }, { "epoch": 0.06, "grad_norm": 1.5080419817774886, "learning_rate": 9.968820282648186e-06, "loss": 0.8848, "step": 804 }, { "epoch": 0.06, "grad_norm": 0.9376162111387027, "learning_rate": 9.96867524386868e-06, "loss": 1.215, "step": 805 }, { "epoch": 0.06, "grad_norm": 1.6630040066781884, "learning_rate": 9.96852986959286e-06, "loss": 0.8812, "step": 806 }, { "epoch": 0.06, "grad_norm": 0.8556451627071315, "learning_rate": 9.968384159830542e-06, "loss": 1.213, "step": 807 }, { "epoch": 0.06, "grad_norm": 1.7165800515064054, "learning_rate": 9.968238114591567e-06, "loss": 0.8601, "step": 808 }, { "epoch": 0.06, "grad_norm": 1.5021242657808525, "learning_rate": 9.96809173388579e-06, "loss": 0.838, "step": 809 }, { "epoch": 0.06, "grad_norm": 0.8262725675577803, "learning_rate": 9.967945017723102e-06, "loss": 1.2221, "step": 810 }, { "epoch": 0.07, "grad_norm": 0.823374721998848, "learning_rate": 9.967797966113404e-06, "loss": 1.1861, "step": 811 }, { "epoch": 0.07, "grad_norm": 1.5704877437669507, "learning_rate": 9.96765057906663e-06, "loss": 0.8668, "step": 812 }, { "epoch": 0.07, "grad_norm": 1.609655047484496, "learning_rate": 9.967502856592728e-06, "loss": 0.8939, "step": 813 }, { "epoch": 0.07, "grad_norm": 1.5023129617247735, "learning_rate": 9.967354798701676e-06, "loss": 0.8581, "step": 814 }, { "epoch": 0.07, "grad_norm": 1.5513696158423875, "learning_rate": 9.967206405403468e-06, "loss": 0.8089, "step": 815 }, { "epoch": 0.07, "grad_norm": 1.480325824586333, "learning_rate": 9.967057676708126e-06, "loss": 0.8899, "step": 816 }, { "epoch": 0.07, "grad_norm": 1.5791232959809558, "learning_rate": 9.966908612625693e-06, "loss": 0.898, "step": 817 }, { "epoch": 0.07, "grad_norm": 1.5435787004076655, "learning_rate": 9.966759213166231e-06, "loss": 0.8969, "step": 818 }, { "epoch": 0.07, "grad_norm": 1.491859491751482, "learning_rate": 9.96660947833983e-06, "loss": 0.9832, "step": 819 }, { "epoch": 0.07, "grad_norm": 1.6134884929501343, "learning_rate": 9.966459408156601e-06, "loss": 0.8548, "step": 820 }, { "epoch": 0.07, "grad_norm": 1.6129626840203988, "learning_rate": 9.966309002626676e-06, "loss": 0.8981, "step": 821 }, { "epoch": 0.07, "grad_norm": 0.9876694438889108, "learning_rate": 9.966158261760211e-06, "loss": 1.2057, "step": 822 }, { "epoch": 0.07, "grad_norm": 1.4481304968213926, "learning_rate": 9.966007185567383e-06, "loss": 0.8799, "step": 823 }, { "epoch": 0.07, "grad_norm": 0.8124541295541208, "learning_rate": 9.965855774058395e-06, "loss": 1.2072, "step": 824 }, { "epoch": 0.07, "grad_norm": 1.7162508680321158, "learning_rate": 9.96570402724347e-06, "loss": 0.8587, "step": 825 }, { "epoch": 0.07, "grad_norm": 1.5821215459936224, "learning_rate": 9.965551945132857e-06, "loss": 0.887, "step": 826 }, { "epoch": 0.07, "grad_norm": 1.56419392195959, "learning_rate": 9.965399527736819e-06, "loss": 0.8165, "step": 827 }, { "epoch": 0.07, "grad_norm": 1.6293281869621739, "learning_rate": 9.965246775065652e-06, "loss": 0.8051, "step": 828 }, { "epoch": 0.07, "grad_norm": 0.9726554641881016, "learning_rate": 9.965093687129669e-06, "loss": 1.225, "step": 829 }, { "epoch": 0.07, "grad_norm": 1.4965604333569407, "learning_rate": 9.964940263939206e-06, "loss": 0.8082, "step": 830 }, { "epoch": 0.07, "grad_norm": 1.696273361150681, "learning_rate": 9.964786505504624e-06, "loss": 0.8512, "step": 831 }, { "epoch": 0.07, "grad_norm": 1.5109942544994217, "learning_rate": 9.964632411836306e-06, "loss": 0.8501, "step": 832 }, { "epoch": 0.07, "grad_norm": 0.8587970481718141, "learning_rate": 9.964477982944654e-06, "loss": 1.1877, "step": 833 }, { "epoch": 0.07, "grad_norm": 1.5279342918380323, "learning_rate": 9.964323218840095e-06, "loss": 0.8103, "step": 834 }, { "epoch": 0.07, "grad_norm": 1.5595903501863457, "learning_rate": 9.964168119533084e-06, "loss": 0.8384, "step": 835 }, { "epoch": 0.07, "grad_norm": 1.5979927257460829, "learning_rate": 9.964012685034087e-06, "loss": 0.8243, "step": 836 }, { "epoch": 0.07, "grad_norm": 1.690872532926303, "learning_rate": 9.963856915353604e-06, "loss": 0.9339, "step": 837 }, { "epoch": 0.07, "grad_norm": 0.8790289567986185, "learning_rate": 9.963700810502154e-06, "loss": 1.2626, "step": 838 }, { "epoch": 0.07, "grad_norm": 1.465410231999522, "learning_rate": 9.96354437049027e-06, "loss": 0.9177, "step": 839 }, { "epoch": 0.07, "grad_norm": 1.4658161959685483, "learning_rate": 9.963387595328524e-06, "loss": 0.8501, "step": 840 }, { "epoch": 0.07, "grad_norm": 1.5831185515196096, "learning_rate": 9.963230485027498e-06, "loss": 0.8988, "step": 841 }, { "epoch": 0.07, "grad_norm": 1.468366848240649, "learning_rate": 9.963073039597798e-06, "loss": 0.8387, "step": 842 }, { "epoch": 0.07, "grad_norm": 1.5807375552592093, "learning_rate": 9.962915259050058e-06, "loss": 0.867, "step": 843 }, { "epoch": 0.07, "grad_norm": 1.6579131494058128, "learning_rate": 9.962757143394934e-06, "loss": 0.9019, "step": 844 }, { "epoch": 0.07, "grad_norm": 1.5243215409962914, "learning_rate": 9.962598692643098e-06, "loss": 0.91, "step": 845 }, { "epoch": 0.07, "grad_norm": 1.6755319096627121, "learning_rate": 9.96243990680525e-06, "loss": 0.9577, "step": 846 }, { "epoch": 0.07, "grad_norm": 1.5894058638274384, "learning_rate": 9.962280785892113e-06, "loss": 0.8963, "step": 847 }, { "epoch": 0.07, "grad_norm": 0.9345554922338601, "learning_rate": 9.962121329914432e-06, "loss": 1.2323, "step": 848 }, { "epoch": 0.07, "grad_norm": 1.65930238891711, "learning_rate": 9.96196153888297e-06, "loss": 0.8531, "step": 849 }, { "epoch": 0.07, "grad_norm": 1.5225564704758765, "learning_rate": 9.96180141280852e-06, "loss": 0.9075, "step": 850 }, { "epoch": 0.07, "grad_norm": 0.8542727834613469, "learning_rate": 9.961640951701892e-06, "loss": 1.23, "step": 851 }, { "epoch": 0.07, "grad_norm": 0.8160117491424631, "learning_rate": 9.961480155573921e-06, "loss": 1.1977, "step": 852 }, { "epoch": 0.07, "grad_norm": 1.5911479201817575, "learning_rate": 9.961319024435465e-06, "loss": 0.9465, "step": 853 }, { "epoch": 0.07, "grad_norm": 1.6384427925034404, "learning_rate": 9.961157558297404e-06, "loss": 0.8725, "step": 854 }, { "epoch": 0.07, "grad_norm": 1.4620914600178492, "learning_rate": 9.960995757170639e-06, "loss": 0.8515, "step": 855 }, { "epoch": 0.07, "grad_norm": 1.5610457928687695, "learning_rate": 9.9608336210661e-06, "loss": 0.9082, "step": 856 }, { "epoch": 0.07, "grad_norm": 1.5816539275334973, "learning_rate": 9.960671149994727e-06, "loss": 0.9179, "step": 857 }, { "epoch": 0.07, "grad_norm": 1.4298912907931096, "learning_rate": 9.960508343967497e-06, "loss": 0.88, "step": 858 }, { "epoch": 0.07, "grad_norm": 1.4559016872395851, "learning_rate": 9.960345202995401e-06, "loss": 0.8663, "step": 859 }, { "epoch": 0.07, "grad_norm": 1.5895417029091696, "learning_rate": 9.960181727089455e-06, "loss": 0.8654, "step": 860 }, { "epoch": 0.07, "grad_norm": 1.8876913838201386, "learning_rate": 9.960017916260695e-06, "loss": 0.8921, "step": 861 }, { "epoch": 0.07, "grad_norm": 1.6488919618165974, "learning_rate": 9.959853770520184e-06, "loss": 0.9062, "step": 862 }, { "epoch": 0.07, "grad_norm": 1.6468957590819309, "learning_rate": 9.959689289879003e-06, "loss": 0.7902, "step": 863 }, { "epoch": 0.07, "grad_norm": 1.636536067445133, "learning_rate": 9.959524474348263e-06, "loss": 1.0091, "step": 864 }, { "epoch": 0.07, "grad_norm": 1.505287671297433, "learning_rate": 9.95935932393909e-06, "loss": 0.8594, "step": 865 }, { "epoch": 0.07, "grad_norm": 1.5136664485681097, "learning_rate": 9.959193838662634e-06, "loss": 0.8617, "step": 866 }, { "epoch": 0.07, "grad_norm": 1.4405864271679185, "learning_rate": 9.95902801853007e-06, "loss": 0.8202, "step": 867 }, { "epoch": 0.07, "grad_norm": 1.6017390454120775, "learning_rate": 9.958861863552596e-06, "loss": 0.8785, "step": 868 }, { "epoch": 0.07, "grad_norm": 1.6065819357687168, "learning_rate": 9.958695373741428e-06, "loss": 0.7879, "step": 869 }, { "epoch": 0.07, "grad_norm": 1.132440165936126, "learning_rate": 9.958528549107812e-06, "loss": 1.2008, "step": 870 }, { "epoch": 0.07, "grad_norm": 1.5197884971071811, "learning_rate": 9.958361389663007e-06, "loss": 0.8491, "step": 871 }, { "epoch": 0.07, "grad_norm": 1.576604646765031, "learning_rate": 9.958193895418305e-06, "loss": 0.9584, "step": 872 }, { "epoch": 0.07, "grad_norm": 1.6245211033233342, "learning_rate": 9.958026066385014e-06, "loss": 0.8651, "step": 873 }, { "epoch": 0.07, "grad_norm": 0.876308172032014, "learning_rate": 9.957857902574464e-06, "loss": 1.2343, "step": 874 }, { "epoch": 0.07, "grad_norm": 1.5730547260320016, "learning_rate": 9.957689403998012e-06, "loss": 0.8485, "step": 875 }, { "epoch": 0.07, "grad_norm": 1.6183600606595228, "learning_rate": 9.957520570667036e-06, "loss": 1.0281, "step": 876 }, { "epoch": 0.07, "grad_norm": 1.5215872860332218, "learning_rate": 9.957351402592933e-06, "loss": 0.8868, "step": 877 }, { "epoch": 0.07, "grad_norm": 1.6198581805666281, "learning_rate": 9.95718189978713e-06, "loss": 0.8681, "step": 878 }, { "epoch": 0.07, "grad_norm": 1.499073540604396, "learning_rate": 9.95701206226107e-06, "loss": 0.8472, "step": 879 }, { "epoch": 0.07, "grad_norm": 1.0290245541061973, "learning_rate": 9.956841890026218e-06, "loss": 1.2433, "step": 880 }, { "epoch": 0.07, "grad_norm": 1.597372438886083, "learning_rate": 9.95667138309407e-06, "loss": 0.8655, "step": 881 }, { "epoch": 0.07, "grad_norm": 0.8670633449494914, "learning_rate": 9.956500541476135e-06, "loss": 1.2232, "step": 882 }, { "epoch": 0.07, "grad_norm": 1.8023267121904394, "learning_rate": 9.956329365183948e-06, "loss": 0.8918, "step": 883 }, { "epoch": 0.07, "grad_norm": 0.8403670938989496, "learning_rate": 9.956157854229072e-06, "loss": 1.2084, "step": 884 }, { "epoch": 0.07, "grad_norm": 1.5220605541357521, "learning_rate": 9.955986008623083e-06, "loss": 0.816, "step": 885 }, { "epoch": 0.07, "grad_norm": 1.5766209460689375, "learning_rate": 9.955813828377585e-06, "loss": 0.8964, "step": 886 }, { "epoch": 0.07, "grad_norm": 1.7968329731912558, "learning_rate": 9.955641313504208e-06, "loss": 0.8917, "step": 887 }, { "epoch": 0.07, "grad_norm": 2.31269975837571, "learning_rate": 9.955468464014595e-06, "loss": 0.9481, "step": 888 }, { "epoch": 0.07, "grad_norm": 1.565988950782471, "learning_rate": 9.955295279920422e-06, "loss": 0.9358, "step": 889 }, { "epoch": 0.07, "grad_norm": 1.5498484177096614, "learning_rate": 9.95512176123338e-06, "loss": 0.8871, "step": 890 }, { "epoch": 0.07, "grad_norm": 1.5764026661093893, "learning_rate": 9.954947907965186e-06, "loss": 0.9162, "step": 891 }, { "epoch": 0.07, "grad_norm": 1.4343010486300414, "learning_rate": 9.954773720127579e-06, "loss": 0.7625, "step": 892 }, { "epoch": 0.07, "grad_norm": 1.483432701379875, "learning_rate": 9.95459919773232e-06, "loss": 0.7859, "step": 893 }, { "epoch": 0.07, "grad_norm": 1.5006845700511382, "learning_rate": 9.954424340791195e-06, "loss": 0.865, "step": 894 }, { "epoch": 0.07, "grad_norm": 1.5432522428558055, "learning_rate": 9.95424914931601e-06, "loss": 0.8889, "step": 895 }, { "epoch": 0.07, "grad_norm": 1.508478572845355, "learning_rate": 9.954073623318593e-06, "loss": 0.9132, "step": 896 }, { "epoch": 0.07, "grad_norm": 1.685373554909562, "learning_rate": 9.953897762810796e-06, "loss": 0.926, "step": 897 }, { "epoch": 0.07, "grad_norm": 1.0509692178331513, "learning_rate": 9.953721567804496e-06, "loss": 1.1793, "step": 898 }, { "epoch": 0.07, "grad_norm": 1.6457077056726912, "learning_rate": 9.953545038311587e-06, "loss": 0.8336, "step": 899 }, { "epoch": 0.07, "grad_norm": 1.5051217144543587, "learning_rate": 9.95336817434399e-06, "loss": 0.8139, "step": 900 }, { "epoch": 0.07, "grad_norm": 1.509006638078777, "learning_rate": 9.953190975913648e-06, "loss": 0.813, "step": 901 }, { "epoch": 0.07, "grad_norm": 1.5407290794333866, "learning_rate": 9.953013443032524e-06, "loss": 0.8367, "step": 902 }, { "epoch": 0.07, "grad_norm": 2.148708886736417, "learning_rate": 9.952835575712607e-06, "loss": 0.883, "step": 903 }, { "epoch": 0.07, "grad_norm": 1.4351348165143027, "learning_rate": 9.952657373965908e-06, "loss": 0.8516, "step": 904 }, { "epoch": 0.07, "grad_norm": 0.9163739034550332, "learning_rate": 9.952478837804459e-06, "loss": 1.2375, "step": 905 }, { "epoch": 0.07, "grad_norm": 1.596995845901004, "learning_rate": 9.95229996724031e-06, "loss": 0.8367, "step": 906 }, { "epoch": 0.07, "grad_norm": 1.6241139398126248, "learning_rate": 9.952120762285546e-06, "loss": 0.9035, "step": 907 }, { "epoch": 0.07, "grad_norm": 1.7389202527739365, "learning_rate": 9.951941222952264e-06, "loss": 0.9981, "step": 908 }, { "epoch": 0.07, "grad_norm": 0.8881784023247613, "learning_rate": 9.951761349252587e-06, "loss": 1.164, "step": 909 }, { "epoch": 0.07, "grad_norm": 1.745527381650594, "learning_rate": 9.951581141198662e-06, "loss": 0.8893, "step": 910 }, { "epoch": 0.07, "grad_norm": 1.4603734816595164, "learning_rate": 9.951400598802655e-06, "loss": 0.8596, "step": 911 }, { "epoch": 0.07, "grad_norm": 1.6758564415412476, "learning_rate": 9.951219722076759e-06, "loss": 0.8865, "step": 912 }, { "epoch": 0.07, "grad_norm": 1.554165973561126, "learning_rate": 9.951038511033182e-06, "loss": 0.8569, "step": 913 }, { "epoch": 0.07, "grad_norm": 0.8294022262654263, "learning_rate": 9.950856965684167e-06, "loss": 1.2292, "step": 914 }, { "epoch": 0.07, "grad_norm": 1.5887727342285896, "learning_rate": 9.950675086041967e-06, "loss": 0.8965, "step": 915 }, { "epoch": 0.07, "grad_norm": 1.5873872244248364, "learning_rate": 9.950492872118867e-06, "loss": 0.8617, "step": 916 }, { "epoch": 0.07, "grad_norm": 1.4561416679150612, "learning_rate": 9.950310323927165e-06, "loss": 0.7397, "step": 917 }, { "epoch": 0.07, "grad_norm": 1.569213865174641, "learning_rate": 9.950127441479193e-06, "loss": 0.9019, "step": 918 }, { "epoch": 0.07, "grad_norm": 1.7220188100836231, "learning_rate": 9.949944224787296e-06, "loss": 0.907, "step": 919 }, { "epoch": 0.07, "grad_norm": 1.6776562914672004, "learning_rate": 9.949760673863846e-06, "loss": 0.9423, "step": 920 }, { "epoch": 0.07, "grad_norm": 1.4972164724285582, "learning_rate": 9.949576788721237e-06, "loss": 0.877, "step": 921 }, { "epoch": 0.07, "grad_norm": 1.443475702180261, "learning_rate": 9.949392569371886e-06, "loss": 0.7778, "step": 922 }, { "epoch": 0.07, "grad_norm": 0.860554014325369, "learning_rate": 9.949208015828232e-06, "loss": 1.1936, "step": 923 }, { "epoch": 0.07, "grad_norm": 1.6219505745666818, "learning_rate": 9.949023128102734e-06, "loss": 0.9293, "step": 924 }, { "epoch": 0.07, "grad_norm": 1.5469613469048205, "learning_rate": 9.948837906207878e-06, "loss": 0.819, "step": 925 }, { "epoch": 0.07, "grad_norm": 1.5759415908270265, "learning_rate": 9.948652350156172e-06, "loss": 0.8659, "step": 926 }, { "epoch": 0.07, "grad_norm": 2.1399759358035464, "learning_rate": 9.948466459960142e-06, "loss": 0.7979, "step": 927 }, { "epoch": 0.07, "grad_norm": 1.534666105384267, "learning_rate": 9.948280235632341e-06, "loss": 0.9091, "step": 928 }, { "epoch": 0.07, "grad_norm": 1.4610704585460546, "learning_rate": 9.948093677185345e-06, "loss": 0.8987, "step": 929 }, { "epoch": 0.07, "grad_norm": 1.519563061823506, "learning_rate": 9.947906784631749e-06, "loss": 0.8853, "step": 930 }, { "epoch": 0.07, "grad_norm": 1.6430968184663322, "learning_rate": 9.947719557984172e-06, "loss": 0.8796, "step": 931 }, { "epoch": 0.07, "grad_norm": 1.5331526850000736, "learning_rate": 9.947531997255256e-06, "loss": 0.9565, "step": 932 }, { "epoch": 0.07, "grad_norm": 1.4856893161716964, "learning_rate": 9.947344102457669e-06, "loss": 0.8796, "step": 933 }, { "epoch": 0.07, "grad_norm": 1.5657934606103434, "learning_rate": 9.947155873604093e-06, "loss": 0.951, "step": 934 }, { "epoch": 0.08, "grad_norm": 1.5821595327846476, "learning_rate": 9.946967310707241e-06, "loss": 0.7768, "step": 935 }, { "epoch": 0.08, "grad_norm": 1.6180640920062845, "learning_rate": 9.946778413779844e-06, "loss": 0.8243, "step": 936 }, { "epoch": 0.08, "grad_norm": 1.7297272143683036, "learning_rate": 9.946589182834657e-06, "loss": 0.9456, "step": 937 }, { "epoch": 0.08, "grad_norm": 1.9824388706503082, "learning_rate": 9.946399617884457e-06, "loss": 0.9136, "step": 938 }, { "epoch": 0.08, "grad_norm": 1.6628193105182445, "learning_rate": 9.946209718942046e-06, "loss": 0.8556, "step": 939 }, { "epoch": 0.08, "grad_norm": 1.6179862584998124, "learning_rate": 9.946019486020242e-06, "loss": 0.8255, "step": 940 }, { "epoch": 0.08, "grad_norm": 1.5413442363317122, "learning_rate": 9.945828919131894e-06, "loss": 0.9084, "step": 941 }, { "epoch": 0.08, "grad_norm": 1.631281259492192, "learning_rate": 9.94563801828987e-06, "loss": 0.7806, "step": 942 }, { "epoch": 0.08, "grad_norm": 1.5136807338796188, "learning_rate": 9.945446783507056e-06, "loss": 0.8918, "step": 943 }, { "epoch": 0.08, "grad_norm": 1.605564196752661, "learning_rate": 9.945255214796366e-06, "loss": 0.8638, "step": 944 }, { "epoch": 0.08, "grad_norm": 1.5496755674473295, "learning_rate": 9.945063312170736e-06, "loss": 0.9507, "step": 945 }, { "epoch": 0.08, "grad_norm": 1.5981523373121949, "learning_rate": 9.944871075643125e-06, "loss": 0.9116, "step": 946 }, { "epoch": 0.08, "grad_norm": 1.4576302343313638, "learning_rate": 9.944678505226511e-06, "loss": 0.8427, "step": 947 }, { "epoch": 0.08, "grad_norm": 1.6232678569968226, "learning_rate": 9.944485600933898e-06, "loss": 0.8038, "step": 948 }, { "epoch": 0.08, "grad_norm": 1.8700567164123805, "learning_rate": 9.94429236277831e-06, "loss": 0.8559, "step": 949 }, { "epoch": 0.08, "grad_norm": 1.6592827706886064, "learning_rate": 9.944098790772797e-06, "loss": 0.8687, "step": 950 }, { "epoch": 0.08, "grad_norm": 1.6709037132298505, "learning_rate": 9.943904884930427e-06, "loss": 0.922, "step": 951 }, { "epoch": 0.08, "grad_norm": 1.5381899241454413, "learning_rate": 9.943710645264295e-06, "loss": 0.8483, "step": 952 }, { "epoch": 0.08, "grad_norm": 1.5614291075175473, "learning_rate": 9.943516071787517e-06, "loss": 0.9734, "step": 953 }, { "epoch": 0.08, "grad_norm": 1.5961762762811158, "learning_rate": 9.943321164513229e-06, "loss": 0.8162, "step": 954 }, { "epoch": 0.08, "grad_norm": 1.0210905831306314, "learning_rate": 9.94312592345459e-06, "loss": 1.2453, "step": 955 }, { "epoch": 0.08, "grad_norm": 1.6169314694183436, "learning_rate": 9.942930348624788e-06, "loss": 0.9288, "step": 956 }, { "epoch": 0.08, "grad_norm": 1.4918095732996235, "learning_rate": 9.942734440037028e-06, "loss": 0.8517, "step": 957 }, { "epoch": 0.08, "grad_norm": 1.64156294740685, "learning_rate": 9.942538197704533e-06, "loss": 0.881, "step": 958 }, { "epoch": 0.08, "grad_norm": 0.8148778287909048, "learning_rate": 9.942341621640558e-06, "loss": 1.1763, "step": 959 }, { "epoch": 0.08, "grad_norm": 1.5743707770346411, "learning_rate": 9.942144711858374e-06, "loss": 0.9441, "step": 960 }, { "epoch": 0.08, "grad_norm": 1.586839715239287, "learning_rate": 9.94194746837128e-06, "loss": 0.8847, "step": 961 }, { "epoch": 0.08, "grad_norm": 1.604869335109098, "learning_rate": 9.941749891192594e-06, "loss": 0.8805, "step": 962 }, { "epoch": 0.08, "grad_norm": 1.637202468195142, "learning_rate": 9.941551980335653e-06, "loss": 0.7827, "step": 963 }, { "epoch": 0.08, "grad_norm": 1.374547597815223, "learning_rate": 9.941353735813824e-06, "loss": 0.7999, "step": 964 }, { "epoch": 0.08, "grad_norm": 1.597958564554555, "learning_rate": 9.94115515764049e-06, "loss": 0.8718, "step": 965 }, { "epoch": 0.08, "grad_norm": 0.9021014230172983, "learning_rate": 9.940956245829061e-06, "loss": 1.1964, "step": 966 }, { "epoch": 0.08, "grad_norm": 1.5312997885390878, "learning_rate": 9.940757000392971e-06, "loss": 0.8741, "step": 967 }, { "epoch": 0.08, "grad_norm": 1.5440300544113965, "learning_rate": 9.940557421345667e-06, "loss": 0.9808, "step": 968 }, { "epoch": 0.08, "grad_norm": 1.5445796018965658, "learning_rate": 9.94035750870063e-06, "loss": 0.8889, "step": 969 }, { "epoch": 0.08, "grad_norm": 1.6666604033169776, "learning_rate": 9.940157262471359e-06, "loss": 0.7208, "step": 970 }, { "epoch": 0.08, "grad_norm": 0.8842428952972322, "learning_rate": 9.939956682671372e-06, "loss": 1.2271, "step": 971 }, { "epoch": 0.08, "grad_norm": 1.7581795636181663, "learning_rate": 9.939755769314215e-06, "loss": 0.8037, "step": 972 }, { "epoch": 0.08, "grad_norm": 1.5391243599005493, "learning_rate": 9.93955452241345e-06, "loss": 0.8918, "step": 973 }, { "epoch": 0.08, "grad_norm": 1.5359442733189441, "learning_rate": 9.939352941982671e-06, "loss": 0.8655, "step": 974 }, { "epoch": 0.08, "grad_norm": 0.8062949802875369, "learning_rate": 9.939151028035487e-06, "loss": 1.2372, "step": 975 }, { "epoch": 0.08, "grad_norm": 1.5216981529084044, "learning_rate": 9.93894878058553e-06, "loss": 0.8752, "step": 976 }, { "epoch": 0.08, "grad_norm": 0.8032581072848206, "learning_rate": 9.938746199646458e-06, "loss": 1.1947, "step": 977 }, { "epoch": 0.08, "grad_norm": 1.5425433427122017, "learning_rate": 9.938543285231952e-06, "loss": 0.8646, "step": 978 }, { "epoch": 0.08, "grad_norm": 1.8766476602216222, "learning_rate": 9.938340037355709e-06, "loss": 0.8965, "step": 979 }, { "epoch": 0.08, "grad_norm": 1.5530331778329138, "learning_rate": 9.938136456031454e-06, "loss": 0.8668, "step": 980 }, { "epoch": 0.08, "grad_norm": 1.7312236697410663, "learning_rate": 9.937932541272935e-06, "loss": 0.9714, "step": 981 }, { "epoch": 0.08, "grad_norm": 1.53502276308394, "learning_rate": 9.937728293093918e-06, "loss": 0.8559, "step": 982 }, { "epoch": 0.08, "grad_norm": 0.873787567066599, "learning_rate": 9.937523711508196e-06, "loss": 1.1908, "step": 983 }, { "epoch": 0.08, "grad_norm": 1.583836588337417, "learning_rate": 9.937318796529583e-06, "loss": 0.93, "step": 984 }, { "epoch": 0.08, "grad_norm": 1.595658369403494, "learning_rate": 9.937113548171914e-06, "loss": 0.9399, "step": 985 }, { "epoch": 0.08, "grad_norm": 0.8016914893362079, "learning_rate": 9.93690796644905e-06, "loss": 1.1929, "step": 986 }, { "epoch": 0.08, "grad_norm": 1.6791446924191054, "learning_rate": 9.93670205137487e-06, "loss": 0.9495, "step": 987 }, { "epoch": 0.08, "grad_norm": 1.515781764995385, "learning_rate": 9.936495802963282e-06, "loss": 0.9064, "step": 988 }, { "epoch": 0.08, "grad_norm": 1.60690294009617, "learning_rate": 9.936289221228207e-06, "loss": 0.7737, "step": 989 }, { "epoch": 0.08, "grad_norm": 1.5146465229404158, "learning_rate": 9.936082306183598e-06, "loss": 0.8108, "step": 990 }, { "epoch": 0.08, "grad_norm": 0.8504574430651369, "learning_rate": 9.935875057843423e-06, "loss": 1.1846, "step": 991 }, { "epoch": 0.08, "grad_norm": 1.7182963666124904, "learning_rate": 9.935667476221678e-06, "loss": 1.002, "step": 992 }, { "epoch": 0.08, "grad_norm": 1.5884988118240264, "learning_rate": 9.93545956133238e-06, "loss": 0.7536, "step": 993 }, { "epoch": 0.08, "grad_norm": 1.7254944733755144, "learning_rate": 9.935251313189564e-06, "loss": 0.9031, "step": 994 }, { "epoch": 0.08, "grad_norm": 1.5504381889644234, "learning_rate": 9.935042731807297e-06, "loss": 0.8186, "step": 995 }, { "epoch": 0.08, "grad_norm": 1.5130669231709604, "learning_rate": 9.93483381719966e-06, "loss": 0.8472, "step": 996 }, { "epoch": 0.08, "grad_norm": 1.5185782026311627, "learning_rate": 9.934624569380757e-06, "loss": 0.8774, "step": 997 }, { "epoch": 0.08, "grad_norm": 1.5647256269570495, "learning_rate": 9.934414988364722e-06, "loss": 0.9294, "step": 998 }, { "epoch": 0.08, "grad_norm": 0.8972949851881172, "learning_rate": 9.934205074165703e-06, "loss": 1.1737, "step": 999 }, { "epoch": 0.08, "grad_norm": 1.665495518116886, "learning_rate": 9.933994826797874e-06, "loss": 0.9056, "step": 1000 }, { "epoch": 0.08, "grad_norm": 1.5531067781664374, "learning_rate": 9.933784246275432e-06, "loss": 0.877, "step": 1001 }, { "epoch": 0.08, "grad_norm": 1.53748757501442, "learning_rate": 9.933573332612597e-06, "loss": 0.8487, "step": 1002 }, { "epoch": 0.08, "grad_norm": 1.5843208437155938, "learning_rate": 9.933362085823607e-06, "loss": 0.8863, "step": 1003 }, { "epoch": 0.08, "grad_norm": 1.5492327815742086, "learning_rate": 9.93315050592273e-06, "loss": 0.9022, "step": 1004 }, { "epoch": 0.08, "grad_norm": 1.6243674850768384, "learning_rate": 9.93293859292425e-06, "loss": 0.8727, "step": 1005 }, { "epoch": 0.08, "grad_norm": 1.5174011916970498, "learning_rate": 9.932726346842479e-06, "loss": 0.863, "step": 1006 }, { "epoch": 0.08, "grad_norm": 1.6042732116648475, "learning_rate": 9.932513767691743e-06, "loss": 0.8812, "step": 1007 }, { "epoch": 0.08, "grad_norm": 1.5394724745453752, "learning_rate": 9.932300855486397e-06, "loss": 0.8525, "step": 1008 }, { "epoch": 0.08, "grad_norm": 1.5453099659151175, "learning_rate": 9.932087610240822e-06, "loss": 0.8639, "step": 1009 }, { "epoch": 0.08, "grad_norm": 1.7275330068236703, "learning_rate": 9.931874031969411e-06, "loss": 0.842, "step": 1010 }, { "epoch": 0.08, "grad_norm": 1.7201351796931614, "learning_rate": 9.93166012068659e-06, "loss": 0.8436, "step": 1011 }, { "epoch": 0.08, "grad_norm": 1.582786772520307, "learning_rate": 9.9314458764068e-06, "loss": 0.7336, "step": 1012 }, { "epoch": 0.08, "grad_norm": 0.9531460852511578, "learning_rate": 9.931231299144509e-06, "loss": 1.203, "step": 1013 }, { "epoch": 0.08, "grad_norm": 1.4798361473330093, "learning_rate": 9.931016388914203e-06, "loss": 0.9, "step": 1014 }, { "epoch": 0.08, "grad_norm": 1.600552365583872, "learning_rate": 9.930801145730395e-06, "loss": 0.8708, "step": 1015 }, { "epoch": 0.08, "grad_norm": 1.6394589132287902, "learning_rate": 9.93058556960762e-06, "loss": 0.8994, "step": 1016 }, { "epoch": 0.08, "grad_norm": 1.5195910122642113, "learning_rate": 9.930369660560432e-06, "loss": 0.7773, "step": 1017 }, { "epoch": 0.08, "grad_norm": 1.632863694125496, "learning_rate": 9.930153418603411e-06, "loss": 0.845, "step": 1018 }, { "epoch": 0.08, "grad_norm": 1.0662260827843473, "learning_rate": 9.929936843751158e-06, "loss": 1.2221, "step": 1019 }, { "epoch": 0.08, "grad_norm": 1.5664288412922165, "learning_rate": 9.929719936018296e-06, "loss": 0.941, "step": 1020 }, { "epoch": 0.08, "grad_norm": 1.5698506426366516, "learning_rate": 9.92950269541947e-06, "loss": 0.9052, "step": 1021 }, { "epoch": 0.08, "grad_norm": 0.8310566856172155, "learning_rate": 9.929285121969352e-06, "loss": 1.1871, "step": 1022 }, { "epoch": 0.08, "grad_norm": 0.8732153692891029, "learning_rate": 9.929067215682631e-06, "loss": 1.2318, "step": 1023 }, { "epoch": 0.08, "grad_norm": 2.046394045987021, "learning_rate": 9.92884897657402e-06, "loss": 0.9203, "step": 1024 }, { "epoch": 0.08, "grad_norm": 1.567175079880734, "learning_rate": 9.928630404658255e-06, "loss": 0.8439, "step": 1025 }, { "epoch": 0.08, "grad_norm": 0.9090350751602232, "learning_rate": 9.928411499950096e-06, "loss": 1.2207, "step": 1026 }, { "epoch": 0.08, "grad_norm": 1.5002832370544328, "learning_rate": 9.928192262464322e-06, "loss": 0.8756, "step": 1027 }, { "epoch": 0.08, "grad_norm": 1.5352448946562403, "learning_rate": 9.927972692215739e-06, "loss": 0.8771, "step": 1028 }, { "epoch": 0.08, "grad_norm": 1.6336150628605406, "learning_rate": 9.927752789219171e-06, "loss": 0.7678, "step": 1029 }, { "epoch": 0.08, "grad_norm": 1.6818809068422826, "learning_rate": 9.927532553489465e-06, "loss": 0.8266, "step": 1030 }, { "epoch": 0.08, "grad_norm": 1.5708989484033575, "learning_rate": 9.927311985041495e-06, "loss": 0.943, "step": 1031 }, { "epoch": 0.08, "grad_norm": 1.5027899322917295, "learning_rate": 9.927091083890152e-06, "loss": 0.8621, "step": 1032 }, { "epoch": 0.08, "grad_norm": 1.4992683125154507, "learning_rate": 9.926869850050353e-06, "loss": 0.7401, "step": 1033 }, { "epoch": 0.08, "grad_norm": 1.4824835062092483, "learning_rate": 9.926648283537037e-06, "loss": 0.8743, "step": 1034 }, { "epoch": 0.08, "grad_norm": 1.5354069234637797, "learning_rate": 9.926426384365162e-06, "loss": 0.8563, "step": 1035 }, { "epoch": 0.08, "grad_norm": 1.0987647945752776, "learning_rate": 9.926204152549711e-06, "loss": 1.1839, "step": 1036 }, { "epoch": 0.08, "grad_norm": 1.5525296555975288, "learning_rate": 9.925981588105695e-06, "loss": 0.8212, "step": 1037 }, { "epoch": 0.08, "grad_norm": 0.8568660608055731, "learning_rate": 9.925758691048134e-06, "loss": 1.2212, "step": 1038 }, { "epoch": 0.08, "grad_norm": 1.5258758737977265, "learning_rate": 9.925535461392086e-06, "loss": 0.8393, "step": 1039 }, { "epoch": 0.08, "grad_norm": 1.4817903993309793, "learning_rate": 9.92531189915262e-06, "loss": 0.845, "step": 1040 }, { "epoch": 0.08, "grad_norm": 1.6972602229571543, "learning_rate": 9.925088004344832e-06, "loss": 0.9327, "step": 1041 }, { "epoch": 0.08, "grad_norm": 0.909799237114476, "learning_rate": 9.92486377698384e-06, "loss": 1.2105, "step": 1042 }, { "epoch": 0.08, "grad_norm": 1.7024101955125066, "learning_rate": 9.924639217084783e-06, "loss": 0.9141, "step": 1043 }, { "epoch": 0.08, "grad_norm": 1.5110794413414614, "learning_rate": 9.924414324662827e-06, "loss": 0.9045, "step": 1044 }, { "epoch": 0.08, "grad_norm": 1.5066057699860382, "learning_rate": 9.924189099733155e-06, "loss": 0.7925, "step": 1045 }, { "epoch": 0.08, "grad_norm": 0.8702900988726628, "learning_rate": 9.923963542310975e-06, "loss": 1.1937, "step": 1046 }, { "epoch": 0.08, "grad_norm": 1.736162303163339, "learning_rate": 9.923737652411518e-06, "loss": 0.8332, "step": 1047 }, { "epoch": 0.08, "grad_norm": 1.5198858096184873, "learning_rate": 9.923511430050034e-06, "loss": 0.8473, "step": 1048 }, { "epoch": 0.08, "grad_norm": 1.4527317209393917, "learning_rate": 9.923284875241802e-06, "loss": 0.7719, "step": 1049 }, { "epoch": 0.08, "grad_norm": 1.4891813388016397, "learning_rate": 9.923057988002117e-06, "loss": 0.9117, "step": 1050 }, { "epoch": 0.08, "grad_norm": 0.8729613917353225, "learning_rate": 9.922830768346298e-06, "loss": 1.2137, "step": 1051 }, { "epoch": 0.08, "grad_norm": 1.4835843078850388, "learning_rate": 9.92260321628969e-06, "loss": 0.7782, "step": 1052 }, { "epoch": 0.08, "grad_norm": 0.786138353739257, "learning_rate": 9.922375331847658e-06, "loss": 1.1841, "step": 1053 }, { "epoch": 0.08, "grad_norm": 1.5722831660808285, "learning_rate": 9.922147115035585e-06, "loss": 0.8429, "step": 1054 }, { "epoch": 0.08, "grad_norm": 1.5294361807362953, "learning_rate": 9.921918565868887e-06, "loss": 0.9203, "step": 1055 }, { "epoch": 0.08, "grad_norm": 1.472894075752094, "learning_rate": 9.921689684362989e-06, "loss": 0.8603, "step": 1056 }, { "epoch": 0.08, "grad_norm": 1.464298819187418, "learning_rate": 9.921460470533352e-06, "loss": 0.8238, "step": 1057 }, { "epoch": 0.08, "grad_norm": 0.91131972026508, "learning_rate": 9.921230924395449e-06, "loss": 1.1832, "step": 1058 }, { "epoch": 0.08, "grad_norm": 0.8713129941443456, "learning_rate": 9.921001045964781e-06, "loss": 1.1831, "step": 1059 }, { "epoch": 0.09, "grad_norm": 1.6823759060121664, "learning_rate": 9.920770835256871e-06, "loss": 0.9078, "step": 1060 }, { "epoch": 0.09, "grad_norm": 1.6424460203350244, "learning_rate": 9.920540292287262e-06, "loss": 0.9276, "step": 1061 }, { "epoch": 0.09, "grad_norm": 1.5198904477845627, "learning_rate": 9.92030941707152e-06, "loss": 0.946, "step": 1062 }, { "epoch": 0.09, "grad_norm": 1.5076187627365758, "learning_rate": 9.920078209625235e-06, "loss": 0.8346, "step": 1063 }, { "epoch": 0.09, "grad_norm": 1.5020431667943692, "learning_rate": 9.91984666996402e-06, "loss": 0.9164, "step": 1064 }, { "epoch": 0.09, "grad_norm": 1.5395527010476282, "learning_rate": 9.919614798103507e-06, "loss": 0.8482, "step": 1065 }, { "epoch": 0.09, "grad_norm": 1.5533403247446196, "learning_rate": 9.919382594059355e-06, "loss": 0.903, "step": 1066 }, { "epoch": 0.09, "grad_norm": 1.5167212409535804, "learning_rate": 9.91915005784724e-06, "loss": 0.7482, "step": 1067 }, { "epoch": 0.09, "grad_norm": 1.2914236185044998, "learning_rate": 9.918917189482865e-06, "loss": 1.251, "step": 1068 }, { "epoch": 0.09, "grad_norm": 1.4555784770399571, "learning_rate": 9.918683988981955e-06, "loss": 0.7842, "step": 1069 }, { "epoch": 0.09, "grad_norm": 1.5160918002045114, "learning_rate": 9.918450456360252e-06, "loss": 0.7749, "step": 1070 }, { "epoch": 0.09, "grad_norm": 1.609977507953848, "learning_rate": 9.91821659163353e-06, "loss": 0.8249, "step": 1071 }, { "epoch": 0.09, "grad_norm": 1.579525264431761, "learning_rate": 9.917982394817576e-06, "loss": 0.9153, "step": 1072 }, { "epoch": 0.09, "grad_norm": 1.5418925623560698, "learning_rate": 9.917747865928206e-06, "loss": 0.7731, "step": 1073 }, { "epoch": 0.09, "grad_norm": 1.5288245582176816, "learning_rate": 9.917513004981253e-06, "loss": 0.9565, "step": 1074 }, { "epoch": 0.09, "grad_norm": 1.4619844993559827, "learning_rate": 9.917277811992581e-06, "loss": 0.8472, "step": 1075 }, { "epoch": 0.09, "grad_norm": 1.5730241206034477, "learning_rate": 9.917042286978064e-06, "loss": 0.8843, "step": 1076 }, { "epoch": 0.09, "grad_norm": 1.7141542324201973, "learning_rate": 9.91680642995361e-06, "loss": 0.8374, "step": 1077 }, { "epoch": 0.09, "grad_norm": 1.2169815829014885, "learning_rate": 9.916570240935141e-06, "loss": 1.1773, "step": 1078 }, { "epoch": 0.09, "grad_norm": 1.6097672967008727, "learning_rate": 9.916333719938608e-06, "loss": 0.9599, "step": 1079 }, { "epoch": 0.09, "grad_norm": 1.344097721529167, "learning_rate": 9.91609686697998e-06, "loss": 0.8377, "step": 1080 }, { "epoch": 0.09, "grad_norm": 1.6316441228779275, "learning_rate": 9.915859682075252e-06, "loss": 0.8202, "step": 1081 }, { "epoch": 0.09, "grad_norm": 1.5486254248979179, "learning_rate": 9.915622165240435e-06, "loss": 0.8237, "step": 1082 }, { "epoch": 0.09, "grad_norm": 1.6247933979351792, "learning_rate": 9.915384316491572e-06, "loss": 0.8637, "step": 1083 }, { "epoch": 0.09, "grad_norm": 1.459428823926388, "learning_rate": 9.915146135844718e-06, "loss": 0.82, "step": 1084 }, { "epoch": 0.09, "grad_norm": 1.5344320703812198, "learning_rate": 9.914907623315958e-06, "loss": 0.8219, "step": 1085 }, { "epoch": 0.09, "grad_norm": 1.4836243220108631, "learning_rate": 9.914668778921398e-06, "loss": 0.8555, "step": 1086 }, { "epoch": 0.09, "grad_norm": 1.5856890990603516, "learning_rate": 9.914429602677163e-06, "loss": 0.8011, "step": 1087 }, { "epoch": 0.09, "grad_norm": 1.5548503293413205, "learning_rate": 9.914190094599403e-06, "loss": 0.8195, "step": 1088 }, { "epoch": 0.09, "grad_norm": 1.6908458661639212, "learning_rate": 9.913950254704291e-06, "loss": 0.8285, "step": 1089 }, { "epoch": 0.09, "grad_norm": 1.630572128957167, "learning_rate": 9.913710083008021e-06, "loss": 0.8455, "step": 1090 }, { "epoch": 0.09, "grad_norm": 1.598683503942084, "learning_rate": 9.913469579526811e-06, "loss": 0.8437, "step": 1091 }, { "epoch": 0.09, "grad_norm": 1.5601603919710103, "learning_rate": 9.9132287442769e-06, "loss": 0.7611, "step": 1092 }, { "epoch": 0.09, "grad_norm": 1.6090610272391923, "learning_rate": 9.91298757727455e-06, "loss": 0.8685, "step": 1093 }, { "epoch": 0.09, "grad_norm": 1.5139364606929304, "learning_rate": 9.912746078536044e-06, "loss": 0.8942, "step": 1094 }, { "epoch": 0.09, "grad_norm": 1.5366755768643061, "learning_rate": 9.912504248077688e-06, "loss": 0.7907, "step": 1095 }, { "epoch": 0.09, "grad_norm": 1.4509300835573726, "learning_rate": 9.912262085915813e-06, "loss": 0.8253, "step": 1096 }, { "epoch": 0.09, "grad_norm": 1.5753821866681386, "learning_rate": 9.91201959206677e-06, "loss": 0.9057, "step": 1097 }, { "epoch": 0.09, "grad_norm": 1.6530624321200773, "learning_rate": 9.911776766546931e-06, "loss": 0.8657, "step": 1098 }, { "epoch": 0.09, "grad_norm": 1.5248727947254477, "learning_rate": 9.911533609372694e-06, "loss": 0.8792, "step": 1099 }, { "epoch": 0.09, "grad_norm": 1.6866733858683758, "learning_rate": 9.911290120560477e-06, "loss": 0.8792, "step": 1100 }, { "epoch": 0.09, "grad_norm": 1.171399721776323, "learning_rate": 9.911046300126719e-06, "loss": 1.2299, "step": 1101 }, { "epoch": 0.09, "grad_norm": 1.6891617334926026, "learning_rate": 9.910802148087887e-06, "loss": 0.8796, "step": 1102 }, { "epoch": 0.09, "grad_norm": 1.535237975178472, "learning_rate": 9.910557664460464e-06, "loss": 0.8837, "step": 1103 }, { "epoch": 0.09, "grad_norm": 1.5129704526911545, "learning_rate": 9.910312849260959e-06, "loss": 0.958, "step": 1104 }, { "epoch": 0.09, "grad_norm": 0.9220043339547658, "learning_rate": 9.9100677025059e-06, "loss": 1.2046, "step": 1105 }, { "epoch": 0.09, "grad_norm": 1.784846926630246, "learning_rate": 9.909822224211845e-06, "loss": 0.9383, "step": 1106 }, { "epoch": 0.09, "grad_norm": 1.514460963986215, "learning_rate": 9.909576414395367e-06, "loss": 0.8208, "step": 1107 }, { "epoch": 0.09, "grad_norm": 1.521264286141833, "learning_rate": 9.909330273073062e-06, "loss": 0.9664, "step": 1108 }, { "epoch": 0.09, "grad_norm": 1.5200117206841075, "learning_rate": 9.90908380026155e-06, "loss": 0.9416, "step": 1109 }, { "epoch": 0.09, "grad_norm": 0.8792569407413832, "learning_rate": 9.908836995977475e-06, "loss": 1.2216, "step": 1110 }, { "epoch": 0.09, "grad_norm": 1.570973813922727, "learning_rate": 9.908589860237503e-06, "loss": 0.9358, "step": 1111 }, { "epoch": 0.09, "grad_norm": 1.550819864566989, "learning_rate": 9.908342393058317e-06, "loss": 0.85, "step": 1112 }, { "epoch": 0.09, "grad_norm": 2.118708779521882, "learning_rate": 9.908094594456631e-06, "loss": 0.8399, "step": 1113 }, { "epoch": 0.09, "grad_norm": 1.6512662053032676, "learning_rate": 9.907846464449174e-06, "loss": 0.8493, "step": 1114 }, { "epoch": 0.09, "grad_norm": 1.62276289173124, "learning_rate": 9.907598003052701e-06, "loss": 0.9437, "step": 1115 }, { "epoch": 0.09, "grad_norm": 1.5674265861098502, "learning_rate": 9.90734921028399e-06, "loss": 0.8414, "step": 1116 }, { "epoch": 0.09, "grad_norm": 1.5451079032907697, "learning_rate": 9.907100086159838e-06, "loss": 0.7977, "step": 1117 }, { "epoch": 0.09, "grad_norm": 1.4785922671367702, "learning_rate": 9.906850630697068e-06, "loss": 0.8055, "step": 1118 }, { "epoch": 0.09, "grad_norm": 1.587094961471265, "learning_rate": 9.906600843912523e-06, "loss": 0.8646, "step": 1119 }, { "epoch": 0.09, "grad_norm": 1.4526678098687626, "learning_rate": 9.90635072582307e-06, "loss": 0.8927, "step": 1120 }, { "epoch": 0.09, "grad_norm": 1.5463857958282818, "learning_rate": 9.906100276445596e-06, "loss": 0.7829, "step": 1121 }, { "epoch": 0.09, "grad_norm": 1.1807653472659774, "learning_rate": 9.905849495797014e-06, "loss": 1.2406, "step": 1122 }, { "epoch": 0.09, "grad_norm": 1.63948515815072, "learning_rate": 9.905598383894254e-06, "loss": 0.8754, "step": 1123 }, { "epoch": 0.09, "grad_norm": 1.5223887128561004, "learning_rate": 9.905346940754274e-06, "loss": 0.7713, "step": 1124 }, { "epoch": 0.09, "grad_norm": 1.4300524512584327, "learning_rate": 9.905095166394055e-06, "loss": 0.8, "step": 1125 }, { "epoch": 0.09, "grad_norm": 1.624434536546282, "learning_rate": 9.90484306083059e-06, "loss": 0.8801, "step": 1126 }, { "epoch": 0.09, "grad_norm": 1.4194621891840205, "learning_rate": 9.90459062408091e-06, "loss": 0.8817, "step": 1127 }, { "epoch": 0.09, "grad_norm": 1.0209018765780313, "learning_rate": 9.904337856162054e-06, "loss": 1.2278, "step": 1128 }, { "epoch": 0.09, "grad_norm": 1.5574826714228565, "learning_rate": 9.904084757091091e-06, "loss": 0.8016, "step": 1129 }, { "epoch": 0.09, "grad_norm": 1.4955509123193762, "learning_rate": 9.903831326885112e-06, "loss": 0.8899, "step": 1130 }, { "epoch": 0.09, "grad_norm": 1.6916403146279122, "learning_rate": 9.903577565561229e-06, "loss": 0.9612, "step": 1131 }, { "epoch": 0.09, "grad_norm": 1.478638540478408, "learning_rate": 9.903323473136575e-06, "loss": 0.8891, "step": 1132 }, { "epoch": 0.09, "grad_norm": 1.4542566163402708, "learning_rate": 9.90306904962831e-06, "loss": 0.8421, "step": 1133 }, { "epoch": 0.09, "grad_norm": 1.5171907465491326, "learning_rate": 9.902814295053608e-06, "loss": 0.9647, "step": 1134 }, { "epoch": 0.09, "grad_norm": 1.4377286820578683, "learning_rate": 9.902559209429676e-06, "loss": 0.8935, "step": 1135 }, { "epoch": 0.09, "grad_norm": 1.506615842362728, "learning_rate": 9.902303792773736e-06, "loss": 0.8391, "step": 1136 }, { "epoch": 0.09, "grad_norm": 1.6930207740617287, "learning_rate": 9.902048045103031e-06, "loss": 0.9168, "step": 1137 }, { "epoch": 0.09, "grad_norm": 1.6149285982630328, "learning_rate": 9.901791966434838e-06, "loss": 0.8755, "step": 1138 }, { "epoch": 0.09, "grad_norm": 0.9078728509504543, "learning_rate": 9.90153555678644e-06, "loss": 1.1742, "step": 1139 }, { "epoch": 0.09, "grad_norm": 1.639294434120774, "learning_rate": 9.901278816175152e-06, "loss": 0.9032, "step": 1140 }, { "epoch": 0.09, "grad_norm": 1.5834602250884953, "learning_rate": 9.90102174461831e-06, "loss": 0.8432, "step": 1141 }, { "epoch": 0.09, "grad_norm": 1.728071903217369, "learning_rate": 9.900764342133277e-06, "loss": 0.9073, "step": 1142 }, { "epoch": 0.09, "grad_norm": 1.5617181078667028, "learning_rate": 9.900506608737427e-06, "loss": 0.8592, "step": 1143 }, { "epoch": 0.09, "grad_norm": 0.8670616800731159, "learning_rate": 9.900248544448166e-06, "loss": 1.2156, "step": 1144 }, { "epoch": 0.09, "grad_norm": 1.5264199845585185, "learning_rate": 9.899990149282917e-06, "loss": 0.8329, "step": 1145 }, { "epoch": 0.09, "grad_norm": 1.5763591753268233, "learning_rate": 9.899731423259129e-06, "loss": 0.7854, "step": 1146 }, { "epoch": 0.09, "grad_norm": 1.5581618396775654, "learning_rate": 9.899472366394272e-06, "loss": 0.8635, "step": 1147 }, { "epoch": 0.09, "grad_norm": 1.524802436119131, "learning_rate": 9.899212978705836e-06, "loss": 0.8511, "step": 1148 }, { "epoch": 0.09, "grad_norm": 0.9461813365221302, "learning_rate": 9.89895326021134e-06, "loss": 1.2242, "step": 1149 }, { "epoch": 0.09, "grad_norm": 1.5934375961221818, "learning_rate": 9.898693210928315e-06, "loss": 0.8575, "step": 1150 }, { "epoch": 0.09, "grad_norm": 0.8177545336906779, "learning_rate": 9.898432830874324e-06, "loss": 1.1945, "step": 1151 }, { "epoch": 0.09, "grad_norm": 1.6507124546737442, "learning_rate": 9.898172120066947e-06, "loss": 0.9113, "step": 1152 }, { "epoch": 0.09, "grad_norm": 1.4963074703378922, "learning_rate": 9.897911078523787e-06, "loss": 0.8705, "step": 1153 }, { "epoch": 0.09, "grad_norm": 1.621155849917538, "learning_rate": 9.897649706262474e-06, "loss": 0.8986, "step": 1154 }, { "epoch": 0.09, "grad_norm": 1.5692567111905145, "learning_rate": 9.897388003300652e-06, "loss": 0.895, "step": 1155 }, { "epoch": 0.09, "grad_norm": 1.645268669549263, "learning_rate": 9.897125969655993e-06, "loss": 0.9499, "step": 1156 }, { "epoch": 0.09, "grad_norm": 1.481465500600821, "learning_rate": 9.896863605346191e-06, "loss": 0.8011, "step": 1157 }, { "epoch": 0.09, "grad_norm": 1.062999489797315, "learning_rate": 9.89660091038896e-06, "loss": 1.1873, "step": 1158 }, { "epoch": 0.09, "grad_norm": 1.6463121471874338, "learning_rate": 9.896337884802041e-06, "loss": 0.9552, "step": 1159 }, { "epoch": 0.09, "grad_norm": 1.5696978312823517, "learning_rate": 9.89607452860319e-06, "loss": 0.838, "step": 1160 }, { "epoch": 0.09, "grad_norm": 0.801389252910241, "learning_rate": 9.895810841810191e-06, "loss": 1.1902, "step": 1161 }, { "epoch": 0.09, "grad_norm": 1.5817728664880302, "learning_rate": 9.895546824440852e-06, "loss": 0.8703, "step": 1162 }, { "epoch": 0.09, "grad_norm": 1.5451104085043792, "learning_rate": 9.895282476512995e-06, "loss": 0.8254, "step": 1163 }, { "epoch": 0.09, "grad_norm": 1.718846582803934, "learning_rate": 9.89501779804447e-06, "loss": 0.8769, "step": 1164 }, { "epoch": 0.09, "grad_norm": 1.6278556824204597, "learning_rate": 9.894752789053153e-06, "loss": 0.8371, "step": 1165 }, { "epoch": 0.09, "grad_norm": 1.4303309211167965, "learning_rate": 9.894487449556934e-06, "loss": 0.7878, "step": 1166 }, { "epoch": 0.09, "grad_norm": 1.5587649644443693, "learning_rate": 9.894221779573729e-06, "loss": 0.8388, "step": 1167 }, { "epoch": 0.09, "grad_norm": 1.4743462116990227, "learning_rate": 9.89395577912148e-06, "loss": 0.9034, "step": 1168 }, { "epoch": 0.09, "grad_norm": 1.6285272101420099, "learning_rate": 9.893689448218146e-06, "loss": 0.9346, "step": 1169 }, { "epoch": 0.09, "grad_norm": 0.8609240438837328, "learning_rate": 9.89342278688171e-06, "loss": 1.1954, "step": 1170 }, { "epoch": 0.09, "grad_norm": 0.8379287176936888, "learning_rate": 9.893155795130177e-06, "loss": 1.1985, "step": 1171 }, { "epoch": 0.09, "grad_norm": 1.5995032803149256, "learning_rate": 9.892888472981577e-06, "loss": 0.8818, "step": 1172 }, { "epoch": 0.09, "grad_norm": 1.59624011749449, "learning_rate": 9.892620820453959e-06, "loss": 0.8093, "step": 1173 }, { "epoch": 0.09, "grad_norm": 0.8662669558907233, "learning_rate": 9.892352837565395e-06, "loss": 1.1694, "step": 1174 }, { "epoch": 0.09, "grad_norm": 0.8646642927054429, "learning_rate": 9.89208452433398e-06, "loss": 1.195, "step": 1175 }, { "epoch": 0.09, "grad_norm": 1.4936008096193696, "learning_rate": 9.891815880777834e-06, "loss": 0.8531, "step": 1176 }, { "epoch": 0.09, "grad_norm": 1.5847014248547993, "learning_rate": 9.891546906915091e-06, "loss": 0.9319, "step": 1177 }, { "epoch": 0.09, "grad_norm": 0.8423058678231164, "learning_rate": 9.891277602763916e-06, "loss": 1.2195, "step": 1178 }, { "epoch": 0.09, "grad_norm": 1.4170008341534437, "learning_rate": 9.891007968342493e-06, "loss": 0.809, "step": 1179 }, { "epoch": 0.09, "grad_norm": 1.5350499010970065, "learning_rate": 9.890738003669029e-06, "loss": 0.7639, "step": 1180 }, { "epoch": 0.09, "grad_norm": 1.470641389458826, "learning_rate": 9.89046770876175e-06, "loss": 0.7469, "step": 1181 }, { "epoch": 0.09, "grad_norm": 1.5180644769971037, "learning_rate": 9.89019708363891e-06, "loss": 0.8169, "step": 1182 }, { "epoch": 0.09, "grad_norm": 0.896302808603409, "learning_rate": 9.88992612831878e-06, "loss": 1.2118, "step": 1183 }, { "epoch": 0.09, "grad_norm": 1.4363488405314828, "learning_rate": 9.889654842819658e-06, "loss": 0.8802, "step": 1184 }, { "epoch": 0.1, "grad_norm": 1.4963303480368686, "learning_rate": 9.88938322715986e-06, "loss": 0.8113, "step": 1185 }, { "epoch": 0.1, "grad_norm": 1.5079940885764052, "learning_rate": 9.889111281357725e-06, "loss": 0.8417, "step": 1186 }, { "epoch": 0.1, "grad_norm": 1.4420228775634831, "learning_rate": 9.888839005431615e-06, "loss": 0.7966, "step": 1187 }, { "epoch": 0.1, "grad_norm": 1.4217592483158636, "learning_rate": 9.888566399399918e-06, "loss": 0.8243, "step": 1188 }, { "epoch": 0.1, "grad_norm": 0.9596237029887802, "learning_rate": 9.88829346328104e-06, "loss": 1.1973, "step": 1189 }, { "epoch": 0.1, "grad_norm": 1.5807244492235648, "learning_rate": 9.888020197093409e-06, "loss": 0.7818, "step": 1190 }, { "epoch": 0.1, "grad_norm": 1.6094023180268835, "learning_rate": 9.887746600855477e-06, "loss": 0.9041, "step": 1191 }, { "epoch": 0.1, "grad_norm": 1.4315510844110082, "learning_rate": 9.887472674585718e-06, "loss": 0.8847, "step": 1192 }, { "epoch": 0.1, "grad_norm": 1.6143849420957126, "learning_rate": 9.887198418302629e-06, "loss": 0.8722, "step": 1193 }, { "epoch": 0.1, "grad_norm": 0.8736413049553033, "learning_rate": 9.886923832024726e-06, "loss": 1.1919, "step": 1194 }, { "epoch": 0.1, "grad_norm": 1.4876563566682297, "learning_rate": 9.886648915770553e-06, "loss": 0.8599, "step": 1195 }, { "epoch": 0.1, "grad_norm": 1.4922819788171222, "learning_rate": 9.886373669558669e-06, "loss": 0.8139, "step": 1196 }, { "epoch": 0.1, "grad_norm": 1.5551088110903455, "learning_rate": 9.886098093407664e-06, "loss": 0.7953, "step": 1197 }, { "epoch": 0.1, "grad_norm": 1.5044351452307718, "learning_rate": 9.885822187336142e-06, "loss": 0.872, "step": 1198 }, { "epoch": 0.1, "grad_norm": 1.5884547010397598, "learning_rate": 9.885545951362733e-06, "loss": 0.8428, "step": 1199 }, { "epoch": 0.1, "grad_norm": 1.505602855132224, "learning_rate": 9.88526938550609e-06, "loss": 0.7956, "step": 1200 }, { "epoch": 0.1, "grad_norm": 1.5422232321602634, "learning_rate": 9.884992489784888e-06, "loss": 0.7777, "step": 1201 }, { "epoch": 0.1, "grad_norm": 1.5526181719588474, "learning_rate": 9.884715264217823e-06, "loss": 0.9298, "step": 1202 }, { "epoch": 0.1, "grad_norm": 1.4935116108163888, "learning_rate": 9.884437708823614e-06, "loss": 0.7999, "step": 1203 }, { "epoch": 0.1, "grad_norm": 1.5376788293044548, "learning_rate": 9.884159823621e-06, "loss": 0.7795, "step": 1204 }, { "epoch": 0.1, "grad_norm": 0.9331977794353743, "learning_rate": 9.883881608628748e-06, "loss": 1.2014, "step": 1205 }, { "epoch": 0.1, "grad_norm": 0.8756825225718817, "learning_rate": 9.883603063865642e-06, "loss": 1.1707, "step": 1206 }, { "epoch": 0.1, "grad_norm": 1.5765964141280344, "learning_rate": 9.88332418935049e-06, "loss": 0.8703, "step": 1207 }, { "epoch": 0.1, "grad_norm": 0.853175936019261, "learning_rate": 9.883044985102122e-06, "loss": 1.1873, "step": 1208 }, { "epoch": 0.1, "grad_norm": 1.6361721109061373, "learning_rate": 9.882765451139391e-06, "loss": 0.8048, "step": 1209 }, { "epoch": 0.1, "grad_norm": 0.8840592149869639, "learning_rate": 9.882485587481172e-06, "loss": 1.2146, "step": 1210 }, { "epoch": 0.1, "grad_norm": 1.5680679306183298, "learning_rate": 9.882205394146362e-06, "loss": 0.8792, "step": 1211 }, { "epoch": 0.1, "grad_norm": 1.6423825607086926, "learning_rate": 9.881924871153879e-06, "loss": 0.9045, "step": 1212 }, { "epoch": 0.1, "grad_norm": 1.6278985205730483, "learning_rate": 9.881644018522665e-06, "loss": 0.8729, "step": 1213 }, { "epoch": 0.1, "grad_norm": 1.5592678817310412, "learning_rate": 9.881362836271686e-06, "loss": 0.7889, "step": 1214 }, { "epoch": 0.1, "grad_norm": 0.9323916723592967, "learning_rate": 9.881081324419925e-06, "loss": 1.148, "step": 1215 }, { "epoch": 0.1, "grad_norm": 1.498353559239422, "learning_rate": 9.880799482986392e-06, "loss": 0.865, "step": 1216 }, { "epoch": 0.1, "grad_norm": 1.741714375667812, "learning_rate": 9.880517311990118e-06, "loss": 0.8332, "step": 1217 }, { "epoch": 0.1, "grad_norm": 1.531632018796856, "learning_rate": 9.880234811450154e-06, "loss": 0.8458, "step": 1218 }, { "epoch": 0.1, "grad_norm": 1.547892073304275, "learning_rate": 9.879951981385577e-06, "loss": 0.7555, "step": 1219 }, { "epoch": 0.1, "grad_norm": 1.4623411094987229, "learning_rate": 9.879668821815484e-06, "loss": 0.8779, "step": 1220 }, { "epoch": 0.1, "grad_norm": 0.9117430969888959, "learning_rate": 9.879385332758994e-06, "loss": 1.2091, "step": 1221 }, { "epoch": 0.1, "grad_norm": 1.5910541555968742, "learning_rate": 9.879101514235248e-06, "loss": 0.8156, "step": 1222 }, { "epoch": 0.1, "grad_norm": 1.530674721681562, "learning_rate": 9.878817366263412e-06, "loss": 0.8341, "step": 1223 }, { "epoch": 0.1, "grad_norm": 1.537143374975027, "learning_rate": 9.878532888862671e-06, "loss": 0.8712, "step": 1224 }, { "epoch": 0.1, "grad_norm": 1.438194046880818, "learning_rate": 9.878248082052233e-06, "loss": 0.8724, "step": 1225 }, { "epoch": 0.1, "grad_norm": 1.469020163955216, "learning_rate": 9.87796294585133e-06, "loss": 0.8774, "step": 1226 }, { "epoch": 0.1, "grad_norm": 0.9045402846986408, "learning_rate": 9.877677480279215e-06, "loss": 1.1877, "step": 1227 }, { "epoch": 0.1, "grad_norm": 0.8485465763884734, "learning_rate": 9.877391685355164e-06, "loss": 1.1939, "step": 1228 }, { "epoch": 0.1, "grad_norm": 1.5349571510053912, "learning_rate": 9.877105561098473e-06, "loss": 0.8907, "step": 1229 }, { "epoch": 0.1, "grad_norm": 1.5231731842172158, "learning_rate": 9.876819107528461e-06, "loss": 0.835, "step": 1230 }, { "epoch": 0.1, "grad_norm": 1.6079268720640238, "learning_rate": 9.876532324664473e-06, "loss": 0.8379, "step": 1231 }, { "epoch": 0.1, "grad_norm": 1.4404650649228092, "learning_rate": 9.87624521252587e-06, "loss": 0.7746, "step": 1232 }, { "epoch": 0.1, "grad_norm": 1.573042300152001, "learning_rate": 9.875957771132042e-06, "loss": 0.8227, "step": 1233 }, { "epoch": 0.1, "grad_norm": 1.4782603594155683, "learning_rate": 9.875670000502394e-06, "loss": 0.8051, "step": 1234 }, { "epoch": 0.1, "grad_norm": 1.5490029843574642, "learning_rate": 9.87538190065636e-06, "loss": 0.8982, "step": 1235 }, { "epoch": 0.1, "grad_norm": 1.520006558494078, "learning_rate": 9.875093471613392e-06, "loss": 0.7148, "step": 1236 }, { "epoch": 0.1, "grad_norm": 1.5517389488432893, "learning_rate": 9.874804713392964e-06, "loss": 0.8479, "step": 1237 }, { "epoch": 0.1, "grad_norm": 1.2903052314724248, "learning_rate": 9.874515626014576e-06, "loss": 1.1894, "step": 1238 }, { "epoch": 0.1, "grad_norm": 1.1237688510319628, "learning_rate": 9.874226209497745e-06, "loss": 1.2088, "step": 1239 }, { "epoch": 0.1, "grad_norm": 1.588593261587381, "learning_rate": 9.873936463862017e-06, "loss": 0.8539, "step": 1240 }, { "epoch": 0.1, "grad_norm": 1.5673792588622841, "learning_rate": 9.873646389126954e-06, "loss": 0.8058, "step": 1241 }, { "epoch": 0.1, "grad_norm": 1.956677178028655, "learning_rate": 9.873355985312141e-06, "loss": 0.8613, "step": 1242 }, { "epoch": 0.1, "grad_norm": 1.5887984839563987, "learning_rate": 9.873065252437189e-06, "loss": 0.8421, "step": 1243 }, { "epoch": 0.1, "grad_norm": 1.6146774662495709, "learning_rate": 9.872774190521727e-06, "loss": 0.8584, "step": 1244 }, { "epoch": 0.1, "grad_norm": 1.9486224620946708, "learning_rate": 9.87248279958541e-06, "loss": 0.7851, "step": 1245 }, { "epoch": 0.1, "grad_norm": 1.5886960173019136, "learning_rate": 9.872191079647913e-06, "loss": 0.9202, "step": 1246 }, { "epoch": 0.1, "grad_norm": 1.5278281640509506, "learning_rate": 9.871899030728932e-06, "loss": 0.8494, "step": 1247 }, { "epoch": 0.1, "grad_norm": 1.595886011184496, "learning_rate": 9.871606652848191e-06, "loss": 0.849, "step": 1248 }, { "epoch": 0.1, "grad_norm": 1.5471501250115154, "learning_rate": 9.871313946025427e-06, "loss": 0.7933, "step": 1249 }, { "epoch": 0.1, "grad_norm": 1.5653286903242623, "learning_rate": 9.871020910280408e-06, "loss": 0.9047, "step": 1250 }, { "epoch": 0.1, "grad_norm": 2.4825047931837663, "learning_rate": 9.87072754563292e-06, "loss": 1.2111, "step": 1251 }, { "epoch": 0.1, "grad_norm": 1.438869923833787, "learning_rate": 9.870433852102766e-06, "loss": 0.7566, "step": 1252 }, { "epoch": 0.1, "grad_norm": 1.4688938720814233, "learning_rate": 9.870139829709784e-06, "loss": 0.8079, "step": 1253 }, { "epoch": 0.1, "grad_norm": 1.5547898485997957, "learning_rate": 9.869845478473826e-06, "loss": 0.7872, "step": 1254 }, { "epoch": 0.1, "grad_norm": 1.7793062260624724, "learning_rate": 9.869550798414763e-06, "loss": 0.8893, "step": 1255 }, { "epoch": 0.1, "grad_norm": 1.6369441462934284, "learning_rate": 9.869255789552496e-06, "loss": 0.9565, "step": 1256 }, { "epoch": 0.1, "grad_norm": 3.026470008140896, "learning_rate": 9.868960451906945e-06, "loss": 0.8732, "step": 1257 }, { "epoch": 0.1, "grad_norm": 1.5542806129200308, "learning_rate": 9.868664785498049e-06, "loss": 0.8932, "step": 1258 }, { "epoch": 0.1, "grad_norm": 1.5747825171958425, "learning_rate": 9.868368790345777e-06, "loss": 0.8428, "step": 1259 }, { "epoch": 0.1, "grad_norm": 1.5981271931048509, "learning_rate": 9.868072466470109e-06, "loss": 0.8302, "step": 1260 }, { "epoch": 0.1, "grad_norm": 0.936162562503447, "learning_rate": 9.867775813891056e-06, "loss": 1.1908, "step": 1261 }, { "epoch": 0.1, "grad_norm": 0.9071525844636011, "learning_rate": 9.867478832628652e-06, "loss": 1.1804, "step": 1262 }, { "epoch": 0.1, "grad_norm": 1.5562349094598218, "learning_rate": 9.867181522702945e-06, "loss": 0.8913, "step": 1263 }, { "epoch": 0.1, "grad_norm": 1.4443955872843803, "learning_rate": 9.866883884134012e-06, "loss": 0.9088, "step": 1264 }, { "epoch": 0.1, "grad_norm": 1.4809995842689774, "learning_rate": 9.866585916941951e-06, "loss": 0.9139, "step": 1265 }, { "epoch": 0.1, "grad_norm": 1.496507161727315, "learning_rate": 9.866287621146882e-06, "loss": 0.8198, "step": 1266 }, { "epoch": 0.1, "grad_norm": 1.4955253721812956, "learning_rate": 9.865988996768945e-06, "loss": 0.8259, "step": 1267 }, { "epoch": 0.1, "grad_norm": 1.6666385141954962, "learning_rate": 9.865690043828302e-06, "loss": 0.9153, "step": 1268 }, { "epoch": 0.1, "grad_norm": 1.5171236315540129, "learning_rate": 9.865390762345143e-06, "loss": 0.8697, "step": 1269 }, { "epoch": 0.1, "grad_norm": 1.6354602865317125, "learning_rate": 9.865091152339674e-06, "loss": 0.9007, "step": 1270 }, { "epoch": 0.1, "grad_norm": 1.5689026535447894, "learning_rate": 9.864791213832125e-06, "loss": 0.8492, "step": 1271 }, { "epoch": 0.1, "grad_norm": 1.5659722753495828, "learning_rate": 9.86449094684275e-06, "loss": 0.86, "step": 1272 }, { "epoch": 0.1, "grad_norm": 1.4771556642637091, "learning_rate": 9.864190351391822e-06, "loss": 0.8549, "step": 1273 }, { "epoch": 0.1, "grad_norm": 1.3284379296792292, "learning_rate": 9.863889427499641e-06, "loss": 1.1914, "step": 1274 }, { "epoch": 0.1, "grad_norm": 1.591508108745014, "learning_rate": 9.863588175186522e-06, "loss": 0.8972, "step": 1275 }, { "epoch": 0.1, "grad_norm": 1.5414392588606192, "learning_rate": 9.863286594472809e-06, "loss": 0.872, "step": 1276 }, { "epoch": 0.1, "grad_norm": 1.667243685766783, "learning_rate": 9.862984685378864e-06, "loss": 0.738, "step": 1277 }, { "epoch": 0.1, "grad_norm": 1.5744172694909626, "learning_rate": 9.862682447925073e-06, "loss": 0.8596, "step": 1278 }, { "epoch": 0.1, "grad_norm": 1.6953900439190548, "learning_rate": 9.862379882131844e-06, "loss": 0.932, "step": 1279 }, { "epoch": 0.1, "grad_norm": 1.5298377238644922, "learning_rate": 9.862076988019609e-06, "loss": 0.9113, "step": 1280 }, { "epoch": 0.1, "grad_norm": 1.4729571933496919, "learning_rate": 9.861773765608816e-06, "loss": 0.8325, "step": 1281 }, { "epoch": 0.1, "grad_norm": 1.6070729794782581, "learning_rate": 9.86147021491994e-06, "loss": 0.8781, "step": 1282 }, { "epoch": 0.1, "grad_norm": 0.9221028350118031, "learning_rate": 9.86116633597348e-06, "loss": 1.1878, "step": 1283 }, { "epoch": 0.1, "grad_norm": 1.5225635092739078, "learning_rate": 9.860862128789954e-06, "loss": 0.8356, "step": 1284 }, { "epoch": 0.1, "grad_norm": 1.5351457652965883, "learning_rate": 9.860557593389901e-06, "loss": 0.8227, "step": 1285 }, { "epoch": 0.1, "grad_norm": 1.6052197914722888, "learning_rate": 9.860252729793885e-06, "loss": 0.8858, "step": 1286 }, { "epoch": 0.1, "grad_norm": 0.8158616010700757, "learning_rate": 9.859947538022493e-06, "loss": 1.1954, "step": 1287 }, { "epoch": 0.1, "grad_norm": 1.4108127920660092, "learning_rate": 9.859642018096326e-06, "loss": 0.828, "step": 1288 }, { "epoch": 0.1, "grad_norm": 1.5418038115503787, "learning_rate": 9.859336170036022e-06, "loss": 0.9367, "step": 1289 }, { "epoch": 0.1, "grad_norm": 1.546929474515776, "learning_rate": 9.859029993862225e-06, "loss": 0.9134, "step": 1290 }, { "epoch": 0.1, "grad_norm": 1.7300549261260982, "learning_rate": 9.858723489595613e-06, "loss": 0.9416, "step": 1291 }, { "epoch": 0.1, "grad_norm": 1.6836381900352337, "learning_rate": 9.858416657256883e-06, "loss": 0.901, "step": 1292 }, { "epoch": 0.1, "grad_norm": 4.289018750690642, "learning_rate": 9.85810949686675e-06, "loss": 0.82, "step": 1293 }, { "epoch": 0.1, "grad_norm": 0.9242258865474157, "learning_rate": 9.857802008445953e-06, "loss": 1.2152, "step": 1294 }, { "epoch": 0.1, "grad_norm": 1.4736484530680223, "learning_rate": 9.857494192015258e-06, "loss": 0.9157, "step": 1295 }, { "epoch": 0.1, "grad_norm": 1.6566500046015487, "learning_rate": 9.857186047595448e-06, "loss": 0.8569, "step": 1296 }, { "epoch": 0.1, "grad_norm": 1.7055677039529196, "learning_rate": 9.85687757520733e-06, "loss": 0.8578, "step": 1297 }, { "epoch": 0.1, "grad_norm": 1.4070556213737988, "learning_rate": 9.85656877487173e-06, "loss": 0.7889, "step": 1298 }, { "epoch": 0.1, "grad_norm": 1.4129318827285564, "learning_rate": 9.856259646609502e-06, "loss": 0.7902, "step": 1299 }, { "epoch": 0.1, "grad_norm": 0.9754546732047004, "learning_rate": 9.85595019044152e-06, "loss": 1.1929, "step": 1300 }, { "epoch": 0.1, "grad_norm": 1.6490818380054377, "learning_rate": 9.855640406388673e-06, "loss": 0.929, "step": 1301 }, { "epoch": 0.1, "grad_norm": 1.6112090513989958, "learning_rate": 9.855330294471886e-06, "loss": 0.7994, "step": 1302 }, { "epoch": 0.1, "grad_norm": 1.5926109815547944, "learning_rate": 9.855019854712094e-06, "loss": 0.8724, "step": 1303 }, { "epoch": 0.1, "grad_norm": 0.9784351763031909, "learning_rate": 9.854709087130261e-06, "loss": 1.2002, "step": 1304 }, { "epoch": 0.1, "grad_norm": 1.6299940449234447, "learning_rate": 9.85439799174737e-06, "loss": 0.8581, "step": 1305 }, { "epoch": 0.1, "grad_norm": 1.5349689049540285, "learning_rate": 9.854086568584425e-06, "loss": 0.8728, "step": 1306 }, { "epoch": 0.1, "grad_norm": 1.687897348710694, "learning_rate": 9.853774817662453e-06, "loss": 0.7981, "step": 1307 }, { "epoch": 0.1, "grad_norm": 1.4854390251710325, "learning_rate": 9.85346273900251e-06, "loss": 0.8525, "step": 1308 }, { "epoch": 0.11, "grad_norm": 1.5643721308759893, "learning_rate": 9.853150332625663e-06, "loss": 0.8589, "step": 1309 }, { "epoch": 0.11, "grad_norm": 1.467465638269256, "learning_rate": 9.85283759855301e-06, "loss": 0.8462, "step": 1310 }, { "epoch": 0.11, "grad_norm": 1.5261348890192472, "learning_rate": 9.852524536805663e-06, "loss": 0.8514, "step": 1311 }, { "epoch": 0.11, "grad_norm": 1.5259279326559416, "learning_rate": 9.852211147404765e-06, "loss": 0.8678, "step": 1312 }, { "epoch": 0.11, "grad_norm": 1.5371570267510344, "learning_rate": 9.851897430371475e-06, "loss": 0.8571, "step": 1313 }, { "epoch": 0.11, "grad_norm": 1.450758035026696, "learning_rate": 9.851583385726975e-06, "loss": 0.8335, "step": 1314 }, { "epoch": 0.11, "grad_norm": 1.641953556168877, "learning_rate": 9.851269013492473e-06, "loss": 0.9522, "step": 1315 }, { "epoch": 0.11, "grad_norm": 1.431164765337068, "learning_rate": 9.850954313689193e-06, "loss": 0.8876, "step": 1316 }, { "epoch": 0.11, "grad_norm": 1.49865279944874, "learning_rate": 9.850639286338385e-06, "loss": 0.8355, "step": 1317 }, { "epoch": 0.11, "grad_norm": 1.4538855531246735, "learning_rate": 9.850323931461321e-06, "loss": 0.8226, "step": 1318 }, { "epoch": 0.11, "grad_norm": 1.5798041629490462, "learning_rate": 9.850008249079295e-06, "loss": 0.8621, "step": 1319 }, { "epoch": 0.11, "grad_norm": 1.464627903690832, "learning_rate": 9.84969223921362e-06, "loss": 0.8131, "step": 1320 }, { "epoch": 0.11, "grad_norm": 0.9260955724761041, "learning_rate": 9.849375901885636e-06, "loss": 1.1874, "step": 1321 }, { "epoch": 0.11, "grad_norm": 0.8986381017899657, "learning_rate": 9.849059237116702e-06, "loss": 1.1666, "step": 1322 }, { "epoch": 0.11, "grad_norm": 1.533701088336803, "learning_rate": 9.848742244928202e-06, "loss": 0.8333, "step": 1323 }, { "epoch": 0.11, "grad_norm": 1.4644545161832554, "learning_rate": 9.848424925341537e-06, "loss": 0.7996, "step": 1324 }, { "epoch": 0.11, "grad_norm": 0.9247337474623049, "learning_rate": 9.848107278378136e-06, "loss": 1.2204, "step": 1325 }, { "epoch": 0.11, "grad_norm": 1.6635449759521905, "learning_rate": 9.847789304059445e-06, "loss": 0.998, "step": 1326 }, { "epoch": 0.11, "grad_norm": 1.479228470814932, "learning_rate": 9.847471002406935e-06, "loss": 0.8962, "step": 1327 }, { "epoch": 0.11, "grad_norm": 0.9279910086761578, "learning_rate": 9.847152373442096e-06, "loss": 1.1762, "step": 1328 }, { "epoch": 0.11, "grad_norm": 1.4333696439023775, "learning_rate": 9.846833417186448e-06, "loss": 0.848, "step": 1329 }, { "epoch": 0.11, "grad_norm": 1.5276918188363302, "learning_rate": 9.846514133661524e-06, "loss": 0.8787, "step": 1330 }, { "epoch": 0.11, "grad_norm": 1.5809737046192498, "learning_rate": 9.846194522888884e-06, "loss": 0.8455, "step": 1331 }, { "epoch": 0.11, "grad_norm": 1.4855007643805271, "learning_rate": 9.845874584890106e-06, "loss": 0.7949, "step": 1332 }, { "epoch": 0.11, "grad_norm": 1.5016534184499017, "learning_rate": 9.845554319686799e-06, "loss": 0.8553, "step": 1333 }, { "epoch": 0.11, "grad_norm": 1.0583734228798338, "learning_rate": 9.84523372730058e-06, "loss": 1.1944, "step": 1334 }, { "epoch": 0.11, "grad_norm": 1.4948105985914302, "learning_rate": 9.844912807753105e-06, "loss": 0.8048, "step": 1335 }, { "epoch": 0.11, "grad_norm": 1.549981597895823, "learning_rate": 9.844591561066035e-06, "loss": 0.8597, "step": 1336 }, { "epoch": 0.11, "grad_norm": 1.5397088050533996, "learning_rate": 9.844269987261066e-06, "loss": 0.8023, "step": 1337 }, { "epoch": 0.11, "grad_norm": 1.4976514792560012, "learning_rate": 9.843948086359912e-06, "loss": 0.8148, "step": 1338 }, { "epoch": 0.11, "grad_norm": 1.5970778161381713, "learning_rate": 9.843625858384305e-06, "loss": 0.9347, "step": 1339 }, { "epoch": 0.11, "grad_norm": 1.4376779464012985, "learning_rate": 9.843303303356005e-06, "loss": 0.8763, "step": 1340 }, { "epoch": 0.11, "grad_norm": 0.9373975731809215, "learning_rate": 9.84298042129679e-06, "loss": 1.1849, "step": 1341 }, { "epoch": 0.11, "grad_norm": 1.6350707255511154, "learning_rate": 9.842657212228464e-06, "loss": 0.9309, "step": 1342 }, { "epoch": 0.11, "grad_norm": 1.5039739292986363, "learning_rate": 9.84233367617285e-06, "loss": 0.8529, "step": 1343 }, { "epoch": 0.11, "grad_norm": 1.5865616158002578, "learning_rate": 9.842009813151793e-06, "loss": 0.9316, "step": 1344 }, { "epoch": 0.11, "grad_norm": 1.508166700126012, "learning_rate": 9.841685623187162e-06, "loss": 0.8487, "step": 1345 }, { "epoch": 0.11, "grad_norm": 1.582670270272094, "learning_rate": 9.841361106300846e-06, "loss": 0.9394, "step": 1346 }, { "epoch": 0.11, "grad_norm": 1.4243645307350397, "learning_rate": 9.841036262514756e-06, "loss": 0.8543, "step": 1347 }, { "epoch": 0.11, "grad_norm": 1.4596442543767099, "learning_rate": 9.84071109185083e-06, "loss": 0.8383, "step": 1348 }, { "epoch": 0.11, "grad_norm": 1.5103195211903802, "learning_rate": 9.840385594331022e-06, "loss": 0.8385, "step": 1349 }, { "epoch": 0.11, "grad_norm": 1.0356195386223297, "learning_rate": 9.84005976997731e-06, "loss": 1.1976, "step": 1350 }, { "epoch": 0.11, "grad_norm": 1.610580585463436, "learning_rate": 9.839733618811695e-06, "loss": 0.7779, "step": 1351 }, { "epoch": 0.11, "grad_norm": 1.4362731185806386, "learning_rate": 9.839407140856199e-06, "loss": 0.8681, "step": 1352 }, { "epoch": 0.11, "grad_norm": 1.684087985304583, "learning_rate": 9.839080336132867e-06, "loss": 0.9215, "step": 1353 }, { "epoch": 0.11, "grad_norm": 1.6142422860259236, "learning_rate": 9.838753204663766e-06, "loss": 0.8348, "step": 1354 }, { "epoch": 0.11, "grad_norm": 1.5927316913953349, "learning_rate": 9.838425746470984e-06, "loss": 0.8466, "step": 1355 }, { "epoch": 0.11, "grad_norm": 1.6364811177703218, "learning_rate": 9.838097961576632e-06, "loss": 0.9265, "step": 1356 }, { "epoch": 0.11, "grad_norm": 1.4531917493494853, "learning_rate": 9.837769850002842e-06, "loss": 0.8171, "step": 1357 }, { "epoch": 0.11, "grad_norm": 1.5843963754970225, "learning_rate": 9.837441411771771e-06, "loss": 0.8449, "step": 1358 }, { "epoch": 0.11, "grad_norm": 1.0211902152211878, "learning_rate": 9.837112646905593e-06, "loss": 1.2037, "step": 1359 }, { "epoch": 0.11, "grad_norm": 1.4799796641075675, "learning_rate": 9.836783555426508e-06, "loss": 0.7816, "step": 1360 }, { "epoch": 0.11, "grad_norm": 1.5104608898802458, "learning_rate": 9.836454137356739e-06, "loss": 0.8397, "step": 1361 }, { "epoch": 0.11, "grad_norm": 1.5986710279649745, "learning_rate": 9.836124392718526e-06, "loss": 0.8931, "step": 1362 }, { "epoch": 0.11, "grad_norm": 1.5327145870916294, "learning_rate": 9.835794321534136e-06, "loss": 0.7759, "step": 1363 }, { "epoch": 0.11, "grad_norm": 1.6431471114587126, "learning_rate": 9.835463923825854e-06, "loss": 0.8484, "step": 1364 }, { "epoch": 0.11, "grad_norm": 1.4390853701463031, "learning_rate": 9.835133199615994e-06, "loss": 0.8117, "step": 1365 }, { "epoch": 0.11, "grad_norm": 1.4472150698218422, "learning_rate": 9.834802148926883e-06, "loss": 0.71, "step": 1366 }, { "epoch": 0.11, "grad_norm": 1.0461701179326943, "learning_rate": 9.834470771780875e-06, "loss": 1.2117, "step": 1367 }, { "epoch": 0.11, "grad_norm": 0.9118172566859502, "learning_rate": 9.834139068200346e-06, "loss": 1.1839, "step": 1368 }, { "epoch": 0.11, "grad_norm": 1.5478388917734731, "learning_rate": 9.833807038207693e-06, "loss": 0.8947, "step": 1369 }, { "epoch": 0.11, "grad_norm": 1.6115358619188718, "learning_rate": 9.833474681825334e-06, "loss": 0.8313, "step": 1370 }, { "epoch": 0.11, "grad_norm": 1.605303375468737, "learning_rate": 9.833141999075715e-06, "loss": 0.9124, "step": 1371 }, { "epoch": 0.11, "grad_norm": 1.5676137176313567, "learning_rate": 9.832808989981296e-06, "loss": 0.8648, "step": 1372 }, { "epoch": 0.11, "grad_norm": 2.2447719994848394, "learning_rate": 9.832475654564562e-06, "loss": 0.8052, "step": 1373 }, { "epoch": 0.11, "grad_norm": 1.5951523608946647, "learning_rate": 9.832141992848022e-06, "loss": 0.8798, "step": 1374 }, { "epoch": 0.11, "grad_norm": 1.4909927414318729, "learning_rate": 9.831808004854207e-06, "loss": 0.8175, "step": 1375 }, { "epoch": 0.11, "grad_norm": 1.5350463398282166, "learning_rate": 9.831473690605664e-06, "loss": 0.8448, "step": 1376 }, { "epoch": 0.11, "grad_norm": 1.619771994945871, "learning_rate": 9.831139050124972e-06, "loss": 0.9307, "step": 1377 }, { "epoch": 0.11, "grad_norm": 1.5795033045615345, "learning_rate": 9.830804083434722e-06, "loss": 0.9465, "step": 1378 }, { "epoch": 0.11, "grad_norm": 1.7465772773765422, "learning_rate": 9.830468790557536e-06, "loss": 1.1725, "step": 1379 }, { "epoch": 0.11, "grad_norm": 1.4366774512116443, "learning_rate": 9.83013317151605e-06, "loss": 1.1944, "step": 1380 }, { "epoch": 0.11, "grad_norm": 1.54944758466393, "learning_rate": 9.829797226332928e-06, "loss": 0.8501, "step": 1381 }, { "epoch": 0.11, "grad_norm": 0.8108328247645301, "learning_rate": 9.829460955030854e-06, "loss": 1.168, "step": 1382 }, { "epoch": 0.11, "grad_norm": 0.9471264346778719, "learning_rate": 9.829124357632533e-06, "loss": 1.167, "step": 1383 }, { "epoch": 0.11, "grad_norm": 1.5824945124402159, "learning_rate": 9.828787434160694e-06, "loss": 0.7757, "step": 1384 }, { "epoch": 0.11, "grad_norm": 1.6296089234138476, "learning_rate": 9.828450184638082e-06, "loss": 0.781, "step": 1385 }, { "epoch": 0.11, "grad_norm": 1.5168811759476448, "learning_rate": 9.828112609087477e-06, "loss": 0.8173, "step": 1386 }, { "epoch": 0.11, "grad_norm": 1.4641586970613358, "learning_rate": 9.827774707531667e-06, "loss": 0.8237, "step": 1387 }, { "epoch": 0.11, "grad_norm": 1.5652592366624505, "learning_rate": 9.827436479993468e-06, "loss": 0.824, "step": 1388 }, { "epoch": 0.11, "grad_norm": 1.5380373868838944, "learning_rate": 9.827097926495722e-06, "loss": 0.7754, "step": 1389 }, { "epoch": 0.11, "grad_norm": 1.498154961271221, "learning_rate": 9.826759047061283e-06, "loss": 0.8721, "step": 1390 }, { "epoch": 0.11, "grad_norm": 1.520452054808714, "learning_rate": 9.826419841713038e-06, "loss": 0.8127, "step": 1391 }, { "epoch": 0.11, "grad_norm": 1.4999675509528054, "learning_rate": 9.826080310473888e-06, "loss": 0.8486, "step": 1392 }, { "epoch": 0.11, "grad_norm": 1.5869869206789395, "learning_rate": 9.825740453366761e-06, "loss": 0.8818, "step": 1393 }, { "epoch": 0.11, "grad_norm": 1.6242947500917713, "learning_rate": 9.825400270414602e-06, "loss": 0.8402, "step": 1394 }, { "epoch": 0.11, "grad_norm": 1.479651042019983, "learning_rate": 9.825059761640386e-06, "loss": 0.7545, "step": 1395 }, { "epoch": 0.11, "grad_norm": 1.5197623788977601, "learning_rate": 9.824718927067098e-06, "loss": 0.7946, "step": 1396 }, { "epoch": 0.11, "grad_norm": 1.4493370271122041, "learning_rate": 9.824377766717758e-06, "loss": 0.8174, "step": 1397 }, { "epoch": 0.11, "grad_norm": 1.6071322849901584, "learning_rate": 9.824036280615398e-06, "loss": 0.8973, "step": 1398 }, { "epoch": 0.11, "grad_norm": 1.5205270050291608, "learning_rate": 9.82369446878308e-06, "loss": 0.8092, "step": 1399 }, { "epoch": 0.11, "grad_norm": 1.4170956171852072, "learning_rate": 9.823352331243881e-06, "loss": 0.8179, "step": 1400 }, { "epoch": 0.11, "grad_norm": 1.495142648736861, "learning_rate": 9.823009868020901e-06, "loss": 0.8784, "step": 1401 }, { "epoch": 0.11, "grad_norm": 1.7685272216454342, "learning_rate": 9.822667079137268e-06, "loss": 0.9017, "step": 1402 }, { "epoch": 0.11, "grad_norm": 1.5183626075829533, "learning_rate": 9.822323964616125e-06, "loss": 0.7997, "step": 1403 }, { "epoch": 0.11, "grad_norm": 1.7470646161691972, "learning_rate": 9.821980524480641e-06, "loss": 1.1873, "step": 1404 }, { "epoch": 0.11, "grad_norm": 1.429186462052708, "learning_rate": 9.821636758754007e-06, "loss": 0.8153, "step": 1405 }, { "epoch": 0.11, "grad_norm": 1.1084619603492618, "learning_rate": 9.821292667459435e-06, "loss": 1.1843, "step": 1406 }, { "epoch": 0.11, "grad_norm": 0.8988174886017172, "learning_rate": 9.820948250620155e-06, "loss": 1.1764, "step": 1407 }, { "epoch": 0.11, "grad_norm": 1.6693095102999058, "learning_rate": 9.820603508259425e-06, "loss": 0.8289, "step": 1408 }, { "epoch": 0.11, "grad_norm": 1.6867170159085598, "learning_rate": 9.820258440400525e-06, "loss": 0.8981, "step": 1409 }, { "epoch": 0.11, "grad_norm": 1.1837697770917035, "learning_rate": 9.819913047066752e-06, "loss": 1.1551, "step": 1410 }, { "epoch": 0.11, "grad_norm": 1.47722073073164, "learning_rate": 9.81956732828143e-06, "loss": 0.8417, "step": 1411 }, { "epoch": 0.11, "grad_norm": 1.296421444259182, "learning_rate": 9.8192212840679e-06, "loss": 1.1881, "step": 1412 }, { "epoch": 0.11, "grad_norm": 1.687541674674507, "learning_rate": 9.818874914449528e-06, "loss": 0.8185, "step": 1413 }, { "epoch": 0.11, "grad_norm": 1.4710423676489746, "learning_rate": 9.818528219449705e-06, "loss": 0.8521, "step": 1414 }, { "epoch": 0.11, "grad_norm": 1.616671725725048, "learning_rate": 9.818181199091838e-06, "loss": 0.7511, "step": 1415 }, { "epoch": 0.11, "grad_norm": 1.4261890079336377, "learning_rate": 9.817833853399358e-06, "loss": 0.7955, "step": 1416 }, { "epoch": 0.11, "grad_norm": 1.6409360040228838, "learning_rate": 9.81748618239572e-06, "loss": 0.9141, "step": 1417 }, { "epoch": 0.11, "grad_norm": 1.6216273692196497, "learning_rate": 9.817138186104401e-06, "loss": 0.76, "step": 1418 }, { "epoch": 0.11, "grad_norm": 1.063935879852533, "learning_rate": 9.816789864548893e-06, "loss": 1.2028, "step": 1419 }, { "epoch": 0.11, "grad_norm": 1.5800870537974188, "learning_rate": 9.816441217752721e-06, "loss": 0.9143, "step": 1420 }, { "epoch": 0.11, "grad_norm": 1.5537999519646133, "learning_rate": 9.816092245739426e-06, "loss": 0.8306, "step": 1421 }, { "epoch": 0.11, "grad_norm": 1.4393604420606185, "learning_rate": 9.815742948532568e-06, "loss": 0.8153, "step": 1422 }, { "epoch": 0.11, "grad_norm": 1.5679317888267674, "learning_rate": 9.815393326155734e-06, "loss": 0.8041, "step": 1423 }, { "epoch": 0.11, "grad_norm": 1.5260223643283375, "learning_rate": 9.81504337863253e-06, "loss": 0.8428, "step": 1424 }, { "epoch": 0.11, "grad_norm": 1.5373357530411829, "learning_rate": 9.81469310598659e-06, "loss": 0.8429, "step": 1425 }, { "epoch": 0.11, "grad_norm": 1.5631309644465936, "learning_rate": 9.814342508241561e-06, "loss": 0.8683, "step": 1426 }, { "epoch": 0.11, "grad_norm": 1.4509390970918752, "learning_rate": 9.813991585421118e-06, "loss": 0.8087, "step": 1427 }, { "epoch": 0.11, "grad_norm": 1.1233512121323803, "learning_rate": 9.813640337548955e-06, "loss": 1.1819, "step": 1428 }, { "epoch": 0.11, "grad_norm": 1.5336450112502695, "learning_rate": 9.813288764648787e-06, "loss": 0.829, "step": 1429 }, { "epoch": 0.11, "grad_norm": 1.545749266813217, "learning_rate": 9.812936866744358e-06, "loss": 0.7875, "step": 1430 }, { "epoch": 0.11, "grad_norm": 1.4431320648369386, "learning_rate": 9.812584643859426e-06, "loss": 0.843, "step": 1431 }, { "epoch": 0.11, "grad_norm": 1.5258267217906833, "learning_rate": 9.812232096017773e-06, "loss": 0.8933, "step": 1432 }, { "epoch": 0.11, "grad_norm": 1.5303516472823941, "learning_rate": 9.811879223243207e-06, "loss": 0.8757, "step": 1433 }, { "epoch": 0.12, "grad_norm": 1.4277562074627048, "learning_rate": 9.81152602555955e-06, "loss": 0.7984, "step": 1434 }, { "epoch": 0.12, "grad_norm": 1.5397463552019395, "learning_rate": 9.811172502990656e-06, "loss": 0.7918, "step": 1435 }, { "epoch": 0.12, "grad_norm": 1.4516082156336023, "learning_rate": 9.810818655560393e-06, "loss": 0.7881, "step": 1436 }, { "epoch": 0.12, "grad_norm": 1.4768224070579294, "learning_rate": 9.810464483292653e-06, "loss": 0.8238, "step": 1437 }, { "epoch": 0.12, "grad_norm": 1.5003525044063502, "learning_rate": 9.81010998621135e-06, "loss": 0.8339, "step": 1438 }, { "epoch": 0.12, "grad_norm": 1.489859004665234, "learning_rate": 9.809755164340423e-06, "loss": 0.8019, "step": 1439 }, { "epoch": 0.12, "grad_norm": 1.0518296593142906, "learning_rate": 9.80940001770383e-06, "loss": 1.1595, "step": 1440 }, { "epoch": 0.12, "grad_norm": 0.8766815480270564, "learning_rate": 9.80904454632555e-06, "loss": 1.1913, "step": 1441 }, { "epoch": 0.12, "grad_norm": 1.5368395043072665, "learning_rate": 9.808688750229584e-06, "loss": 0.7743, "step": 1442 }, { "epoch": 0.12, "grad_norm": 1.4537216444180296, "learning_rate": 9.808332629439961e-06, "loss": 0.7998, "step": 1443 }, { "epoch": 0.12, "grad_norm": 1.7008063387753622, "learning_rate": 9.80797618398072e-06, "loss": 0.8099, "step": 1444 }, { "epoch": 0.12, "grad_norm": 1.4677348412520916, "learning_rate": 9.807619413875937e-06, "loss": 0.7965, "step": 1445 }, { "epoch": 0.12, "grad_norm": 1.5208447333326456, "learning_rate": 9.807262319149695e-06, "loss": 0.8557, "step": 1446 }, { "epoch": 0.12, "grad_norm": 1.42775197658619, "learning_rate": 9.80690489982611e-06, "loss": 0.7997, "step": 1447 }, { "epoch": 0.12, "grad_norm": 1.4833874630850366, "learning_rate": 9.806547155929315e-06, "loss": 0.8494, "step": 1448 }, { "epoch": 0.12, "grad_norm": 1.6021441839810697, "learning_rate": 9.806189087483465e-06, "loss": 0.8959, "step": 1449 }, { "epoch": 0.12, "grad_norm": 1.4769055730644622, "learning_rate": 9.805830694512736e-06, "loss": 0.8331, "step": 1450 }, { "epoch": 0.12, "grad_norm": 1.7147908340655862, "learning_rate": 9.80547197704133e-06, "loss": 1.1938, "step": 1451 }, { "epoch": 0.12, "grad_norm": 1.5508334248365785, "learning_rate": 9.805112935093469e-06, "loss": 0.8128, "step": 1452 }, { "epoch": 0.12, "grad_norm": 1.652445688019367, "learning_rate": 9.804753568693395e-06, "loss": 0.8933, "step": 1453 }, { "epoch": 0.12, "grad_norm": 1.5706119466869262, "learning_rate": 9.804393877865373e-06, "loss": 0.7907, "step": 1454 }, { "epoch": 0.12, "grad_norm": 1.4910739003796312, "learning_rate": 9.80403386263369e-06, "loss": 0.8202, "step": 1455 }, { "epoch": 0.12, "grad_norm": 1.4921426614704876, "learning_rate": 9.803673523022655e-06, "loss": 0.818, "step": 1456 }, { "epoch": 0.12, "grad_norm": 1.6236117416339702, "learning_rate": 9.8033128590566e-06, "loss": 0.8422, "step": 1457 }, { "epoch": 0.12, "grad_norm": 0.9816857878795604, "learning_rate": 9.802951870759878e-06, "loss": 1.1497, "step": 1458 }, { "epoch": 0.12, "grad_norm": 1.4825176137391896, "learning_rate": 9.802590558156863e-06, "loss": 0.8794, "step": 1459 }, { "epoch": 0.12, "grad_norm": 1.64229382344146, "learning_rate": 9.80222892127195e-06, "loss": 0.9127, "step": 1460 }, { "epoch": 0.12, "grad_norm": 1.4356716503948197, "learning_rate": 9.801866960129561e-06, "loss": 0.7799, "step": 1461 }, { "epoch": 0.12, "grad_norm": 1.572036977908545, "learning_rate": 9.801504674754134e-06, "loss": 0.8608, "step": 1462 }, { "epoch": 0.12, "grad_norm": 1.5817357085829638, "learning_rate": 9.801142065170132e-06, "loss": 0.9045, "step": 1463 }, { "epoch": 0.12, "grad_norm": 1.495132733617185, "learning_rate": 9.80077913140204e-06, "loss": 0.8792, "step": 1464 }, { "epoch": 0.12, "grad_norm": 1.4955302647396005, "learning_rate": 9.800415873474363e-06, "loss": 0.8211, "step": 1465 }, { "epoch": 0.12, "grad_norm": 1.5802661811514742, "learning_rate": 9.80005229141163e-06, "loss": 0.8688, "step": 1466 }, { "epoch": 0.12, "grad_norm": 0.8842380797754821, "learning_rate": 9.799688385238388e-06, "loss": 1.1488, "step": 1467 }, { "epoch": 0.12, "grad_norm": 1.5840031616198111, "learning_rate": 9.799324154979215e-06, "loss": 0.7911, "step": 1468 }, { "epoch": 0.12, "grad_norm": 0.7999383203826839, "learning_rate": 9.798959600658697e-06, "loss": 1.1829, "step": 1469 }, { "epoch": 0.12, "grad_norm": 1.512162564849784, "learning_rate": 9.798594722301455e-06, "loss": 0.8454, "step": 1470 }, { "epoch": 0.12, "grad_norm": 1.6902123568426313, "learning_rate": 9.798229519932125e-06, "loss": 0.8018, "step": 1471 }, { "epoch": 0.12, "grad_norm": 1.4787318483175864, "learning_rate": 9.797863993575365e-06, "loss": 0.744, "step": 1472 }, { "epoch": 0.12, "grad_norm": 1.506274356964622, "learning_rate": 9.797498143255859e-06, "loss": 0.6985, "step": 1473 }, { "epoch": 0.12, "grad_norm": 1.6210371067957121, "learning_rate": 9.797131968998305e-06, "loss": 0.9201, "step": 1474 }, { "epoch": 0.12, "grad_norm": 0.9276205140553885, "learning_rate": 9.796765470827435e-06, "loss": 1.2148, "step": 1475 }, { "epoch": 0.12, "grad_norm": 1.5595260770203656, "learning_rate": 9.796398648767989e-06, "loss": 0.8049, "step": 1476 }, { "epoch": 0.12, "grad_norm": 1.4807356023861347, "learning_rate": 9.79603150284474e-06, "loss": 0.7859, "step": 1477 }, { "epoch": 0.12, "grad_norm": 1.4144421454948075, "learning_rate": 9.795664033082476e-06, "loss": 0.8067, "step": 1478 }, { "epoch": 0.12, "grad_norm": 1.5984690238716062, "learning_rate": 9.795296239506011e-06, "loss": 0.8363, "step": 1479 }, { "epoch": 0.12, "grad_norm": 1.7086357408694295, "learning_rate": 9.794928122140179e-06, "loss": 0.8154, "step": 1480 }, { "epoch": 0.12, "grad_norm": 1.6046329493160347, "learning_rate": 9.794559681009837e-06, "loss": 0.7742, "step": 1481 }, { "epoch": 0.12, "grad_norm": 1.4861206995305734, "learning_rate": 9.794190916139861e-06, "loss": 0.8119, "step": 1482 }, { "epoch": 0.12, "grad_norm": 1.646525136148379, "learning_rate": 9.79382182755515e-06, "loss": 0.8068, "step": 1483 }, { "epoch": 0.12, "grad_norm": 1.5592221437211087, "learning_rate": 9.79345241528063e-06, "loss": 0.8823, "step": 1484 }, { "epoch": 0.12, "grad_norm": 1.385837525038439, "learning_rate": 9.79308267934124e-06, "loss": 0.7938, "step": 1485 }, { "epoch": 0.12, "grad_norm": 1.5882318770697132, "learning_rate": 9.79271261976195e-06, "loss": 0.8718, "step": 1486 }, { "epoch": 0.12, "grad_norm": 0.9924734439766923, "learning_rate": 9.792342236567743e-06, "loss": 1.1529, "step": 1487 }, { "epoch": 0.12, "grad_norm": 1.5180753555449649, "learning_rate": 9.79197152978363e-06, "loss": 0.8271, "step": 1488 }, { "epoch": 0.12, "grad_norm": 1.5575423371826715, "learning_rate": 9.791600499434642e-06, "loss": 0.8882, "step": 1489 }, { "epoch": 0.12, "grad_norm": 1.5807953831444395, "learning_rate": 9.791229145545832e-06, "loss": 0.8461, "step": 1490 }, { "epoch": 0.12, "grad_norm": 0.9030204631492412, "learning_rate": 9.790857468142274e-06, "loss": 1.19, "step": 1491 }, { "epoch": 0.12, "grad_norm": 1.4742362590822566, "learning_rate": 9.790485467249065e-06, "loss": 0.8686, "step": 1492 }, { "epoch": 0.12, "grad_norm": 1.6644699931148292, "learning_rate": 9.790113142891323e-06, "loss": 0.9355, "step": 1493 }, { "epoch": 0.12, "grad_norm": 1.4882826852943436, "learning_rate": 9.789740495094186e-06, "loss": 0.7823, "step": 1494 }, { "epoch": 0.12, "grad_norm": 0.8543869463904564, "learning_rate": 9.789367523882822e-06, "loss": 1.1814, "step": 1495 }, { "epoch": 0.12, "grad_norm": 1.460642725609597, "learning_rate": 9.78899422928241e-06, "loss": 0.8275, "step": 1496 }, { "epoch": 0.12, "grad_norm": 1.4911956750238113, "learning_rate": 9.788620611318157e-06, "loss": 0.8702, "step": 1497 }, { "epoch": 0.12, "grad_norm": 1.6122796984548178, "learning_rate": 9.788246670015289e-06, "loss": 0.9353, "step": 1498 }, { "epoch": 0.12, "grad_norm": 0.8684723600441826, "learning_rate": 9.787872405399059e-06, "loss": 1.1727, "step": 1499 }, { "epoch": 0.12, "grad_norm": 1.4535354234294837, "learning_rate": 9.787497817494734e-06, "loss": 0.8486, "step": 1500 }, { "epoch": 0.12, "grad_norm": 0.8607205042600619, "learning_rate": 9.78712290632761e-06, "loss": 1.1782, "step": 1501 }, { "epoch": 0.12, "grad_norm": 0.8005297584029993, "learning_rate": 9.786747671923003e-06, "loss": 1.1533, "step": 1502 }, { "epoch": 0.12, "grad_norm": 1.4322755033143728, "learning_rate": 9.786372114306244e-06, "loss": 0.8055, "step": 1503 }, { "epoch": 0.12, "grad_norm": 1.6949315201653319, "learning_rate": 9.785996233502697e-06, "loss": 0.8137, "step": 1504 }, { "epoch": 0.12, "grad_norm": 1.5014190562268706, "learning_rate": 9.785620029537741e-06, "loss": 0.7736, "step": 1505 }, { "epoch": 0.12, "grad_norm": 1.4009907262020476, "learning_rate": 9.785243502436776e-06, "loss": 0.8824, "step": 1506 }, { "epoch": 0.12, "grad_norm": 1.5103592923137423, "learning_rate": 9.78486665222523e-06, "loss": 0.892, "step": 1507 }, { "epoch": 0.12, "grad_norm": 1.7978399952160373, "learning_rate": 9.784489478928545e-06, "loss": 0.8985, "step": 1508 }, { "epoch": 0.12, "grad_norm": 1.5970410404323987, "learning_rate": 9.784111982572188e-06, "loss": 0.8033, "step": 1509 }, { "epoch": 0.12, "grad_norm": 1.1592982381505892, "learning_rate": 9.783734163181653e-06, "loss": 1.158, "step": 1510 }, { "epoch": 0.12, "grad_norm": 1.4491139836061901, "learning_rate": 9.783356020782448e-06, "loss": 0.8536, "step": 1511 }, { "epoch": 0.12, "grad_norm": 1.5309159256057085, "learning_rate": 9.782977555400106e-06, "loss": 0.9239, "step": 1512 }, { "epoch": 0.12, "grad_norm": 1.4467469986087207, "learning_rate": 9.782598767060186e-06, "loss": 0.8158, "step": 1513 }, { "epoch": 0.12, "grad_norm": 1.4916075589874864, "learning_rate": 9.782219655788257e-06, "loss": 0.8621, "step": 1514 }, { "epoch": 0.12, "grad_norm": 1.5996329867560244, "learning_rate": 9.781840221609922e-06, "loss": 0.8734, "step": 1515 }, { "epoch": 0.12, "grad_norm": 1.4481352930461173, "learning_rate": 9.781460464550802e-06, "loss": 0.8206, "step": 1516 }, { "epoch": 0.12, "grad_norm": 1.5387136935042627, "learning_rate": 9.781080384636539e-06, "loss": 0.821, "step": 1517 }, { "epoch": 0.12, "grad_norm": 1.4524478710416522, "learning_rate": 9.780699981892793e-06, "loss": 0.8188, "step": 1518 }, { "epoch": 0.12, "grad_norm": 1.0502036669747203, "learning_rate": 9.780319256345255e-06, "loss": 1.1834, "step": 1519 }, { "epoch": 0.12, "grad_norm": 1.4827645758652848, "learning_rate": 9.77993820801963e-06, "loss": 0.7462, "step": 1520 }, { "epoch": 0.12, "grad_norm": 1.5676902291255275, "learning_rate": 9.779556836941646e-06, "loss": 0.7887, "step": 1521 }, { "epoch": 0.12, "grad_norm": 1.570302620741426, "learning_rate": 9.779175143137055e-06, "loss": 0.8601, "step": 1522 }, { "epoch": 0.12, "grad_norm": 1.5130651986180583, "learning_rate": 9.778793126631632e-06, "loss": 0.7974, "step": 1523 }, { "epoch": 0.12, "grad_norm": 1.5460217504723186, "learning_rate": 9.778410787451168e-06, "loss": 0.8215, "step": 1524 }, { "epoch": 0.12, "grad_norm": 1.5482815896105826, "learning_rate": 9.778028125621481e-06, "loss": 0.8432, "step": 1525 }, { "epoch": 0.12, "grad_norm": 1.425089310779897, "learning_rate": 9.777645141168411e-06, "loss": 0.8456, "step": 1526 }, { "epoch": 0.12, "grad_norm": 1.447066841273666, "learning_rate": 9.777261834117814e-06, "loss": 0.802, "step": 1527 }, { "epoch": 0.12, "grad_norm": 1.6027690940761827, "learning_rate": 9.776878204495574e-06, "loss": 0.8732, "step": 1528 }, { "epoch": 0.12, "grad_norm": 1.5018584687698269, "learning_rate": 9.776494252327597e-06, "loss": 0.7605, "step": 1529 }, { "epoch": 0.12, "grad_norm": 1.0469217417949142, "learning_rate": 9.776109977639804e-06, "loss": 1.148, "step": 1530 }, { "epoch": 0.12, "grad_norm": 1.4886294184825286, "learning_rate": 9.775725380458145e-06, "loss": 0.7644, "step": 1531 }, { "epoch": 0.12, "grad_norm": 1.541150848069218, "learning_rate": 9.775340460808589e-06, "loss": 0.8111, "step": 1532 }, { "epoch": 0.12, "grad_norm": 1.4055461771970452, "learning_rate": 9.774955218717123e-06, "loss": 0.7777, "step": 1533 }, { "epoch": 0.12, "grad_norm": 1.4823678578145854, "learning_rate": 9.774569654209764e-06, "loss": 0.7948, "step": 1534 }, { "epoch": 0.12, "grad_norm": 0.827097810162567, "learning_rate": 9.774183767312545e-06, "loss": 1.1593, "step": 1535 }, { "epoch": 0.12, "grad_norm": 0.8379624305681712, "learning_rate": 9.77379755805152e-06, "loss": 1.1903, "step": 1536 }, { "epoch": 0.12, "grad_norm": 1.429181293296838, "learning_rate": 9.773411026452768e-06, "loss": 0.8137, "step": 1537 }, { "epoch": 0.12, "grad_norm": 1.3976240793968402, "learning_rate": 9.773024172542389e-06, "loss": 0.8866, "step": 1538 }, { "epoch": 0.12, "grad_norm": 1.5318989016343512, "learning_rate": 9.772636996346503e-06, "loss": 0.8699, "step": 1539 }, { "epoch": 0.12, "grad_norm": 1.6253614944609107, "learning_rate": 9.772249497891254e-06, "loss": 0.8171, "step": 1540 }, { "epoch": 0.12, "grad_norm": 1.0469555977503313, "learning_rate": 9.771861677202804e-06, "loss": 1.1813, "step": 1541 }, { "epoch": 0.12, "grad_norm": 1.6177812148047643, "learning_rate": 9.771473534307345e-06, "loss": 0.8568, "step": 1542 }, { "epoch": 0.12, "grad_norm": 1.4767819250951477, "learning_rate": 9.771085069231082e-06, "loss": 0.9074, "step": 1543 }, { "epoch": 0.12, "grad_norm": 1.5602706941923075, "learning_rate": 9.770696282000245e-06, "loss": 0.8657, "step": 1544 }, { "epoch": 0.12, "grad_norm": 1.511199648796229, "learning_rate": 9.770307172641088e-06, "loss": 0.8177, "step": 1545 }, { "epoch": 0.12, "grad_norm": 1.5269963836368348, "learning_rate": 9.76991774117988e-06, "loss": 0.793, "step": 1546 }, { "epoch": 0.12, "grad_norm": 1.010890221253354, "learning_rate": 9.76952798764292e-06, "loss": 1.2296, "step": 1547 }, { "epoch": 0.12, "grad_norm": 1.6302748602203143, "learning_rate": 9.769137912056523e-06, "loss": 0.9168, "step": 1548 }, { "epoch": 0.12, "grad_norm": 0.8426255534483389, "learning_rate": 9.76874751444703e-06, "loss": 1.1568, "step": 1549 }, { "epoch": 0.12, "grad_norm": 1.5922851810237115, "learning_rate": 9.7683567948408e-06, "loss": 0.7692, "step": 1550 }, { "epoch": 0.12, "grad_norm": 1.566652154364714, "learning_rate": 9.767965753264218e-06, "loss": 0.7822, "step": 1551 }, { "epoch": 0.12, "grad_norm": 1.4300587126065634, "learning_rate": 9.767574389743683e-06, "loss": 0.7466, "step": 1552 }, { "epoch": 0.12, "grad_norm": 1.558900846418108, "learning_rate": 9.767182704305625e-06, "loss": 0.8102, "step": 1553 }, { "epoch": 0.12, "grad_norm": 1.4941730998603502, "learning_rate": 9.766790696976489e-06, "loss": 0.8531, "step": 1554 }, { "epoch": 0.12, "grad_norm": 1.5629299857961287, "learning_rate": 9.766398367782744e-06, "loss": 0.8043, "step": 1555 }, { "epoch": 0.12, "grad_norm": 1.529365923869202, "learning_rate": 9.766005716750884e-06, "loss": 0.8151, "step": 1556 }, { "epoch": 0.12, "grad_norm": 1.515383482519452, "learning_rate": 9.76561274390742e-06, "loss": 0.8664, "step": 1557 }, { "epoch": 0.12, "grad_norm": 1.6261631614332202, "learning_rate": 9.765219449278885e-06, "loss": 0.8843, "step": 1558 }, { "epoch": 0.13, "grad_norm": 1.419538000836456, "learning_rate": 9.764825832891837e-06, "loss": 0.7278, "step": 1559 }, { "epoch": 0.13, "grad_norm": 1.0606748374123418, "learning_rate": 9.764431894772855e-06, "loss": 1.1815, "step": 1560 }, { "epoch": 0.13, "grad_norm": 1.513236066606246, "learning_rate": 9.764037634948536e-06, "loss": 0.8176, "step": 1561 }, { "epoch": 0.13, "grad_norm": 0.8597541750032723, "learning_rate": 9.7636430534455e-06, "loss": 1.155, "step": 1562 }, { "epoch": 0.13, "grad_norm": 1.6011073045904582, "learning_rate": 9.763248150290394e-06, "loss": 0.844, "step": 1563 }, { "epoch": 0.13, "grad_norm": 1.5256822446620584, "learning_rate": 9.762852925509882e-06, "loss": 0.911, "step": 1564 }, { "epoch": 0.13, "grad_norm": 1.618662847792621, "learning_rate": 9.762457379130649e-06, "loss": 0.8191, "step": 1565 }, { "epoch": 0.13, "grad_norm": 1.601523173857137, "learning_rate": 9.762061511179404e-06, "loss": 0.8384, "step": 1566 }, { "epoch": 0.13, "grad_norm": 1.4883050780964193, "learning_rate": 9.761665321682875e-06, "loss": 0.7799, "step": 1567 }, { "epoch": 0.13, "grad_norm": 1.562189650546599, "learning_rate": 9.761268810667817e-06, "loss": 0.8477, "step": 1568 }, { "epoch": 0.13, "grad_norm": 1.193667133727517, "learning_rate": 9.760871978161e-06, "loss": 1.2182, "step": 1569 }, { "epoch": 0.13, "grad_norm": 1.5791481548647144, "learning_rate": 9.760474824189222e-06, "loss": 0.7709, "step": 1570 }, { "epoch": 0.13, "grad_norm": 1.5643770932118597, "learning_rate": 9.760077348779298e-06, "loss": 0.8382, "step": 1571 }, { "epoch": 0.13, "grad_norm": 1.7273908665768951, "learning_rate": 9.759679551958068e-06, "loss": 0.8113, "step": 1572 }, { "epoch": 0.13, "grad_norm": 0.8131383980359979, "learning_rate": 9.759281433752389e-06, "loss": 1.141, "step": 1573 }, { "epoch": 0.13, "grad_norm": 1.6165712698132797, "learning_rate": 9.758882994189145e-06, "loss": 0.8206, "step": 1574 }, { "epoch": 0.13, "grad_norm": 1.7753753442074367, "learning_rate": 9.75848423329524e-06, "loss": 0.8438, "step": 1575 }, { "epoch": 0.13, "grad_norm": 1.5264704262010795, "learning_rate": 9.7580851510976e-06, "loss": 0.8779, "step": 1576 }, { "epoch": 0.13, "grad_norm": 1.631113433724481, "learning_rate": 9.757685747623169e-06, "loss": 0.7932, "step": 1577 }, { "epoch": 0.13, "grad_norm": 1.5479028858885668, "learning_rate": 9.757286022898918e-06, "loss": 0.885, "step": 1578 }, { "epoch": 0.13, "grad_norm": 1.4445042117203173, "learning_rate": 9.756885976951835e-06, "loss": 0.8372, "step": 1579 }, { "epoch": 0.13, "grad_norm": 1.5081997649156735, "learning_rate": 9.756485609808934e-06, "loss": 0.8447, "step": 1580 }, { "epoch": 0.13, "grad_norm": 1.4155712115763956, "learning_rate": 9.75608492149725e-06, "loss": 0.866, "step": 1581 }, { "epoch": 0.13, "grad_norm": 1.6566785977571656, "learning_rate": 9.755683912043836e-06, "loss": 0.8761, "step": 1582 }, { "epoch": 0.13, "grad_norm": 1.6474805475883003, "learning_rate": 9.755282581475769e-06, "loss": 0.854, "step": 1583 }, { "epoch": 0.13, "grad_norm": 1.4853855333542811, "learning_rate": 9.754880929820149e-06, "loss": 0.7515, "step": 1584 }, { "epoch": 0.13, "grad_norm": 1.5044393710760613, "learning_rate": 9.754478957104094e-06, "loss": 0.838, "step": 1585 }, { "epoch": 0.13, "grad_norm": 1.6174230905492268, "learning_rate": 9.75407666335475e-06, "loss": 0.862, "step": 1586 }, { "epoch": 0.13, "grad_norm": 1.1999052919767088, "learning_rate": 9.75367404859928e-06, "loss": 1.1896, "step": 1587 }, { "epoch": 0.13, "grad_norm": 0.9775660498868867, "learning_rate": 9.753271112864866e-06, "loss": 1.1426, "step": 1588 }, { "epoch": 0.13, "grad_norm": 1.5607453626554773, "learning_rate": 9.752867856178719e-06, "loss": 0.8262, "step": 1589 }, { "epoch": 0.13, "grad_norm": 1.5355587884381978, "learning_rate": 9.752464278568066e-06, "loss": 0.833, "step": 1590 }, { "epoch": 0.13, "grad_norm": 1.552507424276035, "learning_rate": 9.752060380060156e-06, "loss": 0.8163, "step": 1591 }, { "epoch": 0.13, "grad_norm": 1.5448047551595698, "learning_rate": 9.751656160682265e-06, "loss": 0.9331, "step": 1592 }, { "epoch": 0.13, "grad_norm": 1.4905546644728287, "learning_rate": 9.751251620461683e-06, "loss": 0.8142, "step": 1593 }, { "epoch": 0.13, "grad_norm": 1.4920693436656194, "learning_rate": 9.75084675942573e-06, "loss": 0.8839, "step": 1594 }, { "epoch": 0.13, "grad_norm": 1.8640971351172513, "learning_rate": 9.750441577601738e-06, "loss": 1.1824, "step": 1595 }, { "epoch": 0.13, "grad_norm": 1.6794332669502086, "learning_rate": 9.750036075017068e-06, "loss": 0.8747, "step": 1596 }, { "epoch": 0.13, "grad_norm": 1.5443385615478114, "learning_rate": 9.7496302516991e-06, "loss": 0.8118, "step": 1597 }, { "epoch": 0.13, "grad_norm": 1.6498921883344098, "learning_rate": 9.749224107675239e-06, "loss": 0.8177, "step": 1598 }, { "epoch": 0.13, "grad_norm": 1.4392998257719518, "learning_rate": 9.748817642972905e-06, "loss": 0.8275, "step": 1599 }, { "epoch": 0.13, "grad_norm": 1.5418585095034405, "learning_rate": 9.748410857619547e-06, "loss": 0.8588, "step": 1600 }, { "epoch": 0.13, "grad_norm": 1.468319085881354, "learning_rate": 9.748003751642628e-06, "loss": 0.8244, "step": 1601 }, { "epoch": 0.13, "grad_norm": 1.5544739155997838, "learning_rate": 9.747596325069638e-06, "loss": 0.8278, "step": 1602 }, { "epoch": 0.13, "grad_norm": 1.5857674438921594, "learning_rate": 9.747188577928089e-06, "loss": 0.9021, "step": 1603 }, { "epoch": 0.13, "grad_norm": 1.6330246634932593, "learning_rate": 9.746780510245512e-06, "loss": 0.8882, "step": 1604 }, { "epoch": 0.13, "grad_norm": 2.020640255631947, "learning_rate": 9.74637212204946e-06, "loss": 0.9223, "step": 1605 }, { "epoch": 0.13, "grad_norm": 1.4763635857374666, "learning_rate": 9.745963413367511e-06, "loss": 0.8214, "step": 1606 }, { "epoch": 0.13, "grad_norm": 1.522480455458243, "learning_rate": 9.74555438422726e-06, "loss": 0.8077, "step": 1607 }, { "epoch": 0.13, "grad_norm": 1.545304232439133, "learning_rate": 9.745145034656325e-06, "loss": 0.8768, "step": 1608 }, { "epoch": 0.13, "grad_norm": 1.462005431515541, "learning_rate": 9.744735364682347e-06, "loss": 0.7482, "step": 1609 }, { "epoch": 0.13, "grad_norm": 1.5309857620212377, "learning_rate": 9.744325374332986e-06, "loss": 0.8582, "step": 1610 }, { "epoch": 0.13, "grad_norm": 1.5750460300317697, "learning_rate": 9.74391506363593e-06, "loss": 0.843, "step": 1611 }, { "epoch": 0.13, "grad_norm": 1.506357040075656, "learning_rate": 9.74350443261888e-06, "loss": 0.8294, "step": 1612 }, { "epoch": 0.13, "grad_norm": 1.6059452791451545, "learning_rate": 9.743093481309563e-06, "loss": 0.8354, "step": 1613 }, { "epoch": 0.13, "grad_norm": 1.5212094461268337, "learning_rate": 9.742682209735727e-06, "loss": 0.8456, "step": 1614 }, { "epoch": 0.13, "grad_norm": 1.4914197812146246, "learning_rate": 9.742270617925148e-06, "loss": 0.8993, "step": 1615 }, { "epoch": 0.13, "grad_norm": 1.4445695564865404, "learning_rate": 9.741858705905609e-06, "loss": 0.8626, "step": 1616 }, { "epoch": 0.13, "grad_norm": 1.5311151263751501, "learning_rate": 9.74144647370493e-06, "loss": 0.8097, "step": 1617 }, { "epoch": 0.13, "grad_norm": 1.5540372202517865, "learning_rate": 9.741033921350941e-06, "loss": 0.8272, "step": 1618 }, { "epoch": 0.13, "grad_norm": 0.902374857213976, "learning_rate": 9.740621048871501e-06, "loss": 1.1535, "step": 1619 }, { "epoch": 0.13, "grad_norm": 1.516597058394549, "learning_rate": 9.74020785629449e-06, "loss": 0.8848, "step": 1620 }, { "epoch": 0.13, "grad_norm": 1.4783985389734733, "learning_rate": 9.739794343647802e-06, "loss": 0.8706, "step": 1621 }, { "epoch": 0.13, "grad_norm": 1.4985533830083768, "learning_rate": 9.739380510959365e-06, "loss": 0.8503, "step": 1622 }, { "epoch": 0.13, "grad_norm": 1.5211375808335061, "learning_rate": 9.738966358257116e-06, "loss": 0.9254, "step": 1623 }, { "epoch": 0.13, "grad_norm": 1.5229077503549107, "learning_rate": 9.738551885569022e-06, "loss": 0.8359, "step": 1624 }, { "epoch": 0.13, "grad_norm": 0.820993167648609, "learning_rate": 9.738137092923072e-06, "loss": 1.1883, "step": 1625 }, { "epoch": 0.13, "grad_norm": 1.562891885063231, "learning_rate": 9.73772198034727e-06, "loss": 0.8385, "step": 1626 }, { "epoch": 0.13, "grad_norm": 0.8270678503712992, "learning_rate": 9.737306547869645e-06, "loss": 1.1241, "step": 1627 }, { "epoch": 0.13, "grad_norm": 1.4828856995959205, "learning_rate": 9.73689079551825e-06, "loss": 0.8357, "step": 1628 }, { "epoch": 0.13, "grad_norm": 1.5924636544676867, "learning_rate": 9.736474723321159e-06, "loss": 0.8112, "step": 1629 }, { "epoch": 0.13, "grad_norm": 1.4639223366458662, "learning_rate": 9.736058331306461e-06, "loss": 0.8454, "step": 1630 }, { "epoch": 0.13, "grad_norm": 1.541009111588045, "learning_rate": 9.735641619502277e-06, "loss": 0.809, "step": 1631 }, { "epoch": 0.13, "grad_norm": 1.437058264494758, "learning_rate": 9.735224587936743e-06, "loss": 0.7504, "step": 1632 }, { "epoch": 0.13, "grad_norm": 1.490602647157641, "learning_rate": 9.734807236638015e-06, "loss": 0.8573, "step": 1633 }, { "epoch": 0.13, "grad_norm": 1.4525107881689685, "learning_rate": 9.734389565634277e-06, "loss": 0.7874, "step": 1634 }, { "epoch": 0.13, "grad_norm": 1.575529978469036, "learning_rate": 9.733971574953726e-06, "loss": 0.7654, "step": 1635 }, { "epoch": 0.13, "grad_norm": 2.004436012358606, "learning_rate": 9.733553264624593e-06, "loss": 0.8332, "step": 1636 }, { "epoch": 0.13, "grad_norm": 0.978151023509133, "learning_rate": 9.73313463467512e-06, "loss": 1.173, "step": 1637 }, { "epoch": 0.13, "grad_norm": 1.4578954697664561, "learning_rate": 9.732715685133572e-06, "loss": 0.7895, "step": 1638 }, { "epoch": 0.13, "grad_norm": 1.5855147013660655, "learning_rate": 9.732296416028239e-06, "loss": 0.8316, "step": 1639 }, { "epoch": 0.13, "grad_norm": 0.8309499242629935, "learning_rate": 9.731876827387433e-06, "loss": 1.1713, "step": 1640 }, { "epoch": 0.13, "grad_norm": 1.5135406747756008, "learning_rate": 9.73145691923948e-06, "loss": 0.852, "step": 1641 }, { "epoch": 0.13, "grad_norm": 1.6245707033690688, "learning_rate": 9.73103669161274e-06, "loss": 0.8243, "step": 1642 }, { "epoch": 0.13, "grad_norm": 1.450746219526346, "learning_rate": 9.730616144535581e-06, "loss": 0.839, "step": 1643 }, { "epoch": 0.13, "grad_norm": 1.5363749401661804, "learning_rate": 9.730195278036405e-06, "loss": 0.8135, "step": 1644 }, { "epoch": 0.13, "grad_norm": 1.5532327193979731, "learning_rate": 9.729774092143627e-06, "loss": 0.8142, "step": 1645 }, { "epoch": 0.13, "grad_norm": 1.477348693834012, "learning_rate": 9.729352586885687e-06, "loss": 0.8481, "step": 1646 }, { "epoch": 0.13, "grad_norm": 0.982004071184762, "learning_rate": 9.728930762291046e-06, "loss": 1.1643, "step": 1647 }, { "epoch": 0.13, "grad_norm": 0.9184945549533521, "learning_rate": 9.728508618388186e-06, "loss": 1.2233, "step": 1648 }, { "epoch": 0.13, "grad_norm": 0.8206029141756019, "learning_rate": 9.728086155205614e-06, "loss": 1.1858, "step": 1649 }, { "epoch": 0.13, "grad_norm": 1.6320246874260242, "learning_rate": 9.727663372771852e-06, "loss": 0.9328, "step": 1650 }, { "epoch": 0.13, "grad_norm": 1.5676439956669872, "learning_rate": 9.727240271115448e-06, "loss": 0.7884, "step": 1651 }, { "epoch": 0.13, "grad_norm": 1.9871983160944695, "learning_rate": 9.726816850264971e-06, "loss": 0.7755, "step": 1652 }, { "epoch": 0.13, "grad_norm": 1.4911241582228956, "learning_rate": 9.726393110249012e-06, "loss": 0.825, "step": 1653 }, { "epoch": 0.13, "grad_norm": 1.4602639356893825, "learning_rate": 9.725969051096185e-06, "loss": 0.8068, "step": 1654 }, { "epoch": 0.13, "grad_norm": 1.3886021944492761, "learning_rate": 9.725544672835118e-06, "loss": 1.1916, "step": 1655 }, { "epoch": 0.13, "grad_norm": 1.5131917405124988, "learning_rate": 9.72511997549447e-06, "loss": 0.8899, "step": 1656 }, { "epoch": 0.13, "grad_norm": 1.4705134975539087, "learning_rate": 9.724694959102918e-06, "loss": 0.8234, "step": 1657 }, { "epoch": 0.13, "grad_norm": 1.128559687342653, "learning_rate": 9.724269623689158e-06, "loss": 1.1929, "step": 1658 }, { "epoch": 0.13, "grad_norm": 1.5025403322874118, "learning_rate": 9.72384396928191e-06, "loss": 0.81, "step": 1659 }, { "epoch": 0.13, "grad_norm": 1.555435411350934, "learning_rate": 9.723417995909915e-06, "loss": 0.8151, "step": 1660 }, { "epoch": 0.13, "grad_norm": 1.514204807186842, "learning_rate": 9.722991703601936e-06, "loss": 0.8844, "step": 1661 }, { "epoch": 0.13, "grad_norm": 1.5702605979905422, "learning_rate": 9.72256509238676e-06, "loss": 0.7571, "step": 1662 }, { "epoch": 0.13, "grad_norm": 1.5417940807373565, "learning_rate": 9.722138162293187e-06, "loss": 0.8097, "step": 1663 }, { "epoch": 0.13, "grad_norm": 1.1852843789942191, "learning_rate": 9.721710913350048e-06, "loss": 1.1835, "step": 1664 }, { "epoch": 0.13, "grad_norm": 1.5278967487269908, "learning_rate": 9.721283345586191e-06, "loss": 0.8881, "step": 1665 }, { "epoch": 0.13, "grad_norm": 1.5557144690578057, "learning_rate": 9.720855459030489e-06, "loss": 0.9165, "step": 1666 }, { "epoch": 0.13, "grad_norm": 1.475850216630695, "learning_rate": 9.720427253711831e-06, "loss": 0.8597, "step": 1667 }, { "epoch": 0.13, "grad_norm": 0.8964989655734535, "learning_rate": 9.719998729659129e-06, "loss": 1.1653, "step": 1668 }, { "epoch": 0.13, "grad_norm": 1.38238178685736, "learning_rate": 9.71956988690132e-06, "loss": 0.7778, "step": 1669 }, { "epoch": 0.13, "grad_norm": 1.7232355346277546, "learning_rate": 9.719140725467362e-06, "loss": 0.8164, "step": 1670 }, { "epoch": 0.13, "grad_norm": 1.5585898877866649, "learning_rate": 9.718711245386232e-06, "loss": 0.8672, "step": 1671 }, { "epoch": 0.13, "grad_norm": 1.3973544877900228, "learning_rate": 9.718281446686926e-06, "loss": 0.8284, "step": 1672 }, { "epoch": 0.13, "grad_norm": 1.4752334985864726, "learning_rate": 9.717851329398469e-06, "loss": 0.7674, "step": 1673 }, { "epoch": 0.13, "grad_norm": 1.5296708480556587, "learning_rate": 9.717420893549902e-06, "loss": 0.8824, "step": 1674 }, { "epoch": 0.13, "grad_norm": 1.548199121009184, "learning_rate": 9.71699013917029e-06, "loss": 0.8609, "step": 1675 }, { "epoch": 0.13, "grad_norm": 1.0407842058765289, "learning_rate": 9.716559066288716e-06, "loss": 1.1901, "step": 1676 }, { "epoch": 0.13, "grad_norm": 1.4881735800219105, "learning_rate": 9.716127674934291e-06, "loss": 0.7915, "step": 1677 }, { "epoch": 0.13, "grad_norm": 0.8098622496458227, "learning_rate": 9.715695965136139e-06, "loss": 1.1799, "step": 1678 }, { "epoch": 0.13, "grad_norm": 1.466229642879152, "learning_rate": 9.715263936923413e-06, "loss": 0.8702, "step": 1679 }, { "epoch": 0.13, "grad_norm": 1.589982846154434, "learning_rate": 9.714831590325286e-06, "loss": 0.8377, "step": 1680 }, { "epoch": 0.13, "grad_norm": 1.4760628580750468, "learning_rate": 9.714398925370946e-06, "loss": 0.8133, "step": 1681 }, { "epoch": 0.13, "grad_norm": 1.4681820268432904, "learning_rate": 9.713965942089612e-06, "loss": 0.8062, "step": 1682 }, { "epoch": 0.14, "grad_norm": 1.5914508007456492, "learning_rate": 9.71353264051052e-06, "loss": 0.9552, "step": 1683 }, { "epoch": 0.14, "grad_norm": 1.4149153146506217, "learning_rate": 9.713099020662922e-06, "loss": 0.8842, "step": 1684 }, { "epoch": 0.14, "grad_norm": 1.3767279424585575, "learning_rate": 9.712665082576104e-06, "loss": 0.7917, "step": 1685 }, { "epoch": 0.14, "grad_norm": 1.6455039654789445, "learning_rate": 9.712230826279363e-06, "loss": 0.793, "step": 1686 }, { "epoch": 0.14, "grad_norm": 1.5443356530905545, "learning_rate": 9.71179625180202e-06, "loss": 0.9405, "step": 1687 }, { "epoch": 0.14, "grad_norm": 1.5945932195345813, "learning_rate": 9.711361359173422e-06, "loss": 0.8455, "step": 1688 }, { "epoch": 0.14, "grad_norm": 1.213131833909528, "learning_rate": 9.71092614842293e-06, "loss": 1.1983, "step": 1689 }, { "epoch": 0.14, "grad_norm": 1.5333978683809595, "learning_rate": 9.710490619579933e-06, "loss": 0.8661, "step": 1690 }, { "epoch": 0.14, "grad_norm": 1.534402845248015, "learning_rate": 9.710054772673839e-06, "loss": 0.8026, "step": 1691 }, { "epoch": 0.14, "grad_norm": 1.4759418252359835, "learning_rate": 9.709618607734075e-06, "loss": 0.8948, "step": 1692 }, { "epoch": 0.14, "grad_norm": 0.8173881197474613, "learning_rate": 9.709182124790094e-06, "loss": 1.1829, "step": 1693 }, { "epoch": 0.14, "grad_norm": 1.49791759692554, "learning_rate": 9.708745323871369e-06, "loss": 0.7922, "step": 1694 }, { "epoch": 0.14, "grad_norm": 1.3852993818674915, "learning_rate": 9.708308205007391e-06, "loss": 0.7752, "step": 1695 }, { "epoch": 0.14, "grad_norm": 1.6008010360542064, "learning_rate": 9.707870768227677e-06, "loss": 0.7751, "step": 1696 }, { "epoch": 0.14, "grad_norm": 0.881584166135359, "learning_rate": 9.707433013561765e-06, "loss": 1.1321, "step": 1697 }, { "epoch": 0.14, "grad_norm": 1.4908590197680862, "learning_rate": 9.706994941039209e-06, "loss": 0.7502, "step": 1698 }, { "epoch": 0.14, "grad_norm": 1.3632819123284918, "learning_rate": 9.706556550689593e-06, "loss": 0.8297, "step": 1699 }, { "epoch": 0.14, "grad_norm": 1.500361643935751, "learning_rate": 9.706117842542517e-06, "loss": 0.7485, "step": 1700 }, { "epoch": 0.14, "grad_norm": 1.412177840869574, "learning_rate": 9.705678816627601e-06, "loss": 0.8444, "step": 1701 }, { "epoch": 0.14, "grad_norm": 1.4053669329528855, "learning_rate": 9.705239472974495e-06, "loss": 0.7415, "step": 1702 }, { "epoch": 0.14, "grad_norm": 1.6369343494640611, "learning_rate": 9.704799811612858e-06, "loss": 0.8382, "step": 1703 }, { "epoch": 0.14, "grad_norm": 1.4399529872397383, "learning_rate": 9.704359832572378e-06, "loss": 0.7943, "step": 1704 }, { "epoch": 0.14, "grad_norm": 0.8819670372697286, "learning_rate": 9.703919535882767e-06, "loss": 1.1698, "step": 1705 }, { "epoch": 0.14, "grad_norm": 0.8284847659341459, "learning_rate": 9.703478921573753e-06, "loss": 1.1445, "step": 1706 }, { "epoch": 0.14, "grad_norm": 0.8097446502843204, "learning_rate": 9.703037989675088e-06, "loss": 1.1868, "step": 1707 }, { "epoch": 0.14, "grad_norm": 1.5347071011631346, "learning_rate": 9.702596740216541e-06, "loss": 0.8518, "step": 1708 }, { "epoch": 0.14, "grad_norm": 1.5870662861378397, "learning_rate": 9.702155173227911e-06, "loss": 0.8512, "step": 1709 }, { "epoch": 0.14, "grad_norm": 1.5379043403593504, "learning_rate": 9.70171328873901e-06, "loss": 0.8222, "step": 1710 }, { "epoch": 0.14, "grad_norm": 0.845065133551567, "learning_rate": 9.701271086779678e-06, "loss": 1.156, "step": 1711 }, { "epoch": 0.14, "grad_norm": 1.5488072125359502, "learning_rate": 9.700828567379772e-06, "loss": 0.9055, "step": 1712 }, { "epoch": 0.14, "grad_norm": 1.6891927046268491, "learning_rate": 9.700385730569171e-06, "loss": 0.8476, "step": 1713 }, { "epoch": 0.14, "grad_norm": 1.4939929915660417, "learning_rate": 9.699942576377779e-06, "loss": 0.7776, "step": 1714 }, { "epoch": 0.14, "grad_norm": 1.5925000552209982, "learning_rate": 9.699499104835514e-06, "loss": 0.9079, "step": 1715 }, { "epoch": 0.14, "grad_norm": 1.5722214073225893, "learning_rate": 9.699055315972328e-06, "loss": 0.8073, "step": 1716 }, { "epoch": 0.14, "grad_norm": 1.5545814010449013, "learning_rate": 9.698611209818178e-06, "loss": 0.8066, "step": 1717 }, { "epoch": 0.14, "grad_norm": 0.8806955295773359, "learning_rate": 9.698166786403057e-06, "loss": 1.1803, "step": 1718 }, { "epoch": 0.14, "grad_norm": 1.4686405393031277, "learning_rate": 9.697722045756973e-06, "loss": 0.7562, "step": 1719 }, { "epoch": 0.14, "grad_norm": 1.5699556054544272, "learning_rate": 9.697276987909951e-06, "loss": 0.8435, "step": 1720 }, { "epoch": 0.14, "grad_norm": 1.5128329517989345, "learning_rate": 9.696831612892048e-06, "loss": 0.8269, "step": 1721 }, { "epoch": 0.14, "grad_norm": 0.8431566538279116, "learning_rate": 9.696385920733335e-06, "loss": 1.1715, "step": 1722 }, { "epoch": 0.14, "grad_norm": 1.7200554272170396, "learning_rate": 9.695939911463904e-06, "loss": 0.829, "step": 1723 }, { "epoch": 0.14, "grad_norm": 1.6162504723424296, "learning_rate": 9.695493585113873e-06, "loss": 0.7914, "step": 1724 }, { "epoch": 0.14, "grad_norm": 0.8270729569141857, "learning_rate": 9.695046941713379e-06, "loss": 1.1894, "step": 1725 }, { "epoch": 0.14, "grad_norm": 1.4696685078097274, "learning_rate": 9.694599981292578e-06, "loss": 0.7616, "step": 1726 }, { "epoch": 0.14, "grad_norm": 1.6114130894525227, "learning_rate": 9.694152703881653e-06, "loss": 0.7942, "step": 1727 }, { "epoch": 0.14, "grad_norm": 1.5874158109834735, "learning_rate": 9.693705109510803e-06, "loss": 0.8807, "step": 1728 }, { "epoch": 0.14, "grad_norm": 1.5242717956570906, "learning_rate": 9.693257198210251e-06, "loss": 0.8851, "step": 1729 }, { "epoch": 0.14, "grad_norm": 1.4181308437409006, "learning_rate": 9.69280897001024e-06, "loss": 0.8883, "step": 1730 }, { "epoch": 0.14, "grad_norm": 1.6331227105813944, "learning_rate": 9.69236042494104e-06, "loss": 0.8319, "step": 1731 }, { "epoch": 0.14, "grad_norm": 1.4522987714088507, "learning_rate": 9.691911563032932e-06, "loss": 0.8675, "step": 1732 }, { "epoch": 0.14, "grad_norm": 1.440827238388004, "learning_rate": 9.691462384316226e-06, "loss": 0.8664, "step": 1733 }, { "epoch": 0.14, "grad_norm": 1.6223928363189393, "learning_rate": 9.691012888821254e-06, "loss": 0.9025, "step": 1734 }, { "epoch": 0.14, "grad_norm": 1.464324892960736, "learning_rate": 9.690563076578364e-06, "loss": 0.8914, "step": 1735 }, { "epoch": 0.14, "grad_norm": 1.5829757379714613, "learning_rate": 9.690112947617929e-06, "loss": 0.7316, "step": 1736 }, { "epoch": 0.14, "grad_norm": 1.4012009351665144, "learning_rate": 9.689662501970343e-06, "loss": 0.7813, "step": 1737 }, { "epoch": 0.14, "grad_norm": 0.981365538394024, "learning_rate": 9.689211739666023e-06, "loss": 1.2022, "step": 1738 }, { "epoch": 0.14, "grad_norm": 1.4941257252165787, "learning_rate": 9.688760660735403e-06, "loss": 0.8789, "step": 1739 }, { "epoch": 0.14, "grad_norm": 1.663006714337196, "learning_rate": 9.688309265208941e-06, "loss": 0.8431, "step": 1740 }, { "epoch": 0.14, "grad_norm": 0.8469568396807724, "learning_rate": 9.687857553117119e-06, "loss": 1.1719, "step": 1741 }, { "epoch": 0.14, "grad_norm": 1.520666432172101, "learning_rate": 9.687405524490433e-06, "loss": 0.7752, "step": 1742 }, { "epoch": 0.14, "grad_norm": 1.464014520658535, "learning_rate": 9.686953179359408e-06, "loss": 0.8272, "step": 1743 }, { "epoch": 0.14, "grad_norm": 1.5526223490259756, "learning_rate": 9.686500517754589e-06, "loss": 0.7765, "step": 1744 }, { "epoch": 0.14, "grad_norm": 1.5800301252872575, "learning_rate": 9.686047539706536e-06, "loss": 0.903, "step": 1745 }, { "epoch": 0.14, "grad_norm": 1.561692851940971, "learning_rate": 9.68559424524584e-06, "loss": 0.7937, "step": 1746 }, { "epoch": 0.14, "grad_norm": 0.8583825366753453, "learning_rate": 9.685140634403106e-06, "loss": 1.1621, "step": 1747 }, { "epoch": 0.14, "grad_norm": 0.8801039464378387, "learning_rate": 9.684686707208962e-06, "loss": 1.1957, "step": 1748 }, { "epoch": 0.14, "grad_norm": 1.4569117609816349, "learning_rate": 9.68423246369406e-06, "loss": 0.8619, "step": 1749 }, { "epoch": 0.14, "grad_norm": 1.4406207939514377, "learning_rate": 9.68377790388907e-06, "loss": 0.7772, "step": 1750 }, { "epoch": 0.14, "grad_norm": 1.5253511878443835, "learning_rate": 9.683323027824687e-06, "loss": 0.7999, "step": 1751 }, { "epoch": 0.14, "grad_norm": 0.8365798442417645, "learning_rate": 9.682867835531624e-06, "loss": 1.1777, "step": 1752 }, { "epoch": 0.14, "grad_norm": 1.5742170997246374, "learning_rate": 9.682412327040617e-06, "loss": 0.8897, "step": 1753 }, { "epoch": 0.14, "grad_norm": 1.668988513159721, "learning_rate": 9.681956502382423e-06, "loss": 0.9119, "step": 1754 }, { "epoch": 0.14, "grad_norm": 1.5716584749091955, "learning_rate": 9.681500361587818e-06, "loss": 0.9084, "step": 1755 }, { "epoch": 0.14, "grad_norm": 1.3578454896936722, "learning_rate": 9.681043904687605e-06, "loss": 0.7841, "step": 1756 }, { "epoch": 0.14, "grad_norm": 1.6065361944319299, "learning_rate": 9.680587131712605e-06, "loss": 0.9119, "step": 1757 }, { "epoch": 0.14, "grad_norm": 1.5205484398859332, "learning_rate": 9.680130042693657e-06, "loss": 0.835, "step": 1758 }, { "epoch": 0.14, "grad_norm": 1.5306054478117836, "learning_rate": 9.679672637661627e-06, "loss": 0.8301, "step": 1759 }, { "epoch": 0.14, "grad_norm": 1.3510576427864385, "learning_rate": 9.6792149166474e-06, "loss": 0.8059, "step": 1760 }, { "epoch": 0.14, "grad_norm": 1.5781590120740918, "learning_rate": 9.678756879681884e-06, "loss": 0.7731, "step": 1761 }, { "epoch": 0.14, "grad_norm": 1.6688549350329707, "learning_rate": 9.678298526796002e-06, "loss": 0.8226, "step": 1762 }, { "epoch": 0.14, "grad_norm": 1.5862792942823696, "learning_rate": 9.677839858020709e-06, "loss": 0.8524, "step": 1763 }, { "epoch": 0.14, "grad_norm": 1.4487138889108717, "learning_rate": 9.677380873386968e-06, "loss": 0.8052, "step": 1764 }, { "epoch": 0.14, "grad_norm": 1.6050776691542659, "learning_rate": 9.676921572925777e-06, "loss": 0.8389, "step": 1765 }, { "epoch": 0.14, "grad_norm": 1.4527464183103627, "learning_rate": 9.676461956668148e-06, "loss": 0.7924, "step": 1766 }, { "epoch": 0.14, "grad_norm": 1.6100323349956902, "learning_rate": 9.676002024645114e-06, "loss": 0.8305, "step": 1767 }, { "epoch": 0.14, "grad_norm": 1.4744206179733308, "learning_rate": 9.675541776887731e-06, "loss": 0.8078, "step": 1768 }, { "epoch": 0.14, "grad_norm": 0.9540264302670897, "learning_rate": 9.675081213427076e-06, "loss": 1.1727, "step": 1769 }, { "epoch": 0.14, "grad_norm": 1.5586735225640893, "learning_rate": 9.674620334294246e-06, "loss": 0.8913, "step": 1770 }, { "epoch": 0.14, "grad_norm": 1.6136987113322039, "learning_rate": 9.674159139520363e-06, "loss": 0.8612, "step": 1771 }, { "epoch": 0.14, "grad_norm": 1.6435939542342457, "learning_rate": 9.673697629136566e-06, "loss": 0.8432, "step": 1772 }, { "epoch": 0.14, "grad_norm": 1.6254283917331394, "learning_rate": 9.673235803174018e-06, "loss": 0.8067, "step": 1773 }, { "epoch": 0.14, "grad_norm": 0.9389270769073029, "learning_rate": 9.672773661663903e-06, "loss": 1.1524, "step": 1774 }, { "epoch": 0.14, "grad_norm": 1.4837075164280675, "learning_rate": 9.672311204637426e-06, "loss": 0.8768, "step": 1775 }, { "epoch": 0.14, "grad_norm": 1.5767991968162345, "learning_rate": 9.671848432125812e-06, "loss": 0.8347, "step": 1776 }, { "epoch": 0.14, "grad_norm": 1.6535477450131855, "learning_rate": 9.671385344160309e-06, "loss": 0.8516, "step": 1777 }, { "epoch": 0.14, "grad_norm": 1.513176100383834, "learning_rate": 9.670921940772186e-06, "loss": 0.7918, "step": 1778 }, { "epoch": 0.14, "grad_norm": 1.5335213234117717, "learning_rate": 9.670458221992733e-06, "loss": 0.9053, "step": 1779 }, { "epoch": 0.14, "grad_norm": 1.4993628500123712, "learning_rate": 9.66999418785326e-06, "loss": 0.8964, "step": 1780 }, { "epoch": 0.14, "grad_norm": 1.5679626871207248, "learning_rate": 9.669529838385102e-06, "loss": 0.8191, "step": 1781 }, { "epoch": 0.14, "grad_norm": 1.4816658730262284, "learning_rate": 9.669065173619612e-06, "loss": 0.8692, "step": 1782 }, { "epoch": 0.14, "grad_norm": 0.9700913678395203, "learning_rate": 9.668600193588165e-06, "loss": 1.1645, "step": 1783 }, { "epoch": 0.14, "grad_norm": 1.468160328747445, "learning_rate": 9.668134898322157e-06, "loss": 0.8437, "step": 1784 }, { "epoch": 0.14, "grad_norm": 1.5107270157570762, "learning_rate": 9.667669287853006e-06, "loss": 0.875, "step": 1785 }, { "epoch": 0.14, "grad_norm": 1.5435556474230339, "learning_rate": 9.667203362212152e-06, "loss": 0.9047, "step": 1786 }, { "epoch": 0.14, "grad_norm": 1.5510476452922575, "learning_rate": 9.666737121431055e-06, "loss": 0.8127, "step": 1787 }, { "epoch": 0.14, "grad_norm": 1.553092568257882, "learning_rate": 9.666270565541196e-06, "loss": 0.822, "step": 1788 }, { "epoch": 0.14, "grad_norm": 0.9546332086625076, "learning_rate": 9.665803694574079e-06, "loss": 1.1728, "step": 1789 }, { "epoch": 0.14, "grad_norm": 1.5084168363121073, "learning_rate": 9.665336508561225e-06, "loss": 0.8489, "step": 1790 }, { "epoch": 0.14, "grad_norm": 1.6698513623819695, "learning_rate": 9.664869007534185e-06, "loss": 0.9352, "step": 1791 }, { "epoch": 0.14, "grad_norm": 1.6591394351454614, "learning_rate": 9.664401191524522e-06, "loss": 0.7893, "step": 1792 }, { "epoch": 0.14, "grad_norm": 1.5129262070241574, "learning_rate": 9.663933060563824e-06, "loss": 0.8436, "step": 1793 }, { "epoch": 0.14, "grad_norm": 1.5192580700707576, "learning_rate": 9.663464614683702e-06, "loss": 0.8233, "step": 1794 }, { "epoch": 0.14, "grad_norm": 1.6460297487561517, "learning_rate": 9.662995853915785e-06, "loss": 0.8645, "step": 1795 }, { "epoch": 0.14, "grad_norm": 1.7052106840703782, "learning_rate": 9.662526778291725e-06, "loss": 0.8707, "step": 1796 }, { "epoch": 0.14, "grad_norm": 1.5208134976902332, "learning_rate": 9.662057387843196e-06, "loss": 0.7294, "step": 1797 }, { "epoch": 0.14, "grad_norm": 1.5846465073453435, "learning_rate": 9.66158768260189e-06, "loss": 0.8496, "step": 1798 }, { "epoch": 0.14, "grad_norm": 1.6143837844637892, "learning_rate": 9.661117662599527e-06, "loss": 0.9072, "step": 1799 }, { "epoch": 0.14, "grad_norm": 1.5528409262348821, "learning_rate": 9.66064732786784e-06, "loss": 0.8341, "step": 1800 }, { "epoch": 0.14, "grad_norm": 1.6034731646820828, "learning_rate": 9.660176678438588e-06, "loss": 0.7606, "step": 1801 }, { "epoch": 0.14, "grad_norm": 1.1485660012523888, "learning_rate": 9.659705714343551e-06, "loss": 1.1838, "step": 1802 }, { "epoch": 0.14, "grad_norm": 0.942463094180893, "learning_rate": 9.659234435614529e-06, "loss": 1.1757, "step": 1803 }, { "epoch": 0.14, "grad_norm": 1.5729860579640866, "learning_rate": 9.658762842283343e-06, "loss": 0.8553, "step": 1804 }, { "epoch": 0.14, "grad_norm": 1.6418064827736678, "learning_rate": 9.658290934381837e-06, "loss": 0.8452, "step": 1805 }, { "epoch": 0.14, "grad_norm": 1.4643190953413758, "learning_rate": 9.657818711941877e-06, "loss": 0.8631, "step": 1806 }, { "epoch": 0.14, "grad_norm": 1.465402551095835, "learning_rate": 9.657346174995346e-06, "loss": 0.7899, "step": 1807 }, { "epoch": 0.15, "grad_norm": 1.5352074145945847, "learning_rate": 9.656873323574152e-06, "loss": 0.8929, "step": 1808 }, { "epoch": 0.15, "grad_norm": 1.449831483293168, "learning_rate": 9.656400157710221e-06, "loss": 0.8194, "step": 1809 }, { "epoch": 0.15, "grad_norm": 1.5206170485420745, "learning_rate": 9.655926677435506e-06, "loss": 0.8191, "step": 1810 }, { "epoch": 0.15, "grad_norm": 1.5922879442628086, "learning_rate": 9.655452882781972e-06, "loss": 0.8223, "step": 1811 }, { "epoch": 0.15, "grad_norm": 1.9004144095111877, "learning_rate": 9.654978773781617e-06, "loss": 1.194, "step": 1812 }, { "epoch": 0.15, "grad_norm": 1.5918330055100864, "learning_rate": 9.65450435046645e-06, "loss": 0.9016, "step": 1813 }, { "epoch": 0.15, "grad_norm": 1.4864201664851162, "learning_rate": 9.654029612868507e-06, "loss": 0.8024, "step": 1814 }, { "epoch": 0.15, "grad_norm": 1.0799982012085143, "learning_rate": 9.653554561019843e-06, "loss": 1.2023, "step": 1815 }, { "epoch": 0.15, "grad_norm": 1.4693907542152291, "learning_rate": 9.653079194952532e-06, "loss": 0.8172, "step": 1816 }, { "epoch": 0.15, "grad_norm": 1.6978351899966562, "learning_rate": 9.652603514698674e-06, "loss": 0.859, "step": 1817 }, { "epoch": 0.15, "grad_norm": 1.5316583722538475, "learning_rate": 9.652127520290388e-06, "loss": 0.7953, "step": 1818 }, { "epoch": 0.15, "grad_norm": 1.5944718165226932, "learning_rate": 9.651651211759814e-06, "loss": 0.8765, "step": 1819 }, { "epoch": 0.15, "grad_norm": 1.691153840271507, "learning_rate": 9.651174589139115e-06, "loss": 0.9134, "step": 1820 }, { "epoch": 0.15, "grad_norm": 1.5928018952598135, "learning_rate": 9.650697652460471e-06, "loss": 1.1802, "step": 1821 }, { "epoch": 0.15, "grad_norm": 1.4575409701539865, "learning_rate": 9.650220401756088e-06, "loss": 0.791, "step": 1822 }, { "epoch": 0.15, "grad_norm": 1.420906813906397, "learning_rate": 9.649742837058189e-06, "loss": 0.8431, "step": 1823 }, { "epoch": 0.15, "grad_norm": 1.3935946370901644, "learning_rate": 9.649264958399022e-06, "loss": 0.836, "step": 1824 }, { "epoch": 0.15, "grad_norm": 1.6397407284761742, "learning_rate": 9.648786765810853e-06, "loss": 0.9505, "step": 1825 }, { "epoch": 0.15, "grad_norm": 1.004196427702683, "learning_rate": 9.648308259325973e-06, "loss": 1.1863, "step": 1826 }, { "epoch": 0.15, "grad_norm": 0.8371272119375583, "learning_rate": 9.647829438976689e-06, "loss": 1.1771, "step": 1827 }, { "epoch": 0.15, "grad_norm": 1.5494113910947722, "learning_rate": 9.647350304795333e-06, "loss": 0.8044, "step": 1828 }, { "epoch": 0.15, "grad_norm": 1.4829373228421028, "learning_rate": 9.646870856814259e-06, "loss": 0.7746, "step": 1829 }, { "epoch": 0.15, "grad_norm": 1.4223975837845726, "learning_rate": 9.646391095065838e-06, "loss": 0.7936, "step": 1830 }, { "epoch": 0.15, "grad_norm": 1.5277439315278891, "learning_rate": 9.645911019582467e-06, "loss": 0.885, "step": 1831 }, { "epoch": 0.15, "grad_norm": 1.4960155050835333, "learning_rate": 9.64543063039656e-06, "loss": 0.8184, "step": 1832 }, { "epoch": 0.15, "grad_norm": 1.6172671488436552, "learning_rate": 9.644949927540553e-06, "loss": 0.8108, "step": 1833 }, { "epoch": 0.15, "grad_norm": 1.6599409473511135, "learning_rate": 9.644468911046906e-06, "loss": 1.1598, "step": 1834 }, { "epoch": 0.15, "grad_norm": 1.6976516394521568, "learning_rate": 9.6439875809481e-06, "loss": 0.8224, "step": 1835 }, { "epoch": 0.15, "grad_norm": 1.4330036058999358, "learning_rate": 9.64350593727663e-06, "loss": 0.8043, "step": 1836 }, { "epoch": 0.15, "grad_norm": 1.4988459876749862, "learning_rate": 9.643023980065025e-06, "loss": 0.7638, "step": 1837 }, { "epoch": 0.15, "grad_norm": 1.3990705807693076, "learning_rate": 9.64254170934582e-06, "loss": 0.7856, "step": 1838 }, { "epoch": 0.15, "grad_norm": 1.45729867400443, "learning_rate": 9.642059125151586e-06, "loss": 0.8615, "step": 1839 }, { "epoch": 0.15, "grad_norm": 1.5661666821009343, "learning_rate": 9.641576227514903e-06, "loss": 0.8643, "step": 1840 }, { "epoch": 0.15, "grad_norm": 1.529652947545323, "learning_rate": 9.641093016468381e-06, "loss": 0.7785, "step": 1841 }, { "epoch": 0.15, "grad_norm": 1.5282067353219049, "learning_rate": 9.640609492044646e-06, "loss": 0.793, "step": 1842 }, { "epoch": 0.15, "grad_norm": 1.5961346165456891, "learning_rate": 9.640125654276347e-06, "loss": 0.8773, "step": 1843 }, { "epoch": 0.15, "grad_norm": 1.608136513525393, "learning_rate": 9.639641503196152e-06, "loss": 0.8363, "step": 1844 }, { "epoch": 0.15, "grad_norm": 1.6011860828587843, "learning_rate": 9.639157038836755e-06, "loss": 0.8059, "step": 1845 }, { "epoch": 0.15, "grad_norm": 1.7847742494734866, "learning_rate": 9.638672261230866e-06, "loss": 0.8634, "step": 1846 }, { "epoch": 0.15, "grad_norm": 1.6085087575125747, "learning_rate": 9.638187170411218e-06, "loss": 0.7925, "step": 1847 }, { "epoch": 0.15, "grad_norm": 1.6105788528642824, "learning_rate": 9.637701766410568e-06, "loss": 0.8748, "step": 1848 }, { "epoch": 0.15, "grad_norm": 1.4956439065009206, "learning_rate": 9.63721604926169e-06, "loss": 0.7961, "step": 1849 }, { "epoch": 0.15, "grad_norm": 1.5730577589379264, "learning_rate": 9.63673001899738e-06, "loss": 0.8023, "step": 1850 }, { "epoch": 0.15, "grad_norm": 1.6750585084901053, "learning_rate": 9.636243675650456e-06, "loss": 0.903, "step": 1851 }, { "epoch": 0.15, "grad_norm": 1.5140501616418316, "learning_rate": 9.635757019253758e-06, "loss": 0.819, "step": 1852 }, { "epoch": 0.15, "grad_norm": 1.5488672183551293, "learning_rate": 9.635270049840146e-06, "loss": 0.8091, "step": 1853 }, { "epoch": 0.15, "grad_norm": 1.5512724062080712, "learning_rate": 9.634782767442501e-06, "loss": 0.8442, "step": 1854 }, { "epoch": 0.15, "grad_norm": 1.5016854700880238, "learning_rate": 9.634295172093727e-06, "loss": 0.8592, "step": 1855 }, { "epoch": 0.15, "grad_norm": 1.453975023028802, "learning_rate": 9.633807263826745e-06, "loss": 0.8873, "step": 1856 }, { "epoch": 0.15, "grad_norm": 1.4848079009089288, "learning_rate": 9.633319042674497e-06, "loss": 0.8333, "step": 1857 }, { "epoch": 0.15, "grad_norm": 1.5440876797994385, "learning_rate": 9.632830508669957e-06, "loss": 0.8307, "step": 1858 }, { "epoch": 0.15, "grad_norm": 1.7645069403788376, "learning_rate": 9.632341661846107e-06, "loss": 0.8262, "step": 1859 }, { "epoch": 0.15, "grad_norm": 1.566634535503002, "learning_rate": 9.631852502235954e-06, "loss": 0.7956, "step": 1860 }, { "epoch": 0.15, "grad_norm": 1.5596251130748158, "learning_rate": 9.631363029872529e-06, "loss": 0.8929, "step": 1861 }, { "epoch": 0.15, "grad_norm": 1.5179854677025637, "learning_rate": 9.630873244788884e-06, "loss": 0.805, "step": 1862 }, { "epoch": 0.15, "grad_norm": 1.090096791941229, "learning_rate": 9.630383147018086e-06, "loss": 1.1363, "step": 1863 }, { "epoch": 0.15, "grad_norm": 0.9430071435023304, "learning_rate": 9.629892736593231e-06, "loss": 1.1606, "step": 1864 }, { "epoch": 0.15, "grad_norm": 1.5453346312647547, "learning_rate": 9.629402013547432e-06, "loss": 0.8189, "step": 1865 }, { "epoch": 0.15, "grad_norm": 1.6004283871551463, "learning_rate": 9.628910977913821e-06, "loss": 0.8366, "step": 1866 }, { "epoch": 0.15, "grad_norm": 1.6779674885803768, "learning_rate": 9.628419629725558e-06, "loss": 0.855, "step": 1867 }, { "epoch": 0.15, "grad_norm": 1.1112261931216882, "learning_rate": 9.627927969015817e-06, "loss": 1.1756, "step": 1868 }, { "epoch": 0.15, "grad_norm": 1.617339152264514, "learning_rate": 9.627435995817799e-06, "loss": 0.8174, "step": 1869 }, { "epoch": 0.15, "grad_norm": 1.6176686794045037, "learning_rate": 9.62694371016472e-06, "loss": 0.8144, "step": 1870 }, { "epoch": 0.15, "grad_norm": 1.4551864310463187, "learning_rate": 9.62645111208982e-06, "loss": 0.8049, "step": 1871 }, { "epoch": 0.15, "grad_norm": 1.500558989547229, "learning_rate": 9.625958201626365e-06, "loss": 0.8802, "step": 1872 }, { "epoch": 0.15, "grad_norm": 1.4847802025673165, "learning_rate": 9.625464978807633e-06, "loss": 0.8953, "step": 1873 }, { "epoch": 0.15, "grad_norm": 1.6388772642875407, "learning_rate": 9.62497144366693e-06, "loss": 0.8518, "step": 1874 }, { "epoch": 0.15, "grad_norm": 1.581868826155573, "learning_rate": 9.624477596237577e-06, "loss": 0.8313, "step": 1875 }, { "epoch": 0.15, "grad_norm": 1.5557398423833004, "learning_rate": 9.623983436552924e-06, "loss": 0.762, "step": 1876 }, { "epoch": 0.15, "grad_norm": 1.606046016753044, "learning_rate": 9.623488964646334e-06, "loss": 0.8427, "step": 1877 }, { "epoch": 0.15, "grad_norm": 1.449851429012584, "learning_rate": 9.6229941805512e-06, "loss": 0.8233, "step": 1878 }, { "epoch": 0.15, "grad_norm": 1.4510607379891773, "learning_rate": 9.622499084300924e-06, "loss": 0.899, "step": 1879 }, { "epoch": 0.15, "grad_norm": 1.4704394143513242, "learning_rate": 9.622003675928943e-06, "loss": 0.7686, "step": 1880 }, { "epoch": 0.15, "grad_norm": 1.481190483383334, "learning_rate": 9.621507955468704e-06, "loss": 0.8059, "step": 1881 }, { "epoch": 0.15, "grad_norm": 1.5877354816884341, "learning_rate": 9.621011922953681e-06, "loss": 0.7473, "step": 1882 }, { "epoch": 0.15, "grad_norm": 1.6622911969097016, "learning_rate": 9.620515578417364e-06, "loss": 0.8036, "step": 1883 }, { "epoch": 0.15, "grad_norm": 1.5823729385497614, "learning_rate": 9.620018921893272e-06, "loss": 0.8257, "step": 1884 }, { "epoch": 0.15, "grad_norm": 1.5125782339126712, "learning_rate": 9.619521953414936e-06, "loss": 0.8121, "step": 1885 }, { "epoch": 0.15, "grad_norm": 1.7126401164519731, "learning_rate": 9.619024673015916e-06, "loss": 0.863, "step": 1886 }, { "epoch": 0.15, "grad_norm": 0.8957481621484896, "learning_rate": 9.618527080729789e-06, "loss": 1.1752, "step": 1887 }, { "epoch": 0.15, "grad_norm": 1.4585206774743154, "learning_rate": 9.618029176590152e-06, "loss": 0.8013, "step": 1888 }, { "epoch": 0.15, "grad_norm": 1.469168193796979, "learning_rate": 9.617530960630624e-06, "loss": 0.7789, "step": 1889 }, { "epoch": 0.15, "grad_norm": 1.5289400520077705, "learning_rate": 9.617032432884847e-06, "loss": 0.8235, "step": 1890 }, { "epoch": 0.15, "grad_norm": 1.5185767951338127, "learning_rate": 9.616533593386484e-06, "loss": 0.7904, "step": 1891 }, { "epoch": 0.15, "grad_norm": 1.444557590913687, "learning_rate": 9.616034442169214e-06, "loss": 0.8816, "step": 1892 }, { "epoch": 0.15, "grad_norm": 0.9029375239582471, "learning_rate": 9.615534979266745e-06, "loss": 1.1914, "step": 1893 }, { "epoch": 0.15, "grad_norm": 0.8304838852731932, "learning_rate": 9.6150352047128e-06, "loss": 1.1768, "step": 1894 }, { "epoch": 0.15, "grad_norm": 1.5806368107679758, "learning_rate": 9.614535118541126e-06, "loss": 0.9242, "step": 1895 }, { "epoch": 0.15, "grad_norm": 1.4347680283690143, "learning_rate": 9.614034720785488e-06, "loss": 0.7653, "step": 1896 }, { "epoch": 0.15, "grad_norm": 1.8296597903754166, "learning_rate": 9.613534011479675e-06, "loss": 0.7371, "step": 1897 }, { "epoch": 0.15, "grad_norm": 1.4590699707067372, "learning_rate": 9.613032990657495e-06, "loss": 0.8686, "step": 1898 }, { "epoch": 0.15, "grad_norm": 0.8609330094284481, "learning_rate": 9.612531658352782e-06, "loss": 1.137, "step": 1899 }, { "epoch": 0.15, "grad_norm": 1.5709311651799216, "learning_rate": 9.612030014599381e-06, "loss": 0.817, "step": 1900 }, { "epoch": 0.15, "grad_norm": 0.8627446563438778, "learning_rate": 9.61152805943117e-06, "loss": 1.154, "step": 1901 }, { "epoch": 0.15, "grad_norm": 0.808761666850305, "learning_rate": 9.611025792882038e-06, "loss": 1.1731, "step": 1902 }, { "epoch": 0.15, "grad_norm": 1.6537067174113689, "learning_rate": 9.6105232149859e-06, "loss": 0.8358, "step": 1903 }, { "epoch": 0.15, "grad_norm": 0.8006899805807882, "learning_rate": 9.610020325776694e-06, "loss": 1.1253, "step": 1904 }, { "epoch": 0.15, "grad_norm": 1.5995874645563164, "learning_rate": 9.609517125288373e-06, "loss": 0.8371, "step": 1905 }, { "epoch": 0.15, "grad_norm": 1.64395169708802, "learning_rate": 9.609013613554917e-06, "loss": 0.7476, "step": 1906 }, { "epoch": 0.15, "grad_norm": 1.3823231042324897, "learning_rate": 9.608509790610322e-06, "loss": 0.7927, "step": 1907 }, { "epoch": 0.15, "grad_norm": 1.4188303104669986, "learning_rate": 9.608005656488605e-06, "loss": 0.7651, "step": 1908 }, { "epoch": 0.15, "grad_norm": 0.8875494826376961, "learning_rate": 9.607501211223812e-06, "loss": 1.1497, "step": 1909 }, { "epoch": 0.15, "grad_norm": 1.6343710275658958, "learning_rate": 9.606996454850002e-06, "loss": 0.8216, "step": 1910 }, { "epoch": 0.15, "grad_norm": 0.8411611534455249, "learning_rate": 9.606491387401256e-06, "loss": 1.1532, "step": 1911 }, { "epoch": 0.15, "grad_norm": 1.4592087109381038, "learning_rate": 9.605986008911677e-06, "loss": 0.8018, "step": 1912 }, { "epoch": 0.15, "grad_norm": 1.4271140318066142, "learning_rate": 9.605480319415391e-06, "loss": 0.8076, "step": 1913 }, { "epoch": 0.15, "grad_norm": 1.4031341931286325, "learning_rate": 9.604974318946544e-06, "loss": 0.7655, "step": 1914 }, { "epoch": 0.15, "grad_norm": 1.5157193237988733, "learning_rate": 9.6044680075393e-06, "loss": 0.8691, "step": 1915 }, { "epoch": 0.15, "grad_norm": 1.6025534141134328, "learning_rate": 9.603961385227848e-06, "loss": 0.7923, "step": 1916 }, { "epoch": 0.15, "grad_norm": 1.4126408589981612, "learning_rate": 9.603454452046395e-06, "loss": 0.8057, "step": 1917 }, { "epoch": 0.15, "grad_norm": 1.5140043897353115, "learning_rate": 9.602947208029172e-06, "loss": 0.7787, "step": 1918 }, { "epoch": 0.15, "grad_norm": 1.4458827783841817, "learning_rate": 9.602439653210426e-06, "loss": 0.8325, "step": 1919 }, { "epoch": 0.15, "grad_norm": 1.5065544985895871, "learning_rate": 9.601931787624432e-06, "loss": 0.9228, "step": 1920 }, { "epoch": 0.15, "grad_norm": 1.5500625454414283, "learning_rate": 9.601423611305481e-06, "loss": 0.865, "step": 1921 }, { "epoch": 0.15, "grad_norm": 1.4859931888949134, "learning_rate": 9.600915124287886e-06, "loss": 0.8372, "step": 1922 }, { "epoch": 0.15, "grad_norm": 1.0559445035420005, "learning_rate": 9.600406326605983e-06, "loss": 1.2052, "step": 1923 }, { "epoch": 0.15, "grad_norm": 1.4571971603518485, "learning_rate": 9.599897218294122e-06, "loss": 0.9132, "step": 1924 }, { "epoch": 0.15, "grad_norm": 1.5753094972092905, "learning_rate": 9.599387799386684e-06, "loss": 0.8819, "step": 1925 }, { "epoch": 0.15, "grad_norm": 1.5823569573061436, "learning_rate": 9.598878069918064e-06, "loss": 0.8316, "step": 1926 }, { "epoch": 0.15, "grad_norm": 0.8271540781876586, "learning_rate": 9.598368029922681e-06, "loss": 1.134, "step": 1927 }, { "epoch": 0.15, "grad_norm": 1.6059994765595487, "learning_rate": 9.597857679434974e-06, "loss": 0.9165, "step": 1928 }, { "epoch": 0.15, "grad_norm": 1.5831001769555655, "learning_rate": 9.597347018489403e-06, "loss": 0.8513, "step": 1929 }, { "epoch": 0.15, "grad_norm": 1.4450721335626313, "learning_rate": 9.596836047120449e-06, "loss": 0.808, "step": 1930 }, { "epoch": 0.15, "grad_norm": 0.8980032014616626, "learning_rate": 9.596324765362614e-06, "loss": 1.154, "step": 1931 }, { "epoch": 0.16, "grad_norm": 1.539293914150005, "learning_rate": 9.59581317325042e-06, "loss": 0.8221, "step": 1932 }, { "epoch": 0.16, "grad_norm": 1.460131114780644, "learning_rate": 9.595301270818411e-06, "loss": 0.7284, "step": 1933 }, { "epoch": 0.16, "grad_norm": 1.5304059527857843, "learning_rate": 9.594789058101154e-06, "loss": 0.8338, "step": 1934 }, { "epoch": 0.16, "grad_norm": 1.4860589328315805, "learning_rate": 9.594276535133232e-06, "loss": 0.825, "step": 1935 }, { "epoch": 0.16, "grad_norm": 0.8856197892352309, "learning_rate": 9.593763701949253e-06, "loss": 1.2051, "step": 1936 }, { "epoch": 0.16, "grad_norm": 1.5130406344868286, "learning_rate": 9.593250558583846e-06, "loss": 0.8339, "step": 1937 }, { "epoch": 0.16, "grad_norm": 1.4716043320582513, "learning_rate": 9.592737105071657e-06, "loss": 0.8468, "step": 1938 }, { "epoch": 0.16, "grad_norm": 1.578091370244503, "learning_rate": 9.592223341447357e-06, "loss": 0.8179, "step": 1939 }, { "epoch": 0.16, "grad_norm": 1.619685237875716, "learning_rate": 9.591709267745635e-06, "loss": 0.8001, "step": 1940 }, { "epoch": 0.16, "grad_norm": 1.46787499085914, "learning_rate": 9.591194884001205e-06, "loss": 0.8854, "step": 1941 }, { "epoch": 0.16, "grad_norm": 1.4478472053033615, "learning_rate": 9.590680190248797e-06, "loss": 0.8025, "step": 1942 }, { "epoch": 0.16, "grad_norm": 0.8779506792836145, "learning_rate": 9.590165186523166e-06, "loss": 1.159, "step": 1943 }, { "epoch": 0.16, "grad_norm": 1.6402958016195528, "learning_rate": 9.589649872859086e-06, "loss": 0.8218, "step": 1944 }, { "epoch": 0.16, "grad_norm": 1.6222334347202898, "learning_rate": 9.589134249291352e-06, "loss": 0.879, "step": 1945 }, { "epoch": 0.16, "grad_norm": 1.5688963707710817, "learning_rate": 9.588618315854779e-06, "loss": 0.7228, "step": 1946 }, { "epoch": 0.16, "grad_norm": 1.444387335333108, "learning_rate": 9.588102072584204e-06, "loss": 0.7721, "step": 1947 }, { "epoch": 0.16, "grad_norm": 1.5416620701577475, "learning_rate": 9.587585519514487e-06, "loss": 0.8052, "step": 1948 }, { "epoch": 0.16, "grad_norm": 1.535094422655125, "learning_rate": 9.587068656680506e-06, "loss": 0.8476, "step": 1949 }, { "epoch": 0.16, "grad_norm": 1.5486971614978873, "learning_rate": 9.58655148411716e-06, "loss": 0.8101, "step": 1950 }, { "epoch": 0.16, "grad_norm": 0.9407853303983144, "learning_rate": 9.586034001859368e-06, "loss": 1.1375, "step": 1951 }, { "epoch": 0.16, "grad_norm": 0.8473184264588529, "learning_rate": 9.585516209942077e-06, "loss": 1.1524, "step": 1952 }, { "epoch": 0.16, "grad_norm": 1.4607168546118343, "learning_rate": 9.584998108400243e-06, "loss": 0.7828, "step": 1953 }, { "epoch": 0.16, "grad_norm": 1.451335229150967, "learning_rate": 9.584479697268854e-06, "loss": 0.7445, "step": 1954 }, { "epoch": 0.16, "grad_norm": 1.6259967175486456, "learning_rate": 9.583960976582914e-06, "loss": 0.8593, "step": 1955 }, { "epoch": 0.16, "grad_norm": 1.4611492870796858, "learning_rate": 9.583441946377445e-06, "loss": 0.827, "step": 1956 }, { "epoch": 0.16, "grad_norm": 1.5320261995963909, "learning_rate": 9.582922606687495e-06, "loss": 0.7932, "step": 1957 }, { "epoch": 0.16, "grad_norm": 1.6663199891705658, "learning_rate": 9.582402957548132e-06, "loss": 0.8263, "step": 1958 }, { "epoch": 0.16, "grad_norm": 1.5106437468927638, "learning_rate": 9.581882998994442e-06, "loss": 0.8054, "step": 1959 }, { "epoch": 0.16, "grad_norm": 1.4488080696130805, "learning_rate": 9.581362731061537e-06, "loss": 0.864, "step": 1960 }, { "epoch": 0.16, "grad_norm": 1.3518759642735945, "learning_rate": 9.580842153784542e-06, "loss": 1.1979, "step": 1961 }, { "epoch": 0.16, "grad_norm": 1.6557396991620612, "learning_rate": 9.580321267198611e-06, "loss": 0.7622, "step": 1962 }, { "epoch": 0.16, "grad_norm": 1.5576491340894745, "learning_rate": 9.579800071338915e-06, "loss": 0.8304, "step": 1963 }, { "epoch": 0.16, "grad_norm": 1.6421353511099428, "learning_rate": 9.579278566240646e-06, "loss": 0.865, "step": 1964 }, { "epoch": 0.16, "grad_norm": 1.5950115560258147, "learning_rate": 9.578756751939017e-06, "loss": 0.8593, "step": 1965 }, { "epoch": 0.16, "grad_norm": 1.4879709247103965, "learning_rate": 9.57823462846926e-06, "loss": 0.7878, "step": 1966 }, { "epoch": 0.16, "grad_norm": 1.4132387057288935, "learning_rate": 9.577712195866634e-06, "loss": 0.7245, "step": 1967 }, { "epoch": 0.16, "grad_norm": 1.8303345242631137, "learning_rate": 9.577189454166414e-06, "loss": 0.8199, "step": 1968 }, { "epoch": 0.16, "grad_norm": 1.504998426988508, "learning_rate": 9.576666403403894e-06, "loss": 0.7702, "step": 1969 }, { "epoch": 0.16, "grad_norm": 1.4171771564609978, "learning_rate": 9.576143043614393e-06, "loss": 0.8041, "step": 1970 }, { "epoch": 0.16, "grad_norm": 1.018158099347651, "learning_rate": 9.57561937483325e-06, "loss": 1.1838, "step": 1971 }, { "epoch": 0.16, "grad_norm": 1.6354530380802565, "learning_rate": 9.575095397095824e-06, "loss": 0.7886, "step": 1972 }, { "epoch": 0.16, "grad_norm": 1.4081521493242066, "learning_rate": 9.574571110437496e-06, "loss": 0.8637, "step": 1973 }, { "epoch": 0.16, "grad_norm": 1.4592120599050626, "learning_rate": 9.574046514893667e-06, "loss": 0.8088, "step": 1974 }, { "epoch": 0.16, "grad_norm": 1.5242720845036777, "learning_rate": 9.573521610499756e-06, "loss": 0.876, "step": 1975 }, { "epoch": 0.16, "grad_norm": 0.8648406607296378, "learning_rate": 9.572996397291209e-06, "loss": 1.1459, "step": 1976 }, { "epoch": 0.16, "grad_norm": 0.8023884409772494, "learning_rate": 9.572470875303488e-06, "loss": 1.1524, "step": 1977 }, { "epoch": 0.16, "grad_norm": 1.6096258897212612, "learning_rate": 9.571945044572079e-06, "loss": 0.8636, "step": 1978 }, { "epoch": 0.16, "grad_norm": 1.524697864588973, "learning_rate": 9.571418905132486e-06, "loss": 0.8322, "step": 1979 }, { "epoch": 0.16, "grad_norm": 1.6275365199180214, "learning_rate": 9.570892457020233e-06, "loss": 0.7787, "step": 1980 }, { "epoch": 0.16, "grad_norm": 1.5489770770693743, "learning_rate": 9.570365700270872e-06, "loss": 0.688, "step": 1981 }, { "epoch": 0.16, "grad_norm": 0.8687087857902865, "learning_rate": 9.569838634919968e-06, "loss": 1.1414, "step": 1982 }, { "epoch": 0.16, "grad_norm": 1.3907902823670681, "learning_rate": 9.569311261003108e-06, "loss": 0.7978, "step": 1983 }, { "epoch": 0.16, "grad_norm": 1.4801077999409677, "learning_rate": 9.568783578555904e-06, "loss": 0.7986, "step": 1984 }, { "epoch": 0.16, "grad_norm": 1.8247994134928776, "learning_rate": 9.568255587613986e-06, "loss": 0.9336, "step": 1985 }, { "epoch": 0.16, "grad_norm": 1.5290343716768693, "learning_rate": 9.567727288213005e-06, "loss": 0.7561, "step": 1986 }, { "epoch": 0.16, "grad_norm": 0.8962036807717707, "learning_rate": 9.567198680388632e-06, "loss": 1.1802, "step": 1987 }, { "epoch": 0.16, "grad_norm": 1.554517624306963, "learning_rate": 9.566669764176562e-06, "loss": 0.8261, "step": 1988 }, { "epoch": 0.16, "grad_norm": 1.4407902648746544, "learning_rate": 9.566140539612506e-06, "loss": 0.8345, "step": 1989 }, { "epoch": 0.16, "grad_norm": 1.478552519173747, "learning_rate": 9.565611006732201e-06, "loss": 0.8206, "step": 1990 }, { "epoch": 0.16, "grad_norm": 0.8269263857066178, "learning_rate": 9.5650811655714e-06, "loss": 1.1757, "step": 1991 }, { "epoch": 0.16, "grad_norm": 1.584076308274061, "learning_rate": 9.564551016165879e-06, "loss": 0.8713, "step": 1992 }, { "epoch": 0.16, "grad_norm": 1.6177442423537147, "learning_rate": 9.564020558551437e-06, "loss": 0.8274, "step": 1993 }, { "epoch": 0.16, "grad_norm": 1.564769845079321, "learning_rate": 9.56348979276389e-06, "loss": 0.7795, "step": 1994 }, { "epoch": 0.16, "grad_norm": 1.4469394368930757, "learning_rate": 9.562958718839078e-06, "loss": 0.7564, "step": 1995 }, { "epoch": 0.16, "grad_norm": 1.41775881739794, "learning_rate": 9.562427336812859e-06, "loss": 0.8132, "step": 1996 }, { "epoch": 0.16, "grad_norm": 0.8340506300875027, "learning_rate": 9.561895646721113e-06, "loss": 1.1485, "step": 1997 }, { "epoch": 0.16, "grad_norm": 1.4314700834740577, "learning_rate": 9.561363648599742e-06, "loss": 0.8653, "step": 1998 }, { "epoch": 0.16, "grad_norm": 1.5556179449093055, "learning_rate": 9.560831342484668e-06, "loss": 0.906, "step": 1999 }, { "epoch": 0.16, "grad_norm": 1.695931216928343, "learning_rate": 9.560298728411833e-06, "loss": 0.7533, "step": 2000 }, { "epoch": 0.16, "grad_norm": 1.5548177138660582, "learning_rate": 9.559765806417198e-06, "loss": 0.9132, "step": 2001 }, { "epoch": 0.16, "grad_norm": 1.4834800453069483, "learning_rate": 9.55923257653675e-06, "loss": 0.8037, "step": 2002 }, { "epoch": 0.16, "grad_norm": 0.842534846303849, "learning_rate": 9.558699038806494e-06, "loss": 1.1661, "step": 2003 }, { "epoch": 0.16, "grad_norm": 0.8147928577499463, "learning_rate": 9.558165193262455e-06, "loss": 1.1491, "step": 2004 }, { "epoch": 0.16, "grad_norm": 1.4651154921627292, "learning_rate": 9.557631039940678e-06, "loss": 0.7976, "step": 2005 }, { "epoch": 0.16, "grad_norm": 1.4775401430952735, "learning_rate": 9.557096578877232e-06, "loss": 0.8029, "step": 2006 }, { "epoch": 0.16, "grad_norm": 1.442471322137238, "learning_rate": 9.556561810108205e-06, "loss": 0.8657, "step": 2007 }, { "epoch": 0.16, "grad_norm": 1.440087798122106, "learning_rate": 9.556026733669706e-06, "loss": 0.7912, "step": 2008 }, { "epoch": 0.16, "grad_norm": 1.4522236870456824, "learning_rate": 9.555491349597862e-06, "loss": 0.7075, "step": 2009 }, { "epoch": 0.16, "grad_norm": 1.4086375758751186, "learning_rate": 9.554955657928828e-06, "loss": 0.8965, "step": 2010 }, { "epoch": 0.16, "grad_norm": 1.5526463637832908, "learning_rate": 9.55441965869877e-06, "loss": 0.8198, "step": 2011 }, { "epoch": 0.16, "grad_norm": 1.5405722404386109, "learning_rate": 9.553883351943882e-06, "loss": 0.8009, "step": 2012 }, { "epoch": 0.16, "grad_norm": 1.4334982610963374, "learning_rate": 9.55334673770038e-06, "loss": 0.8151, "step": 2013 }, { "epoch": 0.16, "grad_norm": 1.597102999347615, "learning_rate": 9.552809816004491e-06, "loss": 0.8327, "step": 2014 }, { "epoch": 0.16, "grad_norm": 1.5404945020064367, "learning_rate": 9.552272586892475e-06, "loss": 0.7972, "step": 2015 }, { "epoch": 0.16, "grad_norm": 1.4999958359113896, "learning_rate": 9.551735050400603e-06, "loss": 0.8994, "step": 2016 }, { "epoch": 0.16, "grad_norm": 1.6005243026581775, "learning_rate": 9.551197206565174e-06, "loss": 0.8585, "step": 2017 }, { "epoch": 0.16, "grad_norm": 1.1101390736515815, "learning_rate": 9.550659055422502e-06, "loss": 1.1717, "step": 2018 }, { "epoch": 0.16, "grad_norm": 1.5256614815794431, "learning_rate": 9.550120597008925e-06, "loss": 0.8467, "step": 2019 }, { "epoch": 0.16, "grad_norm": 1.5635654397022658, "learning_rate": 9.549581831360799e-06, "loss": 0.8884, "step": 2020 }, { "epoch": 0.16, "grad_norm": 1.5124055114417434, "learning_rate": 9.549042758514505e-06, "loss": 0.7685, "step": 2021 }, { "epoch": 0.16, "grad_norm": 1.515363090117947, "learning_rate": 9.548503378506444e-06, "loss": 0.8301, "step": 2022 }, { "epoch": 0.16, "grad_norm": 1.6609574982501256, "learning_rate": 9.547963691373033e-06, "loss": 0.7951, "step": 2023 }, { "epoch": 0.16, "grad_norm": 1.5170720984309314, "learning_rate": 9.547423697150714e-06, "loss": 0.914, "step": 2024 }, { "epoch": 0.16, "grad_norm": 1.5850238289351597, "learning_rate": 9.546883395875947e-06, "loss": 0.8434, "step": 2025 }, { "epoch": 0.16, "grad_norm": 1.4772370305066855, "learning_rate": 9.54634278758522e-06, "loss": 0.8794, "step": 2026 }, { "epoch": 0.16, "grad_norm": 1.5916768671297898, "learning_rate": 9.545801872315028e-06, "loss": 0.8886, "step": 2027 }, { "epoch": 0.16, "grad_norm": 1.5298054603189897, "learning_rate": 9.545260650101902e-06, "loss": 0.7431, "step": 2028 }, { "epoch": 0.16, "grad_norm": 1.4609699030287253, "learning_rate": 9.544719120982382e-06, "loss": 0.7705, "step": 2029 }, { "epoch": 0.16, "grad_norm": 1.6164060796216215, "learning_rate": 9.544177284993035e-06, "loss": 0.8011, "step": 2030 }, { "epoch": 0.16, "grad_norm": 1.5087461083376432, "learning_rate": 9.543635142170447e-06, "loss": 0.8729, "step": 2031 }, { "epoch": 0.16, "grad_norm": 1.6154532817564018, "learning_rate": 9.543092692551224e-06, "loss": 0.8245, "step": 2032 }, { "epoch": 0.16, "grad_norm": 2.1211807465923624, "learning_rate": 9.542549936171994e-06, "loss": 0.8555, "step": 2033 }, { "epoch": 0.16, "grad_norm": 1.7826255612234376, "learning_rate": 9.542006873069404e-06, "loss": 0.8081, "step": 2034 }, { "epoch": 0.16, "grad_norm": 1.6566557841116214, "learning_rate": 9.541463503280127e-06, "loss": 0.8851, "step": 2035 }, { "epoch": 0.16, "grad_norm": 1.6309681368643612, "learning_rate": 9.540919826840848e-06, "loss": 0.8091, "step": 2036 }, { "epoch": 0.16, "grad_norm": 1.5875453047972214, "learning_rate": 9.540375843788278e-06, "loss": 0.7335, "step": 2037 }, { "epoch": 0.16, "grad_norm": 1.5200056144487872, "learning_rate": 9.539831554159152e-06, "loss": 0.8098, "step": 2038 }, { "epoch": 0.16, "grad_norm": 1.5398000046571634, "learning_rate": 9.539286957990215e-06, "loss": 0.921, "step": 2039 }, { "epoch": 0.16, "grad_norm": 1.5068932575198775, "learning_rate": 9.538742055318243e-06, "loss": 0.8468, "step": 2040 }, { "epoch": 0.16, "grad_norm": 1.4437664856269383, "learning_rate": 9.538196846180033e-06, "loss": 0.8117, "step": 2041 }, { "epoch": 0.16, "grad_norm": 1.507050775408361, "learning_rate": 9.53765133061239e-06, "loss": 0.8139, "step": 2042 }, { "epoch": 0.16, "grad_norm": 1.4289315139188263, "learning_rate": 9.537105508652156e-06, "loss": 0.8306, "step": 2043 }, { "epoch": 0.16, "grad_norm": 1.4556253493228337, "learning_rate": 9.536559380336183e-06, "loss": 0.8621, "step": 2044 }, { "epoch": 0.16, "grad_norm": 1.410097251607193, "learning_rate": 9.536012945701345e-06, "loss": 0.7413, "step": 2045 }, { "epoch": 0.16, "grad_norm": 1.1795139289825596, "learning_rate": 9.535466204784542e-06, "loss": 1.1694, "step": 2046 }, { "epoch": 0.16, "grad_norm": 1.5666830304945485, "learning_rate": 9.53491915762269e-06, "loss": 0.7922, "step": 2047 }, { "epoch": 0.16, "grad_norm": 0.8816786053977199, "learning_rate": 9.534371804252727e-06, "loss": 1.1548, "step": 2048 }, { "epoch": 0.16, "grad_norm": 1.6087610094690001, "learning_rate": 9.533824144711612e-06, "loss": 0.8347, "step": 2049 }, { "epoch": 0.16, "grad_norm": 1.5539672774751516, "learning_rate": 9.533276179036324e-06, "loss": 0.7507, "step": 2050 }, { "epoch": 0.16, "grad_norm": 1.5538271987534389, "learning_rate": 9.532727907263861e-06, "loss": 0.8741, "step": 2051 }, { "epoch": 0.16, "grad_norm": 1.4663255725235147, "learning_rate": 9.532179329431243e-06, "loss": 0.7856, "step": 2052 }, { "epoch": 0.16, "grad_norm": 1.5870658117724739, "learning_rate": 9.531630445575516e-06, "loss": 0.803, "step": 2053 }, { "epoch": 0.16, "grad_norm": 1.538162675417869, "learning_rate": 9.53108125573374e-06, "loss": 0.7601, "step": 2054 }, { "epoch": 0.16, "grad_norm": 1.4335290445611213, "learning_rate": 9.530531759942994e-06, "loss": 0.8054, "step": 2055 }, { "epoch": 0.16, "grad_norm": 1.567707543091647, "learning_rate": 9.529981958240386e-06, "loss": 0.8285, "step": 2056 }, { "epoch": 0.17, "grad_norm": 1.5614285894465612, "learning_rate": 9.529431850663036e-06, "loss": 0.8436, "step": 2057 }, { "epoch": 0.17, "grad_norm": 1.5078563648487966, "learning_rate": 9.528881437248092e-06, "loss": 0.7465, "step": 2058 }, { "epoch": 0.17, "grad_norm": 1.548778924405887, "learning_rate": 9.528330718032716e-06, "loss": 0.8465, "step": 2059 }, { "epoch": 0.17, "grad_norm": 1.569953279471227, "learning_rate": 9.527779693054095e-06, "loss": 0.7959, "step": 2060 }, { "epoch": 0.17, "grad_norm": 1.6111020901195088, "learning_rate": 9.527228362349437e-06, "loss": 0.8646, "step": 2061 }, { "epoch": 0.17, "grad_norm": 1.5756714611813065, "learning_rate": 9.526676725955968e-06, "loss": 0.8022, "step": 2062 }, { "epoch": 0.17, "grad_norm": 1.5240010988885164, "learning_rate": 9.526124783910935e-06, "loss": 0.8453, "step": 2063 }, { "epoch": 0.17, "grad_norm": 1.4088776288386655, "learning_rate": 9.525572536251608e-06, "loss": 0.7902, "step": 2064 }, { "epoch": 0.17, "grad_norm": 1.4395932589334097, "learning_rate": 9.525019983015274e-06, "loss": 0.8042, "step": 2065 }, { "epoch": 0.17, "grad_norm": 1.4844657044914202, "learning_rate": 9.524467124239243e-06, "loss": 0.8279, "step": 2066 }, { "epoch": 0.17, "grad_norm": 1.6001718981232678, "learning_rate": 9.523913959960846e-06, "loss": 0.8858, "step": 2067 }, { "epoch": 0.17, "grad_norm": 1.459166915834659, "learning_rate": 9.523360490217435e-06, "loss": 0.8044, "step": 2068 }, { "epoch": 0.17, "grad_norm": 1.5885640927619404, "learning_rate": 9.52280671504638e-06, "loss": 0.8866, "step": 2069 }, { "epoch": 0.17, "grad_norm": 1.498360637011439, "learning_rate": 9.522252634485071e-06, "loss": 0.8446, "step": 2070 }, { "epoch": 0.17, "grad_norm": 1.6628163404921328, "learning_rate": 9.521698248570928e-06, "loss": 0.7474, "step": 2071 }, { "epoch": 0.17, "grad_norm": 1.383755394028347, "learning_rate": 9.521143557341378e-06, "loss": 0.7574, "step": 2072 }, { "epoch": 0.17, "grad_norm": 1.5464371876326106, "learning_rate": 9.520588560833876e-06, "loss": 0.7907, "step": 2073 }, { "epoch": 0.17, "grad_norm": 1.4848428431731366, "learning_rate": 9.520033259085897e-06, "loss": 0.752, "step": 2074 }, { "epoch": 0.17, "grad_norm": 1.4959677903249786, "learning_rate": 9.519477652134938e-06, "loss": 0.7615, "step": 2075 }, { "epoch": 0.17, "grad_norm": 1.54307098278177, "learning_rate": 9.518921740018512e-06, "loss": 0.9456, "step": 2076 }, { "epoch": 0.17, "grad_norm": 1.4165815111712976, "learning_rate": 9.518365522774157e-06, "loss": 1.1971, "step": 2077 }, { "epoch": 0.17, "grad_norm": 1.298176375964562, "learning_rate": 9.517809000439432e-06, "loss": 1.1785, "step": 2078 }, { "epoch": 0.17, "grad_norm": 1.5319971429975636, "learning_rate": 9.517252173051912e-06, "loss": 0.9188, "step": 2079 }, { "epoch": 0.17, "grad_norm": 1.5799540359746305, "learning_rate": 9.516695040649195e-06, "loss": 0.7559, "step": 2080 }, { "epoch": 0.17, "grad_norm": 1.5601980254238543, "learning_rate": 9.516137603268903e-06, "loss": 0.8696, "step": 2081 }, { "epoch": 0.17, "grad_norm": 1.558500343844507, "learning_rate": 9.515579860948672e-06, "loss": 0.8403, "step": 2082 }, { "epoch": 0.17, "grad_norm": 1.531770581325229, "learning_rate": 9.515021813726162e-06, "loss": 0.8491, "step": 2083 }, { "epoch": 0.17, "grad_norm": 1.4204828569392018, "learning_rate": 9.514463461639055e-06, "loss": 0.7953, "step": 2084 }, { "epoch": 0.17, "grad_norm": 1.6420364718889713, "learning_rate": 9.513904804725054e-06, "loss": 0.832, "step": 2085 }, { "epoch": 0.17, "grad_norm": 1.5693719784151614, "learning_rate": 9.513345843021878e-06, "loss": 0.8973, "step": 2086 }, { "epoch": 0.17, "grad_norm": 1.5859173024931585, "learning_rate": 9.51278657656727e-06, "loss": 0.7858, "step": 2087 }, { "epoch": 0.17, "grad_norm": 2.200025805776269, "learning_rate": 9.512227005398992e-06, "loss": 1.1956, "step": 2088 }, { "epoch": 0.17, "grad_norm": 1.4535402868958767, "learning_rate": 9.511667129554832e-06, "loss": 0.8228, "step": 2089 }, { "epoch": 0.17, "grad_norm": 1.484412985829688, "learning_rate": 9.511106949072588e-06, "loss": 0.8294, "step": 2090 }, { "epoch": 0.17, "grad_norm": 1.5227825115611187, "learning_rate": 9.510546463990089e-06, "loss": 0.9095, "step": 2091 }, { "epoch": 0.17, "grad_norm": 1.4716656227651383, "learning_rate": 9.509985674345179e-06, "loss": 0.7868, "step": 2092 }, { "epoch": 0.17, "grad_norm": 1.458081447545481, "learning_rate": 9.509424580175724e-06, "loss": 0.8232, "step": 2093 }, { "epoch": 0.17, "grad_norm": 1.1001736302014453, "learning_rate": 9.508863181519608e-06, "loss": 1.1835, "step": 2094 }, { "epoch": 0.17, "grad_norm": 1.561968626215087, "learning_rate": 9.50830147841474e-06, "loss": 0.7872, "step": 2095 }, { "epoch": 0.17, "grad_norm": 1.5122608393218788, "learning_rate": 9.507739470899048e-06, "loss": 0.8592, "step": 2096 }, { "epoch": 0.17, "grad_norm": 1.6332424973760133, "learning_rate": 9.50717715901048e-06, "loss": 0.8814, "step": 2097 }, { "epoch": 0.17, "grad_norm": 1.5522742721355947, "learning_rate": 9.506614542787003e-06, "loss": 0.8092, "step": 2098 }, { "epoch": 0.17, "grad_norm": 1.5900342227438522, "learning_rate": 9.506051622266608e-06, "loss": 0.8388, "step": 2099 }, { "epoch": 0.17, "grad_norm": 1.5877402561046388, "learning_rate": 9.505488397487303e-06, "loss": 0.8431, "step": 2100 }, { "epoch": 0.17, "grad_norm": 1.1499759375621594, "learning_rate": 9.504924868487118e-06, "loss": 1.1687, "step": 2101 }, { "epoch": 0.17, "grad_norm": 1.546693746661753, "learning_rate": 9.504361035304106e-06, "loss": 0.8611, "step": 2102 }, { "epoch": 0.17, "grad_norm": 1.0350998333260615, "learning_rate": 9.503796897976339e-06, "loss": 1.1279, "step": 2103 }, { "epoch": 0.17, "grad_norm": 0.878201323343734, "learning_rate": 9.503232456541904e-06, "loss": 1.1592, "step": 2104 }, { "epoch": 0.17, "grad_norm": 1.5677990365056014, "learning_rate": 9.502667711038917e-06, "loss": 0.7505, "step": 2105 }, { "epoch": 0.17, "grad_norm": 1.654728209652386, "learning_rate": 9.50210266150551e-06, "loss": 0.8595, "step": 2106 }, { "epoch": 0.17, "grad_norm": 1.5488854348199774, "learning_rate": 9.501537307979836e-06, "loss": 0.868, "step": 2107 }, { "epoch": 0.17, "grad_norm": 1.180044400332189, "learning_rate": 9.500971650500072e-06, "loss": 1.1492, "step": 2108 }, { "epoch": 0.17, "grad_norm": 1.5940741885667802, "learning_rate": 9.500405689104408e-06, "loss": 0.8597, "step": 2109 }, { "epoch": 0.17, "grad_norm": 1.5110415421957293, "learning_rate": 9.499839423831062e-06, "loss": 0.8315, "step": 2110 }, { "epoch": 0.17, "grad_norm": 1.5161124359375824, "learning_rate": 9.499272854718268e-06, "loss": 0.793, "step": 2111 }, { "epoch": 0.17, "grad_norm": 1.0760527603217243, "learning_rate": 9.498705981804283e-06, "loss": 1.132, "step": 2112 }, { "epoch": 0.17, "grad_norm": 1.5528258855269335, "learning_rate": 9.498138805127383e-06, "loss": 0.8544, "step": 2113 }, { "epoch": 0.17, "grad_norm": 1.5572580710714743, "learning_rate": 9.497571324725865e-06, "loss": 0.814, "step": 2114 }, { "epoch": 0.17, "grad_norm": 1.4474290273521566, "learning_rate": 9.497003540638047e-06, "loss": 0.8112, "step": 2115 }, { "epoch": 0.17, "grad_norm": 1.5296451017598425, "learning_rate": 9.496435452902268e-06, "loss": 0.8103, "step": 2116 }, { "epoch": 0.17, "grad_norm": 1.4298819709449386, "learning_rate": 9.495867061556884e-06, "loss": 0.8095, "step": 2117 }, { "epoch": 0.17, "grad_norm": 1.4829398011880368, "learning_rate": 9.495298366640276e-06, "loss": 0.8085, "step": 2118 }, { "epoch": 0.17, "grad_norm": 1.5206479717954375, "learning_rate": 9.494729368190843e-06, "loss": 0.7782, "step": 2119 }, { "epoch": 0.17, "grad_norm": 1.873404289918305, "learning_rate": 9.494160066247006e-06, "loss": 0.7789, "step": 2120 }, { "epoch": 0.17, "grad_norm": 1.0268484387428616, "learning_rate": 9.493590460847204e-06, "loss": 1.162, "step": 2121 }, { "epoch": 0.17, "grad_norm": 1.4513800902525658, "learning_rate": 9.4930205520299e-06, "loss": 0.744, "step": 2122 }, { "epoch": 0.17, "grad_norm": 1.5568742926042145, "learning_rate": 9.492450339833573e-06, "loss": 0.7672, "step": 2123 }, { "epoch": 0.17, "grad_norm": 0.8566521929923023, "learning_rate": 9.491879824296729e-06, "loss": 1.1548, "step": 2124 }, { "epoch": 0.17, "grad_norm": 1.5281890094016113, "learning_rate": 9.491309005457885e-06, "loss": 0.8647, "step": 2125 }, { "epoch": 0.17, "grad_norm": 1.4806339144556646, "learning_rate": 9.490737883355587e-06, "loss": 0.7954, "step": 2126 }, { "epoch": 0.17, "grad_norm": 1.5837364064120136, "learning_rate": 9.4901664580284e-06, "loss": 0.8453, "step": 2127 }, { "epoch": 0.17, "grad_norm": 1.5867057799872515, "learning_rate": 9.489594729514907e-06, "loss": 0.8119, "step": 2128 }, { "epoch": 0.17, "grad_norm": 1.5329971605249757, "learning_rate": 9.48902269785371e-06, "loss": 0.7895, "step": 2129 }, { "epoch": 0.17, "grad_norm": 1.631845253540148, "learning_rate": 9.488450363083435e-06, "loss": 0.8152, "step": 2130 }, { "epoch": 0.17, "grad_norm": 1.4279116263224285, "learning_rate": 9.48787772524273e-06, "loss": 0.8523, "step": 2131 }, { "epoch": 0.17, "grad_norm": 1.4580836107684407, "learning_rate": 9.487304784370257e-06, "loss": 0.7321, "step": 2132 }, { "epoch": 0.17, "grad_norm": 1.5188237211342017, "learning_rate": 9.486731540504705e-06, "loss": 0.759, "step": 2133 }, { "epoch": 0.17, "grad_norm": 1.676518276728722, "learning_rate": 9.48615799368478e-06, "loss": 0.8169, "step": 2134 }, { "epoch": 0.17, "grad_norm": 1.5291736490255858, "learning_rate": 9.48558414394921e-06, "loss": 0.8203, "step": 2135 }, { "epoch": 0.17, "grad_norm": 1.670853457846532, "learning_rate": 9.48500999133674e-06, "loss": 0.8245, "step": 2136 }, { "epoch": 0.17, "grad_norm": 1.4306486237015201, "learning_rate": 9.484435535886142e-06, "loss": 0.7851, "step": 2137 }, { "epoch": 0.17, "grad_norm": 1.413913750685564, "learning_rate": 9.4838607776362e-06, "loss": 0.682, "step": 2138 }, { "epoch": 0.17, "grad_norm": 1.3887091694760194, "learning_rate": 9.483285716625727e-06, "loss": 0.7608, "step": 2139 }, { "epoch": 0.17, "grad_norm": 1.2378729349356958, "learning_rate": 9.482710352893549e-06, "loss": 1.1491, "step": 2140 }, { "epoch": 0.17, "grad_norm": 2.329785060480567, "learning_rate": 9.48213468647852e-06, "loss": 0.8265, "step": 2141 }, { "epoch": 0.17, "grad_norm": 1.5982615918880068, "learning_rate": 9.481558717419506e-06, "loss": 0.8724, "step": 2142 }, { "epoch": 0.17, "grad_norm": 1.481082657944046, "learning_rate": 9.4809824457554e-06, "loss": 0.8105, "step": 2143 }, { "epoch": 0.17, "grad_norm": 1.488119328039974, "learning_rate": 9.480405871525114e-06, "loss": 0.7929, "step": 2144 }, { "epoch": 0.17, "grad_norm": 1.5183815142154768, "learning_rate": 9.479828994767577e-06, "loss": 0.8464, "step": 2145 }, { "epoch": 0.17, "grad_norm": 0.9162720706906529, "learning_rate": 9.479251815521745e-06, "loss": 1.1637, "step": 2146 }, { "epoch": 0.17, "grad_norm": 0.849114843188252, "learning_rate": 9.478674333826586e-06, "loss": 1.1692, "step": 2147 }, { "epoch": 0.17, "grad_norm": 1.3414491824864267, "learning_rate": 9.478096549721094e-06, "loss": 0.6869, "step": 2148 }, { "epoch": 0.17, "grad_norm": 1.4093935836935414, "learning_rate": 9.477518463244284e-06, "loss": 0.773, "step": 2149 }, { "epoch": 0.17, "grad_norm": 1.4876030850139856, "learning_rate": 9.476940074435189e-06, "loss": 0.7885, "step": 2150 }, { "epoch": 0.17, "grad_norm": 1.616464725075642, "learning_rate": 9.476361383332864e-06, "loss": 0.8187, "step": 2151 }, { "epoch": 0.17, "grad_norm": 1.6001166081300626, "learning_rate": 9.475782389976382e-06, "loss": 0.6992, "step": 2152 }, { "epoch": 0.17, "grad_norm": 1.8570326225769496, "learning_rate": 9.475203094404836e-06, "loss": 0.7656, "step": 2153 }, { "epoch": 0.17, "grad_norm": 1.4808079056633383, "learning_rate": 9.474623496657347e-06, "loss": 0.8221, "step": 2154 }, { "epoch": 0.17, "grad_norm": 1.061787492683681, "learning_rate": 9.474043596773048e-06, "loss": 1.1292, "step": 2155 }, { "epoch": 0.17, "grad_norm": 1.6177951181536774, "learning_rate": 9.473463394791093e-06, "loss": 0.8721, "step": 2156 }, { "epoch": 0.17, "grad_norm": 1.5480323673435714, "learning_rate": 9.472882890750662e-06, "loss": 0.8089, "step": 2157 }, { "epoch": 0.17, "grad_norm": 1.437392033160396, "learning_rate": 9.472302084690948e-06, "loss": 0.826, "step": 2158 }, { "epoch": 0.17, "grad_norm": 1.5559288800446636, "learning_rate": 9.471720976651173e-06, "loss": 0.7628, "step": 2159 }, { "epoch": 0.17, "grad_norm": 1.521667628788659, "learning_rate": 9.471139566670571e-06, "loss": 0.7587, "step": 2160 }, { "epoch": 0.17, "grad_norm": 1.5129018389789217, "learning_rate": 9.470557854788402e-06, "loss": 0.8344, "step": 2161 }, { "epoch": 0.17, "grad_norm": 1.4934008841497588, "learning_rate": 9.469975841043946e-06, "loss": 0.71, "step": 2162 }, { "epoch": 0.17, "grad_norm": 1.5490401054743221, "learning_rate": 9.469393525476498e-06, "loss": 0.837, "step": 2163 }, { "epoch": 0.17, "grad_norm": 1.5786207173715618, "learning_rate": 9.468810908125379e-06, "loss": 0.8031, "step": 2164 }, { "epoch": 0.17, "grad_norm": 1.4702982481981928, "learning_rate": 9.468227989029929e-06, "loss": 0.7858, "step": 2165 }, { "epoch": 0.17, "grad_norm": 1.5124586106922184, "learning_rate": 9.467644768229509e-06, "loss": 0.8133, "step": 2166 }, { "epoch": 0.17, "grad_norm": 0.9101090939486413, "learning_rate": 9.467061245763499e-06, "loss": 1.1778, "step": 2167 }, { "epoch": 0.17, "grad_norm": 1.5839117102355542, "learning_rate": 9.466477421671296e-06, "loss": 0.8423, "step": 2168 }, { "epoch": 0.17, "grad_norm": 1.5303065091084644, "learning_rate": 9.465893295992326e-06, "loss": 0.8137, "step": 2169 }, { "epoch": 0.17, "grad_norm": 1.4725048707969537, "learning_rate": 9.46530886876603e-06, "loss": 0.7229, "step": 2170 }, { "epoch": 0.17, "grad_norm": 1.4621103243510853, "learning_rate": 9.464724140031866e-06, "loss": 0.7663, "step": 2171 }, { "epoch": 0.17, "grad_norm": 1.475005015398539, "learning_rate": 9.46413910982932e-06, "loss": 0.8533, "step": 2172 }, { "epoch": 0.17, "grad_norm": 1.3699921744937011, "learning_rate": 9.463553778197897e-06, "loss": 0.8637, "step": 2173 }, { "epoch": 0.17, "grad_norm": 1.8282297120994768, "learning_rate": 9.462968145177112e-06, "loss": 0.7457, "step": 2174 }, { "epoch": 0.17, "grad_norm": 1.518068126232067, "learning_rate": 9.462382210806514e-06, "loss": 0.8473, "step": 2175 }, { "epoch": 0.17, "grad_norm": 1.3962732002835698, "learning_rate": 9.461795975125665e-06, "loss": 0.7941, "step": 2176 }, { "epoch": 0.17, "grad_norm": 1.5919861851279307, "learning_rate": 9.461209438174148e-06, "loss": 0.8638, "step": 2177 }, { "epoch": 0.17, "grad_norm": 0.9683919122304938, "learning_rate": 9.46062259999157e-06, "loss": 1.1776, "step": 2178 }, { "epoch": 0.17, "grad_norm": 1.613195879580568, "learning_rate": 9.460035460617555e-06, "loss": 0.8854, "step": 2179 }, { "epoch": 0.17, "grad_norm": 0.8331130749215876, "learning_rate": 9.459448020091746e-06, "loss": 1.157, "step": 2180 }, { "epoch": 0.17, "grad_norm": 1.4717283642964227, "learning_rate": 9.45886027845381e-06, "loss": 0.7812, "step": 2181 }, { "epoch": 0.18, "grad_norm": 1.5023710772068761, "learning_rate": 9.458272235743434e-06, "loss": 0.7815, "step": 2182 }, { "epoch": 0.18, "grad_norm": 1.4486567946514675, "learning_rate": 9.457683892000318e-06, "loss": 0.8181, "step": 2183 }, { "epoch": 0.18, "grad_norm": 1.6591626242786128, "learning_rate": 9.457095247264197e-06, "loss": 0.9463, "step": 2184 }, { "epoch": 0.18, "grad_norm": 1.045601299762233, "learning_rate": 9.45650630157481e-06, "loss": 1.1608, "step": 2185 }, { "epoch": 0.18, "grad_norm": 1.5799418955524047, "learning_rate": 9.455917054971929e-06, "loss": 0.8853, "step": 2186 }, { "epoch": 0.18, "grad_norm": 1.6725159178427151, "learning_rate": 9.455327507495338e-06, "loss": 0.7993, "step": 2187 }, { "epoch": 0.18, "grad_norm": 1.6892802921167822, "learning_rate": 9.454737659184845e-06, "loss": 0.7906, "step": 2188 }, { "epoch": 0.18, "grad_norm": 1.5480470618414361, "learning_rate": 9.45414751008028e-06, "loss": 0.855, "step": 2189 }, { "epoch": 0.18, "grad_norm": 1.5052535639480646, "learning_rate": 9.45355706022149e-06, "loss": 0.7726, "step": 2190 }, { "epoch": 0.18, "grad_norm": 1.6103094544191743, "learning_rate": 9.452966309648347e-06, "loss": 0.8683, "step": 2191 }, { "epoch": 0.18, "grad_norm": 1.0160254135878146, "learning_rate": 9.452375258400732e-06, "loss": 1.1234, "step": 2192 }, { "epoch": 0.18, "grad_norm": 1.5585288104324018, "learning_rate": 9.451783906518558e-06, "loss": 0.7273, "step": 2193 }, { "epoch": 0.18, "grad_norm": 1.4594836811495433, "learning_rate": 9.451192254041759e-06, "loss": 0.7998, "step": 2194 }, { "epoch": 0.18, "grad_norm": 1.4877333153410297, "learning_rate": 9.450600301010279e-06, "loss": 0.8391, "step": 2195 }, { "epoch": 0.18, "grad_norm": 1.5242492907421654, "learning_rate": 9.45000804746409e-06, "loss": 0.8078, "step": 2196 }, { "epoch": 0.18, "grad_norm": 1.514317118877034, "learning_rate": 9.449415493443181e-06, "loss": 0.8065, "step": 2197 }, { "epoch": 0.18, "grad_norm": 0.8835591538195738, "learning_rate": 9.448822638987564e-06, "loss": 1.1597, "step": 2198 }, { "epoch": 0.18, "grad_norm": 1.3962471594885688, "learning_rate": 9.44822948413727e-06, "loss": 0.8879, "step": 2199 }, { "epoch": 0.18, "grad_norm": 1.489889452005428, "learning_rate": 9.44763602893235e-06, "loss": 0.8837, "step": 2200 }, { "epoch": 0.18, "grad_norm": 1.4156134482077132, "learning_rate": 9.447042273412873e-06, "loss": 0.7768, "step": 2201 }, { "epoch": 0.18, "grad_norm": 1.462114683557008, "learning_rate": 9.446448217618935e-06, "loss": 0.8187, "step": 2202 }, { "epoch": 0.18, "grad_norm": 1.693005438792916, "learning_rate": 9.445853861590647e-06, "loss": 0.8404, "step": 2203 }, { "epoch": 0.18, "grad_norm": 1.5564941247757622, "learning_rate": 9.445259205368138e-06, "loss": 0.7549, "step": 2204 }, { "epoch": 0.18, "grad_norm": 0.8587664991133772, "learning_rate": 9.444664248991563e-06, "loss": 1.1196, "step": 2205 }, { "epoch": 0.18, "grad_norm": 1.523464610621261, "learning_rate": 9.444068992501097e-06, "loss": 0.7609, "step": 2206 }, { "epoch": 0.18, "grad_norm": 0.8041221973951086, "learning_rate": 9.44347343593693e-06, "loss": 1.148, "step": 2207 }, { "epoch": 0.18, "grad_norm": 1.5137151896952397, "learning_rate": 9.442877579339273e-06, "loss": 0.771, "step": 2208 }, { "epoch": 0.18, "grad_norm": 1.5062186662026789, "learning_rate": 9.442281422748365e-06, "loss": 0.8607, "step": 2209 }, { "epoch": 0.18, "grad_norm": 1.5244668676406488, "learning_rate": 9.441684966204456e-06, "loss": 0.7913, "step": 2210 }, { "epoch": 0.18, "grad_norm": 1.4977607566378373, "learning_rate": 9.441088209747823e-06, "loss": 0.8155, "step": 2211 }, { "epoch": 0.18, "grad_norm": 1.521428901776055, "learning_rate": 9.440491153418759e-06, "loss": 0.8538, "step": 2212 }, { "epoch": 0.18, "grad_norm": 1.589509863167939, "learning_rate": 9.439893797257578e-06, "loss": 0.8432, "step": 2213 }, { "epoch": 0.18, "grad_norm": 1.4668184795811325, "learning_rate": 9.439296141304615e-06, "loss": 0.7854, "step": 2214 }, { "epoch": 0.18, "grad_norm": 1.532414283691502, "learning_rate": 9.438698185600226e-06, "loss": 0.8819, "step": 2215 }, { "epoch": 0.18, "grad_norm": 1.5229161899686, "learning_rate": 9.438099930184783e-06, "loss": 0.9098, "step": 2216 }, { "epoch": 0.18, "grad_norm": 1.5428311306161095, "learning_rate": 9.437501375098688e-06, "loss": 0.7601, "step": 2217 }, { "epoch": 0.18, "grad_norm": 1.5097778559357995, "learning_rate": 9.436902520382352e-06, "loss": 0.8351, "step": 2218 }, { "epoch": 0.18, "grad_norm": 1.5314460971829593, "learning_rate": 9.436303366076213e-06, "loss": 0.8206, "step": 2219 }, { "epoch": 0.18, "grad_norm": 1.5923034782512633, "learning_rate": 9.435703912220727e-06, "loss": 0.9379, "step": 2220 }, { "epoch": 0.18, "grad_norm": 1.531339849717247, "learning_rate": 9.435104158856367e-06, "loss": 0.8075, "step": 2221 }, { "epoch": 0.18, "grad_norm": 1.5220375979160192, "learning_rate": 9.434504106023634e-06, "loss": 0.8673, "step": 2222 }, { "epoch": 0.18, "grad_norm": 1.6157550333426582, "learning_rate": 9.433903753763045e-06, "loss": 0.7937, "step": 2223 }, { "epoch": 0.18, "grad_norm": 1.528030876968063, "learning_rate": 9.433303102115136e-06, "loss": 0.7806, "step": 2224 }, { "epoch": 0.18, "grad_norm": 1.5008505563884982, "learning_rate": 9.432702151120464e-06, "loss": 0.8813, "step": 2225 }, { "epoch": 0.18, "grad_norm": 1.5978982993623998, "learning_rate": 9.432100900819604e-06, "loss": 0.8872, "step": 2226 }, { "epoch": 0.18, "grad_norm": 1.4225618312662536, "learning_rate": 9.431499351253159e-06, "loss": 0.8822, "step": 2227 }, { "epoch": 0.18, "grad_norm": 1.5752846964676948, "learning_rate": 9.430897502461745e-06, "loss": 0.8144, "step": 2228 }, { "epoch": 0.18, "grad_norm": 0.9712858789887392, "learning_rate": 9.430295354485999e-06, "loss": 1.1575, "step": 2229 }, { "epoch": 0.18, "grad_norm": 1.5510150067171729, "learning_rate": 9.42969290736658e-06, "loss": 0.8738, "step": 2230 }, { "epoch": 0.18, "grad_norm": 1.4580628567400207, "learning_rate": 9.429090161144166e-06, "loss": 0.8383, "step": 2231 }, { "epoch": 0.18, "grad_norm": 1.4257216096123133, "learning_rate": 9.428487115859458e-06, "loss": 0.7807, "step": 2232 }, { "epoch": 0.18, "grad_norm": 1.492338976508945, "learning_rate": 9.427883771553172e-06, "loss": 0.8373, "step": 2233 }, { "epoch": 0.18, "grad_norm": 0.9378096728603565, "learning_rate": 9.427280128266049e-06, "loss": 1.1601, "step": 2234 }, { "epoch": 0.18, "grad_norm": 1.50116159122152, "learning_rate": 9.42667618603885e-06, "loss": 0.8255, "step": 2235 }, { "epoch": 0.18, "grad_norm": 1.5522154223309075, "learning_rate": 9.426071944912351e-06, "loss": 0.8468, "step": 2236 }, { "epoch": 0.18, "grad_norm": 0.8231893312153317, "learning_rate": 9.425467404927356e-06, "loss": 1.1747, "step": 2237 }, { "epoch": 0.18, "grad_norm": 1.496687113134294, "learning_rate": 9.42486256612468e-06, "loss": 0.8029, "step": 2238 }, { "epoch": 0.18, "grad_norm": 1.4609103786613773, "learning_rate": 9.424257428545166e-06, "loss": 0.7729, "step": 2239 }, { "epoch": 0.18, "grad_norm": 1.5120034068362511, "learning_rate": 9.423651992229673e-06, "loss": 0.8157, "step": 2240 }, { "epoch": 0.18, "grad_norm": 0.9048209561668592, "learning_rate": 9.423046257219083e-06, "loss": 1.1542, "step": 2241 }, { "epoch": 0.18, "grad_norm": 0.8710660185885328, "learning_rate": 9.422440223554296e-06, "loss": 1.1207, "step": 2242 }, { "epoch": 0.18, "grad_norm": 1.5456547325887937, "learning_rate": 9.421833891276233e-06, "loss": 0.8568, "step": 2243 }, { "epoch": 0.18, "grad_norm": 0.8183570069267132, "learning_rate": 9.421227260425834e-06, "loss": 1.1735, "step": 2244 }, { "epoch": 0.18, "grad_norm": 1.4798376334199836, "learning_rate": 9.42062033104406e-06, "loss": 0.8513, "step": 2245 }, { "epoch": 0.18, "grad_norm": 1.5676482686965014, "learning_rate": 9.420013103171893e-06, "loss": 0.8467, "step": 2246 }, { "epoch": 0.18, "grad_norm": 1.6402454840549179, "learning_rate": 9.419405576850334e-06, "loss": 0.8313, "step": 2247 }, { "epoch": 0.18, "grad_norm": 1.6494450064767665, "learning_rate": 9.418797752120406e-06, "loss": 0.7398, "step": 2248 }, { "epoch": 0.18, "grad_norm": 1.504794867567575, "learning_rate": 9.418189629023149e-06, "loss": 0.8776, "step": 2249 }, { "epoch": 0.18, "grad_norm": 1.504432324814278, "learning_rate": 9.417581207599626e-06, "loss": 0.7632, "step": 2250 }, { "epoch": 0.18, "grad_norm": 1.4885288995658281, "learning_rate": 9.416972487890918e-06, "loss": 0.826, "step": 2251 }, { "epoch": 0.18, "grad_norm": 1.6147520129410498, "learning_rate": 9.416363469938128e-06, "loss": 0.8256, "step": 2252 }, { "epoch": 0.18, "grad_norm": 1.054392646446437, "learning_rate": 9.415754153782377e-06, "loss": 1.1384, "step": 2253 }, { "epoch": 0.18, "grad_norm": 1.591742564460529, "learning_rate": 9.415144539464809e-06, "loss": 0.8422, "step": 2254 }, { "epoch": 0.18, "grad_norm": 1.4047579033837214, "learning_rate": 9.414534627026586e-06, "loss": 0.8815, "step": 2255 }, { "epoch": 0.18, "grad_norm": 0.7994289258791244, "learning_rate": 9.413924416508891e-06, "loss": 1.1246, "step": 2256 }, { "epoch": 0.18, "grad_norm": 1.6376112406253958, "learning_rate": 9.413313907952925e-06, "loss": 0.8004, "step": 2257 }, { "epoch": 0.18, "grad_norm": 0.7872891826227746, "learning_rate": 9.412703101399912e-06, "loss": 1.174, "step": 2258 }, { "epoch": 0.18, "grad_norm": 1.4623619136243506, "learning_rate": 9.412091996891097e-06, "loss": 0.8262, "step": 2259 }, { "epoch": 0.18, "grad_norm": 0.8332610993397026, "learning_rate": 9.41148059446774e-06, "loss": 1.1363, "step": 2260 }, { "epoch": 0.18, "grad_norm": 0.7931089506990561, "learning_rate": 9.410868894171126e-06, "loss": 1.1443, "step": 2261 }, { "epoch": 0.18, "grad_norm": 1.5058739310972282, "learning_rate": 9.410256896042558e-06, "loss": 0.8147, "step": 2262 }, { "epoch": 0.18, "grad_norm": 1.5064991891967874, "learning_rate": 9.409644600123362e-06, "loss": 0.8847, "step": 2263 }, { "epoch": 0.18, "grad_norm": 1.8471921859416185, "learning_rate": 9.409032006454877e-06, "loss": 0.8282, "step": 2264 }, { "epoch": 0.18, "grad_norm": 1.5030017921694547, "learning_rate": 9.40841911507847e-06, "loss": 0.8305, "step": 2265 }, { "epoch": 0.18, "grad_norm": 1.513720679097505, "learning_rate": 9.407805926035524e-06, "loss": 0.8168, "step": 2266 }, { "epoch": 0.18, "grad_norm": 1.5353803914990405, "learning_rate": 9.407192439367443e-06, "loss": 0.8694, "step": 2267 }, { "epoch": 0.18, "grad_norm": 1.609652548386237, "learning_rate": 9.40657865511565e-06, "loss": 0.8194, "step": 2268 }, { "epoch": 0.18, "grad_norm": 1.3829395790122831, "learning_rate": 9.40596457332159e-06, "loss": 0.7911, "step": 2269 }, { "epoch": 0.18, "grad_norm": 1.5197321795379435, "learning_rate": 9.405350194026728e-06, "loss": 0.9147, "step": 2270 }, { "epoch": 0.18, "grad_norm": 1.5624644065103857, "learning_rate": 9.404735517272547e-06, "loss": 0.9, "step": 2271 }, { "epoch": 0.18, "grad_norm": 1.468519599234418, "learning_rate": 9.404120543100553e-06, "loss": 0.7932, "step": 2272 }, { "epoch": 0.18, "grad_norm": 1.0451410344294818, "learning_rate": 9.403505271552267e-06, "loss": 1.159, "step": 2273 }, { "epoch": 0.18, "grad_norm": 1.5336142217449362, "learning_rate": 9.402889702669235e-06, "loss": 0.9369, "step": 2274 }, { "epoch": 0.18, "grad_norm": 1.5617754734150564, "learning_rate": 9.402273836493026e-06, "loss": 0.7928, "step": 2275 }, { "epoch": 0.18, "grad_norm": 1.651424888578387, "learning_rate": 9.401657673065218e-06, "loss": 0.7777, "step": 2276 }, { "epoch": 0.18, "grad_norm": 1.4146233181646815, "learning_rate": 9.401041212427422e-06, "loss": 0.86, "step": 2277 }, { "epoch": 0.18, "grad_norm": 1.5130278283894125, "learning_rate": 9.400424454621258e-06, "loss": 0.8358, "step": 2278 }, { "epoch": 0.18, "grad_norm": 1.589797501295723, "learning_rate": 9.399807399688371e-06, "loss": 0.9157, "step": 2279 }, { "epoch": 0.18, "grad_norm": 1.5172480715960457, "learning_rate": 9.39919004767043e-06, "loss": 0.8279, "step": 2280 }, { "epoch": 0.18, "grad_norm": 1.436918572201185, "learning_rate": 9.398572398609118e-06, "loss": 0.8488, "step": 2281 }, { "epoch": 0.18, "grad_norm": 1.9002967569532419, "learning_rate": 9.397954452546139e-06, "loss": 0.9042, "step": 2282 }, { "epoch": 0.18, "grad_norm": 1.0579833345622311, "learning_rate": 9.397336209523218e-06, "loss": 1.1243, "step": 2283 }, { "epoch": 0.18, "grad_norm": 0.940718754472445, "learning_rate": 9.396717669582102e-06, "loss": 1.1522, "step": 2284 }, { "epoch": 0.18, "grad_norm": 1.4047347674541666, "learning_rate": 9.396098832764555e-06, "loss": 0.8598, "step": 2285 }, { "epoch": 0.18, "grad_norm": 1.525875454032734, "learning_rate": 9.395479699112363e-06, "loss": 0.7459, "step": 2286 }, { "epoch": 0.18, "grad_norm": 1.4609041845155042, "learning_rate": 9.394860268667329e-06, "loss": 0.7814, "step": 2287 }, { "epoch": 0.18, "grad_norm": 1.6864513463658968, "learning_rate": 9.394240541471282e-06, "loss": 0.8137, "step": 2288 }, { "epoch": 0.18, "grad_norm": 1.3718549209765878, "learning_rate": 9.393620517566066e-06, "loss": 0.7469, "step": 2289 }, { "epoch": 0.18, "grad_norm": 1.4887363232867643, "learning_rate": 9.393000196993544e-06, "loss": 0.7297, "step": 2290 }, { "epoch": 0.18, "grad_norm": 1.493873494780687, "learning_rate": 9.392379579795605e-06, "loss": 1.1464, "step": 2291 }, { "epoch": 0.18, "grad_norm": 1.6129748487171538, "learning_rate": 9.391758666014152e-06, "loss": 0.9141, "step": 2292 }, { "epoch": 0.18, "grad_norm": 1.5571412809063239, "learning_rate": 9.391137455691113e-06, "loss": 0.8315, "step": 2293 }, { "epoch": 0.18, "grad_norm": 1.4928072914762776, "learning_rate": 9.39051594886843e-06, "loss": 0.8963, "step": 2294 }, { "epoch": 0.18, "grad_norm": 1.505424809303369, "learning_rate": 9.389894145588072e-06, "loss": 0.8277, "step": 2295 }, { "epoch": 0.18, "grad_norm": 1.4809810596085007, "learning_rate": 9.389272045892023e-06, "loss": 0.8328, "step": 2296 }, { "epoch": 0.18, "grad_norm": 1.4832405102914936, "learning_rate": 9.388649649822289e-06, "loss": 0.8408, "step": 2297 }, { "epoch": 0.18, "grad_norm": 1.49164874286212, "learning_rate": 9.388026957420895e-06, "loss": 0.8173, "step": 2298 }, { "epoch": 0.18, "grad_norm": 1.6237531092560462, "learning_rate": 9.387403968729887e-06, "loss": 0.7891, "step": 2299 }, { "epoch": 0.18, "grad_norm": 1.5936813063153645, "learning_rate": 9.386780683791331e-06, "loss": 0.81, "step": 2300 }, { "epoch": 0.18, "grad_norm": 1.763657237633359, "learning_rate": 9.386157102647312e-06, "loss": 0.8504, "step": 2301 }, { "epoch": 0.18, "grad_norm": 1.6730909591583305, "learning_rate": 9.385533225339936e-06, "loss": 0.8159, "step": 2302 }, { "epoch": 0.18, "grad_norm": 1.6353347214690444, "learning_rate": 9.384909051911329e-06, "loss": 0.914, "step": 2303 }, { "epoch": 0.18, "grad_norm": 1.4862383479040204, "learning_rate": 9.384284582403636e-06, "loss": 0.7922, "step": 2304 }, { "epoch": 0.18, "grad_norm": 1.4605269964234635, "learning_rate": 9.38365981685902e-06, "loss": 0.8063, "step": 2305 }, { "epoch": 0.19, "grad_norm": 1.5238915804119315, "learning_rate": 9.383034755319673e-06, "loss": 0.7339, "step": 2306 }, { "epoch": 0.19, "grad_norm": 1.470695900606466, "learning_rate": 9.382409397827794e-06, "loss": 0.7251, "step": 2307 }, { "epoch": 0.19, "grad_norm": 1.0386695209242822, "learning_rate": 9.381783744425615e-06, "loss": 1.1441, "step": 2308 }, { "epoch": 0.19, "grad_norm": 1.7039258012960135, "learning_rate": 9.381157795155374e-06, "loss": 0.8552, "step": 2309 }, { "epoch": 0.19, "grad_norm": 0.8229847245522032, "learning_rate": 9.380531550059345e-06, "loss": 1.1296, "step": 2310 }, { "epoch": 0.19, "grad_norm": 0.8054768888921484, "learning_rate": 9.379905009179804e-06, "loss": 1.135, "step": 2311 }, { "epoch": 0.19, "grad_norm": 1.4479348511614942, "learning_rate": 9.379278172559065e-06, "loss": 0.8206, "step": 2312 }, { "epoch": 0.19, "grad_norm": 1.604122916752245, "learning_rate": 9.378651040239449e-06, "loss": 0.7975, "step": 2313 }, { "epoch": 0.19, "grad_norm": 1.5896132157466434, "learning_rate": 9.378023612263302e-06, "loss": 0.822, "step": 2314 }, { "epoch": 0.19, "grad_norm": 1.5120229839977333, "learning_rate": 9.37739588867299e-06, "loss": 0.857, "step": 2315 }, { "epoch": 0.19, "grad_norm": 1.245497066603633, "learning_rate": 9.376767869510899e-06, "loss": 1.1649, "step": 2316 }, { "epoch": 0.19, "grad_norm": 1.688712130060152, "learning_rate": 9.376139554819432e-06, "loss": 0.7953, "step": 2317 }, { "epoch": 0.19, "grad_norm": 1.5435441696900718, "learning_rate": 9.375510944641017e-06, "loss": 0.8395, "step": 2318 }, { "epoch": 0.19, "grad_norm": 1.5082908244345896, "learning_rate": 9.374882039018096e-06, "loss": 0.7597, "step": 2319 }, { "epoch": 0.19, "grad_norm": 1.5175740095678978, "learning_rate": 9.374252837993137e-06, "loss": 0.8859, "step": 2320 }, { "epoch": 0.19, "grad_norm": 1.5428567372431767, "learning_rate": 9.373623341608624e-06, "loss": 0.974, "step": 2321 }, { "epoch": 0.19, "grad_norm": 1.5227664673621937, "learning_rate": 9.372993549907063e-06, "loss": 0.8223, "step": 2322 }, { "epoch": 0.19, "grad_norm": 1.5509384902668868, "learning_rate": 9.372363462930976e-06, "loss": 0.7721, "step": 2323 }, { "epoch": 0.19, "grad_norm": 1.4596386820978262, "learning_rate": 9.371733080722911e-06, "loss": 0.8031, "step": 2324 }, { "epoch": 0.19, "grad_norm": 1.5212226482078324, "learning_rate": 9.371102403325432e-06, "loss": 0.8203, "step": 2325 }, { "epoch": 0.19, "grad_norm": 1.6525396996155555, "learning_rate": 9.370471430781123e-06, "loss": 0.8624, "step": 2326 }, { "epoch": 0.19, "grad_norm": 1.6535655101983044, "learning_rate": 9.36984016313259e-06, "loss": 0.8495, "step": 2327 }, { "epoch": 0.19, "grad_norm": 1.6378609275448528, "learning_rate": 9.369208600422458e-06, "loss": 0.852, "step": 2328 }, { "epoch": 0.19, "grad_norm": 1.0669061825894104, "learning_rate": 9.368576742693369e-06, "loss": 1.1588, "step": 2329 }, { "epoch": 0.19, "grad_norm": 1.6436487702130977, "learning_rate": 9.36794458998799e-06, "loss": 0.8615, "step": 2330 }, { "epoch": 0.19, "grad_norm": 1.658725649119393, "learning_rate": 9.367312142349003e-06, "loss": 0.8523, "step": 2331 }, { "epoch": 0.19, "grad_norm": 1.4229937765685026, "learning_rate": 9.366679399819115e-06, "loss": 0.8424, "step": 2332 }, { "epoch": 0.19, "grad_norm": 1.451585656628338, "learning_rate": 9.366046362441047e-06, "loss": 0.8944, "step": 2333 }, { "epoch": 0.19, "grad_norm": 0.8821858459976993, "learning_rate": 9.365413030257546e-06, "loss": 1.1295, "step": 2334 }, { "epoch": 0.19, "grad_norm": 1.4959518534594562, "learning_rate": 9.364779403311375e-06, "loss": 0.8526, "step": 2335 }, { "epoch": 0.19, "grad_norm": 1.5124655267061424, "learning_rate": 9.36414548164532e-06, "loss": 0.7985, "step": 2336 }, { "epoch": 0.19, "grad_norm": 1.5384832908460226, "learning_rate": 9.36351126530218e-06, "loss": 0.7325, "step": 2337 }, { "epoch": 0.19, "grad_norm": 0.8086932569663726, "learning_rate": 9.362876754324784e-06, "loss": 1.17, "step": 2338 }, { "epoch": 0.19, "grad_norm": 1.6517227970793549, "learning_rate": 9.36224194875597e-06, "loss": 0.818, "step": 2339 }, { "epoch": 0.19, "grad_norm": 1.7307418044484764, "learning_rate": 9.361606848638607e-06, "loss": 0.7954, "step": 2340 }, { "epoch": 0.19, "grad_norm": 1.4615989301131884, "learning_rate": 9.360971454015577e-06, "loss": 0.875, "step": 2341 }, { "epoch": 0.19, "grad_norm": 1.3607220998225291, "learning_rate": 9.360335764929781e-06, "loss": 0.7927, "step": 2342 }, { "epoch": 0.19, "grad_norm": 1.4229704985421556, "learning_rate": 9.359699781424144e-06, "loss": 0.8321, "step": 2343 }, { "epoch": 0.19, "grad_norm": 1.533270345653547, "learning_rate": 9.359063503541609e-06, "loss": 0.7553, "step": 2344 }, { "epoch": 0.19, "grad_norm": 1.5394674834929325, "learning_rate": 9.358426931325137e-06, "loss": 0.9304, "step": 2345 }, { "epoch": 0.19, "grad_norm": 1.5560860093133215, "learning_rate": 9.357790064817715e-06, "loss": 0.8497, "step": 2346 }, { "epoch": 0.19, "grad_norm": 1.4672742368659368, "learning_rate": 9.357152904062342e-06, "loss": 0.7511, "step": 2347 }, { "epoch": 0.19, "grad_norm": 1.5284672390833025, "learning_rate": 9.356515449102041e-06, "loss": 0.8416, "step": 2348 }, { "epoch": 0.19, "grad_norm": 1.423839998102108, "learning_rate": 9.355877699979856e-06, "loss": 0.8061, "step": 2349 }, { "epoch": 0.19, "grad_norm": 1.5454660096995154, "learning_rate": 9.355239656738849e-06, "loss": 0.8088, "step": 2350 }, { "epoch": 0.19, "grad_norm": 1.4643021111025343, "learning_rate": 9.354601319422099e-06, "loss": 0.7818, "step": 2351 }, { "epoch": 0.19, "grad_norm": 1.5083261911109935, "learning_rate": 9.353962688072713e-06, "loss": 0.8328, "step": 2352 }, { "epoch": 0.19, "grad_norm": 1.4342587858732563, "learning_rate": 9.35332376273381e-06, "loss": 0.7935, "step": 2353 }, { "epoch": 0.19, "grad_norm": 1.5032997842952949, "learning_rate": 9.352684543448532e-06, "loss": 0.7813, "step": 2354 }, { "epoch": 0.19, "grad_norm": 1.5906871926804294, "learning_rate": 9.35204503026004e-06, "loss": 0.8072, "step": 2355 }, { "epoch": 0.19, "grad_norm": 1.3883537644422235, "learning_rate": 9.351405223211517e-06, "loss": 0.741, "step": 2356 }, { "epoch": 0.19, "grad_norm": 1.6477209592182354, "learning_rate": 9.350765122346162e-06, "loss": 0.8621, "step": 2357 }, { "epoch": 0.19, "grad_norm": 0.9100279066147571, "learning_rate": 9.350124727707197e-06, "loss": 1.1618, "step": 2358 }, { "epoch": 0.19, "grad_norm": 1.5167882830320787, "learning_rate": 9.349484039337864e-06, "loss": 0.8254, "step": 2359 }, { "epoch": 0.19, "grad_norm": 0.8438362803754844, "learning_rate": 9.348843057281423e-06, "loss": 1.1377, "step": 2360 }, { "epoch": 0.19, "grad_norm": 1.5818584461513054, "learning_rate": 9.348201781581154e-06, "loss": 0.817, "step": 2361 }, { "epoch": 0.19, "grad_norm": 1.524839122679668, "learning_rate": 9.347560212280359e-06, "loss": 0.8146, "step": 2362 }, { "epoch": 0.19, "grad_norm": 1.480583008177718, "learning_rate": 9.346918349422356e-06, "loss": 0.8449, "step": 2363 }, { "epoch": 0.19, "grad_norm": 1.5257778025003537, "learning_rate": 9.346276193050488e-06, "loss": 0.7951, "step": 2364 }, { "epoch": 0.19, "grad_norm": 1.505932257298453, "learning_rate": 9.345633743208112e-06, "loss": 0.8322, "step": 2365 }, { "epoch": 0.19, "grad_norm": 1.5057411829192455, "learning_rate": 9.344990999938609e-06, "loss": 0.7622, "step": 2366 }, { "epoch": 0.19, "grad_norm": 1.4843200979777433, "learning_rate": 9.344347963285376e-06, "loss": 0.8292, "step": 2367 }, { "epoch": 0.19, "grad_norm": 1.5398148645388707, "learning_rate": 9.343704633291836e-06, "loss": 0.8079, "step": 2368 }, { "epoch": 0.19, "grad_norm": 1.4276558757653097, "learning_rate": 9.343061010001428e-06, "loss": 0.8099, "step": 2369 }, { "epoch": 0.19, "grad_norm": 1.0748682044265536, "learning_rate": 9.34241709345761e-06, "loss": 1.1512, "step": 2370 }, { "epoch": 0.19, "grad_norm": 1.4111492860890478, "learning_rate": 9.341772883703859e-06, "loss": 0.6905, "step": 2371 }, { "epoch": 0.19, "grad_norm": 1.57202516713628, "learning_rate": 9.341128380783674e-06, "loss": 0.8622, "step": 2372 }, { "epoch": 0.19, "grad_norm": 1.5025490337411262, "learning_rate": 9.340483584740576e-06, "loss": 0.7899, "step": 2373 }, { "epoch": 0.19, "grad_norm": 1.590149078433891, "learning_rate": 9.3398384956181e-06, "loss": 0.8923, "step": 2374 }, { "epoch": 0.19, "grad_norm": 1.5599930091379783, "learning_rate": 9.339193113459805e-06, "loss": 0.8719, "step": 2375 }, { "epoch": 0.19, "grad_norm": 1.5609913830512618, "learning_rate": 9.33854743830927e-06, "loss": 0.8209, "step": 2376 }, { "epoch": 0.19, "grad_norm": 1.5585473280546345, "learning_rate": 9.33790147021009e-06, "loss": 0.8199, "step": 2377 }, { "epoch": 0.19, "grad_norm": 1.506499313653081, "learning_rate": 9.337255209205884e-06, "loss": 0.7942, "step": 2378 }, { "epoch": 0.19, "grad_norm": 0.9669577121380751, "learning_rate": 9.336608655340289e-06, "loss": 1.163, "step": 2379 }, { "epoch": 0.19, "grad_norm": 1.516845257474133, "learning_rate": 9.335961808656961e-06, "loss": 0.8396, "step": 2380 }, { "epoch": 0.19, "grad_norm": 1.5110682536259774, "learning_rate": 9.335314669199576e-06, "loss": 0.736, "step": 2381 }, { "epoch": 0.19, "grad_norm": 0.8013545142156087, "learning_rate": 9.334667237011832e-06, "loss": 1.1651, "step": 2382 }, { "epoch": 0.19, "grad_norm": 0.8016399957230045, "learning_rate": 9.334019512137444e-06, "loss": 1.1514, "step": 2383 }, { "epoch": 0.19, "grad_norm": 1.6806599893604186, "learning_rate": 9.33337149462015e-06, "loss": 0.8402, "step": 2384 }, { "epoch": 0.19, "grad_norm": 1.6376706679120825, "learning_rate": 9.332723184503702e-06, "loss": 0.857, "step": 2385 }, { "epoch": 0.19, "grad_norm": 1.4435748357656826, "learning_rate": 9.332074581831879e-06, "loss": 0.8358, "step": 2386 }, { "epoch": 0.19, "grad_norm": 1.6541935567071493, "learning_rate": 9.331425686648472e-06, "loss": 0.7909, "step": 2387 }, { "epoch": 0.19, "grad_norm": 1.4425880030565568, "learning_rate": 9.330776498997299e-06, "loss": 0.8275, "step": 2388 }, { "epoch": 0.19, "grad_norm": 1.455407351325235, "learning_rate": 9.330127018922195e-06, "loss": 0.8746, "step": 2389 }, { "epoch": 0.19, "grad_norm": 1.3703728211259008, "learning_rate": 9.32947724646701e-06, "loss": 0.8046, "step": 2390 }, { "epoch": 0.19, "grad_norm": 1.5227572964435723, "learning_rate": 9.328827181675626e-06, "loss": 0.791, "step": 2391 }, { "epoch": 0.19, "grad_norm": 1.5154751568080274, "learning_rate": 9.328176824591928e-06, "loss": 0.846, "step": 2392 }, { "epoch": 0.19, "grad_norm": 1.559916459077352, "learning_rate": 9.327526175259837e-06, "loss": 0.8182, "step": 2393 }, { "epoch": 0.19, "grad_norm": 1.6595893841742924, "learning_rate": 9.326875233723282e-06, "loss": 0.8672, "step": 2394 }, { "epoch": 0.19, "grad_norm": 1.433388852378212, "learning_rate": 9.326224000026217e-06, "loss": 0.8516, "step": 2395 }, { "epoch": 0.19, "grad_norm": 1.5409636474799226, "learning_rate": 9.325572474212615e-06, "loss": 0.8178, "step": 2396 }, { "epoch": 0.19, "grad_norm": 1.4928274800591503, "learning_rate": 9.324920656326468e-06, "loss": 0.8737, "step": 2397 }, { "epoch": 0.19, "grad_norm": 1.532936774849093, "learning_rate": 9.32426854641179e-06, "loss": 0.9262, "step": 2398 }, { "epoch": 0.19, "grad_norm": 1.4693532939019567, "learning_rate": 9.323616144512612e-06, "loss": 0.8259, "step": 2399 }, { "epoch": 0.19, "grad_norm": 1.4717795455295621, "learning_rate": 9.322963450672984e-06, "loss": 0.7841, "step": 2400 }, { "epoch": 0.19, "grad_norm": 1.4355582691699873, "learning_rate": 9.322310464936979e-06, "loss": 0.7734, "step": 2401 }, { "epoch": 0.19, "grad_norm": 1.5155085427752697, "learning_rate": 9.321657187348689e-06, "loss": 0.8711, "step": 2402 }, { "epoch": 0.19, "grad_norm": 1.389644166843602, "learning_rate": 9.321003617952222e-06, "loss": 0.7172, "step": 2403 }, { "epoch": 0.19, "grad_norm": 1.0418753545268298, "learning_rate": 9.32034975679171e-06, "loss": 1.1317, "step": 2404 }, { "epoch": 0.19, "grad_norm": 1.4722193976993903, "learning_rate": 9.319695603911306e-06, "loss": 0.8532, "step": 2405 }, { "epoch": 0.19, "grad_norm": 1.4569090508411682, "learning_rate": 9.319041159355175e-06, "loss": 0.8036, "step": 2406 }, { "epoch": 0.19, "grad_norm": 0.8756320403946444, "learning_rate": 9.318386423167508e-06, "loss": 1.1455, "step": 2407 }, { "epoch": 0.19, "grad_norm": 0.8003882782038089, "learning_rate": 9.317731395392517e-06, "loss": 1.1308, "step": 2408 }, { "epoch": 0.19, "grad_norm": 1.5484745029500007, "learning_rate": 9.31707607607443e-06, "loss": 0.8801, "step": 2409 }, { "epoch": 0.19, "grad_norm": 1.5662333647510236, "learning_rate": 9.316420465257492e-06, "loss": 0.7722, "step": 2410 }, { "epoch": 0.19, "grad_norm": 0.9511981292383505, "learning_rate": 9.315764562985976e-06, "loss": 1.0938, "step": 2411 }, { "epoch": 0.19, "grad_norm": 1.8798321803541076, "learning_rate": 9.315108369304168e-06, "loss": 0.8748, "step": 2412 }, { "epoch": 0.19, "grad_norm": 1.6020128456646812, "learning_rate": 9.314451884256376e-06, "loss": 0.8416, "step": 2413 }, { "epoch": 0.19, "grad_norm": 1.5082439629937063, "learning_rate": 9.313795107886925e-06, "loss": 0.8357, "step": 2414 }, { "epoch": 0.19, "grad_norm": 1.4855274676809238, "learning_rate": 9.313138040240167e-06, "loss": 0.7872, "step": 2415 }, { "epoch": 0.19, "grad_norm": 1.494226939904471, "learning_rate": 9.312480681360465e-06, "loss": 0.8576, "step": 2416 }, { "epoch": 0.19, "grad_norm": 1.444996065038447, "learning_rate": 9.311823031292205e-06, "loss": 0.872, "step": 2417 }, { "epoch": 0.19, "grad_norm": 1.4986834951295838, "learning_rate": 9.311165090079795e-06, "loss": 0.7465, "step": 2418 }, { "epoch": 0.19, "grad_norm": 1.5255595701778242, "learning_rate": 9.310506857767662e-06, "loss": 0.7477, "step": 2419 }, { "epoch": 0.19, "grad_norm": 0.9497616453770169, "learning_rate": 9.309848334400247e-06, "loss": 1.1239, "step": 2420 }, { "epoch": 0.19, "grad_norm": 0.941344763717746, "learning_rate": 9.309189520022018e-06, "loss": 1.1396, "step": 2421 }, { "epoch": 0.19, "grad_norm": 1.5229017025372245, "learning_rate": 9.308530414677459e-06, "loss": 0.8217, "step": 2422 }, { "epoch": 0.19, "grad_norm": 1.65285160590843, "learning_rate": 9.307871018411074e-06, "loss": 0.7857, "step": 2423 }, { "epoch": 0.19, "grad_norm": 1.525867278169634, "learning_rate": 9.307211331267389e-06, "loss": 0.7443, "step": 2424 }, { "epoch": 0.19, "grad_norm": 1.6742388987345733, "learning_rate": 9.306551353290945e-06, "loss": 0.7903, "step": 2425 }, { "epoch": 0.19, "grad_norm": 1.4293922445708995, "learning_rate": 9.305891084526306e-06, "loss": 0.791, "step": 2426 }, { "epoch": 0.19, "grad_norm": 1.5527002548530668, "learning_rate": 9.305230525018054e-06, "loss": 0.7775, "step": 2427 }, { "epoch": 0.19, "grad_norm": 1.5547744785593949, "learning_rate": 9.304569674810794e-06, "loss": 0.8584, "step": 2428 }, { "epoch": 0.19, "grad_norm": 1.3939679049128477, "learning_rate": 9.303908533949146e-06, "loss": 1.1587, "step": 2429 }, { "epoch": 0.19, "grad_norm": 1.5065333760076325, "learning_rate": 9.303247102477752e-06, "loss": 0.7313, "step": 2430 }, { "epoch": 0.2, "grad_norm": 1.507308843062549, "learning_rate": 9.302585380441274e-06, "loss": 0.8194, "step": 2431 }, { "epoch": 0.2, "grad_norm": 1.4244533122215144, "learning_rate": 9.301923367884393e-06, "loss": 0.8452, "step": 2432 }, { "epoch": 0.2, "grad_norm": 1.4971976434654353, "learning_rate": 9.301261064851807e-06, "loss": 0.8444, "step": 2433 }, { "epoch": 0.2, "grad_norm": 1.5536367082200695, "learning_rate": 9.30059847138824e-06, "loss": 0.765, "step": 2434 }, { "epoch": 0.2, "grad_norm": 1.4015697339177768, "learning_rate": 9.299935587538432e-06, "loss": 0.7678, "step": 2435 }, { "epoch": 0.2, "grad_norm": 1.6265026704126035, "learning_rate": 9.29927241334714e-06, "loss": 0.878, "step": 2436 }, { "epoch": 0.2, "grad_norm": 1.5580150395581103, "learning_rate": 9.298608948859141e-06, "loss": 0.7204, "step": 2437 }, { "epoch": 0.2, "grad_norm": 1.6060996141275004, "learning_rate": 9.29794519411924e-06, "loss": 0.8664, "step": 2438 }, { "epoch": 0.2, "grad_norm": 1.4138747150557152, "learning_rate": 9.29728114917225e-06, "loss": 0.7184, "step": 2439 }, { "epoch": 0.2, "grad_norm": 1.549837754651771, "learning_rate": 9.29661681406301e-06, "loss": 0.8208, "step": 2440 }, { "epoch": 0.2, "grad_norm": 1.4570257426225541, "learning_rate": 9.29595218883638e-06, "loss": 0.9175, "step": 2441 }, { "epoch": 0.2, "grad_norm": 1.7359700828978222, "learning_rate": 9.295287273537232e-06, "loss": 0.7988, "step": 2442 }, { "epoch": 0.2, "grad_norm": 1.569880101367236, "learning_rate": 9.294622068210466e-06, "loss": 0.7769, "step": 2443 }, { "epoch": 0.2, "grad_norm": 0.972130423103631, "learning_rate": 9.293956572900999e-06, "loss": 1.1301, "step": 2444 }, { "epoch": 0.2, "grad_norm": 1.3656588943902683, "learning_rate": 9.293290787653766e-06, "loss": 0.8039, "step": 2445 }, { "epoch": 0.2, "grad_norm": 1.604079322581345, "learning_rate": 9.292624712513721e-06, "loss": 0.8844, "step": 2446 }, { "epoch": 0.2, "grad_norm": 1.4362205432912685, "learning_rate": 9.29195834752584e-06, "loss": 0.8596, "step": 2447 }, { "epoch": 0.2, "grad_norm": 2.8702274536498518, "learning_rate": 9.291291692735116e-06, "loss": 0.8681, "step": 2448 }, { "epoch": 0.2, "grad_norm": 1.5005170281505649, "learning_rate": 9.290624748186565e-06, "loss": 0.8316, "step": 2449 }, { "epoch": 0.2, "grad_norm": 1.5889078648588482, "learning_rate": 9.28995751392522e-06, "loss": 0.8171, "step": 2450 }, { "epoch": 0.2, "grad_norm": 1.491605272585685, "learning_rate": 9.289289989996133e-06, "loss": 0.8172, "step": 2451 }, { "epoch": 0.2, "grad_norm": 0.9057442138624477, "learning_rate": 9.28862217644438e-06, "loss": 1.1538, "step": 2452 }, { "epoch": 0.2, "grad_norm": 1.4946484508778342, "learning_rate": 9.28795407331505e-06, "loss": 0.8334, "step": 2453 }, { "epoch": 0.2, "grad_norm": 1.4814272815155147, "learning_rate": 9.287285680653254e-06, "loss": 0.8159, "step": 2454 }, { "epoch": 0.2, "grad_norm": 2.081182927799621, "learning_rate": 9.28661699850413e-06, "loss": 0.8244, "step": 2455 }, { "epoch": 0.2, "grad_norm": 1.3895171644865245, "learning_rate": 9.285948026912822e-06, "loss": 0.6916, "step": 2456 }, { "epoch": 0.2, "grad_norm": 1.4543243338088105, "learning_rate": 9.285278765924502e-06, "loss": 0.7757, "step": 2457 }, { "epoch": 0.2, "grad_norm": 1.696546952911893, "learning_rate": 9.284609215584361e-06, "loss": 0.8494, "step": 2458 }, { "epoch": 0.2, "grad_norm": 1.4302752099200533, "learning_rate": 9.283939375937609e-06, "loss": 0.7587, "step": 2459 }, { "epoch": 0.2, "grad_norm": 1.5597042744311742, "learning_rate": 9.283269247029475e-06, "loss": 0.8437, "step": 2460 }, { "epoch": 0.2, "grad_norm": 1.4637461480282516, "learning_rate": 9.282598828905205e-06, "loss": 0.8102, "step": 2461 }, { "epoch": 0.2, "grad_norm": 1.0546368114545133, "learning_rate": 9.28192812161007e-06, "loss": 1.163, "step": 2462 }, { "epoch": 0.2, "grad_norm": 0.9489094753040603, "learning_rate": 9.281257125189358e-06, "loss": 1.1457, "step": 2463 }, { "epoch": 0.2, "grad_norm": 1.5191850039413246, "learning_rate": 9.280585839688377e-06, "loss": 0.7515, "step": 2464 }, { "epoch": 0.2, "grad_norm": 1.5345111822003539, "learning_rate": 9.279914265152448e-06, "loss": 0.8311, "step": 2465 }, { "epoch": 0.2, "grad_norm": 1.7245765289943977, "learning_rate": 9.279242401626924e-06, "loss": 0.8596, "step": 2466 }, { "epoch": 0.2, "grad_norm": 1.0403186654912064, "learning_rate": 9.278570249157166e-06, "loss": 1.1062, "step": 2467 }, { "epoch": 0.2, "grad_norm": 1.7356703020813822, "learning_rate": 9.277897807788562e-06, "loss": 0.7534, "step": 2468 }, { "epoch": 0.2, "grad_norm": 1.6331958969814673, "learning_rate": 9.277225077566519e-06, "loss": 0.7942, "step": 2469 }, { "epoch": 0.2, "grad_norm": 1.5899155566869414, "learning_rate": 9.276552058536454e-06, "loss": 0.8181, "step": 2470 }, { "epoch": 0.2, "grad_norm": 1.5586433126987576, "learning_rate": 9.275878750743818e-06, "loss": 0.7934, "step": 2471 }, { "epoch": 0.2, "grad_norm": 1.3533408180703748, "learning_rate": 9.275205154234069e-06, "loss": 0.6761, "step": 2472 }, { "epoch": 0.2, "grad_norm": 1.4255730194063503, "learning_rate": 9.274531269052693e-06, "loss": 0.7641, "step": 2473 }, { "epoch": 0.2, "grad_norm": 1.5481276272250761, "learning_rate": 9.273857095245192e-06, "loss": 0.8269, "step": 2474 }, { "epoch": 0.2, "grad_norm": 1.4969669216469579, "learning_rate": 9.273182632857087e-06, "loss": 0.8026, "step": 2475 }, { "epoch": 0.2, "grad_norm": 1.5024078287475118, "learning_rate": 9.272507881933919e-06, "loss": 0.823, "step": 2476 }, { "epoch": 0.2, "grad_norm": 0.9481250189586695, "learning_rate": 9.271832842521249e-06, "loss": 1.1387, "step": 2477 }, { "epoch": 0.2, "grad_norm": 1.5587062328058077, "learning_rate": 9.271157514664658e-06, "loss": 0.7834, "step": 2478 }, { "epoch": 0.2, "grad_norm": 1.5398695977260521, "learning_rate": 9.270481898409744e-06, "loss": 0.8485, "step": 2479 }, { "epoch": 0.2, "grad_norm": 1.6116459211264786, "learning_rate": 9.26980599380213e-06, "loss": 0.7642, "step": 2480 }, { "epoch": 0.2, "grad_norm": 1.4334102439634464, "learning_rate": 9.269129800887448e-06, "loss": 0.8102, "step": 2481 }, { "epoch": 0.2, "grad_norm": 1.4429847173717756, "learning_rate": 9.268453319711362e-06, "loss": 0.758, "step": 2482 }, { "epoch": 0.2, "grad_norm": 1.5549458653127683, "learning_rate": 9.267776550319548e-06, "loss": 0.8215, "step": 2483 }, { "epoch": 0.2, "grad_norm": 1.5858123314893178, "learning_rate": 9.2670994927577e-06, "loss": 0.8247, "step": 2484 }, { "epoch": 0.2, "grad_norm": 1.5471318979962474, "learning_rate": 9.266422147071539e-06, "loss": 0.8356, "step": 2485 }, { "epoch": 0.2, "grad_norm": 0.9450752237004824, "learning_rate": 9.265744513306798e-06, "loss": 1.1276, "step": 2486 }, { "epoch": 0.2, "grad_norm": 1.47768196589061, "learning_rate": 9.265066591509234e-06, "loss": 0.7348, "step": 2487 }, { "epoch": 0.2, "grad_norm": 1.4795328445863316, "learning_rate": 9.264388381724621e-06, "loss": 0.8471, "step": 2488 }, { "epoch": 0.2, "grad_norm": 1.4816607367332844, "learning_rate": 9.263709883998753e-06, "loss": 0.8351, "step": 2489 }, { "epoch": 0.2, "grad_norm": 0.778375577697499, "learning_rate": 9.263031098377445e-06, "loss": 1.1364, "step": 2490 }, { "epoch": 0.2, "grad_norm": 0.7721528459276297, "learning_rate": 9.262352024906526e-06, "loss": 1.1687, "step": 2491 }, { "epoch": 0.2, "grad_norm": 1.477379878079317, "learning_rate": 9.261672663631854e-06, "loss": 0.8056, "step": 2492 }, { "epoch": 0.2, "grad_norm": 1.5180091211707516, "learning_rate": 9.2609930145993e-06, "loss": 0.7896, "step": 2493 }, { "epoch": 0.2, "grad_norm": 1.4564436144952326, "learning_rate": 9.260313077854753e-06, "loss": 0.8115, "step": 2494 }, { "epoch": 0.2, "grad_norm": 0.8089669398112698, "learning_rate": 9.259632853444126e-06, "loss": 1.1157, "step": 2495 }, { "epoch": 0.2, "grad_norm": 1.3642970029582884, "learning_rate": 9.258952341413347e-06, "loss": 0.8287, "step": 2496 }, { "epoch": 0.2, "grad_norm": 1.3972094397470123, "learning_rate": 9.258271541808368e-06, "loss": 0.8342, "step": 2497 }, { "epoch": 0.2, "grad_norm": 1.4330029809673153, "learning_rate": 9.257590454675159e-06, "loss": 0.7664, "step": 2498 }, { "epoch": 0.2, "grad_norm": 1.600136785486778, "learning_rate": 9.256909080059703e-06, "loss": 0.8799, "step": 2499 }, { "epoch": 0.2, "grad_norm": 1.7180306898502098, "learning_rate": 9.256227418008015e-06, "loss": 0.9033, "step": 2500 }, { "epoch": 0.2, "grad_norm": 1.54831355082679, "learning_rate": 9.255545468566119e-06, "loss": 0.7862, "step": 2501 }, { "epoch": 0.2, "grad_norm": 0.895986414167438, "learning_rate": 9.254863231780062e-06, "loss": 1.1261, "step": 2502 }, { "epoch": 0.2, "grad_norm": 1.6182679046285227, "learning_rate": 9.25418070769591e-06, "loss": 0.8103, "step": 2503 }, { "epoch": 0.2, "grad_norm": 1.4988999942857322, "learning_rate": 9.253497896359749e-06, "loss": 0.7803, "step": 2504 }, { "epoch": 0.2, "grad_norm": 1.3400201909480869, "learning_rate": 9.252814797817682e-06, "loss": 0.7023, "step": 2505 }, { "epoch": 0.2, "grad_norm": 0.8258353887860432, "learning_rate": 9.252131412115838e-06, "loss": 1.1591, "step": 2506 }, { "epoch": 0.2, "grad_norm": 1.6029732018166505, "learning_rate": 9.251447739300356e-06, "loss": 0.8763, "step": 2507 }, { "epoch": 0.2, "grad_norm": 1.5666731684174307, "learning_rate": 9.250763779417402e-06, "loss": 0.8323, "step": 2508 }, { "epoch": 0.2, "grad_norm": 1.5369663265896087, "learning_rate": 9.250079532513158e-06, "loss": 0.8142, "step": 2509 }, { "epoch": 0.2, "grad_norm": 1.4847669923366615, "learning_rate": 9.249394998633825e-06, "loss": 0.8802, "step": 2510 }, { "epoch": 0.2, "grad_norm": 1.436761575791087, "learning_rate": 9.248710177825627e-06, "loss": 0.7944, "step": 2511 }, { "epoch": 0.2, "grad_norm": 1.6367701116655093, "learning_rate": 9.2480250701348e-06, "loss": 0.8334, "step": 2512 }, { "epoch": 0.2, "grad_norm": 1.6306418273280565, "learning_rate": 9.247339675607606e-06, "loss": 0.7679, "step": 2513 }, { "epoch": 0.2, "grad_norm": 1.5620956372105916, "learning_rate": 9.246653994290327e-06, "loss": 0.8324, "step": 2514 }, { "epoch": 0.2, "grad_norm": 1.5526424229480875, "learning_rate": 9.245968026229258e-06, "loss": 0.7437, "step": 2515 }, { "epoch": 0.2, "grad_norm": 1.7658279961660834, "learning_rate": 9.24528177147072e-06, "loss": 0.7855, "step": 2516 }, { "epoch": 0.2, "grad_norm": 1.5150501861297674, "learning_rate": 9.24459523006105e-06, "loss": 0.7918, "step": 2517 }, { "epoch": 0.2, "grad_norm": 1.556825071416571, "learning_rate": 9.243908402046602e-06, "loss": 0.819, "step": 2518 }, { "epoch": 0.2, "grad_norm": 1.4696200469352885, "learning_rate": 9.243221287473755e-06, "loss": 0.7696, "step": 2519 }, { "epoch": 0.2, "grad_norm": 1.479881402281857, "learning_rate": 9.242533886388905e-06, "loss": 0.8913, "step": 2520 }, { "epoch": 0.2, "grad_norm": 1.5494408116995453, "learning_rate": 9.241846198838466e-06, "loss": 0.9769, "step": 2521 }, { "epoch": 0.2, "grad_norm": 0.9717623171969201, "learning_rate": 9.241158224868871e-06, "loss": 1.1252, "step": 2522 }, { "epoch": 0.2, "grad_norm": 1.6354236682458738, "learning_rate": 9.240469964526576e-06, "loss": 0.9319, "step": 2523 }, { "epoch": 0.2, "grad_norm": 0.8371376312444543, "learning_rate": 9.239781417858052e-06, "loss": 1.1401, "step": 2524 }, { "epoch": 0.2, "grad_norm": 1.42056073824133, "learning_rate": 9.23909258490979e-06, "loss": 0.8026, "step": 2525 }, { "epoch": 0.2, "grad_norm": 1.5642446989624226, "learning_rate": 9.238403465728306e-06, "loss": 0.7566, "step": 2526 }, { "epoch": 0.2, "grad_norm": 1.5364295275921966, "learning_rate": 9.237714060360128e-06, "loss": 0.7784, "step": 2527 }, { "epoch": 0.2, "grad_norm": 1.5327505734388687, "learning_rate": 9.237024368851805e-06, "loss": 0.8669, "step": 2528 }, { "epoch": 0.2, "grad_norm": 1.6389460059777625, "learning_rate": 9.236334391249909e-06, "loss": 0.8271, "step": 2529 }, { "epoch": 0.2, "grad_norm": 1.561251447729545, "learning_rate": 9.235644127601028e-06, "loss": 0.8893, "step": 2530 }, { "epoch": 0.2, "grad_norm": 1.4484755766900925, "learning_rate": 9.23495357795177e-06, "loss": 0.888, "step": 2531 }, { "epoch": 0.2, "grad_norm": 1.5256584642615125, "learning_rate": 9.234262742348764e-06, "loss": 0.8083, "step": 2532 }, { "epoch": 0.2, "grad_norm": 1.4356220132502138, "learning_rate": 9.233571620838653e-06, "loss": 0.7838, "step": 2533 }, { "epoch": 0.2, "grad_norm": 1.5272841763595624, "learning_rate": 9.232880213468106e-06, "loss": 0.7579, "step": 2534 }, { "epoch": 0.2, "grad_norm": 1.5261167968787952, "learning_rate": 9.23218852028381e-06, "loss": 0.7463, "step": 2535 }, { "epoch": 0.2, "grad_norm": 1.4413707977197021, "learning_rate": 9.231496541332465e-06, "loss": 0.8054, "step": 2536 }, { "epoch": 0.2, "grad_norm": 1.247575903128527, "learning_rate": 9.230804276660799e-06, "loss": 1.166, "step": 2537 }, { "epoch": 0.2, "grad_norm": 1.5321147167756957, "learning_rate": 9.230111726315553e-06, "loss": 0.8438, "step": 2538 }, { "epoch": 0.2, "grad_norm": 0.9412893935866841, "learning_rate": 9.229418890343491e-06, "loss": 1.1398, "step": 2539 }, { "epoch": 0.2, "grad_norm": 1.482744276685692, "learning_rate": 9.228725768791394e-06, "loss": 0.833, "step": 2540 }, { "epoch": 0.2, "grad_norm": 1.5658094804955902, "learning_rate": 9.228032361706065e-06, "loss": 0.8442, "step": 2541 }, { "epoch": 0.2, "grad_norm": 1.5069577774286516, "learning_rate": 9.227338669134322e-06, "loss": 0.8715, "step": 2542 }, { "epoch": 0.2, "grad_norm": 1.5247796765269208, "learning_rate": 9.226644691123006e-06, "loss": 0.7684, "step": 2543 }, { "epoch": 0.2, "grad_norm": 1.2572329616769877, "learning_rate": 9.225950427718974e-06, "loss": 1.1534, "step": 2544 }, { "epoch": 0.2, "grad_norm": 1.63323559261434, "learning_rate": 9.225255878969108e-06, "loss": 0.8586, "step": 2545 }, { "epoch": 0.2, "grad_norm": 1.5332105776010034, "learning_rate": 9.224561044920303e-06, "loss": 0.8846, "step": 2546 }, { "epoch": 0.2, "grad_norm": 1.5755331550683576, "learning_rate": 9.223865925619476e-06, "loss": 0.8072, "step": 2547 }, { "epoch": 0.2, "grad_norm": 0.9185212800110313, "learning_rate": 9.223170521113563e-06, "loss": 1.1449, "step": 2548 }, { "epoch": 0.2, "grad_norm": 1.615931671617186, "learning_rate": 9.222474831449519e-06, "loss": 0.8364, "step": 2549 }, { "epoch": 0.2, "grad_norm": 0.8208561158773751, "learning_rate": 9.221778856674319e-06, "loss": 1.1287, "step": 2550 }, { "epoch": 0.2, "grad_norm": 1.4641742592212923, "learning_rate": 9.221082596834959e-06, "loss": 0.8177, "step": 2551 }, { "epoch": 0.2, "grad_norm": 1.458968604948614, "learning_rate": 9.220386051978449e-06, "loss": 0.8455, "step": 2552 }, { "epoch": 0.2, "grad_norm": 0.9015409137708176, "learning_rate": 9.219689222151821e-06, "loss": 1.1376, "step": 2553 }, { "epoch": 0.2, "grad_norm": 1.5384815928545281, "learning_rate": 9.21899210740213e-06, "loss": 0.8761, "step": 2554 }, { "epoch": 0.2, "grad_norm": 1.4867110105159658, "learning_rate": 9.218294707776441e-06, "loss": 0.868, "step": 2555 }, { "epoch": 0.21, "grad_norm": 1.5274993247221518, "learning_rate": 9.217597023321851e-06, "loss": 0.8214, "step": 2556 }, { "epoch": 0.21, "grad_norm": 1.5432830782903058, "learning_rate": 9.216899054085465e-06, "loss": 0.8044, "step": 2557 }, { "epoch": 0.21, "grad_norm": 0.87359682339141, "learning_rate": 9.216200800114412e-06, "loss": 1.1544, "step": 2558 }, { "epoch": 0.21, "grad_norm": 1.5828680272876856, "learning_rate": 9.215502261455839e-06, "loss": 0.7806, "step": 2559 }, { "epoch": 0.21, "grad_norm": 1.4330020550342948, "learning_rate": 9.214803438156916e-06, "loss": 0.8955, "step": 2560 }, { "epoch": 0.21, "grad_norm": 0.837724005233504, "learning_rate": 9.214104330264826e-06, "loss": 1.12, "step": 2561 }, { "epoch": 0.21, "grad_norm": 0.8050881256311607, "learning_rate": 9.213404937826775e-06, "loss": 1.1214, "step": 2562 }, { "epoch": 0.21, "grad_norm": 1.5824611672829865, "learning_rate": 9.212705260889991e-06, "loss": 0.8794, "step": 2563 }, { "epoch": 0.21, "grad_norm": 1.664006373466688, "learning_rate": 9.212005299501712e-06, "loss": 0.8225, "step": 2564 }, { "epoch": 0.21, "grad_norm": 1.4814192913195985, "learning_rate": 9.211305053709204e-06, "loss": 0.8337, "step": 2565 }, { "epoch": 0.21, "grad_norm": 1.5152630589211757, "learning_rate": 9.210604523559749e-06, "loss": 0.7804, "step": 2566 }, { "epoch": 0.21, "grad_norm": 1.5000665793720065, "learning_rate": 9.20990370910065e-06, "loss": 0.8156, "step": 2567 }, { "epoch": 0.21, "grad_norm": 1.606592282992881, "learning_rate": 9.209202610379225e-06, "loss": 0.7436, "step": 2568 }, { "epoch": 0.21, "grad_norm": 1.5115778571313527, "learning_rate": 9.208501227442815e-06, "loss": 0.8822, "step": 2569 }, { "epoch": 0.21, "grad_norm": 1.6505058735566853, "learning_rate": 9.207799560338779e-06, "loss": 0.8475, "step": 2570 }, { "epoch": 0.21, "grad_norm": 1.5266554406780122, "learning_rate": 9.207097609114495e-06, "loss": 0.7133, "step": 2571 }, { "epoch": 0.21, "grad_norm": 1.3930525103010445, "learning_rate": 9.206395373817358e-06, "loss": 0.8466, "step": 2572 }, { "epoch": 0.21, "grad_norm": 1.049417228015875, "learning_rate": 9.20569285449479e-06, "loss": 1.1714, "step": 2573 }, { "epoch": 0.21, "grad_norm": 1.5605174121424572, "learning_rate": 9.204990051194223e-06, "loss": 0.8447, "step": 2574 }, { "epoch": 0.21, "grad_norm": 1.4839626542236661, "learning_rate": 9.204286963963112e-06, "loss": 0.7874, "step": 2575 }, { "epoch": 0.21, "grad_norm": 1.5589991407822532, "learning_rate": 9.20358359284893e-06, "loss": 0.8202, "step": 2576 }, { "epoch": 0.21, "grad_norm": 1.571370009741804, "learning_rate": 9.202879937899175e-06, "loss": 0.8047, "step": 2577 }, { "epoch": 0.21, "grad_norm": 1.61653770393215, "learning_rate": 9.202175999161353e-06, "loss": 0.7918, "step": 2578 }, { "epoch": 0.21, "grad_norm": 1.503546534742903, "learning_rate": 9.201471776682999e-06, "loss": 0.7798, "step": 2579 }, { "epoch": 0.21, "grad_norm": 1.4977353880812723, "learning_rate": 9.200767270511666e-06, "loss": 0.7605, "step": 2580 }, { "epoch": 0.21, "grad_norm": 0.9138957659664549, "learning_rate": 9.200062480694919e-06, "loss": 1.1572, "step": 2581 }, { "epoch": 0.21, "grad_norm": 1.6765468127964391, "learning_rate": 9.199357407280349e-06, "loss": 0.8251, "step": 2582 }, { "epoch": 0.21, "grad_norm": 1.47209785524806, "learning_rate": 9.198652050315566e-06, "loss": 0.8069, "step": 2583 }, { "epoch": 0.21, "grad_norm": 1.4681897455342794, "learning_rate": 9.197946409848196e-06, "loss": 0.838, "step": 2584 }, { "epoch": 0.21, "grad_norm": 1.6703315928522064, "learning_rate": 9.197240485925883e-06, "loss": 0.7673, "step": 2585 }, { "epoch": 0.21, "grad_norm": 0.8268171043618279, "learning_rate": 9.196534278596296e-06, "loss": 1.1283, "step": 2586 }, { "epoch": 0.21, "grad_norm": 1.473064043645414, "learning_rate": 9.195827787907118e-06, "loss": 0.8215, "step": 2587 }, { "epoch": 0.21, "grad_norm": 1.5165929848954347, "learning_rate": 9.195121013906055e-06, "loss": 0.8348, "step": 2588 }, { "epoch": 0.21, "grad_norm": 1.7521464642457938, "learning_rate": 9.194413956640827e-06, "loss": 0.8401, "step": 2589 }, { "epoch": 0.21, "grad_norm": 1.6759232551961154, "learning_rate": 9.193706616159179e-06, "loss": 0.815, "step": 2590 }, { "epoch": 0.21, "grad_norm": 1.5025848188094852, "learning_rate": 9.19299899250887e-06, "loss": 0.8319, "step": 2591 }, { "epoch": 0.21, "grad_norm": 1.5434808291464792, "learning_rate": 9.192291085737682e-06, "loss": 0.8714, "step": 2592 }, { "epoch": 0.21, "grad_norm": 1.4012653954295182, "learning_rate": 9.191582895893412e-06, "loss": 0.7851, "step": 2593 }, { "epoch": 0.21, "grad_norm": 1.4532579440452962, "learning_rate": 9.19087442302388e-06, "loss": 0.7893, "step": 2594 }, { "epoch": 0.21, "grad_norm": 1.4869734084702937, "learning_rate": 9.190165667176924e-06, "loss": 0.8197, "step": 2595 }, { "epoch": 0.21, "grad_norm": 1.553291644752851, "learning_rate": 9.189456628400403e-06, "loss": 0.8394, "step": 2596 }, { "epoch": 0.21, "grad_norm": 1.5304013902106708, "learning_rate": 9.18874730674219e-06, "loss": 0.806, "step": 2597 }, { "epoch": 0.21, "grad_norm": 1.7561513496702543, "learning_rate": 9.188037702250179e-06, "loss": 0.7906, "step": 2598 }, { "epoch": 0.21, "grad_norm": 1.4673145029558168, "learning_rate": 9.187327814972286e-06, "loss": 0.8431, "step": 2599 }, { "epoch": 0.21, "grad_norm": 1.4463770074354507, "learning_rate": 9.186617644956445e-06, "loss": 0.7968, "step": 2600 }, { "epoch": 0.21, "grad_norm": 0.8709882522778485, "learning_rate": 9.185907192250608e-06, "loss": 1.1098, "step": 2601 }, { "epoch": 0.21, "grad_norm": 1.524867052480939, "learning_rate": 9.185196456902744e-06, "loss": 0.8664, "step": 2602 }, { "epoch": 0.21, "grad_norm": 1.6742710632392814, "learning_rate": 9.184485438960846e-06, "loss": 0.8441, "step": 2603 }, { "epoch": 0.21, "grad_norm": 1.4722461553669444, "learning_rate": 9.183774138472923e-06, "loss": 0.7387, "step": 2604 }, { "epoch": 0.21, "grad_norm": 1.5621579332120756, "learning_rate": 9.183062555487003e-06, "loss": 0.7752, "step": 2605 }, { "epoch": 0.21, "grad_norm": 1.4166504171625893, "learning_rate": 9.182350690051134e-06, "loss": 0.7216, "step": 2606 }, { "epoch": 0.21, "grad_norm": 1.447639711506031, "learning_rate": 9.181638542213383e-06, "loss": 0.7652, "step": 2607 }, { "epoch": 0.21, "grad_norm": 1.7370137799100938, "learning_rate": 9.180926112021837e-06, "loss": 0.8311, "step": 2608 }, { "epoch": 0.21, "grad_norm": 1.6534942037393068, "learning_rate": 9.180213399524599e-06, "loss": 0.7587, "step": 2609 }, { "epoch": 0.21, "grad_norm": 1.5315085597071736, "learning_rate": 9.179500404769792e-06, "loss": 0.9, "step": 2610 }, { "epoch": 0.21, "grad_norm": 1.4532660850305146, "learning_rate": 9.178787127805561e-06, "loss": 0.7569, "step": 2611 }, { "epoch": 0.21, "grad_norm": 1.5743370989835592, "learning_rate": 9.178073568680071e-06, "loss": 0.8462, "step": 2612 }, { "epoch": 0.21, "grad_norm": 1.4789297279405655, "learning_rate": 9.177359727441498e-06, "loss": 0.7965, "step": 2613 }, { "epoch": 0.21, "grad_norm": 1.720299041971694, "learning_rate": 9.176645604138041e-06, "loss": 0.8639, "step": 2614 }, { "epoch": 0.21, "grad_norm": 1.5265942872703935, "learning_rate": 9.175931198817926e-06, "loss": 0.8965, "step": 2615 }, { "epoch": 0.21, "grad_norm": 1.5045024903079554, "learning_rate": 9.175216511529387e-06, "loss": 0.8825, "step": 2616 }, { "epoch": 0.21, "grad_norm": 0.8526720037831905, "learning_rate": 9.17450154232068e-06, "loss": 1.1497, "step": 2617 }, { "epoch": 0.21, "grad_norm": 1.6601239101603003, "learning_rate": 9.173786291240085e-06, "loss": 0.7962, "step": 2618 }, { "epoch": 0.21, "grad_norm": 1.6026089673236867, "learning_rate": 9.173070758335895e-06, "loss": 0.7874, "step": 2619 }, { "epoch": 0.21, "grad_norm": 1.4312355288495462, "learning_rate": 9.172354943656428e-06, "loss": 0.7483, "step": 2620 }, { "epoch": 0.21, "grad_norm": 1.5114226882317068, "learning_rate": 9.17163884725001e-06, "loss": 0.7307, "step": 2621 }, { "epoch": 0.21, "grad_norm": 1.3240011011299948, "learning_rate": 9.170922469165001e-06, "loss": 0.769, "step": 2622 }, { "epoch": 0.21, "grad_norm": 1.5657328032770272, "learning_rate": 9.170205809449768e-06, "loss": 0.9282, "step": 2623 }, { "epoch": 0.21, "grad_norm": 0.8525883100972355, "learning_rate": 9.169488868152704e-06, "loss": 1.172, "step": 2624 }, { "epoch": 0.21, "grad_norm": 0.9080880147917914, "learning_rate": 9.168771645322217e-06, "loss": 1.1842, "step": 2625 }, { "epoch": 0.21, "grad_norm": 1.5139054882992735, "learning_rate": 9.168054141006737e-06, "loss": 0.7665, "step": 2626 }, { "epoch": 0.21, "grad_norm": 1.5023827998544188, "learning_rate": 9.16733635525471e-06, "loss": 0.7744, "step": 2627 }, { "epoch": 0.21, "grad_norm": 1.6251510502624804, "learning_rate": 9.166618288114602e-06, "loss": 0.7657, "step": 2628 }, { "epoch": 0.21, "grad_norm": 1.5021508625957891, "learning_rate": 9.1658999396349e-06, "loss": 0.7771, "step": 2629 }, { "epoch": 0.21, "grad_norm": 1.1106787003922318, "learning_rate": 9.165181309864108e-06, "loss": 1.1394, "step": 2630 }, { "epoch": 0.21, "grad_norm": 1.4899257011392206, "learning_rate": 9.16446239885075e-06, "loss": 0.797, "step": 2631 }, { "epoch": 0.21, "grad_norm": 0.8618978411886932, "learning_rate": 9.16374320664337e-06, "loss": 1.1425, "step": 2632 }, { "epoch": 0.21, "grad_norm": 0.8128061374458699, "learning_rate": 9.163023733290525e-06, "loss": 1.1801, "step": 2633 }, { "epoch": 0.21, "grad_norm": 1.5031824838996164, "learning_rate": 9.162303978840801e-06, "loss": 0.8299, "step": 2634 }, { "epoch": 0.21, "grad_norm": 1.50529889858848, "learning_rate": 9.161583943342793e-06, "loss": 0.7856, "step": 2635 }, { "epoch": 0.21, "grad_norm": 1.1406115887477155, "learning_rate": 9.16086362684512e-06, "loss": 1.1417, "step": 2636 }, { "epoch": 0.21, "grad_norm": 1.4999525213190121, "learning_rate": 9.160143029396422e-06, "loss": 0.738, "step": 2637 }, { "epoch": 0.21, "grad_norm": 1.536523850494388, "learning_rate": 9.159422151045354e-06, "loss": 0.7652, "step": 2638 }, { "epoch": 0.21, "grad_norm": 1.5588538970599959, "learning_rate": 9.15870099184059e-06, "loss": 0.8002, "step": 2639 }, { "epoch": 0.21, "grad_norm": 1.387876477153082, "learning_rate": 9.157979551830827e-06, "loss": 0.8043, "step": 2640 }, { "epoch": 0.21, "grad_norm": 0.9106835827118681, "learning_rate": 9.157257831064776e-06, "loss": 1.1672, "step": 2641 }, { "epoch": 0.21, "grad_norm": 1.6131456691463888, "learning_rate": 9.15653582959117e-06, "loss": 0.8273, "step": 2642 }, { "epoch": 0.21, "grad_norm": 1.5174240954541203, "learning_rate": 9.155813547458761e-06, "loss": 0.8258, "step": 2643 }, { "epoch": 0.21, "grad_norm": 1.5010847854201181, "learning_rate": 9.155090984716319e-06, "loss": 0.7515, "step": 2644 }, { "epoch": 0.21, "grad_norm": 1.005303931720966, "learning_rate": 9.154368141412632e-06, "loss": 1.1716, "step": 2645 }, { "epoch": 0.21, "grad_norm": 1.4092357534148454, "learning_rate": 9.153645017596508e-06, "loss": 0.8775, "step": 2646 }, { "epoch": 0.21, "grad_norm": 1.572211652166216, "learning_rate": 9.152921613316775e-06, "loss": 0.8204, "step": 2647 }, { "epoch": 0.21, "grad_norm": 1.609865365479956, "learning_rate": 9.152197928622278e-06, "loss": 0.8481, "step": 2648 }, { "epoch": 0.21, "grad_norm": 0.8507665366491778, "learning_rate": 9.151473963561884e-06, "loss": 1.1741, "step": 2649 }, { "epoch": 0.21, "grad_norm": 1.4476063491119602, "learning_rate": 9.150749718184473e-06, "loss": 0.7695, "step": 2650 }, { "epoch": 0.21, "grad_norm": 1.5812445317457728, "learning_rate": 9.15002519253895e-06, "loss": 0.8531, "step": 2651 }, { "epoch": 0.21, "grad_norm": 1.61084006384248, "learning_rate": 9.149300386674236e-06, "loss": 0.7527, "step": 2652 }, { "epoch": 0.21, "grad_norm": 1.4069163506952242, "learning_rate": 9.148575300639273e-06, "loss": 0.8479, "step": 2653 }, { "epoch": 0.21, "grad_norm": 1.5519375410644023, "learning_rate": 9.147849934483019e-06, "loss": 0.8238, "step": 2654 }, { "epoch": 0.21, "grad_norm": 1.5411847790140933, "learning_rate": 9.147124288254453e-06, "loss": 0.8032, "step": 2655 }, { "epoch": 0.21, "grad_norm": 0.9304813232897968, "learning_rate": 9.146398362002572e-06, "loss": 1.1472, "step": 2656 }, { "epoch": 0.21, "grad_norm": 1.3883197491904644, "learning_rate": 9.145672155776392e-06, "loss": 0.8326, "step": 2657 }, { "epoch": 0.21, "grad_norm": 0.8874458543774099, "learning_rate": 9.144945669624948e-06, "loss": 1.1624, "step": 2658 }, { "epoch": 0.21, "grad_norm": 1.4258527114866246, "learning_rate": 9.144218903597295e-06, "loss": 0.8421, "step": 2659 }, { "epoch": 0.21, "grad_norm": 1.534751876827594, "learning_rate": 9.143491857742505e-06, "loss": 0.8222, "step": 2660 }, { "epoch": 0.21, "grad_norm": 0.9072853566008295, "learning_rate": 9.142764532109672e-06, "loss": 1.1205, "step": 2661 }, { "epoch": 0.21, "grad_norm": 0.9354698862017317, "learning_rate": 9.142036926747904e-06, "loss": 1.1468, "step": 2662 }, { "epoch": 0.21, "grad_norm": 1.556081593839327, "learning_rate": 9.14130904170633e-06, "loss": 0.781, "step": 2663 }, { "epoch": 0.21, "grad_norm": 1.4851954122712183, "learning_rate": 9.1405808770341e-06, "loss": 0.8597, "step": 2664 }, { "epoch": 0.21, "grad_norm": 0.7756899977019704, "learning_rate": 9.13985243278038e-06, "loss": 1.1323, "step": 2665 }, { "epoch": 0.21, "grad_norm": 1.5854120986171591, "learning_rate": 9.13912370899436e-06, "loss": 0.7843, "step": 2666 }, { "epoch": 0.21, "grad_norm": 1.5975845807215767, "learning_rate": 9.138394705725242e-06, "loss": 0.8409, "step": 2667 }, { "epoch": 0.21, "grad_norm": 1.691545023556888, "learning_rate": 9.13766542302225e-06, "loss": 0.9271, "step": 2668 }, { "epoch": 0.21, "grad_norm": 0.9748404320540639, "learning_rate": 9.136935860934628e-06, "loss": 1.1442, "step": 2669 }, { "epoch": 0.21, "grad_norm": 1.6656328330941927, "learning_rate": 9.136206019511635e-06, "loss": 0.8624, "step": 2670 }, { "epoch": 0.21, "grad_norm": 1.4950492401787432, "learning_rate": 9.135475898802555e-06, "loss": 0.7505, "step": 2671 }, { "epoch": 0.21, "grad_norm": 1.5281006017056877, "learning_rate": 9.134745498856685e-06, "loss": 0.8405, "step": 2672 }, { "epoch": 0.21, "grad_norm": 0.7885699592985145, "learning_rate": 9.134014819723346e-06, "loss": 1.1411, "step": 2673 }, { "epoch": 0.21, "grad_norm": 1.6455930353893764, "learning_rate": 9.133283861451872e-06, "loss": 0.7689, "step": 2674 }, { "epoch": 0.21, "grad_norm": 1.5538032286478747, "learning_rate": 9.13255262409162e-06, "loss": 0.8422, "step": 2675 }, { "epoch": 0.21, "grad_norm": 1.5466189222323465, "learning_rate": 9.131821107691967e-06, "loss": 0.8052, "step": 2676 }, { "epoch": 0.21, "grad_norm": 1.5068723598874263, "learning_rate": 9.131089312302304e-06, "loss": 0.8001, "step": 2677 }, { "epoch": 0.21, "grad_norm": 1.6343859394886489, "learning_rate": 9.130357237972044e-06, "loss": 0.7871, "step": 2678 }, { "epoch": 0.21, "grad_norm": 1.545503926580848, "learning_rate": 9.129624884750618e-06, "loss": 0.7395, "step": 2679 }, { "epoch": 0.22, "grad_norm": 1.5212960381638376, "learning_rate": 9.128892252687478e-06, "loss": 0.879, "step": 2680 }, { "epoch": 0.22, "grad_norm": 1.604478981136443, "learning_rate": 9.128159341832092e-06, "loss": 0.86, "step": 2681 }, { "epoch": 0.22, "grad_norm": 1.5792169595204804, "learning_rate": 9.127426152233946e-06, "loss": 0.7914, "step": 2682 }, { "epoch": 0.22, "grad_norm": 1.4344271960325858, "learning_rate": 9.126692683942549e-06, "loss": 0.7376, "step": 2683 }, { "epoch": 0.22, "grad_norm": 1.418394646871754, "learning_rate": 9.125958937007427e-06, "loss": 0.7976, "step": 2684 }, { "epoch": 0.22, "grad_norm": 1.5240792451892742, "learning_rate": 9.125224911478122e-06, "loss": 0.8043, "step": 2685 }, { "epoch": 0.22, "grad_norm": 2.184244534521629, "learning_rate": 9.124490607404197e-06, "loss": 0.8942, "step": 2686 }, { "epoch": 0.22, "grad_norm": 1.6554362341733437, "learning_rate": 9.123756024835237e-06, "loss": 0.8902, "step": 2687 }, { "epoch": 0.22, "grad_norm": 1.526072381036271, "learning_rate": 9.123021163820839e-06, "loss": 0.8028, "step": 2688 }, { "epoch": 0.22, "grad_norm": 1.4732700580607097, "learning_rate": 9.122286024410625e-06, "loss": 0.7848, "step": 2689 }, { "epoch": 0.22, "grad_norm": 0.8843371575063754, "learning_rate": 9.121550606654232e-06, "loss": 1.1591, "step": 2690 }, { "epoch": 0.22, "grad_norm": 1.499571768179961, "learning_rate": 9.120814910601319e-06, "loss": 0.7329, "step": 2691 }, { "epoch": 0.22, "grad_norm": 0.8395161108973446, "learning_rate": 9.120078936301559e-06, "loss": 1.1559, "step": 2692 }, { "epoch": 0.22, "grad_norm": 1.5377599892200695, "learning_rate": 9.119342683804649e-06, "loss": 0.88, "step": 2693 }, { "epoch": 0.22, "grad_norm": 1.5202991547056606, "learning_rate": 9.1186061531603e-06, "loss": 0.8434, "step": 2694 }, { "epoch": 0.22, "grad_norm": 1.412368540945013, "learning_rate": 9.117869344418246e-06, "loss": 0.8059, "step": 2695 }, { "epoch": 0.22, "grad_norm": 1.4858519166017816, "learning_rate": 9.11713225762824e-06, "loss": 0.7971, "step": 2696 }, { "epoch": 0.22, "grad_norm": 1.5428875257765227, "learning_rate": 9.116394892840047e-06, "loss": 0.8496, "step": 2697 }, { "epoch": 0.22, "grad_norm": 1.5891228521929235, "learning_rate": 9.11565725010346e-06, "loss": 0.807, "step": 2698 }, { "epoch": 0.22, "grad_norm": 1.536870788394737, "learning_rate": 9.114919329468283e-06, "loss": 0.8729, "step": 2699 }, { "epoch": 0.22, "grad_norm": 1.4233762481997725, "learning_rate": 9.114181130984343e-06, "loss": 0.7604, "step": 2700 }, { "epoch": 0.22, "grad_norm": 1.7011575231470912, "learning_rate": 9.113442654701487e-06, "loss": 0.8198, "step": 2701 }, { "epoch": 0.22, "grad_norm": 1.0504204986841759, "learning_rate": 9.112703900669577e-06, "loss": 1.123, "step": 2702 }, { "epoch": 0.22, "grad_norm": 0.9705936042465975, "learning_rate": 9.111964868938495e-06, "loss": 1.1377, "step": 2703 }, { "epoch": 0.22, "grad_norm": 1.6697688602186724, "learning_rate": 9.111225559558144e-06, "loss": 0.762, "step": 2704 }, { "epoch": 0.22, "grad_norm": 1.6243873198550438, "learning_rate": 9.110485972578439e-06, "loss": 0.8388, "step": 2705 }, { "epoch": 0.22, "grad_norm": 1.610735627888926, "learning_rate": 9.109746108049325e-06, "loss": 0.771, "step": 2706 }, { "epoch": 0.22, "grad_norm": 1.6186053775134144, "learning_rate": 9.109005966020753e-06, "loss": 0.7712, "step": 2707 }, { "epoch": 0.22, "grad_norm": 1.55696185568793, "learning_rate": 9.108265546542705e-06, "loss": 0.9213, "step": 2708 }, { "epoch": 0.22, "grad_norm": 1.2605773016443498, "learning_rate": 9.107524849665173e-06, "loss": 1.134, "step": 2709 }, { "epoch": 0.22, "grad_norm": 1.4937501577886223, "learning_rate": 9.106783875438169e-06, "loss": 0.7583, "step": 2710 }, { "epoch": 0.22, "grad_norm": 1.5898569633880812, "learning_rate": 9.106042623911728e-06, "loss": 0.8185, "step": 2711 }, { "epoch": 0.22, "grad_norm": 1.4949295501407474, "learning_rate": 9.105301095135902e-06, "loss": 0.7081, "step": 2712 }, { "epoch": 0.22, "grad_norm": 1.4667325696705478, "learning_rate": 9.104559289160757e-06, "loss": 0.7485, "step": 2713 }, { "epoch": 0.22, "grad_norm": 1.6501943417609812, "learning_rate": 9.103817206036383e-06, "loss": 0.8839, "step": 2714 }, { "epoch": 0.22, "grad_norm": 1.5072453247770954, "learning_rate": 9.103074845812888e-06, "loss": 0.8237, "step": 2715 }, { "epoch": 0.22, "grad_norm": 1.567478283694221, "learning_rate": 9.102332208540396e-06, "loss": 0.8534, "step": 2716 }, { "epoch": 0.22, "grad_norm": 1.5703982114168584, "learning_rate": 9.101589294269054e-06, "loss": 0.8295, "step": 2717 }, { "epoch": 0.22, "grad_norm": 1.4073780037559471, "learning_rate": 9.10084610304902e-06, "loss": 0.7395, "step": 2718 }, { "epoch": 0.22, "grad_norm": 1.5791200969886008, "learning_rate": 9.100102634930485e-06, "loss": 0.8351, "step": 2719 }, { "epoch": 0.22, "grad_norm": 0.8477809643021568, "learning_rate": 9.099358889963643e-06, "loss": 1.1753, "step": 2720 }, { "epoch": 0.22, "grad_norm": 1.577844627014302, "learning_rate": 9.098614868198715e-06, "loss": 0.856, "step": 2721 }, { "epoch": 0.22, "grad_norm": 1.5109881445815694, "learning_rate": 9.097870569685938e-06, "loss": 0.8493, "step": 2722 }, { "epoch": 0.22, "grad_norm": 1.5545392213081684, "learning_rate": 9.097125994475572e-06, "loss": 0.8081, "step": 2723 }, { "epoch": 0.22, "grad_norm": 1.4936266788204047, "learning_rate": 9.09638114261789e-06, "loss": 0.7001, "step": 2724 }, { "epoch": 0.22, "grad_norm": 1.5772067670533552, "learning_rate": 9.095636014163184e-06, "loss": 0.853, "step": 2725 }, { "epoch": 0.22, "grad_norm": 1.4780604983786856, "learning_rate": 9.09489060916177e-06, "loss": 0.7789, "step": 2726 }, { "epoch": 0.22, "grad_norm": 0.8261035263870105, "learning_rate": 9.094144927663979e-06, "loss": 1.1818, "step": 2727 }, { "epoch": 0.22, "grad_norm": 1.5523003697500033, "learning_rate": 9.09339896972016e-06, "loss": 0.7064, "step": 2728 }, { "epoch": 0.22, "grad_norm": 1.5800918336608898, "learning_rate": 9.092652735380683e-06, "loss": 0.8111, "step": 2729 }, { "epoch": 0.22, "grad_norm": 1.5029000690236245, "learning_rate": 9.091906224695935e-06, "loss": 0.8594, "step": 2730 }, { "epoch": 0.22, "grad_norm": 0.8518533135066468, "learning_rate": 9.091159437716322e-06, "loss": 1.185, "step": 2731 }, { "epoch": 0.22, "grad_norm": 1.523137945364994, "learning_rate": 9.09041237449227e-06, "loss": 0.8605, "step": 2732 }, { "epoch": 0.22, "grad_norm": 1.5742739742087455, "learning_rate": 9.08966503507422e-06, "loss": 0.8582, "step": 2733 }, { "epoch": 0.22, "grad_norm": 1.5193055484808926, "learning_rate": 9.088917419512634e-06, "loss": 0.8924, "step": 2734 }, { "epoch": 0.22, "grad_norm": 1.5503271668595082, "learning_rate": 9.088169527857996e-06, "loss": 0.8458, "step": 2735 }, { "epoch": 0.22, "grad_norm": 1.6985238254937525, "learning_rate": 9.087421360160802e-06, "loss": 0.9147, "step": 2736 }, { "epoch": 0.22, "grad_norm": 1.4508042973157518, "learning_rate": 9.08667291647157e-06, "loss": 0.7582, "step": 2737 }, { "epoch": 0.22, "grad_norm": 1.5508237751729161, "learning_rate": 9.085924196840841e-06, "loss": 0.7826, "step": 2738 }, { "epoch": 0.22, "grad_norm": 1.4920365984626915, "learning_rate": 9.085175201319165e-06, "loss": 0.9011, "step": 2739 }, { "epoch": 0.22, "grad_norm": 0.9637838530382876, "learning_rate": 9.08442592995712e-06, "loss": 1.1117, "step": 2740 }, { "epoch": 0.22, "grad_norm": 1.68719857680009, "learning_rate": 9.083676382805295e-06, "loss": 0.8822, "step": 2741 }, { "epoch": 0.22, "grad_norm": 1.9266589414355086, "learning_rate": 9.082926559914303e-06, "loss": 0.8671, "step": 2742 }, { "epoch": 0.22, "grad_norm": 1.5819406201864188, "learning_rate": 9.082176461334774e-06, "loss": 0.7545, "step": 2743 }, { "epoch": 0.22, "grad_norm": 1.6768617501146295, "learning_rate": 9.081426087117356e-06, "loss": 0.8052, "step": 2744 }, { "epoch": 0.22, "grad_norm": 1.5216843153131245, "learning_rate": 9.080675437312715e-06, "loss": 0.8654, "step": 2745 }, { "epoch": 0.22, "grad_norm": 1.5506642948520322, "learning_rate": 9.079924511971538e-06, "loss": 0.8448, "step": 2746 }, { "epoch": 0.22, "grad_norm": 1.6671674533515648, "learning_rate": 9.07917331114453e-06, "loss": 0.809, "step": 2747 }, { "epoch": 0.22, "grad_norm": 1.5004197565331499, "learning_rate": 9.078421834882409e-06, "loss": 0.7968, "step": 2748 }, { "epoch": 0.22, "grad_norm": 1.6253511820513293, "learning_rate": 9.077670083235923e-06, "loss": 0.7796, "step": 2749 }, { "epoch": 0.22, "grad_norm": 1.4706554560400404, "learning_rate": 9.076918056255827e-06, "loss": 0.8917, "step": 2750 }, { "epoch": 0.22, "grad_norm": 1.5238577876431205, "learning_rate": 9.076165753992902e-06, "loss": 0.8016, "step": 2751 }, { "epoch": 0.22, "grad_norm": 1.4734811399930205, "learning_rate": 9.075413176497944e-06, "loss": 0.8063, "step": 2752 }, { "epoch": 0.22, "grad_norm": 1.4929912296668069, "learning_rate": 9.074660323821772e-06, "loss": 0.8342, "step": 2753 }, { "epoch": 0.22, "grad_norm": 1.786005563672545, "learning_rate": 9.073907196015215e-06, "loss": 0.9254, "step": 2754 }, { "epoch": 0.22, "grad_norm": 1.4251010957232777, "learning_rate": 9.073153793129129e-06, "loss": 0.8005, "step": 2755 }, { "epoch": 0.22, "grad_norm": 0.9833946655020197, "learning_rate": 9.072400115214385e-06, "loss": 1.1498, "step": 2756 }, { "epoch": 0.22, "grad_norm": 1.5834666741707812, "learning_rate": 9.071646162321873e-06, "loss": 0.8074, "step": 2757 }, { "epoch": 0.22, "grad_norm": 1.641944664236657, "learning_rate": 9.070891934502501e-06, "loss": 0.8183, "step": 2758 }, { "epoch": 0.22, "grad_norm": 1.5916665907363245, "learning_rate": 9.0701374318072e-06, "loss": 0.8237, "step": 2759 }, { "epoch": 0.22, "grad_norm": 0.8862095453467964, "learning_rate": 9.06938265428691e-06, "loss": 1.1363, "step": 2760 }, { "epoch": 0.22, "grad_norm": 1.5361331791082247, "learning_rate": 9.0686276019926e-06, "loss": 0.7357, "step": 2761 }, { "epoch": 0.22, "grad_norm": 0.8594920604811607, "learning_rate": 9.067872274975248e-06, "loss": 1.1305, "step": 2762 }, { "epoch": 0.22, "grad_norm": 0.8455582108439683, "learning_rate": 9.06711667328586e-06, "loss": 1.1043, "step": 2763 }, { "epoch": 0.22, "grad_norm": 0.8006841629645659, "learning_rate": 9.066360796975452e-06, "loss": 1.1573, "step": 2764 }, { "epoch": 0.22, "grad_norm": 1.523255231724165, "learning_rate": 9.065604646095068e-06, "loss": 0.7987, "step": 2765 }, { "epoch": 0.22, "grad_norm": 0.9106503123825963, "learning_rate": 9.06484822069576e-06, "loss": 1.1462, "step": 2766 }, { "epoch": 0.22, "grad_norm": 1.503029594319182, "learning_rate": 9.064091520828606e-06, "loss": 0.7939, "step": 2767 }, { "epoch": 0.22, "grad_norm": 1.5342127976171993, "learning_rate": 9.0633345465447e-06, "loss": 0.7906, "step": 2768 }, { "epoch": 0.22, "grad_norm": 1.424522801702873, "learning_rate": 9.062577297895152e-06, "loss": 0.7775, "step": 2769 }, { "epoch": 0.22, "grad_norm": 1.60410655153158, "learning_rate": 9.061819774931096e-06, "loss": 0.8274, "step": 2770 }, { "epoch": 0.22, "grad_norm": 1.5034169546769118, "learning_rate": 9.061061977703682e-06, "loss": 0.7303, "step": 2771 }, { "epoch": 0.22, "grad_norm": 1.6483470073625308, "learning_rate": 9.060303906264076e-06, "loss": 0.8671, "step": 2772 }, { "epoch": 0.22, "grad_norm": 1.0371819446776256, "learning_rate": 9.059545560663466e-06, "loss": 1.1219, "step": 2773 }, { "epoch": 0.22, "grad_norm": 1.5397995538869336, "learning_rate": 9.058786940953057e-06, "loss": 0.8353, "step": 2774 }, { "epoch": 0.22, "grad_norm": 1.4777902407079404, "learning_rate": 9.058028047184074e-06, "loss": 0.7805, "step": 2775 }, { "epoch": 0.22, "grad_norm": 1.4873016376334518, "learning_rate": 9.057268879407757e-06, "loss": 0.7861, "step": 2776 }, { "epoch": 0.22, "grad_norm": 0.9273665144315468, "learning_rate": 9.056509437675366e-06, "loss": 1.1367, "step": 2777 }, { "epoch": 0.22, "grad_norm": 1.4166990250748213, "learning_rate": 9.055749722038185e-06, "loss": 0.8189, "step": 2778 }, { "epoch": 0.22, "grad_norm": 1.6563975138090987, "learning_rate": 9.054989732547507e-06, "loss": 0.7594, "step": 2779 }, { "epoch": 0.22, "grad_norm": 1.5453638986035552, "learning_rate": 9.05422946925465e-06, "loss": 0.83, "step": 2780 }, { "epoch": 0.22, "grad_norm": 1.6354881853133996, "learning_rate": 9.05346893221095e-06, "loss": 0.9247, "step": 2781 }, { "epoch": 0.22, "grad_norm": 1.5792386151586097, "learning_rate": 9.052708121467758e-06, "loss": 0.817, "step": 2782 }, { "epoch": 0.22, "grad_norm": 1.4924725733558986, "learning_rate": 9.051947037076446e-06, "loss": 0.8549, "step": 2783 }, { "epoch": 0.22, "grad_norm": 1.434354880167259, "learning_rate": 9.051185679088406e-06, "loss": 0.8304, "step": 2784 }, { "epoch": 0.22, "grad_norm": 1.4347623792977258, "learning_rate": 9.050424047555043e-06, "loss": 0.7923, "step": 2785 }, { "epoch": 0.22, "grad_norm": 1.4491918514006743, "learning_rate": 9.049662142527788e-06, "loss": 0.7308, "step": 2786 }, { "epoch": 0.22, "grad_norm": 1.5805661674267073, "learning_rate": 9.048899964058085e-06, "loss": 0.824, "step": 2787 }, { "epoch": 0.22, "grad_norm": 1.0079574171061683, "learning_rate": 9.048137512197398e-06, "loss": 1.1206, "step": 2788 }, { "epoch": 0.22, "grad_norm": 0.8503934112479651, "learning_rate": 9.04737478699721e-06, "loss": 1.1266, "step": 2789 }, { "epoch": 0.22, "grad_norm": 1.5003092339728685, "learning_rate": 9.046611788509021e-06, "loss": 0.7199, "step": 2790 }, { "epoch": 0.22, "grad_norm": 1.5503689750202785, "learning_rate": 9.045848516784351e-06, "loss": 0.8783, "step": 2791 }, { "epoch": 0.22, "grad_norm": 1.5725833674792489, "learning_rate": 9.045084971874738e-06, "loss": 0.7636, "step": 2792 }, { "epoch": 0.22, "grad_norm": 1.4924882390102854, "learning_rate": 9.044321153831737e-06, "loss": 0.8774, "step": 2793 }, { "epoch": 0.22, "grad_norm": 1.5222432347149883, "learning_rate": 9.043557062706925e-06, "loss": 0.7498, "step": 2794 }, { "epoch": 0.22, "grad_norm": 1.5099066324551467, "learning_rate": 9.042792698551894e-06, "loss": 0.7429, "step": 2795 }, { "epoch": 0.22, "grad_norm": 1.8401561015311043, "learning_rate": 9.042028061418255e-06, "loss": 0.8719, "step": 2796 }, { "epoch": 0.22, "grad_norm": 1.393227328475668, "learning_rate": 9.04126315135764e-06, "loss": 1.1803, "step": 2797 }, { "epoch": 0.22, "grad_norm": 1.4886164324953508, "learning_rate": 9.040497968421694e-06, "loss": 0.7996, "step": 2798 }, { "epoch": 0.22, "grad_norm": 1.5694665022741647, "learning_rate": 9.039732512662087e-06, "loss": 0.7398, "step": 2799 }, { "epoch": 0.22, "grad_norm": 1.5709534175348423, "learning_rate": 9.038966784130503e-06, "loss": 0.7835, "step": 2800 }, { "epoch": 0.22, "grad_norm": 1.5761764161876064, "learning_rate": 9.038200782878647e-06, "loss": 0.8545, "step": 2801 }, { "epoch": 0.22, "grad_norm": 1.6559938013876296, "learning_rate": 9.03743450895824e-06, "loss": 0.7597, "step": 2802 }, { "epoch": 0.22, "grad_norm": 1.4082235982383455, "learning_rate": 9.036667962421023e-06, "loss": 0.8656, "step": 2803 }, { "epoch": 0.22, "grad_norm": 0.8770938118209625, "learning_rate": 9.035901143318753e-06, "loss": 1.1542, "step": 2804 }, { "epoch": 0.23, "grad_norm": 0.8438130286138171, "learning_rate": 9.03513405170321e-06, "loss": 1.1356, "step": 2805 }, { "epoch": 0.23, "grad_norm": 1.421949868076069, "learning_rate": 9.03436668762619e-06, "loss": 0.8452, "step": 2806 }, { "epoch": 0.23, "grad_norm": 1.4138358421767636, "learning_rate": 9.033599051139506e-06, "loss": 0.7841, "step": 2807 }, { "epoch": 0.23, "grad_norm": 1.509266047202581, "learning_rate": 9.032831142294988e-06, "loss": 0.8081, "step": 2808 }, { "epoch": 0.23, "grad_norm": 1.4680689089062113, "learning_rate": 9.032062961144493e-06, "loss": 0.8288, "step": 2809 }, { "epoch": 0.23, "grad_norm": 1.557240307702731, "learning_rate": 9.031294507739885e-06, "loss": 0.772, "step": 2810 }, { "epoch": 0.23, "grad_norm": 1.569641847168124, "learning_rate": 9.030525782133055e-06, "loss": 0.7387, "step": 2811 }, { "epoch": 0.23, "grad_norm": 1.4286286595317244, "learning_rate": 9.029756784375907e-06, "loss": 0.7392, "step": 2812 }, { "epoch": 0.23, "grad_norm": 1.5568313993386487, "learning_rate": 9.028987514520366e-06, "loss": 0.7944, "step": 2813 }, { "epoch": 0.23, "grad_norm": 1.5391399248488427, "learning_rate": 9.028217972618376e-06, "loss": 0.86, "step": 2814 }, { "epoch": 0.23, "grad_norm": 1.5781342163944243, "learning_rate": 9.027448158721898e-06, "loss": 0.8631, "step": 2815 }, { "epoch": 0.23, "grad_norm": 1.3964123084789426, "learning_rate": 9.02667807288291e-06, "loss": 0.7256, "step": 2816 }, { "epoch": 0.23, "grad_norm": 1.5049282969885376, "learning_rate": 9.02590771515341e-06, "loss": 0.8073, "step": 2817 }, { "epoch": 0.23, "grad_norm": 1.587664784886817, "learning_rate": 9.025137085585417e-06, "loss": 0.7709, "step": 2818 }, { "epoch": 0.23, "grad_norm": 1.7011584202925614, "learning_rate": 9.024366184230964e-06, "loss": 0.8018, "step": 2819 }, { "epoch": 0.23, "grad_norm": 1.6061428859183073, "learning_rate": 9.023595011142103e-06, "loss": 0.7664, "step": 2820 }, { "epoch": 0.23, "grad_norm": 1.4977607515454034, "learning_rate": 9.022823566370907e-06, "loss": 0.766, "step": 2821 }, { "epoch": 0.23, "grad_norm": 1.1587008142601938, "learning_rate": 9.022051849969465e-06, "loss": 1.1481, "step": 2822 }, { "epoch": 0.23, "grad_norm": 1.6819304422761396, "learning_rate": 9.021279861989884e-06, "loss": 0.7757, "step": 2823 }, { "epoch": 0.23, "grad_norm": 1.5162379417284817, "learning_rate": 9.020507602484293e-06, "loss": 0.8464, "step": 2824 }, { "epoch": 0.23, "grad_norm": 1.5127299946980448, "learning_rate": 9.019735071504836e-06, "loss": 0.7935, "step": 2825 }, { "epoch": 0.23, "grad_norm": 1.573910478049113, "learning_rate": 9.018962269103672e-06, "loss": 0.8129, "step": 2826 }, { "epoch": 0.23, "grad_norm": 1.670076779943516, "learning_rate": 9.01818919533299e-06, "loss": 0.8751, "step": 2827 }, { "epoch": 0.23, "grad_norm": 0.9085267294951515, "learning_rate": 9.017415850244981e-06, "loss": 1.1164, "step": 2828 }, { "epoch": 0.23, "grad_norm": 1.5336147489826537, "learning_rate": 9.016642233891871e-06, "loss": 0.7139, "step": 2829 }, { "epoch": 0.23, "grad_norm": 1.4593788940364583, "learning_rate": 9.01586834632589e-06, "loss": 0.78, "step": 2830 }, { "epoch": 0.23, "grad_norm": 1.4471877723709619, "learning_rate": 9.015094187599297e-06, "loss": 0.7492, "step": 2831 }, { "epoch": 0.23, "grad_norm": 1.5116541442908384, "learning_rate": 9.014319757764363e-06, "loss": 0.7715, "step": 2832 }, { "epoch": 0.23, "grad_norm": 1.5104326982508283, "learning_rate": 9.01354505687338e-06, "loss": 0.8167, "step": 2833 }, { "epoch": 0.23, "grad_norm": 1.42495266300129, "learning_rate": 9.012770084978657e-06, "loss": 0.8549, "step": 2834 }, { "epoch": 0.23, "grad_norm": 1.502935100063998, "learning_rate": 9.01199484213252e-06, "loss": 0.8248, "step": 2835 }, { "epoch": 0.23, "grad_norm": 1.5144688366466426, "learning_rate": 9.011219328387321e-06, "loss": 0.8722, "step": 2836 }, { "epoch": 0.23, "grad_norm": 1.5362925103899885, "learning_rate": 9.01044354379542e-06, "loss": 0.8159, "step": 2837 }, { "epoch": 0.23, "grad_norm": 1.4732109085517446, "learning_rate": 9.009667488409197e-06, "loss": 0.8432, "step": 2838 }, { "epoch": 0.23, "grad_norm": 1.4754796042898999, "learning_rate": 9.00889116228106e-06, "loss": 0.7628, "step": 2839 }, { "epoch": 0.23, "grad_norm": 1.526808344550726, "learning_rate": 9.008114565463423e-06, "loss": 0.7684, "step": 2840 }, { "epoch": 0.23, "grad_norm": 1.522599896003508, "learning_rate": 9.007337698008727e-06, "loss": 0.823, "step": 2841 }, { "epoch": 0.23, "grad_norm": 1.5056311162585918, "learning_rate": 9.006560559969426e-06, "loss": 0.816, "step": 2842 }, { "epoch": 0.23, "grad_norm": 1.6964257928803927, "learning_rate": 9.005783151397994e-06, "loss": 0.8421, "step": 2843 }, { "epoch": 0.23, "grad_norm": 1.4981445357809546, "learning_rate": 9.005005472346923e-06, "loss": 0.8344, "step": 2844 }, { "epoch": 0.23, "grad_norm": 1.3884495515980424, "learning_rate": 9.004227522868727e-06, "loss": 0.8147, "step": 2845 }, { "epoch": 0.23, "grad_norm": 1.8843444148268373, "learning_rate": 9.00344930301593e-06, "loss": 0.8407, "step": 2846 }, { "epoch": 0.23, "grad_norm": 1.5678728420608044, "learning_rate": 9.002670812841082e-06, "loss": 0.7449, "step": 2847 }, { "epoch": 0.23, "grad_norm": 0.9752374374918128, "learning_rate": 9.001892052396749e-06, "loss": 1.1303, "step": 2848 }, { "epoch": 0.23, "grad_norm": 1.5124312773089488, "learning_rate": 9.001113021735512e-06, "loss": 0.7948, "step": 2849 }, { "epoch": 0.23, "grad_norm": 1.4996656947373783, "learning_rate": 9.000333720909978e-06, "loss": 0.8367, "step": 2850 }, { "epoch": 0.23, "grad_norm": 1.4339501536365045, "learning_rate": 8.99955414997276e-06, "loss": 0.8263, "step": 2851 }, { "epoch": 0.23, "grad_norm": 1.532967561910912, "learning_rate": 8.998774308976504e-06, "loss": 0.7928, "step": 2852 }, { "epoch": 0.23, "grad_norm": 1.5456648842661505, "learning_rate": 8.997994197973861e-06, "loss": 0.7563, "step": 2853 }, { "epoch": 0.23, "grad_norm": 1.4952120070202044, "learning_rate": 8.997213817017508e-06, "loss": 0.8512, "step": 2854 }, { "epoch": 0.23, "grad_norm": 1.6375715260436081, "learning_rate": 8.996433166160137e-06, "loss": 0.739, "step": 2855 }, { "epoch": 0.23, "grad_norm": 1.0284091228677428, "learning_rate": 8.99565224545446e-06, "loss": 1.1398, "step": 2856 }, { "epoch": 0.23, "grad_norm": 1.5105661536045198, "learning_rate": 8.994871054953207e-06, "loss": 0.7857, "step": 2857 }, { "epoch": 0.23, "grad_norm": 0.8167332722287997, "learning_rate": 8.994089594709126e-06, "loss": 1.1583, "step": 2858 }, { "epoch": 0.23, "grad_norm": 2.43041516565146, "learning_rate": 8.993307864774982e-06, "loss": 0.8466, "step": 2859 }, { "epoch": 0.23, "grad_norm": 1.4617727041295017, "learning_rate": 8.992525865203561e-06, "loss": 0.7674, "step": 2860 }, { "epoch": 0.23, "grad_norm": 1.5319426321814078, "learning_rate": 8.991743596047664e-06, "loss": 0.8592, "step": 2861 }, { "epoch": 0.23, "grad_norm": 1.5188445137738937, "learning_rate": 8.990961057360111e-06, "loss": 0.8245, "step": 2862 }, { "epoch": 0.23, "grad_norm": 1.4475697338145361, "learning_rate": 8.990178249193741e-06, "loss": 0.7811, "step": 2863 }, { "epoch": 0.23, "grad_norm": 1.6282364554483362, "learning_rate": 8.989395171601413e-06, "loss": 0.8335, "step": 2864 }, { "epoch": 0.23, "grad_norm": 1.47295817454312, "learning_rate": 8.988611824636e-06, "loss": 0.7977, "step": 2865 }, { "epoch": 0.23, "grad_norm": 1.6249577332321916, "learning_rate": 8.987828208350397e-06, "loss": 0.7008, "step": 2866 }, { "epoch": 0.23, "grad_norm": 1.7083680048264143, "learning_rate": 8.987044322797513e-06, "loss": 0.8047, "step": 2867 }, { "epoch": 0.23, "grad_norm": 1.4503355637356607, "learning_rate": 8.98626016803028e-06, "loss": 0.7975, "step": 2868 }, { "epoch": 0.23, "grad_norm": 1.4796865573458986, "learning_rate": 8.985475744101646e-06, "loss": 0.7856, "step": 2869 }, { "epoch": 0.23, "grad_norm": 1.5363781622816894, "learning_rate": 8.984691051064576e-06, "loss": 0.8611, "step": 2870 }, { "epoch": 0.23, "grad_norm": 1.4535221825374731, "learning_rate": 8.983906088972055e-06, "loss": 0.8144, "step": 2871 }, { "epoch": 0.23, "grad_norm": 1.4936031781988626, "learning_rate": 8.983120857877083e-06, "loss": 0.9549, "step": 2872 }, { "epoch": 0.23, "grad_norm": 1.5637116771563628, "learning_rate": 8.982335357832685e-06, "loss": 0.7642, "step": 2873 }, { "epoch": 0.23, "grad_norm": 1.5081338973871137, "learning_rate": 8.981549588891899e-06, "loss": 0.8396, "step": 2874 }, { "epoch": 0.23, "grad_norm": 1.244200230628726, "learning_rate": 8.980763551107777e-06, "loss": 1.1364, "step": 2875 }, { "epoch": 0.23, "grad_norm": 1.4450153120398683, "learning_rate": 8.979977244533398e-06, "loss": 0.825, "step": 2876 }, { "epoch": 0.23, "grad_norm": 1.5323367957733631, "learning_rate": 8.979190669221856e-06, "loss": 0.7297, "step": 2877 }, { "epoch": 0.23, "grad_norm": 0.8206695022137738, "learning_rate": 8.97840382522626e-06, "loss": 1.1206, "step": 2878 }, { "epoch": 0.23, "grad_norm": 1.5183910460479892, "learning_rate": 8.97761671259974e-06, "loss": 0.8279, "step": 2879 }, { "epoch": 0.23, "grad_norm": 1.5451305866588778, "learning_rate": 8.976829331395447e-06, "loss": 0.7513, "step": 2880 }, { "epoch": 0.23, "grad_norm": 1.5366094745026992, "learning_rate": 8.976041681666541e-06, "loss": 0.8564, "step": 2881 }, { "epoch": 0.23, "grad_norm": 1.1146734014226087, "learning_rate": 8.97525376346621e-06, "loss": 1.1141, "step": 2882 }, { "epoch": 0.23, "grad_norm": 1.460531143910605, "learning_rate": 8.974465576847655e-06, "loss": 0.8093, "step": 2883 }, { "epoch": 0.23, "grad_norm": 1.4695241875788463, "learning_rate": 8.973677121864098e-06, "loss": 0.8166, "step": 2884 }, { "epoch": 0.23, "grad_norm": 1.4542630723282235, "learning_rate": 8.972888398568772e-06, "loss": 0.7475, "step": 2885 }, { "epoch": 0.23, "grad_norm": 1.5662744054442388, "learning_rate": 8.972099407014938e-06, "loss": 0.7669, "step": 2886 }, { "epoch": 0.23, "grad_norm": 1.4332049763724766, "learning_rate": 8.971310147255869e-06, "loss": 0.7681, "step": 2887 }, { "epoch": 0.23, "grad_norm": 0.8729195579071725, "learning_rate": 8.97052061934486e-06, "loss": 1.131, "step": 2888 }, { "epoch": 0.23, "grad_norm": 1.6763431100550428, "learning_rate": 8.969730823335217e-06, "loss": 0.8844, "step": 2889 }, { "epoch": 0.23, "grad_norm": 1.5336994145934675, "learning_rate": 8.968940759280274e-06, "loss": 0.8486, "step": 2890 }, { "epoch": 0.23, "grad_norm": 1.552722258259549, "learning_rate": 8.968150427233373e-06, "loss": 0.8232, "step": 2891 }, { "epoch": 0.23, "grad_norm": 1.4514139487568405, "learning_rate": 8.967359827247882e-06, "loss": 0.7118, "step": 2892 }, { "epoch": 0.23, "grad_norm": 1.5612137065154816, "learning_rate": 8.966568959377184e-06, "loss": 0.7848, "step": 2893 }, { "epoch": 0.23, "grad_norm": 1.5741098855247713, "learning_rate": 8.965777823674679e-06, "loss": 0.743, "step": 2894 }, { "epoch": 0.23, "grad_norm": 0.8646999289568675, "learning_rate": 8.964986420193788e-06, "loss": 1.1224, "step": 2895 }, { "epoch": 0.23, "grad_norm": 1.519257630771146, "learning_rate": 8.964194748987948e-06, "loss": 0.7588, "step": 2896 }, { "epoch": 0.23, "grad_norm": 1.5431681216140927, "learning_rate": 8.963402810110613e-06, "loss": 0.7913, "step": 2897 }, { "epoch": 0.23, "grad_norm": 1.4554978127172866, "learning_rate": 8.962610603615257e-06, "loss": 0.8418, "step": 2898 }, { "epoch": 0.23, "grad_norm": 1.4950723359975566, "learning_rate": 8.961818129555372e-06, "loss": 0.823, "step": 2899 }, { "epoch": 0.23, "grad_norm": 0.8241285507316574, "learning_rate": 8.961025387984468e-06, "loss": 1.1289, "step": 2900 }, { "epoch": 0.23, "grad_norm": 1.5030712649545925, "learning_rate": 8.960232378956073e-06, "loss": 0.783, "step": 2901 }, { "epoch": 0.23, "grad_norm": 1.5134209638333844, "learning_rate": 8.95943910252373e-06, "loss": 0.7661, "step": 2902 }, { "epoch": 0.23, "grad_norm": 1.5249034090377314, "learning_rate": 8.958645558741009e-06, "loss": 0.8948, "step": 2903 }, { "epoch": 0.23, "grad_norm": 0.8364807925788422, "learning_rate": 8.957851747661483e-06, "loss": 1.1207, "step": 2904 }, { "epoch": 0.23, "grad_norm": 1.522809924844445, "learning_rate": 8.95705766933876e-06, "loss": 0.8443, "step": 2905 }, { "epoch": 0.23, "grad_norm": 1.6197373339341499, "learning_rate": 8.956263323826455e-06, "loss": 0.8336, "step": 2906 }, { "epoch": 0.23, "grad_norm": 1.5360875766041366, "learning_rate": 8.955468711178202e-06, "loss": 0.839, "step": 2907 }, { "epoch": 0.23, "grad_norm": 1.5846915170075357, "learning_rate": 8.954673831447658e-06, "loss": 0.7979, "step": 2908 }, { "epoch": 0.23, "grad_norm": 1.8302922267436699, "learning_rate": 8.953878684688492e-06, "loss": 0.7767, "step": 2909 }, { "epoch": 0.23, "grad_norm": 0.8526377718667729, "learning_rate": 8.953083270954399e-06, "loss": 1.1601, "step": 2910 }, { "epoch": 0.23, "grad_norm": 1.5285748971247237, "learning_rate": 8.952287590299084e-06, "loss": 0.8046, "step": 2911 }, { "epoch": 0.23, "grad_norm": 1.5640481003979552, "learning_rate": 8.951491642776274e-06, "loss": 0.6982, "step": 2912 }, { "epoch": 0.23, "grad_norm": 1.4980919380346966, "learning_rate": 8.950695428439709e-06, "loss": 0.8115, "step": 2913 }, { "epoch": 0.23, "grad_norm": 1.4303460991428953, "learning_rate": 8.949898947343158e-06, "loss": 0.7528, "step": 2914 }, { "epoch": 0.23, "grad_norm": 1.4198484606347106, "learning_rate": 8.949102199540397e-06, "loss": 0.7828, "step": 2915 }, { "epoch": 0.23, "grad_norm": 1.5273463727456664, "learning_rate": 8.948305185085226e-06, "loss": 0.8576, "step": 2916 }, { "epoch": 0.23, "grad_norm": 1.420574115458076, "learning_rate": 8.947507904031459e-06, "loss": 0.7548, "step": 2917 }, { "epoch": 0.23, "grad_norm": 1.5549575061802778, "learning_rate": 8.946710356432932e-06, "loss": 0.8176, "step": 2918 }, { "epoch": 0.23, "grad_norm": 1.528149485400961, "learning_rate": 8.9459125423435e-06, "loss": 0.7949, "step": 2919 }, { "epoch": 0.23, "grad_norm": 1.3593274795279662, "learning_rate": 8.945114461817028e-06, "loss": 0.7559, "step": 2920 }, { "epoch": 0.23, "grad_norm": 1.647780961918954, "learning_rate": 8.944316114907406e-06, "loss": 0.7971, "step": 2921 }, { "epoch": 0.23, "grad_norm": 1.4032123810216546, "learning_rate": 8.943517501668541e-06, "loss": 0.7622, "step": 2922 }, { "epoch": 0.23, "grad_norm": 0.9326260243073812, "learning_rate": 8.942718622154359e-06, "loss": 1.12, "step": 2923 }, { "epoch": 0.23, "grad_norm": 1.483494740193894, "learning_rate": 8.941919476418797e-06, "loss": 0.7865, "step": 2924 }, { "epoch": 0.23, "grad_norm": 1.5146962673431117, "learning_rate": 8.94112006451582e-06, "loss": 0.7533, "step": 2925 }, { "epoch": 0.23, "grad_norm": 1.608767387774408, "learning_rate": 8.940320386499404e-06, "loss": 0.7943, "step": 2926 }, { "epoch": 0.23, "grad_norm": 1.4248648006933904, "learning_rate": 8.939520442423544e-06, "loss": 0.7743, "step": 2927 }, { "epoch": 0.23, "grad_norm": 1.7610036982612236, "learning_rate": 8.938720232342257e-06, "loss": 0.837, "step": 2928 }, { "epoch": 0.23, "grad_norm": 1.5322794494100196, "learning_rate": 8.937919756309574e-06, "loss": 0.8103, "step": 2929 }, { "epoch": 0.24, "grad_norm": 1.4591870420357749, "learning_rate": 8.937119014379543e-06, "loss": 0.7395, "step": 2930 }, { "epoch": 0.24, "grad_norm": 1.5493358479552397, "learning_rate": 8.936318006606236e-06, "loss": 0.8654, "step": 2931 }, { "epoch": 0.24, "grad_norm": 1.4813590181065424, "learning_rate": 8.935516733043732e-06, "loss": 0.8018, "step": 2932 }, { "epoch": 0.24, "grad_norm": 1.5402623223914294, "learning_rate": 8.934715193746143e-06, "loss": 0.8155, "step": 2933 }, { "epoch": 0.24, "grad_norm": 1.4625065491451386, "learning_rate": 8.933913388767584e-06, "loss": 0.8122, "step": 2934 }, { "epoch": 0.24, "grad_norm": 0.9390063800795907, "learning_rate": 8.9331113181622e-06, "loss": 1.1334, "step": 2935 }, { "epoch": 0.24, "grad_norm": 1.4362266395687735, "learning_rate": 8.932308981984145e-06, "loss": 0.7872, "step": 2936 }, { "epoch": 0.24, "grad_norm": 1.4077486756476278, "learning_rate": 8.931506380287595e-06, "loss": 0.8364, "step": 2937 }, { "epoch": 0.24, "grad_norm": 1.5533559626382858, "learning_rate": 8.930703513126744e-06, "loss": 0.823, "step": 2938 }, { "epoch": 0.24, "grad_norm": 1.3683578251864288, "learning_rate": 8.929900380555805e-06, "loss": 0.7642, "step": 2939 }, { "epoch": 0.24, "grad_norm": 1.5279528084297416, "learning_rate": 8.929096982629007e-06, "loss": 0.9554, "step": 2940 }, { "epoch": 0.24, "grad_norm": 0.8643991625879426, "learning_rate": 8.928293319400595e-06, "loss": 1.1272, "step": 2941 }, { "epoch": 0.24, "grad_norm": 1.5566570355205518, "learning_rate": 8.927489390924835e-06, "loss": 0.7354, "step": 2942 }, { "epoch": 0.24, "grad_norm": 1.5306002908298693, "learning_rate": 8.92668519725601e-06, "loss": 0.8362, "step": 2943 }, { "epoch": 0.24, "grad_norm": 1.6670288179510317, "learning_rate": 8.925880738448425e-06, "loss": 0.8175, "step": 2944 }, { "epoch": 0.24, "grad_norm": 1.572457259434509, "learning_rate": 8.925076014556393e-06, "loss": 0.758, "step": 2945 }, { "epoch": 0.24, "grad_norm": 1.4719993362319428, "learning_rate": 8.924271025634252e-06, "loss": 0.7984, "step": 2946 }, { "epoch": 0.24, "grad_norm": 1.506690809600484, "learning_rate": 8.923465771736361e-06, "loss": 0.7719, "step": 2947 }, { "epoch": 0.24, "grad_norm": 0.9433627297380383, "learning_rate": 8.922660252917088e-06, "loss": 1.1364, "step": 2948 }, { "epoch": 0.24, "grad_norm": 1.4496464620569396, "learning_rate": 8.921854469230824e-06, "loss": 0.906, "step": 2949 }, { "epoch": 0.24, "grad_norm": 1.4920173013338027, "learning_rate": 8.92104842073198e-06, "loss": 0.8444, "step": 2950 }, { "epoch": 0.24, "grad_norm": 1.5143440268447341, "learning_rate": 8.920242107474979e-06, "loss": 0.7679, "step": 2951 }, { "epoch": 0.24, "grad_norm": 1.6081072525971154, "learning_rate": 8.919435529514269e-06, "loss": 0.9018, "step": 2952 }, { "epoch": 0.24, "grad_norm": 1.524108512813696, "learning_rate": 8.918628686904307e-06, "loss": 0.7228, "step": 2953 }, { "epoch": 0.24, "grad_norm": 1.4601149071338422, "learning_rate": 8.917821579699578e-06, "loss": 0.7925, "step": 2954 }, { "epoch": 0.24, "grad_norm": 1.4541345272351975, "learning_rate": 8.917014207954576e-06, "loss": 0.8047, "step": 2955 }, { "epoch": 0.24, "grad_norm": 1.5613969044588754, "learning_rate": 8.916206571723818e-06, "loss": 0.9001, "step": 2956 }, { "epoch": 0.24, "grad_norm": 1.6230390029193713, "learning_rate": 8.915398671061838e-06, "loss": 0.8646, "step": 2957 }, { "epoch": 0.24, "grad_norm": 1.6051765761335404, "learning_rate": 8.914590506023187e-06, "loss": 0.9032, "step": 2958 }, { "epoch": 0.24, "grad_norm": 1.5071683258832949, "learning_rate": 8.913782076662431e-06, "loss": 0.8155, "step": 2959 }, { "epoch": 0.24, "grad_norm": 0.9246142011389854, "learning_rate": 8.912973383034162e-06, "loss": 1.1546, "step": 2960 }, { "epoch": 0.24, "grad_norm": 1.538619690773534, "learning_rate": 8.912164425192983e-06, "loss": 0.9053, "step": 2961 }, { "epoch": 0.24, "grad_norm": 0.8183437175185185, "learning_rate": 8.911355203193515e-06, "loss": 1.1144, "step": 2962 }, { "epoch": 0.24, "grad_norm": 1.56053114487121, "learning_rate": 8.9105457170904e-06, "loss": 0.8362, "step": 2963 }, { "epoch": 0.24, "grad_norm": 1.50517743658222, "learning_rate": 8.909735966938297e-06, "loss": 0.7812, "step": 2964 }, { "epoch": 0.24, "grad_norm": 1.7399138316328289, "learning_rate": 8.908925952791882e-06, "loss": 0.806, "step": 2965 }, { "epoch": 0.24, "grad_norm": 1.590748582848573, "learning_rate": 8.908115674705847e-06, "loss": 0.7389, "step": 2966 }, { "epoch": 0.24, "grad_norm": 1.4896297912536705, "learning_rate": 8.907305132734905e-06, "loss": 0.7729, "step": 2967 }, { "epoch": 0.24, "grad_norm": 1.6133944724696818, "learning_rate": 8.906494326933788e-06, "loss": 0.8317, "step": 2968 }, { "epoch": 0.24, "grad_norm": 1.6749848041542281, "learning_rate": 8.905683257357238e-06, "loss": 0.756, "step": 2969 }, { "epoch": 0.24, "grad_norm": 1.5372234769772013, "learning_rate": 8.904871924060025e-06, "loss": 0.8431, "step": 2970 }, { "epoch": 0.24, "grad_norm": 1.058042023031189, "learning_rate": 8.904060327096933e-06, "loss": 1.1399, "step": 2971 }, { "epoch": 0.24, "grad_norm": 1.483329686337272, "learning_rate": 8.903248466522758e-06, "loss": 0.7649, "step": 2972 }, { "epoch": 0.24, "grad_norm": 1.4516533020690223, "learning_rate": 8.902436342392322e-06, "loss": 0.8427, "step": 2973 }, { "epoch": 0.24, "grad_norm": 1.513261485303323, "learning_rate": 8.90162395476046e-06, "loss": 0.8222, "step": 2974 }, { "epoch": 0.24, "grad_norm": 1.5857353270809185, "learning_rate": 8.900811303682028e-06, "loss": 0.8325, "step": 2975 }, { "epoch": 0.24, "grad_norm": 0.8968545753413047, "learning_rate": 8.899998389211896e-06, "loss": 1.1292, "step": 2976 }, { "epoch": 0.24, "grad_norm": 1.6459249712875532, "learning_rate": 8.899185211404955e-06, "loss": 0.8091, "step": 2977 }, { "epoch": 0.24, "grad_norm": 0.8422790087258277, "learning_rate": 8.898371770316113e-06, "loss": 1.0999, "step": 2978 }, { "epoch": 0.24, "grad_norm": 1.464973303570124, "learning_rate": 8.897558066000293e-06, "loss": 0.7728, "step": 2979 }, { "epoch": 0.24, "grad_norm": 1.4976982914844355, "learning_rate": 8.896744098512443e-06, "loss": 0.8615, "step": 2980 }, { "epoch": 0.24, "grad_norm": 0.889006453209654, "learning_rate": 8.89592986790752e-06, "loss": 1.1135, "step": 2981 }, { "epoch": 0.24, "grad_norm": 1.5488523715809996, "learning_rate": 8.895115374240505e-06, "loss": 0.856, "step": 2982 }, { "epoch": 0.24, "grad_norm": 1.438919596609975, "learning_rate": 8.894300617566391e-06, "loss": 0.7499, "step": 2983 }, { "epoch": 0.24, "grad_norm": 0.8228071477693027, "learning_rate": 8.893485597940195e-06, "loss": 1.1247, "step": 2984 }, { "epoch": 0.24, "grad_norm": 0.8403746411770103, "learning_rate": 8.892670315416948e-06, "loss": 1.1381, "step": 2985 }, { "epoch": 0.24, "grad_norm": 1.4334007749909583, "learning_rate": 8.8918547700517e-06, "loss": 0.7347, "step": 2986 }, { "epoch": 0.24, "grad_norm": 1.5359216626745498, "learning_rate": 8.891038961899521e-06, "loss": 0.8595, "step": 2987 }, { "epoch": 0.24, "grad_norm": 1.55755050109655, "learning_rate": 8.890222891015492e-06, "loss": 0.7341, "step": 2988 }, { "epoch": 0.24, "grad_norm": 1.4372568486525628, "learning_rate": 8.889406557454719e-06, "loss": 0.7678, "step": 2989 }, { "epoch": 0.24, "grad_norm": 1.4425042080151422, "learning_rate": 8.88858996127232e-06, "loss": 0.8285, "step": 2990 }, { "epoch": 0.24, "grad_norm": 1.6412357001142646, "learning_rate": 8.887773102523436e-06, "loss": 0.8403, "step": 2991 }, { "epoch": 0.24, "grad_norm": 1.5491729873391995, "learning_rate": 8.88695598126322e-06, "loss": 0.6921, "step": 2992 }, { "epoch": 0.24, "grad_norm": 1.4173814230040522, "learning_rate": 8.886138597546852e-06, "loss": 0.8, "step": 2993 }, { "epoch": 0.24, "grad_norm": 1.4708363390605785, "learning_rate": 8.885320951429518e-06, "loss": 0.7975, "step": 2994 }, { "epoch": 0.24, "grad_norm": 1.4427564359821285, "learning_rate": 8.884503042966428e-06, "loss": 0.8008, "step": 2995 }, { "epoch": 0.24, "grad_norm": 1.3005155957035575, "learning_rate": 8.883684872212811e-06, "loss": 1.1161, "step": 2996 }, { "epoch": 0.24, "grad_norm": 1.7032888367121148, "learning_rate": 8.882866439223911e-06, "loss": 0.904, "step": 2997 }, { "epoch": 0.24, "grad_norm": 1.5012818010571198, "learning_rate": 8.88204774405499e-06, "loss": 0.7822, "step": 2998 }, { "epoch": 0.24, "grad_norm": 1.5473490477483385, "learning_rate": 8.881228786761329e-06, "loss": 0.8287, "step": 2999 }, { "epoch": 0.24, "grad_norm": 1.5390447766650186, "learning_rate": 8.880409567398225e-06, "loss": 0.8502, "step": 3000 }, { "epoch": 0.24, "grad_norm": 1.5919106269453307, "learning_rate": 8.879590086020993e-06, "loss": 0.7651, "step": 3001 }, { "epoch": 0.24, "grad_norm": 1.489650344079886, "learning_rate": 8.878770342684968e-06, "loss": 0.7285, "step": 3002 }, { "epoch": 0.24, "grad_norm": 1.4802837618100906, "learning_rate": 8.8779503374455e-06, "loss": 0.8841, "step": 3003 }, { "epoch": 0.24, "grad_norm": 1.532261752868713, "learning_rate": 8.877130070357959e-06, "loss": 0.8358, "step": 3004 }, { "epoch": 0.24, "grad_norm": 1.4701679081488315, "learning_rate": 8.876309541477729e-06, "loss": 0.6946, "step": 3005 }, { "epoch": 0.24, "grad_norm": 1.4587358151462773, "learning_rate": 8.875488750860215e-06, "loss": 0.8322, "step": 3006 }, { "epoch": 0.24, "grad_norm": 1.5035401721848534, "learning_rate": 8.874667698560838e-06, "loss": 0.849, "step": 3007 }, { "epoch": 0.24, "grad_norm": 1.7040655798194564, "learning_rate": 8.87384638463504e-06, "loss": 0.8313, "step": 3008 }, { "epoch": 0.24, "grad_norm": 1.6148731969817678, "learning_rate": 8.873024809138272e-06, "loss": 0.7672, "step": 3009 }, { "epoch": 0.24, "grad_norm": 1.5825485614261268, "learning_rate": 8.872202972126017e-06, "loss": 0.7878, "step": 3010 }, { "epoch": 0.24, "grad_norm": 1.4177329442997801, "learning_rate": 8.871380873653761e-06, "loss": 0.7676, "step": 3011 }, { "epoch": 0.24, "grad_norm": 1.5506413058671813, "learning_rate": 8.870558513777017e-06, "loss": 0.7579, "step": 3012 }, { "epoch": 0.24, "grad_norm": 1.1747591140190825, "learning_rate": 8.869735892551312e-06, "loss": 1.1411, "step": 3013 }, { "epoch": 0.24, "grad_norm": 1.486904591611567, "learning_rate": 8.868913010032188e-06, "loss": 0.7397, "step": 3014 }, { "epoch": 0.24, "grad_norm": 0.922671595108368, "learning_rate": 8.868089866275214e-06, "loss": 1.1328, "step": 3015 }, { "epoch": 0.24, "grad_norm": 1.5190575304587202, "learning_rate": 8.867266461335965e-06, "loss": 0.7974, "step": 3016 }, { "epoch": 0.24, "grad_norm": 0.8318131077061892, "learning_rate": 8.866442795270042e-06, "loss": 1.1451, "step": 3017 }, { "epoch": 0.24, "grad_norm": 1.5193479836339012, "learning_rate": 8.865618868133061e-06, "loss": 0.8397, "step": 3018 }, { "epoch": 0.24, "grad_norm": 1.4805724627642425, "learning_rate": 8.864794679980654e-06, "loss": 0.7638, "step": 3019 }, { "epoch": 0.24, "grad_norm": 1.5588367616511347, "learning_rate": 8.863970230868474e-06, "loss": 0.7872, "step": 3020 }, { "epoch": 0.24, "grad_norm": 1.5561187498343676, "learning_rate": 8.863145520852187e-06, "loss": 0.879, "step": 3021 }, { "epoch": 0.24, "grad_norm": 1.453245090005874, "learning_rate": 8.862320549987483e-06, "loss": 0.8315, "step": 3022 }, { "epoch": 0.24, "grad_norm": 1.570835518006856, "learning_rate": 8.861495318330062e-06, "loss": 0.8479, "step": 3023 }, { "epoch": 0.24, "grad_norm": 1.5504410554323658, "learning_rate": 8.860669825935647e-06, "loss": 0.7705, "step": 3024 }, { "epoch": 0.24, "grad_norm": 1.5199753891489487, "learning_rate": 8.859844072859978e-06, "loss": 0.8559, "step": 3025 }, { "epoch": 0.24, "grad_norm": 1.467600833375324, "learning_rate": 8.85901805915881e-06, "loss": 0.7986, "step": 3026 }, { "epoch": 0.24, "grad_norm": 1.5560118993196896, "learning_rate": 8.85819178488792e-06, "loss": 0.8421, "step": 3027 }, { "epoch": 0.24, "grad_norm": 1.5937484811145721, "learning_rate": 8.857365250103098e-06, "loss": 0.7733, "step": 3028 }, { "epoch": 0.24, "grad_norm": 1.5892924545819431, "learning_rate": 8.856538454860155e-06, "loss": 0.8141, "step": 3029 }, { "epoch": 0.24, "grad_norm": 1.5374475815983144, "learning_rate": 8.855711399214914e-06, "loss": 0.76, "step": 3030 }, { "epoch": 0.24, "grad_norm": 1.3147612029278255, "learning_rate": 8.854884083223225e-06, "loss": 0.7527, "step": 3031 }, { "epoch": 0.24, "grad_norm": 1.2302772796785721, "learning_rate": 8.854056506940949e-06, "loss": 1.0772, "step": 3032 }, { "epoch": 0.24, "grad_norm": 1.4634001149428653, "learning_rate": 8.853228670423964e-06, "loss": 0.8004, "step": 3033 }, { "epoch": 0.24, "grad_norm": 1.5847290564817398, "learning_rate": 8.852400573728167e-06, "loss": 0.8523, "step": 3034 }, { "epoch": 0.24, "grad_norm": 1.6239657056058256, "learning_rate": 8.851572216909475e-06, "loss": 0.7989, "step": 3035 }, { "epoch": 0.24, "grad_norm": 1.5957565322089509, "learning_rate": 8.850743600023819e-06, "loss": 0.7908, "step": 3036 }, { "epoch": 0.24, "grad_norm": 1.5944874269421445, "learning_rate": 8.849914723127151e-06, "loss": 0.8908, "step": 3037 }, { "epoch": 0.24, "grad_norm": 1.5760806610457292, "learning_rate": 8.849085586275437e-06, "loss": 0.8105, "step": 3038 }, { "epoch": 0.24, "grad_norm": 1.0790843371896945, "learning_rate": 8.848256189524661e-06, "loss": 1.177, "step": 3039 }, { "epoch": 0.24, "grad_norm": 1.4726795384664406, "learning_rate": 8.84742653293083e-06, "loss": 0.7552, "step": 3040 }, { "epoch": 0.24, "grad_norm": 1.5225260124999893, "learning_rate": 8.846596616549961e-06, "loss": 0.7989, "step": 3041 }, { "epoch": 0.24, "grad_norm": 1.584347284364022, "learning_rate": 8.845766440438093e-06, "loss": 0.7916, "step": 3042 }, { "epoch": 0.24, "grad_norm": 1.5695851252189608, "learning_rate": 8.84493600465128e-06, "loss": 0.7719, "step": 3043 }, { "epoch": 0.24, "grad_norm": 1.6307835665556678, "learning_rate": 8.8441053092456e-06, "loss": 0.7825, "step": 3044 }, { "epoch": 0.24, "grad_norm": 1.5776015152889598, "learning_rate": 8.843274354277134e-06, "loss": 0.7599, "step": 3045 }, { "epoch": 0.24, "grad_norm": 1.0024409473035796, "learning_rate": 8.842443139801998e-06, "loss": 1.1341, "step": 3046 }, { "epoch": 0.24, "grad_norm": 1.6633893145129788, "learning_rate": 8.841611665876315e-06, "loss": 0.8601, "step": 3047 }, { "epoch": 0.24, "grad_norm": 1.388028630116776, "learning_rate": 8.840779932556227e-06, "loss": 0.795, "step": 3048 }, { "epoch": 0.24, "grad_norm": 2.1565168002877426, "learning_rate": 8.839947939897896e-06, "loss": 0.7257, "step": 3049 }, { "epoch": 0.24, "grad_norm": 1.501958649904521, "learning_rate": 8.839115687957501e-06, "loss": 0.8142, "step": 3050 }, { "epoch": 0.24, "grad_norm": 1.6605799456087253, "learning_rate": 8.838283176791234e-06, "loss": 0.8389, "step": 3051 }, { "epoch": 0.24, "grad_norm": 1.5035271508486334, "learning_rate": 8.83745040645531e-06, "loss": 0.8125, "step": 3052 }, { "epoch": 0.24, "grad_norm": 1.4363544307207956, "learning_rate": 8.83661737700596e-06, "loss": 0.8195, "step": 3053 }, { "epoch": 0.25, "grad_norm": 1.5461857329576576, "learning_rate": 8.835784088499433e-06, "loss": 0.8445, "step": 3054 }, { "epoch": 0.25, "grad_norm": 1.408979375978495, "learning_rate": 8.834950540991992e-06, "loss": 0.731, "step": 3055 }, { "epoch": 0.25, "grad_norm": 1.4980069818997153, "learning_rate": 8.834116734539922e-06, "loss": 0.8133, "step": 3056 }, { "epoch": 0.25, "grad_norm": 1.5538525547047286, "learning_rate": 8.833282669199523e-06, "loss": 0.8694, "step": 3057 }, { "epoch": 0.25, "grad_norm": 1.475315729472042, "learning_rate": 8.832448345027113e-06, "loss": 0.7662, "step": 3058 }, { "epoch": 0.25, "grad_norm": 1.6490662389481463, "learning_rate": 8.831613762079026e-06, "loss": 0.9098, "step": 3059 }, { "epoch": 0.25, "grad_norm": 1.5151682728918048, "learning_rate": 8.830778920411618e-06, "loss": 0.757, "step": 3060 }, { "epoch": 0.25, "grad_norm": 1.6828258983911344, "learning_rate": 8.829943820081258e-06, "loss": 0.8903, "step": 3061 }, { "epoch": 0.25, "grad_norm": 0.953367937432381, "learning_rate": 8.829108461144332e-06, "loss": 1.1628, "step": 3062 }, { "epoch": 0.25, "grad_norm": 1.388860181344939, "learning_rate": 8.828272843657246e-06, "loss": 0.784, "step": 3063 }, { "epoch": 0.25, "grad_norm": 1.4499347514253744, "learning_rate": 8.827436967676426e-06, "loss": 0.7633, "step": 3064 }, { "epoch": 0.25, "grad_norm": 1.9151956737167994, "learning_rate": 8.826600833258307e-06, "loss": 0.839, "step": 3065 }, { "epoch": 0.25, "grad_norm": 1.6438766916970986, "learning_rate": 8.825764440459353e-06, "loss": 0.852, "step": 3066 }, { "epoch": 0.25, "grad_norm": 1.4815984368640527, "learning_rate": 8.824927789336034e-06, "loss": 0.8329, "step": 3067 }, { "epoch": 0.25, "grad_norm": 1.4581093802469367, "learning_rate": 8.824090879944844e-06, "loss": 0.7263, "step": 3068 }, { "epoch": 0.25, "grad_norm": 1.483330024955782, "learning_rate": 8.823253712342295e-06, "loss": 0.8176, "step": 3069 }, { "epoch": 0.25, "grad_norm": 1.519211978312961, "learning_rate": 8.822416286584913e-06, "loss": 0.7323, "step": 3070 }, { "epoch": 0.25, "grad_norm": 1.4531148202258481, "learning_rate": 8.821578602729242e-06, "loss": 0.7102, "step": 3071 }, { "epoch": 0.25, "grad_norm": 1.9167369172003292, "learning_rate": 8.820740660831845e-06, "loss": 0.8102, "step": 3072 }, { "epoch": 0.25, "grad_norm": 1.577211218735741, "learning_rate": 8.819902460949303e-06, "loss": 0.7761, "step": 3073 }, { "epoch": 0.25, "grad_norm": 1.5224795671610412, "learning_rate": 8.819064003138211e-06, "loss": 0.8299, "step": 3074 }, { "epoch": 0.25, "grad_norm": 1.5645539076245956, "learning_rate": 8.818225287455186e-06, "loss": 0.8964, "step": 3075 }, { "epoch": 0.25, "grad_norm": 1.5339144843636363, "learning_rate": 8.817386313956858e-06, "loss": 0.7954, "step": 3076 }, { "epoch": 0.25, "grad_norm": 1.4418946714374445, "learning_rate": 8.816547082699877e-06, "loss": 0.8264, "step": 3077 }, { "epoch": 0.25, "grad_norm": 1.5746300795472294, "learning_rate": 8.815707593740909e-06, "loss": 0.8746, "step": 3078 }, { "epoch": 0.25, "grad_norm": 1.5165452582710521, "learning_rate": 8.81486784713664e-06, "loss": 0.8388, "step": 3079 }, { "epoch": 0.25, "grad_norm": 1.6990914402266366, "learning_rate": 8.814027842943772e-06, "loss": 0.7952, "step": 3080 }, { "epoch": 0.25, "grad_norm": 1.530252697877969, "learning_rate": 8.813187581219021e-06, "loss": 0.8778, "step": 3081 }, { "epoch": 0.25, "grad_norm": 1.5064189923856053, "learning_rate": 8.812347062019124e-06, "loss": 0.7338, "step": 3082 }, { "epoch": 0.25, "grad_norm": 1.5038453113582162, "learning_rate": 8.81150628540084e-06, "loss": 0.7733, "step": 3083 }, { "epoch": 0.25, "grad_norm": 1.4786528947152273, "learning_rate": 8.810665251420932e-06, "loss": 0.7563, "step": 3084 }, { "epoch": 0.25, "grad_norm": 1.456033965314379, "learning_rate": 8.809823960136194e-06, "loss": 0.7121, "step": 3085 }, { "epoch": 0.25, "grad_norm": 1.5447317411971586, "learning_rate": 8.80898241160343e-06, "loss": 0.861, "step": 3086 }, { "epoch": 0.25, "grad_norm": 1.5719431302333535, "learning_rate": 8.808140605879464e-06, "loss": 0.7639, "step": 3087 }, { "epoch": 0.25, "grad_norm": 1.5566190109433007, "learning_rate": 8.80729854302114e-06, "loss": 0.77, "step": 3088 }, { "epoch": 0.25, "grad_norm": 1.5078953340592898, "learning_rate": 8.806456223085308e-06, "loss": 0.8043, "step": 3089 }, { "epoch": 0.25, "grad_norm": 1.5398601806810954, "learning_rate": 8.80561364612885e-06, "loss": 0.7593, "step": 3090 }, { "epoch": 0.25, "grad_norm": 1.002166350760614, "learning_rate": 8.804770812208655e-06, "loss": 1.1402, "step": 3091 }, { "epoch": 0.25, "grad_norm": 1.5277240104488827, "learning_rate": 8.803927721381635e-06, "loss": 0.7284, "step": 3092 }, { "epoch": 0.25, "grad_norm": 1.7160364004749231, "learning_rate": 8.803084373704717e-06, "loss": 0.7661, "step": 3093 }, { "epoch": 0.25, "grad_norm": 1.5762730641103861, "learning_rate": 8.802240769234846e-06, "loss": 0.8729, "step": 3094 }, { "epoch": 0.25, "grad_norm": 0.8292016282678623, "learning_rate": 8.801396908028985e-06, "loss": 1.1334, "step": 3095 }, { "epoch": 0.25, "grad_norm": 1.580406384576486, "learning_rate": 8.800552790144113e-06, "loss": 0.8061, "step": 3096 }, { "epoch": 0.25, "grad_norm": 1.4374534526434397, "learning_rate": 8.799708415637224e-06, "loss": 0.7546, "step": 3097 }, { "epoch": 0.25, "grad_norm": 1.428930016606241, "learning_rate": 8.798863784565335e-06, "loss": 0.7551, "step": 3098 }, { "epoch": 0.25, "grad_norm": 1.5728350060254503, "learning_rate": 8.798018896985476e-06, "loss": 0.7663, "step": 3099 }, { "epoch": 0.25, "grad_norm": 1.466300973178292, "learning_rate": 8.797173752954698e-06, "loss": 0.7085, "step": 3100 }, { "epoch": 0.25, "grad_norm": 1.4194336338661122, "learning_rate": 8.796328352530063e-06, "loss": 0.7777, "step": 3101 }, { "epoch": 0.25, "grad_norm": 1.5208385993387847, "learning_rate": 8.795482695768658e-06, "loss": 0.7354, "step": 3102 }, { "epoch": 0.25, "grad_norm": 1.6558786623908743, "learning_rate": 8.794636782727584e-06, "loss": 0.7354, "step": 3103 }, { "epoch": 0.25, "grad_norm": 1.614059411276489, "learning_rate": 8.793790613463956e-06, "loss": 0.7078, "step": 3104 }, { "epoch": 0.25, "grad_norm": 1.5197712334389075, "learning_rate": 8.792944188034911e-06, "loss": 0.8748, "step": 3105 }, { "epoch": 0.25, "grad_norm": 0.9062791738171584, "learning_rate": 8.7920975064976e-06, "loss": 1.107, "step": 3106 }, { "epoch": 0.25, "grad_norm": 1.5613542689771573, "learning_rate": 8.791250568909196e-06, "loss": 0.7876, "step": 3107 }, { "epoch": 0.25, "grad_norm": 1.5332545434759974, "learning_rate": 8.790403375326883e-06, "loss": 0.7865, "step": 3108 }, { "epoch": 0.25, "grad_norm": 1.6248168790085138, "learning_rate": 8.789555925807868e-06, "loss": 0.7474, "step": 3109 }, { "epoch": 0.25, "grad_norm": 1.4877003513099791, "learning_rate": 8.78870822040937e-06, "loss": 0.7364, "step": 3110 }, { "epoch": 0.25, "grad_norm": 1.4262439894787198, "learning_rate": 8.787860259188633e-06, "loss": 0.8131, "step": 3111 }, { "epoch": 0.25, "grad_norm": 1.4220815932089819, "learning_rate": 8.787012042202907e-06, "loss": 0.8666, "step": 3112 }, { "epoch": 0.25, "grad_norm": 1.915290199089687, "learning_rate": 8.786163569509468e-06, "loss": 0.8077, "step": 3113 }, { "epoch": 0.25, "grad_norm": 1.5940290283863148, "learning_rate": 8.785314841165608e-06, "loss": 0.8163, "step": 3114 }, { "epoch": 0.25, "grad_norm": 1.6472773368774871, "learning_rate": 8.784465857228635e-06, "loss": 0.8714, "step": 3115 }, { "epoch": 0.25, "grad_norm": 1.598673219845349, "learning_rate": 8.783616617755872e-06, "loss": 0.7387, "step": 3116 }, { "epoch": 0.25, "grad_norm": 1.454492943891432, "learning_rate": 8.782767122804664e-06, "loss": 0.722, "step": 3117 }, { "epoch": 0.25, "grad_norm": 1.396045599365883, "learning_rate": 8.781917372432372e-06, "loss": 0.7535, "step": 3118 }, { "epoch": 0.25, "grad_norm": 1.605447466359176, "learning_rate": 8.781067366696368e-06, "loss": 0.8544, "step": 3119 }, { "epoch": 0.25, "grad_norm": 1.4289127922821865, "learning_rate": 8.780217105654053e-06, "loss": 0.773, "step": 3120 }, { "epoch": 0.25, "grad_norm": 0.9410023276919095, "learning_rate": 8.779366589362832e-06, "loss": 1.0767, "step": 3121 }, { "epoch": 0.25, "grad_norm": 1.4740690962377336, "learning_rate": 8.778515817880138e-06, "loss": 0.829, "step": 3122 }, { "epoch": 0.25, "grad_norm": 0.7967490513297111, "learning_rate": 8.777664791263414e-06, "loss": 1.1135, "step": 3123 }, { "epoch": 0.25, "grad_norm": 1.4140678156614672, "learning_rate": 8.776813509570128e-06, "loss": 0.7277, "step": 3124 }, { "epoch": 0.25, "grad_norm": 1.4730568218062938, "learning_rate": 8.775961972857756e-06, "loss": 0.7255, "step": 3125 }, { "epoch": 0.25, "grad_norm": 1.500131241714859, "learning_rate": 8.775110181183797e-06, "loss": 0.84, "step": 3126 }, { "epoch": 0.25, "grad_norm": 0.9183382393714884, "learning_rate": 8.774258134605768e-06, "loss": 1.14, "step": 3127 }, { "epoch": 0.25, "grad_norm": 1.3513378619113963, "learning_rate": 8.773405833181199e-06, "loss": 0.7812, "step": 3128 }, { "epoch": 0.25, "grad_norm": 0.8618208668025693, "learning_rate": 8.772553276967639e-06, "loss": 1.1474, "step": 3129 }, { "epoch": 0.25, "grad_norm": 1.5008291443943045, "learning_rate": 8.771700466022655e-06, "loss": 0.7719, "step": 3130 }, { "epoch": 0.25, "grad_norm": 1.627120206523081, "learning_rate": 8.77084740040383e-06, "loss": 0.8561, "step": 3131 }, { "epoch": 0.25, "grad_norm": 1.577153124859275, "learning_rate": 8.76999408016877e-06, "loss": 0.7629, "step": 3132 }, { "epoch": 0.25, "grad_norm": 1.5762420491818898, "learning_rate": 8.769140505375084e-06, "loss": 0.8378, "step": 3133 }, { "epoch": 0.25, "grad_norm": 1.52622794783532, "learning_rate": 8.768286676080415e-06, "loss": 0.8286, "step": 3134 }, { "epoch": 0.25, "grad_norm": 1.4620932149657226, "learning_rate": 8.767432592342413e-06, "loss": 0.8005, "step": 3135 }, { "epoch": 0.25, "grad_norm": 1.476515698824882, "learning_rate": 8.766578254218746e-06, "loss": 0.7857, "step": 3136 }, { "epoch": 0.25, "grad_norm": 1.582162614342305, "learning_rate": 8.765723661767105e-06, "loss": 0.7861, "step": 3137 }, { "epoch": 0.25, "grad_norm": 1.4356998058437276, "learning_rate": 8.76486881504519e-06, "loss": 0.7535, "step": 3138 }, { "epoch": 0.25, "grad_norm": 1.619427941183286, "learning_rate": 8.764013714110727e-06, "loss": 0.8608, "step": 3139 }, { "epoch": 0.25, "grad_norm": 1.4884048255448308, "learning_rate": 8.763158359021447e-06, "loss": 0.7166, "step": 3140 }, { "epoch": 0.25, "grad_norm": 1.4229080578026914, "learning_rate": 8.76230274983511e-06, "loss": 0.7775, "step": 3141 }, { "epoch": 0.25, "grad_norm": 1.6010770236954148, "learning_rate": 8.761446886609489e-06, "loss": 0.77, "step": 3142 }, { "epoch": 0.25, "grad_norm": 1.506324294220358, "learning_rate": 8.760590769402372e-06, "loss": 0.8111, "step": 3143 }, { "epoch": 0.25, "grad_norm": 1.662117393319966, "learning_rate": 8.759734398271571e-06, "loss": 0.8316, "step": 3144 }, { "epoch": 0.25, "grad_norm": 1.4724432755027153, "learning_rate": 8.758877773274903e-06, "loss": 0.7609, "step": 3145 }, { "epoch": 0.25, "grad_norm": 1.5795335256474001, "learning_rate": 8.758020894470214e-06, "loss": 0.7494, "step": 3146 }, { "epoch": 0.25, "grad_norm": 1.6171500027536925, "learning_rate": 8.757163761915359e-06, "loss": 0.8917, "step": 3147 }, { "epoch": 0.25, "grad_norm": 1.3039661982121495, "learning_rate": 8.756306375668217e-06, "loss": 1.1474, "step": 3148 }, { "epoch": 0.25, "grad_norm": 1.6125911142586806, "learning_rate": 8.75544873578668e-06, "loss": 0.829, "step": 3149 }, { "epoch": 0.25, "grad_norm": 1.4968126295687911, "learning_rate": 8.754590842328655e-06, "loss": 0.7336, "step": 3150 }, { "epoch": 0.25, "grad_norm": 1.4179705414085084, "learning_rate": 8.753732695352072e-06, "loss": 0.7244, "step": 3151 }, { "epoch": 0.25, "grad_norm": 1.5889144611713966, "learning_rate": 8.752874294914874e-06, "loss": 0.7575, "step": 3152 }, { "epoch": 0.25, "grad_norm": 1.5657062189493105, "learning_rate": 8.75201564107502e-06, "loss": 0.7586, "step": 3153 }, { "epoch": 0.25, "grad_norm": 1.4289395697778238, "learning_rate": 8.751156733890494e-06, "loss": 0.8244, "step": 3154 }, { "epoch": 0.25, "grad_norm": 1.4432469869815945, "learning_rate": 8.750297573419286e-06, "loss": 0.7347, "step": 3155 }, { "epoch": 0.25, "grad_norm": 1.5267339704425509, "learning_rate": 8.74943815971941e-06, "loss": 0.8223, "step": 3156 }, { "epoch": 0.25, "grad_norm": 1.4963257222084592, "learning_rate": 8.748578492848896e-06, "loss": 0.8414, "step": 3157 }, { "epoch": 0.25, "grad_norm": 1.5275194786985582, "learning_rate": 8.74771857286579e-06, "loss": 0.768, "step": 3158 }, { "epoch": 0.25, "grad_norm": 1.5558219075185953, "learning_rate": 8.746858399828158e-06, "loss": 0.804, "step": 3159 }, { "epoch": 0.25, "grad_norm": 1.6540195989395183, "learning_rate": 8.74599797379408e-06, "loss": 0.7821, "step": 3160 }, { "epoch": 0.25, "grad_norm": 1.4523496964118205, "learning_rate": 8.74513729482165e-06, "loss": 0.8151, "step": 3161 }, { "epoch": 0.25, "grad_norm": 1.7423097177071774, "learning_rate": 8.744276362968988e-06, "loss": 0.815, "step": 3162 }, { "epoch": 0.25, "grad_norm": 1.6984400558775399, "learning_rate": 8.743415178294223e-06, "loss": 0.7092, "step": 3163 }, { "epoch": 0.25, "grad_norm": 1.738712537058898, "learning_rate": 8.742553740855507e-06, "loss": 0.8743, "step": 3164 }, { "epoch": 0.25, "grad_norm": 1.4587834351917808, "learning_rate": 8.741692050711003e-06, "loss": 0.8256, "step": 3165 }, { "epoch": 0.25, "grad_norm": 1.4955772014900865, "learning_rate": 8.740830107918896e-06, "loss": 0.7875, "step": 3166 }, { "epoch": 0.25, "grad_norm": 1.2942240755673318, "learning_rate": 8.739967912537385e-06, "loss": 1.1428, "step": 3167 }, { "epoch": 0.25, "grad_norm": 1.5986785478802823, "learning_rate": 8.739105464624691e-06, "loss": 0.8616, "step": 3168 }, { "epoch": 0.25, "grad_norm": 1.4262295009261339, "learning_rate": 8.738242764239046e-06, "loss": 0.7875, "step": 3169 }, { "epoch": 0.25, "grad_norm": 1.4636930831957609, "learning_rate": 8.7373798114387e-06, "loss": 0.8415, "step": 3170 }, { "epoch": 0.25, "grad_norm": 1.511452584195006, "learning_rate": 8.736516606281923e-06, "loss": 0.7394, "step": 3171 }, { "epoch": 0.25, "grad_norm": 1.3697482038855815, "learning_rate": 8.735653148827002e-06, "loss": 0.812, "step": 3172 }, { "epoch": 0.25, "grad_norm": 0.9471873617273203, "learning_rate": 8.734789439132239e-06, "loss": 1.1085, "step": 3173 }, { "epoch": 0.25, "grad_norm": 1.5430876954324562, "learning_rate": 8.733925477255952e-06, "loss": 0.7055, "step": 3174 }, { "epoch": 0.25, "grad_norm": 0.8641466236667026, "learning_rate": 8.733061263256477e-06, "loss": 1.1472, "step": 3175 }, { "epoch": 0.25, "grad_norm": 1.4672447911631943, "learning_rate": 8.73219679719217e-06, "loss": 0.7309, "step": 3176 }, { "epoch": 0.25, "grad_norm": 1.6129592790063643, "learning_rate": 8.731332079121403e-06, "loss": 0.8509, "step": 3177 }, { "epoch": 0.25, "grad_norm": 1.4505144681869895, "learning_rate": 8.73046710910256e-06, "loss": 0.7993, "step": 3178 }, { "epoch": 0.26, "grad_norm": 1.3671435331109323, "learning_rate": 8.729601887194048e-06, "loss": 0.7271, "step": 3179 }, { "epoch": 0.26, "grad_norm": 1.4734354803717087, "learning_rate": 8.728736413454288e-06, "loss": 0.7661, "step": 3180 }, { "epoch": 0.26, "grad_norm": 1.7007357656330926, "learning_rate": 8.727870687941721e-06, "loss": 0.7661, "step": 3181 }, { "epoch": 0.26, "grad_norm": 1.1917305184937865, "learning_rate": 8.7270047107148e-06, "loss": 1.1249, "step": 3182 }, { "epoch": 0.26, "grad_norm": 1.5311581719983118, "learning_rate": 8.726138481831997e-06, "loss": 0.8312, "step": 3183 }, { "epoch": 0.26, "grad_norm": 1.4587146670958966, "learning_rate": 8.725272001351805e-06, "loss": 0.8079, "step": 3184 }, { "epoch": 0.26, "grad_norm": 1.5218678729933361, "learning_rate": 8.724405269332727e-06, "loss": 0.841, "step": 3185 }, { "epoch": 0.26, "grad_norm": 1.6471763189073796, "learning_rate": 8.72353828583329e-06, "loss": 0.8412, "step": 3186 }, { "epoch": 0.26, "grad_norm": 1.6378130392734012, "learning_rate": 8.722671050912034e-06, "loss": 0.7951, "step": 3187 }, { "epoch": 0.26, "grad_norm": 1.4748806114775252, "learning_rate": 8.721803564627517e-06, "loss": 0.7773, "step": 3188 }, { "epoch": 0.26, "grad_norm": 1.493333067803711, "learning_rate": 8.720935827038313e-06, "loss": 0.7301, "step": 3189 }, { "epoch": 0.26, "grad_norm": 1.6108300477060489, "learning_rate": 8.720067838203013e-06, "loss": 0.838, "step": 3190 }, { "epoch": 0.26, "grad_norm": 1.673565279852178, "learning_rate": 8.719199598180224e-06, "loss": 0.8306, "step": 3191 }, { "epoch": 0.26, "grad_norm": 1.6081104497356187, "learning_rate": 8.718331107028577e-06, "loss": 0.7437, "step": 3192 }, { "epoch": 0.26, "grad_norm": 1.431411754960613, "learning_rate": 8.717462364806708e-06, "loss": 0.8064, "step": 3193 }, { "epoch": 0.26, "grad_norm": 1.5241899220090658, "learning_rate": 8.71659337157328e-06, "loss": 0.8154, "step": 3194 }, { "epoch": 0.26, "grad_norm": 1.4933202231887361, "learning_rate": 8.715724127386971e-06, "loss": 0.8286, "step": 3195 }, { "epoch": 0.26, "grad_norm": 1.5393260131463324, "learning_rate": 8.714854632306473e-06, "loss": 0.7991, "step": 3196 }, { "epoch": 0.26, "grad_norm": 1.5095728143233038, "learning_rate": 8.713984886390494e-06, "loss": 0.7872, "step": 3197 }, { "epoch": 0.26, "grad_norm": 1.4702508794916604, "learning_rate": 8.713114889697764e-06, "loss": 0.7141, "step": 3198 }, { "epoch": 0.26, "grad_norm": 1.0300625371145722, "learning_rate": 8.712244642287025e-06, "loss": 1.17, "step": 3199 }, { "epoch": 0.26, "grad_norm": 1.558518663911173, "learning_rate": 8.711374144217039e-06, "loss": 0.8305, "step": 3200 }, { "epoch": 0.26, "grad_norm": 1.5617763900491402, "learning_rate": 8.710503395546584e-06, "loss": 0.8174, "step": 3201 }, { "epoch": 0.26, "grad_norm": 1.635960020140099, "learning_rate": 8.709632396334458e-06, "loss": 0.8229, "step": 3202 }, { "epoch": 0.26, "grad_norm": 1.5516639991039758, "learning_rate": 8.708761146639466e-06, "loss": 0.78, "step": 3203 }, { "epoch": 0.26, "grad_norm": 1.5653001465380068, "learning_rate": 8.707889646520443e-06, "loss": 0.7724, "step": 3204 }, { "epoch": 0.26, "grad_norm": 1.59117217440984, "learning_rate": 8.707017896036232e-06, "loss": 0.8172, "step": 3205 }, { "epoch": 0.26, "grad_norm": 1.5273871899522644, "learning_rate": 8.706145895245696e-06, "loss": 0.7794, "step": 3206 }, { "epoch": 0.26, "grad_norm": 1.4838534552581528, "learning_rate": 8.705273644207715e-06, "loss": 0.8388, "step": 3207 }, { "epoch": 0.26, "grad_norm": 1.498031921364573, "learning_rate": 8.704401142981184e-06, "loss": 0.8272, "step": 3208 }, { "epoch": 0.26, "grad_norm": 1.4741911663102372, "learning_rate": 8.70352839162502e-06, "loss": 0.8025, "step": 3209 }, { "epoch": 0.26, "grad_norm": 1.4906753127407333, "learning_rate": 8.702655390198149e-06, "loss": 0.7577, "step": 3210 }, { "epoch": 0.26, "grad_norm": 1.8248897409871006, "learning_rate": 8.701782138759517e-06, "loss": 0.8979, "step": 3211 }, { "epoch": 0.26, "grad_norm": 1.508039567055376, "learning_rate": 8.700908637368093e-06, "loss": 0.7871, "step": 3212 }, { "epoch": 0.26, "grad_norm": 1.6189980039812384, "learning_rate": 8.700034886082853e-06, "loss": 0.8996, "step": 3213 }, { "epoch": 0.26, "grad_norm": 1.5364015859862488, "learning_rate": 8.699160884962798e-06, "loss": 0.8821, "step": 3214 }, { "epoch": 0.26, "grad_norm": 1.037103335572283, "learning_rate": 8.69828663406694e-06, "loss": 1.1652, "step": 3215 }, { "epoch": 0.26, "grad_norm": 1.6090865892499293, "learning_rate": 8.697412133454315e-06, "loss": 0.6963, "step": 3216 }, { "epoch": 0.26, "grad_norm": 0.8441008215431169, "learning_rate": 8.696537383183967e-06, "loss": 1.1325, "step": 3217 }, { "epoch": 0.26, "grad_norm": 0.773011928556298, "learning_rate": 8.695662383314962e-06, "loss": 1.1507, "step": 3218 }, { "epoch": 0.26, "grad_norm": 1.6681593154390442, "learning_rate": 8.694787133906381e-06, "loss": 0.7869, "step": 3219 }, { "epoch": 0.26, "grad_norm": 1.493865781145631, "learning_rate": 8.693911635017324e-06, "loss": 0.798, "step": 3220 }, { "epoch": 0.26, "grad_norm": 1.6162064367577786, "learning_rate": 8.693035886706909e-06, "loss": 0.8476, "step": 3221 }, { "epoch": 0.26, "grad_norm": 1.5423695744509542, "learning_rate": 8.692159889034264e-06, "loss": 0.7316, "step": 3222 }, { "epoch": 0.26, "grad_norm": 1.4207159941020306, "learning_rate": 8.691283642058543e-06, "loss": 0.7706, "step": 3223 }, { "epoch": 0.26, "grad_norm": 1.6641163092071336, "learning_rate": 8.690407145838909e-06, "loss": 0.7993, "step": 3224 }, { "epoch": 0.26, "grad_norm": 1.6420973222144537, "learning_rate": 8.689530400434545e-06, "loss": 0.7752, "step": 3225 }, { "epoch": 0.26, "grad_norm": 1.508623944742499, "learning_rate": 8.688653405904653e-06, "loss": 0.7778, "step": 3226 }, { "epoch": 0.26, "grad_norm": 1.5120302182830403, "learning_rate": 8.687776162308446e-06, "loss": 0.7865, "step": 3227 }, { "epoch": 0.26, "grad_norm": 1.4806291251170662, "learning_rate": 8.68689866970516e-06, "loss": 0.7787, "step": 3228 }, { "epoch": 0.26, "grad_norm": 1.3786908080292355, "learning_rate": 8.686020928154049e-06, "loss": 0.6899, "step": 3229 }, { "epoch": 0.26, "grad_norm": 1.525832885340039, "learning_rate": 8.685142937714374e-06, "loss": 0.7991, "step": 3230 }, { "epoch": 0.26, "grad_norm": 1.432934754694863, "learning_rate": 8.68426469844542e-06, "loss": 0.8296, "step": 3231 }, { "epoch": 0.26, "grad_norm": 1.5074054848302954, "learning_rate": 8.683386210406491e-06, "loss": 0.7621, "step": 3232 }, { "epoch": 0.26, "grad_norm": 1.4955758186378982, "learning_rate": 8.682507473656902e-06, "loss": 0.8077, "step": 3233 }, { "epoch": 0.26, "grad_norm": 1.3934161470577267, "learning_rate": 8.681628488255986e-06, "loss": 0.8703, "step": 3234 }, { "epoch": 0.26, "grad_norm": 1.5902481856323938, "learning_rate": 8.6807492542631e-06, "loss": 0.8324, "step": 3235 }, { "epoch": 0.26, "grad_norm": 1.475185764466494, "learning_rate": 8.679869771737603e-06, "loss": 0.8177, "step": 3236 }, { "epoch": 0.26, "grad_norm": 1.6149222399852938, "learning_rate": 8.678990040738889e-06, "loss": 1.1315, "step": 3237 }, { "epoch": 0.26, "grad_norm": 1.4361954373475614, "learning_rate": 8.678110061326352e-06, "loss": 0.8192, "step": 3238 }, { "epoch": 0.26, "grad_norm": 1.4976554128923416, "learning_rate": 8.677229833559413e-06, "loss": 0.7844, "step": 3239 }, { "epoch": 0.26, "grad_norm": 1.386714041517101, "learning_rate": 8.676349357497509e-06, "loss": 0.7348, "step": 3240 }, { "epoch": 0.26, "grad_norm": 1.449819636447611, "learning_rate": 8.675468633200089e-06, "loss": 0.7878, "step": 3241 }, { "epoch": 0.26, "grad_norm": 1.579395293652694, "learning_rate": 8.674587660726622e-06, "loss": 0.8447, "step": 3242 }, { "epoch": 0.26, "grad_norm": 1.5419875854975602, "learning_rate": 8.673706440136594e-06, "loss": 0.8783, "step": 3243 }, { "epoch": 0.26, "grad_norm": 1.637591092284609, "learning_rate": 8.672824971489506e-06, "loss": 0.8235, "step": 3244 }, { "epoch": 0.26, "grad_norm": 0.9659991722211284, "learning_rate": 8.671943254844878e-06, "loss": 1.1071, "step": 3245 }, { "epoch": 0.26, "grad_norm": 1.5421289501177977, "learning_rate": 8.671061290262245e-06, "loss": 0.796, "step": 3246 }, { "epoch": 0.26, "grad_norm": 1.4912361274572705, "learning_rate": 8.67017907780116e-06, "loss": 0.8056, "step": 3247 }, { "epoch": 0.26, "grad_norm": 1.4846199526850645, "learning_rate": 8.669296617521192e-06, "loss": 0.8961, "step": 3248 }, { "epoch": 0.26, "grad_norm": 1.5866265827751722, "learning_rate": 8.668413909481926e-06, "loss": 0.7841, "step": 3249 }, { "epoch": 0.26, "grad_norm": 1.4252790469194703, "learning_rate": 8.667530953742963e-06, "loss": 0.8585, "step": 3250 }, { "epoch": 0.26, "grad_norm": 1.6286266203177167, "learning_rate": 8.666647750363924e-06, "loss": 0.7844, "step": 3251 }, { "epoch": 0.26, "grad_norm": 0.9166121246289527, "learning_rate": 8.665764299404445e-06, "loss": 1.0988, "step": 3252 }, { "epoch": 0.26, "grad_norm": 1.4103099433099304, "learning_rate": 8.664880600924176e-06, "loss": 0.6941, "step": 3253 }, { "epoch": 0.26, "grad_norm": 1.5743431752403882, "learning_rate": 8.663996654982791e-06, "loss": 0.8149, "step": 3254 }, { "epoch": 0.26, "grad_norm": 1.4967822484595512, "learning_rate": 8.663112461639973e-06, "loss": 0.7461, "step": 3255 }, { "epoch": 0.26, "grad_norm": 2.4234346143247456, "learning_rate": 8.662228020955425e-06, "loss": 0.8249, "step": 3256 }, { "epoch": 0.26, "grad_norm": 1.534514075312154, "learning_rate": 8.661343332988869e-06, "loss": 0.7728, "step": 3257 }, { "epoch": 0.26, "grad_norm": 1.4281755740677766, "learning_rate": 8.660458397800036e-06, "loss": 0.7924, "step": 3258 }, { "epoch": 0.26, "grad_norm": 1.6291331625770813, "learning_rate": 8.659573215448685e-06, "loss": 0.7514, "step": 3259 }, { "epoch": 0.26, "grad_norm": 1.4290980487148393, "learning_rate": 8.658687785994579e-06, "loss": 0.7435, "step": 3260 }, { "epoch": 0.26, "grad_norm": 1.4448832758614776, "learning_rate": 8.65780210949751e-06, "loss": 0.754, "step": 3261 }, { "epoch": 0.26, "grad_norm": 1.6130430765891297, "learning_rate": 8.656916186017277e-06, "loss": 0.8313, "step": 3262 }, { "epoch": 0.26, "grad_norm": 1.5186555830329866, "learning_rate": 8.6560300156137e-06, "loss": 0.7451, "step": 3263 }, { "epoch": 0.26, "grad_norm": 1.4975999552448578, "learning_rate": 8.65514359834662e-06, "loss": 0.8002, "step": 3264 }, { "epoch": 0.26, "grad_norm": 1.5020589058439096, "learning_rate": 8.654256934275885e-06, "loss": 0.8492, "step": 3265 }, { "epoch": 0.26, "grad_norm": 1.4192390282247898, "learning_rate": 8.653370023461365e-06, "loss": 0.7147, "step": 3266 }, { "epoch": 0.26, "grad_norm": 1.5980151963507347, "learning_rate": 8.652482865962947e-06, "loss": 0.7904, "step": 3267 }, { "epoch": 0.26, "grad_norm": 1.4830254593857364, "learning_rate": 8.651595461840533e-06, "loss": 0.7688, "step": 3268 }, { "epoch": 0.26, "grad_norm": 1.6090668563157975, "learning_rate": 8.650707811154045e-06, "loss": 0.7963, "step": 3269 }, { "epoch": 0.26, "grad_norm": 1.4473405594764457, "learning_rate": 8.649819913963417e-06, "loss": 0.8255, "step": 3270 }, { "epoch": 0.26, "grad_norm": 0.9740902067347231, "learning_rate": 8.648931770328604e-06, "loss": 1.1647, "step": 3271 }, { "epoch": 0.26, "grad_norm": 1.4740074637914191, "learning_rate": 8.648043380309574e-06, "loss": 0.7504, "step": 3272 }, { "epoch": 0.26, "grad_norm": 1.5596307573395274, "learning_rate": 8.64715474396631e-06, "loss": 0.8563, "step": 3273 }, { "epoch": 0.26, "grad_norm": 1.544592782802664, "learning_rate": 8.646265861358822e-06, "loss": 0.8511, "step": 3274 }, { "epoch": 0.26, "grad_norm": 1.5331966829051096, "learning_rate": 8.645376732547123e-06, "loss": 0.8366, "step": 3275 }, { "epoch": 0.26, "grad_norm": 1.5414292735602828, "learning_rate": 8.644487357591252e-06, "loss": 0.8417, "step": 3276 }, { "epoch": 0.26, "grad_norm": 1.5528264214801235, "learning_rate": 8.643597736551262e-06, "loss": 0.7625, "step": 3277 }, { "epoch": 0.26, "grad_norm": 1.638042925537915, "learning_rate": 8.642707869487218e-06, "loss": 0.8559, "step": 3278 }, { "epoch": 0.26, "grad_norm": 1.6497572837647958, "learning_rate": 8.641817756459212e-06, "loss": 0.8811, "step": 3279 }, { "epoch": 0.26, "grad_norm": 1.539790742782902, "learning_rate": 8.640927397527344e-06, "loss": 0.7133, "step": 3280 }, { "epoch": 0.26, "grad_norm": 1.470068350701348, "learning_rate": 8.64003679275173e-06, "loss": 0.7815, "step": 3281 }, { "epoch": 0.26, "grad_norm": 1.5574091335189058, "learning_rate": 8.639145942192511e-06, "loss": 0.8128, "step": 3282 }, { "epoch": 0.26, "grad_norm": 1.6386956816701577, "learning_rate": 8.638254845909837e-06, "loss": 0.8262, "step": 3283 }, { "epoch": 0.26, "grad_norm": 1.8422347761236193, "learning_rate": 8.637363503963873e-06, "loss": 0.7746, "step": 3284 }, { "epoch": 0.26, "grad_norm": 1.4562055914037269, "learning_rate": 8.63647191641481e-06, "loss": 0.7768, "step": 3285 }, { "epoch": 0.26, "grad_norm": 1.5456377326106159, "learning_rate": 8.635580083322847e-06, "loss": 0.7835, "step": 3286 }, { "epoch": 0.26, "grad_norm": 1.5623340144891305, "learning_rate": 8.634688004748205e-06, "loss": 0.815, "step": 3287 }, { "epoch": 0.26, "grad_norm": 1.4584696798781007, "learning_rate": 8.633795680751116e-06, "loss": 0.7796, "step": 3288 }, { "epoch": 0.26, "grad_norm": 1.7236563241736043, "learning_rate": 8.632903111391836e-06, "loss": 0.8918, "step": 3289 }, { "epoch": 0.26, "grad_norm": 1.532946154438956, "learning_rate": 8.63201029673063e-06, "loss": 0.8634, "step": 3290 }, { "epoch": 0.26, "grad_norm": 1.5557272501066854, "learning_rate": 8.631117236827782e-06, "loss": 0.7815, "step": 3291 }, { "epoch": 0.26, "grad_norm": 0.9830316941327243, "learning_rate": 8.630223931743595e-06, "loss": 1.1333, "step": 3292 }, { "epoch": 0.26, "grad_norm": 1.459604712604364, "learning_rate": 8.629330381538387e-06, "loss": 0.8041, "step": 3293 }, { "epoch": 0.26, "grad_norm": 1.5574552516033986, "learning_rate": 8.628436586272495e-06, "loss": 0.7798, "step": 3294 }, { "epoch": 0.26, "grad_norm": 1.6841527603693505, "learning_rate": 8.627542546006267e-06, "loss": 0.7965, "step": 3295 }, { "epoch": 0.26, "grad_norm": 1.7241445619099565, "learning_rate": 8.62664826080007e-06, "loss": 0.8206, "step": 3296 }, { "epoch": 0.26, "grad_norm": 1.510372082434077, "learning_rate": 8.62575373071429e-06, "loss": 0.7973, "step": 3297 }, { "epoch": 0.26, "grad_norm": 0.969191801869946, "learning_rate": 8.624858955809328e-06, "loss": 1.1461, "step": 3298 }, { "epoch": 0.26, "grad_norm": 1.418181611038574, "learning_rate": 8.6239639361456e-06, "loss": 0.7936, "step": 3299 }, { "epoch": 0.26, "grad_norm": 1.5776373093021492, "learning_rate": 8.623068671783541e-06, "loss": 0.784, "step": 3300 }, { "epoch": 0.26, "grad_norm": 1.8206280854290395, "learning_rate": 8.6221731627836e-06, "loss": 0.8269, "step": 3301 }, { "epoch": 0.26, "grad_norm": 1.4997731941423187, "learning_rate": 8.621277409206245e-06, "loss": 0.8351, "step": 3302 }, { "epoch": 0.27, "grad_norm": 1.7141788296684661, "learning_rate": 8.620381411111958e-06, "loss": 0.8194, "step": 3303 }, { "epoch": 0.27, "grad_norm": 1.466053773915491, "learning_rate": 8.619485168561242e-06, "loss": 0.8282, "step": 3304 }, { "epoch": 0.27, "grad_norm": 1.516533910548092, "learning_rate": 8.618588681614609e-06, "loss": 0.7827, "step": 3305 }, { "epoch": 0.27, "grad_norm": 1.67325491738838, "learning_rate": 8.617691950332592e-06, "loss": 0.8475, "step": 3306 }, { "epoch": 0.27, "grad_norm": 1.4403451844931572, "learning_rate": 8.616794974775747e-06, "loss": 0.7821, "step": 3307 }, { "epoch": 0.27, "grad_norm": 1.5001917106981668, "learning_rate": 8.61589775500463e-06, "loss": 0.8034, "step": 3308 }, { "epoch": 0.27, "grad_norm": 1.5463256348331216, "learning_rate": 8.615000291079831e-06, "loss": 0.8082, "step": 3309 }, { "epoch": 0.27, "grad_norm": 0.8959006218315122, "learning_rate": 8.614102583061944e-06, "loss": 1.135, "step": 3310 }, { "epoch": 0.27, "grad_norm": 1.4623630899341653, "learning_rate": 8.613204631011589e-06, "loss": 0.8329, "step": 3311 }, { "epoch": 0.27, "grad_norm": 1.559672996872607, "learning_rate": 8.612306434989395e-06, "loss": 0.7971, "step": 3312 }, { "epoch": 0.27, "grad_norm": 1.6591986758736, "learning_rate": 8.61140799505601e-06, "loss": 0.9211, "step": 3313 }, { "epoch": 0.27, "grad_norm": 1.5673557376589486, "learning_rate": 8.610509311272099e-06, "loss": 0.7973, "step": 3314 }, { "epoch": 0.27, "grad_norm": 0.8417875708278847, "learning_rate": 8.609610383698343e-06, "loss": 1.1247, "step": 3315 }, { "epoch": 0.27, "grad_norm": 0.8744058656477753, "learning_rate": 8.608711212395439e-06, "loss": 1.1002, "step": 3316 }, { "epoch": 0.27, "grad_norm": 1.487392471507692, "learning_rate": 8.607811797424104e-06, "loss": 0.7715, "step": 3317 }, { "epoch": 0.27, "grad_norm": 1.4668020218861684, "learning_rate": 8.606912138845066e-06, "loss": 0.7071, "step": 3318 }, { "epoch": 0.27, "grad_norm": 1.5033294157885893, "learning_rate": 8.606012236719073e-06, "loss": 0.8406, "step": 3319 }, { "epoch": 0.27, "grad_norm": 1.530815028663454, "learning_rate": 8.605112091106889e-06, "loss": 0.7378, "step": 3320 }, { "epoch": 0.27, "grad_norm": 1.4524292550628113, "learning_rate": 8.604211702069292e-06, "loss": 0.7956, "step": 3321 }, { "epoch": 0.27, "grad_norm": 1.529002899155621, "learning_rate": 8.603311069667079e-06, "loss": 0.8017, "step": 3322 }, { "epoch": 0.27, "grad_norm": 1.4287958175045858, "learning_rate": 8.602410193961063e-06, "loss": 0.7947, "step": 3323 }, { "epoch": 0.27, "grad_norm": 1.449101707758444, "learning_rate": 8.601509075012074e-06, "loss": 0.7566, "step": 3324 }, { "epoch": 0.27, "grad_norm": 1.557832933409542, "learning_rate": 8.600607712880956e-06, "loss": 0.7678, "step": 3325 }, { "epoch": 0.27, "grad_norm": 1.544886823648102, "learning_rate": 8.599706107628573e-06, "loss": 0.7955, "step": 3326 }, { "epoch": 0.27, "grad_norm": 1.524679372208119, "learning_rate": 8.598804259315802e-06, "loss": 0.7735, "step": 3327 }, { "epoch": 0.27, "grad_norm": 1.4340673842201674, "learning_rate": 8.597902168003539e-06, "loss": 0.8678, "step": 3328 }, { "epoch": 0.27, "grad_norm": 1.5045437336409346, "learning_rate": 8.596999833752694e-06, "loss": 0.8518, "step": 3329 }, { "epoch": 0.27, "grad_norm": 1.5387139189310655, "learning_rate": 8.596097256624194e-06, "loss": 0.6806, "step": 3330 }, { "epoch": 0.27, "grad_norm": 1.022464173225089, "learning_rate": 8.595194436678983e-06, "loss": 1.1212, "step": 3331 }, { "epoch": 0.27, "grad_norm": 1.4811281559434066, "learning_rate": 8.594291373978028e-06, "loss": 0.7287, "step": 3332 }, { "epoch": 0.27, "grad_norm": 1.6015363750156646, "learning_rate": 8.593388068582296e-06, "loss": 0.784, "step": 3333 }, { "epoch": 0.27, "grad_norm": 1.409733960272055, "learning_rate": 8.592484520552786e-06, "loss": 0.7992, "step": 3334 }, { "epoch": 0.27, "grad_norm": 1.6275956443650899, "learning_rate": 8.591580729950506e-06, "loss": 0.8182, "step": 3335 }, { "epoch": 0.27, "grad_norm": 1.6036253434041168, "learning_rate": 8.590676696836484e-06, "loss": 0.8481, "step": 3336 }, { "epoch": 0.27, "grad_norm": 1.489228781790211, "learning_rate": 8.58977242127176e-06, "loss": 0.8007, "step": 3337 }, { "epoch": 0.27, "grad_norm": 1.47290304889642, "learning_rate": 8.588867903317395e-06, "loss": 0.848, "step": 3338 }, { "epoch": 0.27, "grad_norm": 1.5907501429595872, "learning_rate": 8.587963143034461e-06, "loss": 0.789, "step": 3339 }, { "epoch": 0.27, "grad_norm": 1.532781583802185, "learning_rate": 8.587058140484051e-06, "loss": 0.7716, "step": 3340 }, { "epoch": 0.27, "grad_norm": 1.5574053591718162, "learning_rate": 8.586152895727273e-06, "loss": 0.787, "step": 3341 }, { "epoch": 0.27, "grad_norm": 0.9373462971721662, "learning_rate": 8.585247408825252e-06, "loss": 1.1224, "step": 3342 }, { "epoch": 0.27, "grad_norm": 1.4838439580441407, "learning_rate": 8.584341679839129e-06, "loss": 0.8445, "step": 3343 }, { "epoch": 0.27, "grad_norm": 1.5951994290541198, "learning_rate": 8.583435708830058e-06, "loss": 0.8087, "step": 3344 }, { "epoch": 0.27, "grad_norm": 0.820250645930716, "learning_rate": 8.582529495859214e-06, "loss": 1.1737, "step": 3345 }, { "epoch": 0.27, "grad_norm": 1.5576241353849183, "learning_rate": 8.581623040987788e-06, "loss": 0.7524, "step": 3346 }, { "epoch": 0.27, "grad_norm": 1.4917543966721278, "learning_rate": 8.580716344276983e-06, "loss": 0.7369, "step": 3347 }, { "epoch": 0.27, "grad_norm": 1.3848183813559427, "learning_rate": 8.579809405788022e-06, "loss": 0.7311, "step": 3348 }, { "epoch": 0.27, "grad_norm": 0.957568144174073, "learning_rate": 8.578902225582145e-06, "loss": 1.1309, "step": 3349 }, { "epoch": 0.27, "grad_norm": 1.5721491671945418, "learning_rate": 8.577994803720605e-06, "loss": 0.8221, "step": 3350 }, { "epoch": 0.27, "grad_norm": 1.645263961177014, "learning_rate": 8.577087140264677e-06, "loss": 0.7473, "step": 3351 }, { "epoch": 0.27, "grad_norm": 0.8124226806758676, "learning_rate": 8.576179235275643e-06, "loss": 1.164, "step": 3352 }, { "epoch": 0.27, "grad_norm": 1.4471767431748168, "learning_rate": 8.575271088814811e-06, "loss": 0.8124, "step": 3353 }, { "epoch": 0.27, "grad_norm": 1.5610849132602493, "learning_rate": 8.574362700943501e-06, "loss": 0.766, "step": 3354 }, { "epoch": 0.27, "grad_norm": 0.8707505313602819, "learning_rate": 8.573454071723046e-06, "loss": 1.1145, "step": 3355 }, { "epoch": 0.27, "grad_norm": 1.4848332186577673, "learning_rate": 8.572545201214802e-06, "loss": 0.7562, "step": 3356 }, { "epoch": 0.27, "grad_norm": 1.4578253786173254, "learning_rate": 8.571636089480135e-06, "loss": 0.7685, "step": 3357 }, { "epoch": 0.27, "grad_norm": 1.5857738878691185, "learning_rate": 8.570726736580434e-06, "loss": 0.8397, "step": 3358 }, { "epoch": 0.27, "grad_norm": 1.5672089498113106, "learning_rate": 8.569817142577099e-06, "loss": 0.8157, "step": 3359 }, { "epoch": 0.27, "grad_norm": 1.4951836297050192, "learning_rate": 8.568907307531547e-06, "loss": 0.7282, "step": 3360 }, { "epoch": 0.27, "grad_norm": 1.5300229562060224, "learning_rate": 8.567997231505213e-06, "loss": 0.84, "step": 3361 }, { "epoch": 0.27, "grad_norm": 1.5370789855626543, "learning_rate": 8.567086914559545e-06, "loss": 0.7204, "step": 3362 }, { "epoch": 0.27, "grad_norm": 1.6068788413868187, "learning_rate": 8.566176356756015e-06, "loss": 0.8576, "step": 3363 }, { "epoch": 0.27, "grad_norm": 1.3226262288558737, "learning_rate": 8.565265558156101e-06, "loss": 0.7503, "step": 3364 }, { "epoch": 0.27, "grad_norm": 1.7318498795543795, "learning_rate": 8.564354518821307e-06, "loss": 0.8925, "step": 3365 }, { "epoch": 0.27, "grad_norm": 1.458387703398481, "learning_rate": 8.563443238813143e-06, "loss": 0.8151, "step": 3366 }, { "epoch": 0.27, "grad_norm": 1.476384808159295, "learning_rate": 8.562531718193144e-06, "loss": 0.798, "step": 3367 }, { "epoch": 0.27, "grad_norm": 1.4200510801235173, "learning_rate": 8.561619957022855e-06, "loss": 0.8354, "step": 3368 }, { "epoch": 0.27, "grad_norm": 1.6055330033170263, "learning_rate": 8.560707955363845e-06, "loss": 0.6985, "step": 3369 }, { "epoch": 0.27, "grad_norm": 1.5712442771062076, "learning_rate": 8.55979571327769e-06, "loss": 0.8828, "step": 3370 }, { "epoch": 0.27, "grad_norm": 0.8773848608317171, "learning_rate": 8.55888323082599e-06, "loss": 1.153, "step": 3371 }, { "epoch": 0.27, "grad_norm": 1.4559647381473184, "learning_rate": 8.557970508070356e-06, "loss": 0.7741, "step": 3372 }, { "epoch": 0.27, "grad_norm": 1.540014556802582, "learning_rate": 8.557057545072417e-06, "loss": 0.7992, "step": 3373 }, { "epoch": 0.27, "grad_norm": 1.4765466430303391, "learning_rate": 8.556144341893819e-06, "loss": 0.7934, "step": 3374 }, { "epoch": 0.27, "grad_norm": 1.5634188876914148, "learning_rate": 8.555230898596223e-06, "loss": 0.8551, "step": 3375 }, { "epoch": 0.27, "grad_norm": 1.3976782931050098, "learning_rate": 8.554317215241308e-06, "loss": 0.8072, "step": 3376 }, { "epoch": 0.27, "grad_norm": 0.855948445387043, "learning_rate": 8.553403291890767e-06, "loss": 1.1528, "step": 3377 }, { "epoch": 0.27, "grad_norm": 1.4738211328070994, "learning_rate": 8.55248912860631e-06, "loss": 0.7172, "step": 3378 }, { "epoch": 0.27, "grad_norm": 1.5241101804538542, "learning_rate": 8.551574725449665e-06, "loss": 0.8492, "step": 3379 }, { "epoch": 0.27, "grad_norm": 0.7740462955708514, "learning_rate": 8.55066008248257e-06, "loss": 1.0972, "step": 3380 }, { "epoch": 0.27, "grad_norm": 1.525710534585981, "learning_rate": 8.549745199766792e-06, "loss": 0.8703, "step": 3381 }, { "epoch": 0.27, "grad_norm": 1.4703983261637346, "learning_rate": 8.548830077364099e-06, "loss": 0.7253, "step": 3382 }, { "epoch": 0.27, "grad_norm": 1.537241642957234, "learning_rate": 8.547914715336283e-06, "loss": 0.7437, "step": 3383 }, { "epoch": 0.27, "grad_norm": 1.6349451210094343, "learning_rate": 8.546999113745153e-06, "loss": 0.8519, "step": 3384 }, { "epoch": 0.27, "grad_norm": 1.4759516785145512, "learning_rate": 8.546083272652534e-06, "loss": 0.7501, "step": 3385 }, { "epoch": 0.27, "grad_norm": 1.39187232412472, "learning_rate": 8.545167192120263e-06, "loss": 0.8106, "step": 3386 }, { "epoch": 0.27, "grad_norm": 1.417444057460987, "learning_rate": 8.544250872210196e-06, "loss": 0.7704, "step": 3387 }, { "epoch": 0.27, "grad_norm": 1.4433882925185795, "learning_rate": 8.543334312984207e-06, "loss": 0.7445, "step": 3388 }, { "epoch": 0.27, "grad_norm": 1.5801291977494123, "learning_rate": 8.54241751450418e-06, "loss": 0.7383, "step": 3389 }, { "epoch": 0.27, "grad_norm": 1.7393058092144527, "learning_rate": 8.541500476832025e-06, "loss": 0.7884, "step": 3390 }, { "epoch": 0.27, "grad_norm": 1.505755027127155, "learning_rate": 8.540583200029657e-06, "loss": 0.7872, "step": 3391 }, { "epoch": 0.27, "grad_norm": 1.5296802366700433, "learning_rate": 8.539665684159018e-06, "loss": 0.8106, "step": 3392 }, { "epoch": 0.27, "grad_norm": 1.5593493525762068, "learning_rate": 8.538747929282058e-06, "loss": 0.8221, "step": 3393 }, { "epoch": 0.27, "grad_norm": 1.5196211507813802, "learning_rate": 8.537829935460745e-06, "loss": 0.8476, "step": 3394 }, { "epoch": 0.27, "grad_norm": 1.4180977852685681, "learning_rate": 8.536911702757064e-06, "loss": 0.7786, "step": 3395 }, { "epoch": 0.27, "grad_norm": 1.5622299541382703, "learning_rate": 8.53599323123302e-06, "loss": 0.8938, "step": 3396 }, { "epoch": 0.27, "grad_norm": 1.5124413984623433, "learning_rate": 8.535074520950624e-06, "loss": 0.7222, "step": 3397 }, { "epoch": 0.27, "grad_norm": 1.464792259427628, "learning_rate": 8.534155571971916e-06, "loss": 0.7424, "step": 3398 }, { "epoch": 0.27, "grad_norm": 1.4594214473155425, "learning_rate": 8.53323638435894e-06, "loss": 0.8133, "step": 3399 }, { "epoch": 0.27, "grad_norm": 1.4437549586336162, "learning_rate": 8.532316958173765e-06, "loss": 0.8153, "step": 3400 }, { "epoch": 0.27, "grad_norm": 1.4276620149813757, "learning_rate": 8.531397293478472e-06, "loss": 0.7581, "step": 3401 }, { "epoch": 0.27, "grad_norm": 1.3655580442562667, "learning_rate": 8.530477390335158e-06, "loss": 0.7369, "step": 3402 }, { "epoch": 0.27, "grad_norm": 1.587041806840878, "learning_rate": 8.52955724880594e-06, "loss": 0.7686, "step": 3403 }, { "epoch": 0.27, "grad_norm": 1.4730932730858157, "learning_rate": 8.528636868952944e-06, "loss": 0.7589, "step": 3404 }, { "epoch": 0.27, "grad_norm": 1.5002166227243023, "learning_rate": 8.527716250838318e-06, "loss": 0.807, "step": 3405 }, { "epoch": 0.27, "grad_norm": 1.631903669745231, "learning_rate": 8.526795394524224e-06, "loss": 0.8439, "step": 3406 }, { "epoch": 0.27, "grad_norm": 1.5062431862496068, "learning_rate": 8.525874300072841e-06, "loss": 0.8029, "step": 3407 }, { "epoch": 0.27, "grad_norm": 1.4256710367421443, "learning_rate": 8.524952967546363e-06, "loss": 0.8186, "step": 3408 }, { "epoch": 0.27, "grad_norm": 0.951457407786043, "learning_rate": 8.524031397007e-06, "loss": 1.1074, "step": 3409 }, { "epoch": 0.27, "grad_norm": 0.8843161066462978, "learning_rate": 8.523109588516978e-06, "loss": 1.1271, "step": 3410 }, { "epoch": 0.27, "grad_norm": 1.5088167767572076, "learning_rate": 8.522187542138541e-06, "loss": 0.667, "step": 3411 }, { "epoch": 0.27, "grad_norm": 1.380443180521383, "learning_rate": 8.521265257933948e-06, "loss": 0.7679, "step": 3412 }, { "epoch": 0.27, "grad_norm": 1.406970901721928, "learning_rate": 8.520342735965473e-06, "loss": 0.7952, "step": 3413 }, { "epoch": 0.27, "grad_norm": 1.5451141772027912, "learning_rate": 8.519419976295405e-06, "loss": 0.8545, "step": 3414 }, { "epoch": 0.27, "grad_norm": 1.5123450844757678, "learning_rate": 8.518496978986054e-06, "loss": 0.7831, "step": 3415 }, { "epoch": 0.27, "grad_norm": 1.4847687123430433, "learning_rate": 8.51757374409974e-06, "loss": 0.7988, "step": 3416 }, { "epoch": 0.27, "grad_norm": 1.4587887083993443, "learning_rate": 8.516650271698805e-06, "loss": 0.7418, "step": 3417 }, { "epoch": 0.27, "grad_norm": 1.4945521603228082, "learning_rate": 8.515726561845602e-06, "loss": 0.7315, "step": 3418 }, { "epoch": 0.27, "grad_norm": 1.8212989471174916, "learning_rate": 8.514802614602503e-06, "loss": 0.7959, "step": 3419 }, { "epoch": 0.27, "grad_norm": 1.4408393407702618, "learning_rate": 8.513878430031891e-06, "loss": 0.7948, "step": 3420 }, { "epoch": 0.27, "grad_norm": 1.531375938828622, "learning_rate": 8.512954008196178e-06, "loss": 0.7797, "step": 3421 }, { "epoch": 0.27, "grad_norm": 1.441120882442314, "learning_rate": 8.512029349157774e-06, "loss": 0.7907, "step": 3422 }, { "epoch": 0.27, "grad_norm": 1.5701273432760572, "learning_rate": 8.511104452979117e-06, "loss": 0.8191, "step": 3423 }, { "epoch": 0.27, "grad_norm": 1.476361827817332, "learning_rate": 8.51017931972266e-06, "loss": 0.798, "step": 3424 }, { "epoch": 0.27, "grad_norm": 1.48300995102003, "learning_rate": 8.509253949450869e-06, "loss": 0.7975, "step": 3425 }, { "epoch": 0.27, "grad_norm": 1.506361942406405, "learning_rate": 8.508328342226226e-06, "loss": 0.7637, "step": 3426 }, { "epoch": 0.27, "grad_norm": 1.5092718903679876, "learning_rate": 8.507402498111233e-06, "loss": 0.7495, "step": 3427 }, { "epoch": 0.28, "grad_norm": 1.5446469225809485, "learning_rate": 8.5064764171684e-06, "loss": 0.7864, "step": 3428 }, { "epoch": 0.28, "grad_norm": 1.2295115963659597, "learning_rate": 8.505550099460264e-06, "loss": 1.1197, "step": 3429 }, { "epoch": 0.28, "grad_norm": 1.500921275855427, "learning_rate": 8.504623545049369e-06, "loss": 0.7815, "step": 3430 }, { "epoch": 0.28, "grad_norm": 1.8162125818291612, "learning_rate": 8.503696753998277e-06, "loss": 0.7315, "step": 3431 }, { "epoch": 0.28, "grad_norm": 0.8681609389355085, "learning_rate": 8.50276972636957e-06, "loss": 1.1213, "step": 3432 }, { "epoch": 0.28, "grad_norm": 1.525897148028584, "learning_rate": 8.50184246222584e-06, "loss": 0.8357, "step": 3433 }, { "epoch": 0.28, "grad_norm": 1.4638296565314717, "learning_rate": 8.5009149616297e-06, "loss": 0.722, "step": 3434 }, { "epoch": 0.28, "grad_norm": 1.5685386288975545, "learning_rate": 8.499987224643777e-06, "loss": 0.7839, "step": 3435 }, { "epoch": 0.28, "grad_norm": 1.5725060553161119, "learning_rate": 8.499059251330714e-06, "loss": 0.7893, "step": 3436 }, { "epoch": 0.28, "grad_norm": 1.0302495374238567, "learning_rate": 8.498131041753168e-06, "loss": 1.111, "step": 3437 }, { "epoch": 0.28, "grad_norm": 1.5633207609663466, "learning_rate": 8.497202595973818e-06, "loss": 0.771, "step": 3438 }, { "epoch": 0.28, "grad_norm": 1.5138126748100817, "learning_rate": 8.496273914055347e-06, "loss": 0.7697, "step": 3439 }, { "epoch": 0.28, "grad_norm": 1.4737362054429548, "learning_rate": 8.495344996060471e-06, "loss": 0.7434, "step": 3440 }, { "epoch": 0.28, "grad_norm": 1.505879076918591, "learning_rate": 8.494415842051905e-06, "loss": 0.7607, "step": 3441 }, { "epoch": 0.28, "grad_norm": 1.5678052691514186, "learning_rate": 8.493486452092391e-06, "loss": 0.7781, "step": 3442 }, { "epoch": 0.28, "grad_norm": 1.39825468555444, "learning_rate": 8.492556826244687e-06, "loss": 0.7937, "step": 3443 }, { "epoch": 0.28, "grad_norm": 1.6111135686172926, "learning_rate": 8.491626964571555e-06, "loss": 0.8077, "step": 3444 }, { "epoch": 0.28, "grad_norm": 1.6753891428526013, "learning_rate": 8.490696867135791e-06, "loss": 0.8171, "step": 3445 }, { "epoch": 0.28, "grad_norm": 0.9493230391663534, "learning_rate": 8.48976653400019e-06, "loss": 1.1369, "step": 3446 }, { "epoch": 0.28, "grad_norm": 1.454294987375585, "learning_rate": 8.488835965227572e-06, "loss": 0.811, "step": 3447 }, { "epoch": 0.28, "grad_norm": 1.432085862134217, "learning_rate": 8.487905160880773e-06, "loss": 0.8602, "step": 3448 }, { "epoch": 0.28, "grad_norm": 1.56887584892502, "learning_rate": 8.486974121022642e-06, "loss": 0.8661, "step": 3449 }, { "epoch": 0.28, "grad_norm": 1.6628760286502084, "learning_rate": 8.486042845716046e-06, "loss": 0.8263, "step": 3450 }, { "epoch": 0.28, "grad_norm": 1.5241911127270555, "learning_rate": 8.485111335023865e-06, "loss": 0.8099, "step": 3451 }, { "epoch": 0.28, "grad_norm": 1.4524584119119777, "learning_rate": 8.484179589008997e-06, "loss": 0.7805, "step": 3452 }, { "epoch": 0.28, "grad_norm": 1.5605734684782737, "learning_rate": 8.483247607734355e-06, "loss": 0.7973, "step": 3453 }, { "epoch": 0.28, "grad_norm": 1.592705945556069, "learning_rate": 8.482315391262871e-06, "loss": 0.7875, "step": 3454 }, { "epoch": 0.28, "grad_norm": 1.710011480061702, "learning_rate": 8.48138293965749e-06, "loss": 0.814, "step": 3455 }, { "epoch": 0.28, "grad_norm": 1.4298347304993668, "learning_rate": 8.48045025298117e-06, "loss": 0.7317, "step": 3456 }, { "epoch": 0.28, "grad_norm": 1.6073738243183575, "learning_rate": 8.479517331296892e-06, "loss": 0.8571, "step": 3457 }, { "epoch": 0.28, "grad_norm": 1.6231324311064255, "learning_rate": 8.478584174667647e-06, "loss": 0.8098, "step": 3458 }, { "epoch": 0.28, "grad_norm": 1.6106405716226526, "learning_rate": 8.477650783156443e-06, "loss": 0.7767, "step": 3459 }, { "epoch": 0.28, "grad_norm": 1.546614468783573, "learning_rate": 8.476717156826308e-06, "loss": 0.8482, "step": 3460 }, { "epoch": 0.28, "grad_norm": 1.464839041891109, "learning_rate": 8.475783295740279e-06, "loss": 0.8585, "step": 3461 }, { "epoch": 0.28, "grad_norm": 1.5322583130892928, "learning_rate": 8.474849199961415e-06, "loss": 0.8599, "step": 3462 }, { "epoch": 0.28, "grad_norm": 1.633006437146197, "learning_rate": 8.473914869552787e-06, "loss": 0.8074, "step": 3463 }, { "epoch": 0.28, "grad_norm": 1.5182978885782994, "learning_rate": 8.472980304577483e-06, "loss": 0.7563, "step": 3464 }, { "epoch": 0.28, "grad_norm": 1.5402854249575322, "learning_rate": 8.472045505098609e-06, "loss": 0.8253, "step": 3465 }, { "epoch": 0.28, "grad_norm": 1.5906717967773234, "learning_rate": 8.471110471179282e-06, "loss": 0.8463, "step": 3466 }, { "epoch": 0.28, "grad_norm": 1.43541290733252, "learning_rate": 8.470175202882638e-06, "loss": 0.7544, "step": 3467 }, { "epoch": 0.28, "grad_norm": 1.4574526576487707, "learning_rate": 8.46923970027183e-06, "loss": 0.8165, "step": 3468 }, { "epoch": 0.28, "grad_norm": 1.4824488000759777, "learning_rate": 8.468303963410026e-06, "loss": 0.7668, "step": 3469 }, { "epoch": 0.28, "grad_norm": 1.5487400852361024, "learning_rate": 8.467367992360405e-06, "loss": 0.8247, "step": 3470 }, { "epoch": 0.28, "grad_norm": 1.4864660435796813, "learning_rate": 8.466431787186169e-06, "loss": 0.7931, "step": 3471 }, { "epoch": 0.28, "grad_norm": 1.633947319527433, "learning_rate": 8.465495347950533e-06, "loss": 0.7432, "step": 3472 }, { "epoch": 0.28, "grad_norm": 1.4657784071170146, "learning_rate": 8.464558674716727e-06, "loss": 0.7303, "step": 3473 }, { "epoch": 0.28, "grad_norm": 1.4198597163112592, "learning_rate": 8.463621767547998e-06, "loss": 0.8256, "step": 3474 }, { "epoch": 0.28, "grad_norm": 1.458079065485533, "learning_rate": 8.462684626507605e-06, "loss": 0.7275, "step": 3475 }, { "epoch": 0.28, "grad_norm": 0.8303178944336082, "learning_rate": 8.46174725165883e-06, "loss": 1.1589, "step": 3476 }, { "epoch": 0.28, "grad_norm": 1.4735962862355394, "learning_rate": 8.460809643064964e-06, "loss": 0.8573, "step": 3477 }, { "epoch": 0.28, "grad_norm": 1.4957794112675198, "learning_rate": 8.459871800789318e-06, "loss": 0.8366, "step": 3478 }, { "epoch": 0.28, "grad_norm": 1.4504628591376099, "learning_rate": 8.458933724895216e-06, "loss": 0.6875, "step": 3479 }, { "epoch": 0.28, "grad_norm": 1.6075967174580625, "learning_rate": 8.457995415445999e-06, "loss": 0.8055, "step": 3480 }, { "epoch": 0.28, "grad_norm": 1.513486256775826, "learning_rate": 8.457056872505024e-06, "loss": 0.7449, "step": 3481 }, { "epoch": 0.28, "grad_norm": 1.4758283156072165, "learning_rate": 8.456118096135666e-06, "loss": 0.724, "step": 3482 }, { "epoch": 0.28, "grad_norm": 1.5223218280837805, "learning_rate": 8.455179086401309e-06, "loss": 0.8106, "step": 3483 }, { "epoch": 0.28, "grad_norm": 1.485995796894997, "learning_rate": 8.45423984336536e-06, "loss": 0.778, "step": 3484 }, { "epoch": 0.28, "grad_norm": 0.8419018710485615, "learning_rate": 8.45330036709124e-06, "loss": 1.1431, "step": 3485 }, { "epoch": 0.28, "grad_norm": 1.6026072859450107, "learning_rate": 8.45236065764238e-06, "loss": 0.8896, "step": 3486 }, { "epoch": 0.28, "grad_norm": 1.5634998589283264, "learning_rate": 8.451420715082236e-06, "loss": 0.7729, "step": 3487 }, { "epoch": 0.28, "grad_norm": 1.4947106466170899, "learning_rate": 8.450480539474271e-06, "loss": 0.8232, "step": 3488 }, { "epoch": 0.28, "grad_norm": 0.8979939694605681, "learning_rate": 8.449540130881973e-06, "loss": 1.1267, "step": 3489 }, { "epoch": 0.28, "grad_norm": 0.8296454263871952, "learning_rate": 8.448599489368836e-06, "loss": 1.0815, "step": 3490 }, { "epoch": 0.28, "grad_norm": 1.4215036315216873, "learning_rate": 8.447658614998375e-06, "loss": 0.7776, "step": 3491 }, { "epoch": 0.28, "grad_norm": 1.6191229442275317, "learning_rate": 8.44671750783412e-06, "loss": 0.7631, "step": 3492 }, { "epoch": 0.28, "grad_norm": 1.506951805456231, "learning_rate": 8.44577616793962e-06, "loss": 0.8294, "step": 3493 }, { "epoch": 0.28, "grad_norm": 1.3079219756502056, "learning_rate": 8.444834595378434e-06, "loss": 0.8114, "step": 3494 }, { "epoch": 0.28, "grad_norm": 1.6064680685914556, "learning_rate": 8.443892790214138e-06, "loss": 0.8644, "step": 3495 }, { "epoch": 0.28, "grad_norm": 1.506293576796927, "learning_rate": 8.442950752510327e-06, "loss": 0.7577, "step": 3496 }, { "epoch": 0.28, "grad_norm": 1.580818464088887, "learning_rate": 8.442008482330606e-06, "loss": 0.7479, "step": 3497 }, { "epoch": 0.28, "grad_norm": 1.540480897160856, "learning_rate": 8.441065979738602e-06, "loss": 0.7803, "step": 3498 }, { "epoch": 0.28, "grad_norm": 1.131553039788583, "learning_rate": 8.440123244797955e-06, "loss": 1.15, "step": 3499 }, { "epoch": 0.28, "grad_norm": 1.4701165735778428, "learning_rate": 8.439180277572321e-06, "loss": 0.9115, "step": 3500 }, { "epoch": 0.28, "grad_norm": 1.4628975921374012, "learning_rate": 8.43823707812537e-06, "loss": 0.8291, "step": 3501 }, { "epoch": 0.28, "grad_norm": 1.4760332594761176, "learning_rate": 8.43729364652079e-06, "loss": 0.813, "step": 3502 }, { "epoch": 0.28, "grad_norm": 1.4294463676007427, "learning_rate": 8.436349982822283e-06, "loss": 0.703, "step": 3503 }, { "epoch": 0.28, "grad_norm": 1.4729042395678342, "learning_rate": 8.435406087093568e-06, "loss": 0.8349, "step": 3504 }, { "epoch": 0.28, "grad_norm": 1.5006937057029421, "learning_rate": 8.434461959398377e-06, "loss": 0.718, "step": 3505 }, { "epoch": 0.28, "grad_norm": 1.581317936486802, "learning_rate": 8.433517599800462e-06, "loss": 0.7641, "step": 3506 }, { "epoch": 0.28, "grad_norm": 1.476405171792754, "learning_rate": 8.432573008363587e-06, "loss": 0.7607, "step": 3507 }, { "epoch": 0.28, "grad_norm": 1.4515141713591286, "learning_rate": 8.431628185151535e-06, "loss": 0.7731, "step": 3508 }, { "epoch": 0.28, "grad_norm": 0.9724372226062912, "learning_rate": 8.4306831302281e-06, "loss": 1.1081, "step": 3509 }, { "epoch": 0.28, "grad_norm": 1.643807464200815, "learning_rate": 8.429737843657094e-06, "loss": 0.8588, "step": 3510 }, { "epoch": 0.28, "grad_norm": 1.5014252190347916, "learning_rate": 8.428792325502347e-06, "loss": 0.7444, "step": 3511 }, { "epoch": 0.28, "grad_norm": 1.5397738032041766, "learning_rate": 8.427846575827702e-06, "loss": 0.881, "step": 3512 }, { "epoch": 0.28, "grad_norm": 1.596683411830558, "learning_rate": 8.426900594697018e-06, "loss": 0.8654, "step": 3513 }, { "epoch": 0.28, "grad_norm": 1.4053044391216072, "learning_rate": 8.425954382174169e-06, "loss": 0.7386, "step": 3514 }, { "epoch": 0.28, "grad_norm": 0.904155826425117, "learning_rate": 8.425007938323049e-06, "loss": 1.1322, "step": 3515 }, { "epoch": 0.28, "grad_norm": 1.479788420301487, "learning_rate": 8.424061263207558e-06, "loss": 0.7539, "step": 3516 }, { "epoch": 0.28, "grad_norm": 1.6236486592876873, "learning_rate": 8.423114356891622e-06, "loss": 0.8188, "step": 3517 }, { "epoch": 0.28, "grad_norm": 1.50736812975999, "learning_rate": 8.422167219439177e-06, "loss": 0.8024, "step": 3518 }, { "epoch": 0.28, "grad_norm": 0.7812072948088002, "learning_rate": 8.421219850914176e-06, "loss": 1.1122, "step": 3519 }, { "epoch": 0.28, "grad_norm": 1.4550556706801905, "learning_rate": 8.42027225138059e-06, "loss": 0.808, "step": 3520 }, { "epoch": 0.28, "grad_norm": 1.5363364084480264, "learning_rate": 8.419324420902398e-06, "loss": 0.7876, "step": 3521 }, { "epoch": 0.28, "grad_norm": 1.5596946058702839, "learning_rate": 8.418376359543604e-06, "loss": 0.8099, "step": 3522 }, { "epoch": 0.28, "grad_norm": 0.8901033578691886, "learning_rate": 8.417428067368218e-06, "loss": 1.167, "step": 3523 }, { "epoch": 0.28, "grad_norm": 0.8687796713851912, "learning_rate": 8.416479544440279e-06, "loss": 1.1057, "step": 3524 }, { "epoch": 0.28, "grad_norm": 1.4373183884333895, "learning_rate": 8.415530790823825e-06, "loss": 0.8549, "step": 3525 }, { "epoch": 0.28, "grad_norm": 1.5706648415540552, "learning_rate": 8.414581806582925e-06, "loss": 0.7867, "step": 3526 }, { "epoch": 0.28, "grad_norm": 1.4303687074474138, "learning_rate": 8.413632591781653e-06, "loss": 0.704, "step": 3527 }, { "epoch": 0.28, "grad_norm": 1.4347332774150747, "learning_rate": 8.412683146484103e-06, "loss": 0.8072, "step": 3528 }, { "epoch": 0.28, "grad_norm": 1.5063191964151044, "learning_rate": 8.411733470754381e-06, "loss": 0.7593, "step": 3529 }, { "epoch": 0.28, "grad_norm": 1.4980236286929907, "learning_rate": 8.410783564656614e-06, "loss": 0.7748, "step": 3530 }, { "epoch": 0.28, "grad_norm": 1.853791949028391, "learning_rate": 8.409833428254943e-06, "loss": 0.8032, "step": 3531 }, { "epoch": 0.28, "grad_norm": 1.564060017551178, "learning_rate": 8.408883061613522e-06, "loss": 0.7603, "step": 3532 }, { "epoch": 0.28, "grad_norm": 1.5522010164054605, "learning_rate": 8.407932464796521e-06, "loss": 0.7276, "step": 3533 }, { "epoch": 0.28, "grad_norm": 1.4426414634124267, "learning_rate": 8.406981637868128e-06, "loss": 0.7877, "step": 3534 }, { "epoch": 0.28, "grad_norm": 1.1141531685475987, "learning_rate": 8.406030580892543e-06, "loss": 1.1123, "step": 3535 }, { "epoch": 0.28, "grad_norm": 1.0179349867686365, "learning_rate": 8.405079293933986e-06, "loss": 1.1394, "step": 3536 }, { "epoch": 0.28, "grad_norm": 1.4106271735580664, "learning_rate": 8.40412777705669e-06, "loss": 0.8019, "step": 3537 }, { "epoch": 0.28, "grad_norm": 1.5175659904839938, "learning_rate": 8.4031760303249e-06, "loss": 0.8614, "step": 3538 }, { "epoch": 0.28, "grad_norm": 1.5886226852418635, "learning_rate": 8.402224053802884e-06, "loss": 0.931, "step": 3539 }, { "epoch": 0.28, "grad_norm": 1.643329232088793, "learning_rate": 8.401271847554919e-06, "loss": 0.7948, "step": 3540 }, { "epoch": 0.28, "grad_norm": 1.5140155631933534, "learning_rate": 8.400319411645302e-06, "loss": 0.7446, "step": 3541 }, { "epoch": 0.28, "grad_norm": 1.4345181748704043, "learning_rate": 8.399366746138345e-06, "loss": 0.7616, "step": 3542 }, { "epoch": 0.28, "grad_norm": 1.2308407134282107, "learning_rate": 8.39841385109837e-06, "loss": 1.1507, "step": 3543 }, { "epoch": 0.28, "grad_norm": 1.4310635574820163, "learning_rate": 8.397460726589722e-06, "loss": 0.8172, "step": 3544 }, { "epoch": 0.28, "grad_norm": 1.4181192013006847, "learning_rate": 8.396507372676754e-06, "loss": 0.8172, "step": 3545 }, { "epoch": 0.28, "grad_norm": 1.7316412510703956, "learning_rate": 8.395553789423844e-06, "loss": 0.8404, "step": 3546 }, { "epoch": 0.28, "grad_norm": 1.5309765868261809, "learning_rate": 8.394599976895378e-06, "loss": 0.7667, "step": 3547 }, { "epoch": 0.28, "grad_norm": 1.5015501199279628, "learning_rate": 8.393645935155758e-06, "loss": 0.8442, "step": 3548 }, { "epoch": 0.28, "grad_norm": 1.559935933169414, "learning_rate": 8.392691664269406e-06, "loss": 0.7742, "step": 3549 }, { "epoch": 0.28, "grad_norm": 1.3508052585719088, "learning_rate": 8.391737164300755e-06, "loss": 0.8243, "step": 3550 }, { "epoch": 0.28, "grad_norm": 1.365643594713128, "learning_rate": 8.390782435314254e-06, "loss": 0.7736, "step": 3551 }, { "epoch": 0.28, "grad_norm": 1.5088400479716837, "learning_rate": 8.38982747737437e-06, "loss": 0.8819, "step": 3552 }, { "epoch": 0.29, "grad_norm": 1.5669348942204682, "learning_rate": 8.388872290545583e-06, "loss": 0.816, "step": 3553 }, { "epoch": 0.29, "grad_norm": 1.4119942179202158, "learning_rate": 8.38791687489239e-06, "loss": 0.7617, "step": 3554 }, { "epoch": 0.29, "grad_norm": 1.4970504931134134, "learning_rate": 8.386961230479303e-06, "loss": 0.8679, "step": 3555 }, { "epoch": 0.29, "grad_norm": 1.4954023228736286, "learning_rate": 8.386005357370848e-06, "loss": 0.8353, "step": 3556 }, { "epoch": 0.29, "grad_norm": 1.4851758261980823, "learning_rate": 8.38504925563157e-06, "loss": 0.7723, "step": 3557 }, { "epoch": 0.29, "grad_norm": 1.5461433668204432, "learning_rate": 8.384092925326025e-06, "loss": 0.8318, "step": 3558 }, { "epoch": 0.29, "grad_norm": 0.9426589337693875, "learning_rate": 8.383136366518788e-06, "loss": 1.1237, "step": 3559 }, { "epoch": 0.29, "grad_norm": 1.572001906941129, "learning_rate": 8.382179579274447e-06, "loss": 0.8276, "step": 3560 }, { "epoch": 0.29, "grad_norm": 1.4903960315900653, "learning_rate": 8.381222563657608e-06, "loss": 0.8106, "step": 3561 }, { "epoch": 0.29, "grad_norm": 1.599690232210152, "learning_rate": 8.380265319732887e-06, "loss": 0.8789, "step": 3562 }, { "epoch": 0.29, "grad_norm": 0.8603297711654769, "learning_rate": 8.379307847564925e-06, "loss": 1.123, "step": 3563 }, { "epoch": 0.29, "grad_norm": 0.8328562856294598, "learning_rate": 8.378350147218369e-06, "loss": 1.1373, "step": 3564 }, { "epoch": 0.29, "grad_norm": 1.6198609011793015, "learning_rate": 8.377392218757887e-06, "loss": 0.9007, "step": 3565 }, { "epoch": 0.29, "grad_norm": 1.3831740250682023, "learning_rate": 8.376434062248158e-06, "loss": 0.7445, "step": 3566 }, { "epoch": 0.29, "grad_norm": 1.4644786949229776, "learning_rate": 8.375475677753882e-06, "loss": 0.8395, "step": 3567 }, { "epoch": 0.29, "grad_norm": 1.6201882089361996, "learning_rate": 8.374517065339768e-06, "loss": 0.8287, "step": 3568 }, { "epoch": 0.29, "grad_norm": 0.8616476228510223, "learning_rate": 8.373558225070546e-06, "loss": 1.1476, "step": 3569 }, { "epoch": 0.29, "grad_norm": 1.5459815502416623, "learning_rate": 8.37259915701096e-06, "loss": 0.7435, "step": 3570 }, { "epoch": 0.29, "grad_norm": 1.514167588862741, "learning_rate": 8.371639861225765e-06, "loss": 0.7614, "step": 3571 }, { "epoch": 0.29, "grad_norm": 1.4851243519043094, "learning_rate": 8.370680337779737e-06, "loss": 0.7432, "step": 3572 }, { "epoch": 0.29, "grad_norm": 1.4961695646764002, "learning_rate": 8.369720586737666e-06, "loss": 0.8334, "step": 3573 }, { "epoch": 0.29, "grad_norm": 0.8450236183428267, "learning_rate": 8.368760608164356e-06, "loss": 1.1196, "step": 3574 }, { "epoch": 0.29, "grad_norm": 0.8461803096920018, "learning_rate": 8.367800402124626e-06, "loss": 1.1004, "step": 3575 }, { "epoch": 0.29, "grad_norm": 1.4602588952633153, "learning_rate": 8.366839968683312e-06, "loss": 0.7647, "step": 3576 }, { "epoch": 0.29, "grad_norm": 1.596052072987529, "learning_rate": 8.365879307905263e-06, "loss": 0.8549, "step": 3577 }, { "epoch": 0.29, "grad_norm": 1.5816345499693734, "learning_rate": 8.36491841985535e-06, "loss": 0.8124, "step": 3578 }, { "epoch": 0.29, "grad_norm": 1.4416977365339427, "learning_rate": 8.363957304598447e-06, "loss": 0.7397, "step": 3579 }, { "epoch": 0.29, "grad_norm": 1.68055063469111, "learning_rate": 8.362995962199459e-06, "loss": 0.8005, "step": 3580 }, { "epoch": 0.29, "grad_norm": 1.5109318470112802, "learning_rate": 8.36203439272329e-06, "loss": 0.8067, "step": 3581 }, { "epoch": 0.29, "grad_norm": 1.511182446942989, "learning_rate": 8.36107259623487e-06, "loss": 0.8212, "step": 3582 }, { "epoch": 0.29, "grad_norm": 1.5953739171847179, "learning_rate": 8.360110572799146e-06, "loss": 0.7268, "step": 3583 }, { "epoch": 0.29, "grad_norm": 1.4971741027242267, "learning_rate": 8.359148322481073e-06, "loss": 0.7145, "step": 3584 }, { "epoch": 0.29, "grad_norm": 1.5464124386523794, "learning_rate": 8.358185845345623e-06, "loss": 0.8064, "step": 3585 }, { "epoch": 0.29, "grad_norm": 1.4304522878117933, "learning_rate": 8.357223141457787e-06, "loss": 0.8149, "step": 3586 }, { "epoch": 0.29, "grad_norm": 1.5299836144057333, "learning_rate": 8.356260210882565e-06, "loss": 0.8036, "step": 3587 }, { "epoch": 0.29, "grad_norm": 1.5917946362073667, "learning_rate": 8.355297053684982e-06, "loss": 0.7834, "step": 3588 }, { "epoch": 0.29, "grad_norm": 1.6361821340320137, "learning_rate": 8.354333669930067e-06, "loss": 0.8206, "step": 3589 }, { "epoch": 0.29, "grad_norm": 1.5627545794231181, "learning_rate": 8.353370059682873e-06, "loss": 0.8306, "step": 3590 }, { "epoch": 0.29, "grad_norm": 1.6681872277547753, "learning_rate": 8.352406223008465e-06, "loss": 0.8069, "step": 3591 }, { "epoch": 0.29, "grad_norm": 1.5213964168428444, "learning_rate": 8.351442159971922e-06, "loss": 0.888, "step": 3592 }, { "epoch": 0.29, "grad_norm": 1.3666305647624115, "learning_rate": 8.350477870638346e-06, "loss": 0.6951, "step": 3593 }, { "epoch": 0.29, "grad_norm": 1.9113522660832283, "learning_rate": 8.349513355072836e-06, "loss": 0.7474, "step": 3594 }, { "epoch": 0.29, "grad_norm": 1.428919935725367, "learning_rate": 8.348548613340529e-06, "loss": 0.7646, "step": 3595 }, { "epoch": 0.29, "grad_norm": 1.5463819740131208, "learning_rate": 8.347583645506561e-06, "loss": 0.8222, "step": 3596 }, { "epoch": 0.29, "grad_norm": 0.9635301694117145, "learning_rate": 8.346618451636092e-06, "loss": 1.1399, "step": 3597 }, { "epoch": 0.29, "grad_norm": 0.8888381221245981, "learning_rate": 8.345653031794292e-06, "loss": 1.1152, "step": 3598 }, { "epoch": 0.29, "grad_norm": 0.8072268162936169, "learning_rate": 8.344687386046348e-06, "loss": 1.0938, "step": 3599 }, { "epoch": 0.29, "grad_norm": 0.8359251190535291, "learning_rate": 8.343721514457465e-06, "loss": 1.0949, "step": 3600 }, { "epoch": 0.29, "grad_norm": 0.9315328323164076, "learning_rate": 8.34275541709286e-06, "loss": 1.116, "step": 3601 }, { "epoch": 0.29, "grad_norm": 0.837743510381232, "learning_rate": 8.341789094017766e-06, "loss": 1.1151, "step": 3602 }, { "epoch": 0.29, "grad_norm": 1.5196189521007437, "learning_rate": 8.340822545297426e-06, "loss": 0.8833, "step": 3603 }, { "epoch": 0.29, "grad_norm": 1.5240922651798163, "learning_rate": 8.339855770997113e-06, "loss": 0.8213, "step": 3604 }, { "epoch": 0.29, "grad_norm": 1.6095805330489794, "learning_rate": 8.3388887711821e-06, "loss": 0.8299, "step": 3605 }, { "epoch": 0.29, "grad_norm": 0.9931913176022844, "learning_rate": 8.337921545917684e-06, "loss": 1.1295, "step": 3606 }, { "epoch": 0.29, "grad_norm": 1.5530389141451728, "learning_rate": 8.336954095269171e-06, "loss": 0.862, "step": 3607 }, { "epoch": 0.29, "grad_norm": 1.521089332276859, "learning_rate": 8.335986419301886e-06, "loss": 0.7409, "step": 3608 }, { "epoch": 0.29, "grad_norm": 0.9593272193954987, "learning_rate": 8.335018518081171e-06, "loss": 1.1291, "step": 3609 }, { "epoch": 0.29, "grad_norm": 1.4686296168525876, "learning_rate": 8.33405039167238e-06, "loss": 0.8159, "step": 3610 }, { "epoch": 0.29, "grad_norm": 1.4283410628535451, "learning_rate": 8.333082040140884e-06, "loss": 0.8233, "step": 3611 }, { "epoch": 0.29, "grad_norm": 1.4805013166565821, "learning_rate": 8.332113463552065e-06, "loss": 0.7855, "step": 3612 }, { "epoch": 0.29, "grad_norm": 0.8659744583502094, "learning_rate": 8.331144661971325e-06, "loss": 1.0775, "step": 3613 }, { "epoch": 0.29, "grad_norm": 1.4191932013812378, "learning_rate": 8.330175635464082e-06, "loss": 0.7432, "step": 3614 }, { "epoch": 0.29, "grad_norm": 1.5539851337791017, "learning_rate": 8.329206384095765e-06, "loss": 0.7615, "step": 3615 }, { "epoch": 0.29, "grad_norm": 1.5836001916084503, "learning_rate": 8.328236907931819e-06, "loss": 0.7891, "step": 3616 }, { "epoch": 0.29, "grad_norm": 1.5472185156655707, "learning_rate": 8.327267207037707e-06, "loss": 0.7804, "step": 3617 }, { "epoch": 0.29, "grad_norm": 1.529773598300593, "learning_rate": 8.326297281478906e-06, "loss": 0.8066, "step": 3618 }, { "epoch": 0.29, "grad_norm": 1.4851421910833338, "learning_rate": 8.325327131320907e-06, "loss": 0.8167, "step": 3619 }, { "epoch": 0.29, "grad_norm": 1.572705418517364, "learning_rate": 8.324356756629215e-06, "loss": 0.7637, "step": 3620 }, { "epoch": 0.29, "grad_norm": 1.6081858377060583, "learning_rate": 8.323386157469353e-06, "loss": 0.7926, "step": 3621 }, { "epoch": 0.29, "grad_norm": 1.4000884633593909, "learning_rate": 8.322415333906859e-06, "loss": 0.6416, "step": 3622 }, { "epoch": 0.29, "grad_norm": 1.5117813133396107, "learning_rate": 8.321444286007283e-06, "loss": 0.7818, "step": 3623 }, { "epoch": 0.29, "grad_norm": 1.9386306354677698, "learning_rate": 8.320473013836197e-06, "loss": 0.7324, "step": 3624 }, { "epoch": 0.29, "grad_norm": 1.503345526855877, "learning_rate": 8.319501517459178e-06, "loss": 0.8259, "step": 3625 }, { "epoch": 0.29, "grad_norm": 1.4238231424554582, "learning_rate": 8.318529796941825e-06, "loss": 0.8363, "step": 3626 }, { "epoch": 0.29, "grad_norm": 1.4710546563521905, "learning_rate": 8.317557852349753e-06, "loss": 0.7564, "step": 3627 }, { "epoch": 0.29, "grad_norm": 1.4765150245971121, "learning_rate": 8.31658568374859e-06, "loss": 0.8249, "step": 3628 }, { "epoch": 0.29, "grad_norm": 1.523716677530175, "learning_rate": 8.315613291203977e-06, "loss": 0.8568, "step": 3629 }, { "epoch": 0.29, "grad_norm": 1.5630988721985417, "learning_rate": 8.314640674781572e-06, "loss": 0.8259, "step": 3630 }, { "epoch": 0.29, "grad_norm": 1.4591595108802864, "learning_rate": 8.31366783454705e-06, "loss": 0.785, "step": 3631 }, { "epoch": 0.29, "grad_norm": 1.5597784194332103, "learning_rate": 8.312694770566099e-06, "loss": 0.758, "step": 3632 }, { "epoch": 0.29, "grad_norm": 1.527395684505565, "learning_rate": 8.311721482904423e-06, "loss": 0.7684, "step": 3633 }, { "epoch": 0.29, "grad_norm": 1.5321497390807093, "learning_rate": 8.310747971627736e-06, "loss": 0.8293, "step": 3634 }, { "epoch": 0.29, "grad_norm": 1.4336090120498481, "learning_rate": 8.309774236801779e-06, "loss": 0.8172, "step": 3635 }, { "epoch": 0.29, "grad_norm": 1.5941264517496108, "learning_rate": 8.308800278492298e-06, "loss": 0.8842, "step": 3636 }, { "epoch": 0.29, "grad_norm": 1.4262656887077885, "learning_rate": 8.307826096765054e-06, "loss": 0.7364, "step": 3637 }, { "epoch": 0.29, "grad_norm": 4.22061669374943, "learning_rate": 8.306851691685828e-06, "loss": 0.7089, "step": 3638 }, { "epoch": 0.29, "grad_norm": 1.3484767790273897, "learning_rate": 8.305877063320415e-06, "loss": 0.7653, "step": 3639 }, { "epoch": 0.29, "grad_norm": 1.3980156399540455, "learning_rate": 8.304902211734623e-06, "loss": 0.7436, "step": 3640 }, { "epoch": 0.29, "grad_norm": 1.466159099878649, "learning_rate": 8.303927136994278e-06, "loss": 0.7404, "step": 3641 }, { "epoch": 0.29, "grad_norm": 1.5701449236394625, "learning_rate": 8.302951839165217e-06, "loss": 0.7577, "step": 3642 }, { "epoch": 0.29, "grad_norm": 1.4661738535056066, "learning_rate": 8.301976318313295e-06, "loss": 0.7635, "step": 3643 }, { "epoch": 0.29, "grad_norm": 1.4637220261097987, "learning_rate": 8.30100057450438e-06, "loss": 0.8165, "step": 3644 }, { "epoch": 0.29, "grad_norm": 0.9610922162589698, "learning_rate": 8.300024607804359e-06, "loss": 1.1358, "step": 3645 }, { "epoch": 0.29, "grad_norm": 1.4723588716795462, "learning_rate": 8.299048418279133e-06, "loss": 0.7368, "step": 3646 }, { "epoch": 0.29, "grad_norm": 0.8486091620765244, "learning_rate": 8.298072005994611e-06, "loss": 1.075, "step": 3647 }, { "epoch": 0.29, "grad_norm": 1.4832652627077432, "learning_rate": 8.297095371016726e-06, "loss": 0.7338, "step": 3648 }, { "epoch": 0.29, "grad_norm": 1.5847340772523881, "learning_rate": 8.296118513411422e-06, "loss": 0.7499, "step": 3649 }, { "epoch": 0.29, "grad_norm": 1.6176907450218756, "learning_rate": 8.29514143324466e-06, "loss": 0.7289, "step": 3650 }, { "epoch": 0.29, "grad_norm": 1.5553312838498012, "learning_rate": 8.294164130582413e-06, "loss": 0.818, "step": 3651 }, { "epoch": 0.29, "grad_norm": 0.9192958158564914, "learning_rate": 8.293186605490673e-06, "loss": 1.1329, "step": 3652 }, { "epoch": 0.29, "grad_norm": 1.578034747205365, "learning_rate": 8.292208858035441e-06, "loss": 0.8171, "step": 3653 }, { "epoch": 0.29, "grad_norm": 1.5654318499895448, "learning_rate": 8.29123088828274e-06, "loss": 0.7741, "step": 3654 }, { "epoch": 0.29, "grad_norm": 1.5754295223670007, "learning_rate": 8.290252696298604e-06, "loss": 0.8479, "step": 3655 }, { "epoch": 0.29, "grad_norm": 1.4656544874062276, "learning_rate": 8.28927428214908e-06, "loss": 0.8207, "step": 3656 }, { "epoch": 0.29, "grad_norm": 1.4598342522150534, "learning_rate": 8.288295645900237e-06, "loss": 0.7514, "step": 3657 }, { "epoch": 0.29, "grad_norm": 1.5477924208552247, "learning_rate": 8.287316787618153e-06, "loss": 0.8253, "step": 3658 }, { "epoch": 0.29, "grad_norm": 1.52410443810361, "learning_rate": 8.286337707368922e-06, "loss": 0.8346, "step": 3659 }, { "epoch": 0.29, "grad_norm": 0.9675623714475131, "learning_rate": 8.285358405218655e-06, "loss": 1.1069, "step": 3660 }, { "epoch": 0.29, "grad_norm": 1.7818552332769975, "learning_rate": 8.284378881233474e-06, "loss": 0.7733, "step": 3661 }, { "epoch": 0.29, "grad_norm": 0.8264771739143096, "learning_rate": 8.283399135479523e-06, "loss": 1.1344, "step": 3662 }, { "epoch": 0.29, "grad_norm": 0.8297807961899168, "learning_rate": 8.282419168022953e-06, "loss": 1.1201, "step": 3663 }, { "epoch": 0.29, "grad_norm": 1.491049669976652, "learning_rate": 8.281438978929937e-06, "loss": 0.8348, "step": 3664 }, { "epoch": 0.29, "grad_norm": 1.6267413764207723, "learning_rate": 8.280458568266656e-06, "loss": 0.761, "step": 3665 }, { "epoch": 0.29, "grad_norm": 1.4919180644454502, "learning_rate": 8.279477936099312e-06, "loss": 0.8624, "step": 3666 }, { "epoch": 0.29, "grad_norm": 1.46996093908195, "learning_rate": 8.27849708249412e-06, "loss": 0.8523, "step": 3667 }, { "epoch": 0.29, "grad_norm": 1.7409776208033274, "learning_rate": 8.277516007517306e-06, "loss": 0.7843, "step": 3668 }, { "epoch": 0.29, "grad_norm": 1.5220752575857301, "learning_rate": 8.276534711235117e-06, "loss": 0.8422, "step": 3669 }, { "epoch": 0.29, "grad_norm": 1.1337312671261135, "learning_rate": 8.275553193713812e-06, "loss": 1.1251, "step": 3670 }, { "epoch": 0.29, "grad_norm": 1.6814279936876195, "learning_rate": 8.274571455019665e-06, "loss": 0.784, "step": 3671 }, { "epoch": 0.29, "grad_norm": 1.6516102712233145, "learning_rate": 8.273589495218966e-06, "loss": 0.7107, "step": 3672 }, { "epoch": 0.29, "grad_norm": 1.6525951258644387, "learning_rate": 8.27260731437802e-06, "loss": 0.8266, "step": 3673 }, { "epoch": 0.29, "grad_norm": 1.516670064968148, "learning_rate": 8.271624912563143e-06, "loss": 0.8129, "step": 3674 }, { "epoch": 0.29, "grad_norm": 1.5649738579532149, "learning_rate": 8.270642289840673e-06, "loss": 0.8436, "step": 3675 }, { "epoch": 0.29, "grad_norm": 1.459511381277704, "learning_rate": 8.269659446276955e-06, "loss": 0.8439, "step": 3676 }, { "epoch": 0.3, "grad_norm": 1.3666627441030532, "learning_rate": 8.268676381938356e-06, "loss": 0.7474, "step": 3677 }, { "epoch": 0.3, "grad_norm": 0.9620365104151546, "learning_rate": 8.267693096891253e-06, "loss": 1.1109, "step": 3678 }, { "epoch": 0.3, "grad_norm": 1.7794060770429951, "learning_rate": 8.266709591202039e-06, "loss": 0.7939, "step": 3679 }, { "epoch": 0.3, "grad_norm": 0.8232326273459333, "learning_rate": 8.265725864937124e-06, "loss": 1.092, "step": 3680 }, { "epoch": 0.3, "grad_norm": 1.5847028958484406, "learning_rate": 8.264741918162933e-06, "loss": 0.7788, "step": 3681 }, { "epoch": 0.3, "grad_norm": 1.9885877524555804, "learning_rate": 8.2637577509459e-06, "loss": 0.8181, "step": 3682 }, { "epoch": 0.3, "grad_norm": 1.5636140232589018, "learning_rate": 8.262773363352482e-06, "loss": 0.8773, "step": 3683 }, { "epoch": 0.3, "grad_norm": 1.5322197240413016, "learning_rate": 8.261788755449145e-06, "loss": 0.8159, "step": 3684 }, { "epoch": 0.3, "grad_norm": 1.6039068759489672, "learning_rate": 8.260803927302372e-06, "loss": 0.8286, "step": 3685 }, { "epoch": 0.3, "grad_norm": 1.007325652423658, "learning_rate": 8.259818878978662e-06, "loss": 1.1182, "step": 3686 }, { "epoch": 0.3, "grad_norm": 1.4162184983187196, "learning_rate": 8.25883361054453e-06, "loss": 0.7065, "step": 3687 }, { "epoch": 0.3, "grad_norm": 1.5289544948721676, "learning_rate": 8.257848122066498e-06, "loss": 0.9228, "step": 3688 }, { "epoch": 0.3, "grad_norm": 1.5822096067741045, "learning_rate": 8.256862413611113e-06, "loss": 0.8038, "step": 3689 }, { "epoch": 0.3, "grad_norm": 1.4420114914349513, "learning_rate": 8.255876485244927e-06, "loss": 0.8016, "step": 3690 }, { "epoch": 0.3, "grad_norm": 1.538905630628463, "learning_rate": 8.25489033703452e-06, "loss": 0.7534, "step": 3691 }, { "epoch": 0.3, "grad_norm": 1.4937399020306241, "learning_rate": 8.253903969046473e-06, "loss": 0.7573, "step": 3692 }, { "epoch": 0.3, "grad_norm": 1.5781882040795854, "learning_rate": 8.252917381347389e-06, "loss": 0.7993, "step": 3693 }, { "epoch": 0.3, "grad_norm": 1.6569071117480108, "learning_rate": 8.251930574003886e-06, "loss": 0.764, "step": 3694 }, { "epoch": 0.3, "grad_norm": 1.5205082328085566, "learning_rate": 8.250943547082592e-06, "loss": 0.8047, "step": 3695 }, { "epoch": 0.3, "grad_norm": 1.5461981662483404, "learning_rate": 8.249956300650159e-06, "loss": 0.8179, "step": 3696 }, { "epoch": 0.3, "grad_norm": 0.8421898963856037, "learning_rate": 8.248968834773246e-06, "loss": 1.1171, "step": 3697 }, { "epoch": 0.3, "grad_norm": 1.6692837290794227, "learning_rate": 8.247981149518525e-06, "loss": 0.8149, "step": 3698 }, { "epoch": 0.3, "grad_norm": 1.4887124029496397, "learning_rate": 8.24699324495269e-06, "loss": 0.8349, "step": 3699 }, { "epoch": 0.3, "grad_norm": 1.5220247267260225, "learning_rate": 8.246005121142448e-06, "loss": 0.7925, "step": 3700 }, { "epoch": 0.3, "grad_norm": 1.4747789859773595, "learning_rate": 8.245016778154519e-06, "loss": 0.7028, "step": 3701 }, { "epoch": 0.3, "grad_norm": 1.4576761039548471, "learning_rate": 8.244028216055636e-06, "loss": 0.7689, "step": 3702 }, { "epoch": 0.3, "grad_norm": 0.8419338015585344, "learning_rate": 8.243039434912547e-06, "loss": 1.152, "step": 3703 }, { "epoch": 0.3, "grad_norm": 0.8240030479286782, "learning_rate": 8.242050434792022e-06, "loss": 1.1003, "step": 3704 }, { "epoch": 0.3, "grad_norm": 1.4602738618256044, "learning_rate": 8.24106121576084e-06, "loss": 0.7115, "step": 3705 }, { "epoch": 0.3, "grad_norm": 1.6235031110181317, "learning_rate": 8.24007177788579e-06, "loss": 0.7587, "step": 3706 }, { "epoch": 0.3, "grad_norm": 1.4304300010602238, "learning_rate": 8.239082121233687e-06, "loss": 0.8004, "step": 3707 }, { "epoch": 0.3, "grad_norm": 1.5433043268103428, "learning_rate": 8.238092245871352e-06, "loss": 0.7667, "step": 3708 }, { "epoch": 0.3, "grad_norm": 1.5402911024148342, "learning_rate": 8.237102151865625e-06, "loss": 0.8272, "step": 3709 }, { "epoch": 0.3, "grad_norm": 1.4748910148196346, "learning_rate": 8.236111839283355e-06, "loss": 0.8931, "step": 3710 }, { "epoch": 0.3, "grad_norm": 1.3816695488617081, "learning_rate": 8.23512130819142e-06, "loss": 0.691, "step": 3711 }, { "epoch": 0.3, "grad_norm": 1.3535917767919141, "learning_rate": 8.234130558656693e-06, "loss": 0.7338, "step": 3712 }, { "epoch": 0.3, "grad_norm": 1.4823528517561166, "learning_rate": 8.233139590746076e-06, "loss": 0.7646, "step": 3713 }, { "epoch": 0.3, "grad_norm": 1.53471570466257, "learning_rate": 8.23214840452648e-06, "loss": 0.7852, "step": 3714 }, { "epoch": 0.3, "grad_norm": 1.441458710572957, "learning_rate": 8.231157000064833e-06, "loss": 0.7895, "step": 3715 }, { "epoch": 0.3, "grad_norm": 1.3935945549390185, "learning_rate": 8.230165377428078e-06, "loss": 0.6701, "step": 3716 }, { "epoch": 0.3, "grad_norm": 1.454242128022412, "learning_rate": 8.229173536683169e-06, "loss": 0.7493, "step": 3717 }, { "epoch": 0.3, "grad_norm": 1.504515241434594, "learning_rate": 8.22818147789708e-06, "loss": 0.8199, "step": 3718 }, { "epoch": 0.3, "grad_norm": 1.5473342784488457, "learning_rate": 8.227189201136796e-06, "loss": 0.7917, "step": 3719 }, { "epoch": 0.3, "grad_norm": 1.4447779263507392, "learning_rate": 8.226196706469315e-06, "loss": 0.7246, "step": 3720 }, { "epoch": 0.3, "grad_norm": 1.4685502558086694, "learning_rate": 8.22520399396166e-06, "loss": 0.8244, "step": 3721 }, { "epoch": 0.3, "grad_norm": 1.497781444278884, "learning_rate": 8.224211063680854e-06, "loss": 0.8015, "step": 3722 }, { "epoch": 0.3, "grad_norm": 1.7830922293341724, "learning_rate": 8.223217915693944e-06, "loss": 0.8017, "step": 3723 }, { "epoch": 0.3, "grad_norm": 1.6598816043274716, "learning_rate": 8.22222455006799e-06, "loss": 0.7467, "step": 3724 }, { "epoch": 0.3, "grad_norm": 1.5134208142926044, "learning_rate": 8.221230966870068e-06, "loss": 0.7741, "step": 3725 }, { "epoch": 0.3, "grad_norm": 1.5235333212244835, "learning_rate": 8.220237166167264e-06, "loss": 0.8242, "step": 3726 }, { "epoch": 0.3, "grad_norm": 1.5098455282775667, "learning_rate": 8.219243148026683e-06, "loss": 0.7085, "step": 3727 }, { "epoch": 0.3, "grad_norm": 1.440153981879797, "learning_rate": 8.218248912515443e-06, "loss": 0.7333, "step": 3728 }, { "epoch": 0.3, "grad_norm": 1.6002551382802694, "learning_rate": 8.217254459700679e-06, "loss": 0.8191, "step": 3729 }, { "epoch": 0.3, "grad_norm": 1.4508402114343613, "learning_rate": 8.216259789649536e-06, "loss": 0.8478, "step": 3730 }, { "epoch": 0.3, "grad_norm": 1.815799532687122, "learning_rate": 8.215264902429177e-06, "loss": 0.8376, "step": 3731 }, { "epoch": 0.3, "grad_norm": 1.6342800399226038, "learning_rate": 8.21426979810678e-06, "loss": 0.8772, "step": 3732 }, { "epoch": 0.3, "grad_norm": 1.5016387010413945, "learning_rate": 8.213274476749537e-06, "loss": 0.786, "step": 3733 }, { "epoch": 0.3, "grad_norm": 1.579277310982556, "learning_rate": 8.212278938424654e-06, "loss": 0.8516, "step": 3734 }, { "epoch": 0.3, "grad_norm": 1.502594269467104, "learning_rate": 8.211283183199353e-06, "loss": 0.779, "step": 3735 }, { "epoch": 0.3, "grad_norm": 1.5880978154192384, "learning_rate": 8.210287211140864e-06, "loss": 0.7874, "step": 3736 }, { "epoch": 0.3, "grad_norm": 1.5255841949050248, "learning_rate": 8.209291022316445e-06, "loss": 0.786, "step": 3737 }, { "epoch": 0.3, "grad_norm": 1.535895465453568, "learning_rate": 8.208294616793357e-06, "loss": 0.8235, "step": 3738 }, { "epoch": 0.3, "grad_norm": 1.6296658802408148, "learning_rate": 8.20729799463888e-06, "loss": 0.8158, "step": 3739 }, { "epoch": 0.3, "grad_norm": 1.560846904942272, "learning_rate": 8.20630115592031e-06, "loss": 0.785, "step": 3740 }, { "epoch": 0.3, "grad_norm": 1.650451617029216, "learning_rate": 8.205304100704953e-06, "loss": 0.8163, "step": 3741 }, { "epoch": 0.3, "grad_norm": 1.5020728105883276, "learning_rate": 8.204306829060133e-06, "loss": 0.756, "step": 3742 }, { "epoch": 0.3, "grad_norm": 1.590602830309445, "learning_rate": 8.203309341053191e-06, "loss": 0.7697, "step": 3743 }, { "epoch": 0.3, "grad_norm": 1.5011316153175087, "learning_rate": 8.202311636751476e-06, "loss": 0.8496, "step": 3744 }, { "epoch": 0.3, "grad_norm": 1.3903652483703992, "learning_rate": 8.201313716222357e-06, "loss": 0.7736, "step": 3745 }, { "epoch": 0.3, "grad_norm": 1.5917948373487967, "learning_rate": 8.200315579533217e-06, "loss": 0.7876, "step": 3746 }, { "epoch": 0.3, "grad_norm": 1.646637601885048, "learning_rate": 8.19931722675145e-06, "loss": 0.8394, "step": 3747 }, { "epoch": 0.3, "grad_norm": 1.554126868557104, "learning_rate": 8.198318657944466e-06, "loss": 0.8789, "step": 3748 }, { "epoch": 0.3, "grad_norm": 1.4218246823484486, "learning_rate": 8.197319873179694e-06, "loss": 0.737, "step": 3749 }, { "epoch": 0.3, "grad_norm": 0.9418594207579307, "learning_rate": 8.196320872524574e-06, "loss": 1.1392, "step": 3750 }, { "epoch": 0.3, "grad_norm": 1.6975148357684215, "learning_rate": 8.19532165604656e-06, "loss": 0.7794, "step": 3751 }, { "epoch": 0.3, "grad_norm": 1.4792893053976437, "learning_rate": 8.19432222381312e-06, "loss": 0.7572, "step": 3752 }, { "epoch": 0.3, "grad_norm": 1.5300165095405902, "learning_rate": 8.19332257589174e-06, "loss": 0.8972, "step": 3753 }, { "epoch": 0.3, "grad_norm": 1.5351479279027318, "learning_rate": 8.192322712349917e-06, "loss": 0.8186, "step": 3754 }, { "epoch": 0.3, "grad_norm": 1.4286072679340858, "learning_rate": 8.191322633255166e-06, "loss": 0.8809, "step": 3755 }, { "epoch": 0.3, "grad_norm": 1.6030101223458852, "learning_rate": 8.190322338675015e-06, "loss": 0.8155, "step": 3756 }, { "epoch": 0.3, "grad_norm": 0.9690597426693164, "learning_rate": 8.189321828677002e-06, "loss": 1.1307, "step": 3757 }, { "epoch": 0.3, "grad_norm": 1.4574411251427775, "learning_rate": 8.188321103328685e-06, "loss": 0.7583, "step": 3758 }, { "epoch": 0.3, "grad_norm": 1.6644053400273908, "learning_rate": 8.18732016269764e-06, "loss": 0.8636, "step": 3759 }, { "epoch": 0.3, "grad_norm": 1.5665260766105897, "learning_rate": 8.186319006851446e-06, "loss": 0.7698, "step": 3760 }, { "epoch": 0.3, "grad_norm": 1.4535727250840884, "learning_rate": 8.185317635857709e-06, "loss": 0.7784, "step": 3761 }, { "epoch": 0.3, "grad_norm": 1.5175547824455082, "learning_rate": 8.18431604978404e-06, "loss": 0.8994, "step": 3762 }, { "epoch": 0.3, "grad_norm": 1.5803185786194645, "learning_rate": 8.183314248698072e-06, "loss": 0.8206, "step": 3763 }, { "epoch": 0.3, "grad_norm": 1.5454968380607725, "learning_rate": 8.182312232667446e-06, "loss": 0.7982, "step": 3764 }, { "epoch": 0.3, "grad_norm": 1.5352119106569502, "learning_rate": 8.18131000175982e-06, "loss": 0.8117, "step": 3765 }, { "epoch": 0.3, "grad_norm": 1.656401416344771, "learning_rate": 8.18030755604287e-06, "loss": 0.7844, "step": 3766 }, { "epoch": 0.3, "grad_norm": 1.1652032986188399, "learning_rate": 8.179304895584282e-06, "loss": 1.1349, "step": 3767 }, { "epoch": 0.3, "grad_norm": 0.9698988631470631, "learning_rate": 8.178302020451754e-06, "loss": 1.1165, "step": 3768 }, { "epoch": 0.3, "grad_norm": 1.6976265807926911, "learning_rate": 8.17729893071301e-06, "loss": 0.8226, "step": 3769 }, { "epoch": 0.3, "grad_norm": 1.8271160645959916, "learning_rate": 8.176295626435776e-06, "loss": 0.7614, "step": 3770 }, { "epoch": 0.3, "grad_norm": 0.9143021059372802, "learning_rate": 8.175292107687796e-06, "loss": 1.1392, "step": 3771 }, { "epoch": 0.3, "grad_norm": 1.6852365904727538, "learning_rate": 8.174288374536834e-06, "loss": 0.8647, "step": 3772 }, { "epoch": 0.3, "grad_norm": 1.6884990672839613, "learning_rate": 8.17328442705066e-06, "loss": 0.7427, "step": 3773 }, { "epoch": 0.3, "grad_norm": 1.5634508092897763, "learning_rate": 8.172280265297068e-06, "loss": 0.8249, "step": 3774 }, { "epoch": 0.3, "grad_norm": 1.3938030404699415, "learning_rate": 8.17127588934386e-06, "loss": 0.8709, "step": 3775 }, { "epoch": 0.3, "grad_norm": 1.3944505307849302, "learning_rate": 8.170271299258849e-06, "loss": 0.7559, "step": 3776 }, { "epoch": 0.3, "grad_norm": 1.5072856740948022, "learning_rate": 8.169266495109872e-06, "loss": 0.7591, "step": 3777 }, { "epoch": 0.3, "grad_norm": 1.5302284626618314, "learning_rate": 8.168261476964774e-06, "loss": 0.8332, "step": 3778 }, { "epoch": 0.3, "grad_norm": 1.646421707866915, "learning_rate": 8.167256244891416e-06, "loss": 0.8894, "step": 3779 }, { "epoch": 0.3, "grad_norm": 1.3908120607088021, "learning_rate": 8.166250798957676e-06, "loss": 0.8027, "step": 3780 }, { "epoch": 0.3, "grad_norm": 1.5748914418017717, "learning_rate": 8.165245139231441e-06, "loss": 0.8037, "step": 3781 }, { "epoch": 0.3, "grad_norm": 1.5668879701571503, "learning_rate": 8.164239265780616e-06, "loss": 0.8598, "step": 3782 }, { "epoch": 0.3, "grad_norm": 1.4808885196152388, "learning_rate": 8.16323317867312e-06, "loss": 0.7874, "step": 3783 }, { "epoch": 0.3, "grad_norm": 1.3681648258540557, "learning_rate": 8.162226877976886e-06, "loss": 0.7975, "step": 3784 }, { "epoch": 0.3, "grad_norm": 0.9543092178766449, "learning_rate": 8.161220363759865e-06, "loss": 1.1422, "step": 3785 }, { "epoch": 0.3, "grad_norm": 1.4976535676063727, "learning_rate": 8.160213636090014e-06, "loss": 0.8143, "step": 3786 }, { "epoch": 0.3, "grad_norm": 1.5342618771005647, "learning_rate": 8.159206695035314e-06, "loss": 0.8479, "step": 3787 }, { "epoch": 0.3, "grad_norm": 1.505531143908182, "learning_rate": 8.15819954066375e-06, "loss": 0.8263, "step": 3788 }, { "epoch": 0.3, "grad_norm": 1.6120737348004686, "learning_rate": 8.157192173043336e-06, "loss": 0.7893, "step": 3789 }, { "epoch": 0.3, "grad_norm": 1.5321389236967746, "learning_rate": 8.156184592242085e-06, "loss": 0.7724, "step": 3790 }, { "epoch": 0.3, "grad_norm": 1.3897670368135453, "learning_rate": 8.155176798328033e-06, "loss": 0.8432, "step": 3791 }, { "epoch": 0.3, "grad_norm": 1.4196144256306165, "learning_rate": 8.15416879136923e-06, "loss": 0.7268, "step": 3792 }, { "epoch": 0.3, "grad_norm": 1.563232251447117, "learning_rate": 8.153160571433738e-06, "loss": 0.7697, "step": 3793 }, { "epoch": 0.3, "grad_norm": 1.746483957083267, "learning_rate": 8.152152138589633e-06, "loss": 0.7688, "step": 3794 }, { "epoch": 0.3, "grad_norm": 1.50875533742132, "learning_rate": 8.151143492905008e-06, "loss": 0.7751, "step": 3795 }, { "epoch": 0.3, "grad_norm": 1.4557661417280336, "learning_rate": 8.150134634447969e-06, "loss": 0.7651, "step": 3796 }, { "epoch": 0.3, "grad_norm": 1.0659002442138317, "learning_rate": 8.149125563286635e-06, "loss": 1.1427, "step": 3797 }, { "epoch": 0.3, "grad_norm": 1.6735708120160868, "learning_rate": 8.148116279489144e-06, "loss": 0.7828, "step": 3798 }, { "epoch": 0.3, "grad_norm": 1.4317118555511164, "learning_rate": 8.147106783123642e-06, "loss": 0.7984, "step": 3799 }, { "epoch": 0.3, "grad_norm": 1.4369565546771987, "learning_rate": 8.146097074258294e-06, "loss": 0.7761, "step": 3800 }, { "epoch": 0.3, "grad_norm": 0.8249258462412429, "learning_rate": 8.145087152961278e-06, "loss": 1.0918, "step": 3801 }, { "epoch": 0.31, "grad_norm": 1.5325023316493578, "learning_rate": 8.144077019300785e-06, "loss": 0.7056, "step": 3802 }, { "epoch": 0.31, "grad_norm": 1.453736520667085, "learning_rate": 8.143066673345023e-06, "loss": 0.779, "step": 3803 }, { "epoch": 0.31, "grad_norm": 1.5625239653088754, "learning_rate": 8.14205611516221e-06, "loss": 0.7194, "step": 3804 }, { "epoch": 0.31, "grad_norm": 1.527708630441371, "learning_rate": 8.141045344820586e-06, "loss": 0.7945, "step": 3805 }, { "epoch": 0.31, "grad_norm": 1.5543797517872724, "learning_rate": 8.140034362388398e-06, "loss": 0.7201, "step": 3806 }, { "epoch": 0.31, "grad_norm": 0.8646173606919456, "learning_rate": 8.139023167933908e-06, "loss": 1.0832, "step": 3807 }, { "epoch": 0.31, "grad_norm": 1.555878182691804, "learning_rate": 8.138011761525397e-06, "loss": 0.7481, "step": 3808 }, { "epoch": 0.31, "grad_norm": 1.4369293845774085, "learning_rate": 8.137000143231156e-06, "loss": 0.8275, "step": 3809 }, { "epoch": 0.31, "grad_norm": 1.470640667305319, "learning_rate": 8.135988313119493e-06, "loss": 0.8074, "step": 3810 }, { "epoch": 0.31, "grad_norm": 1.495055429142874, "learning_rate": 8.134976271258727e-06, "loss": 0.742, "step": 3811 }, { "epoch": 0.31, "grad_norm": 1.5747092597799497, "learning_rate": 8.133964017717195e-06, "loss": 0.8375, "step": 3812 }, { "epoch": 0.31, "grad_norm": 1.5031204172443617, "learning_rate": 8.132951552563247e-06, "loss": 0.8658, "step": 3813 }, { "epoch": 0.31, "grad_norm": 1.517225300082931, "learning_rate": 8.131938875865246e-06, "loss": 0.8629, "step": 3814 }, { "epoch": 0.31, "grad_norm": 1.4782154497001123, "learning_rate": 8.13092598769157e-06, "loss": 0.8337, "step": 3815 }, { "epoch": 0.31, "grad_norm": 1.4590981810605412, "learning_rate": 8.12991288811061e-06, "loss": 0.7192, "step": 3816 }, { "epoch": 0.31, "grad_norm": 1.5711796235150852, "learning_rate": 8.128899577190778e-06, "loss": 0.8182, "step": 3817 }, { "epoch": 0.31, "grad_norm": 1.417457526530165, "learning_rate": 8.127886055000491e-06, "loss": 0.7989, "step": 3818 }, { "epoch": 0.31, "grad_norm": 1.5197320250105462, "learning_rate": 8.126872321608185e-06, "loss": 0.8358, "step": 3819 }, { "epoch": 0.31, "grad_norm": 1.68673994512127, "learning_rate": 8.12585837708231e-06, "loss": 0.7517, "step": 3820 }, { "epoch": 0.31, "grad_norm": 1.6699721715673357, "learning_rate": 8.124844221491327e-06, "loss": 0.7934, "step": 3821 }, { "epoch": 0.31, "grad_norm": 1.4494430187694314, "learning_rate": 8.123829854903722e-06, "loss": 0.8534, "step": 3822 }, { "epoch": 0.31, "grad_norm": 1.3859474274381696, "learning_rate": 8.122815277387978e-06, "loss": 0.7909, "step": 3823 }, { "epoch": 0.31, "grad_norm": 0.9332965048509334, "learning_rate": 8.121800489012608e-06, "loss": 1.1154, "step": 3824 }, { "epoch": 0.31, "grad_norm": 1.409940431908979, "learning_rate": 8.12078548984613e-06, "loss": 0.7289, "step": 3825 }, { "epoch": 0.31, "grad_norm": 1.4304083912971546, "learning_rate": 8.119770279957079e-06, "loss": 0.7839, "step": 3826 }, { "epoch": 0.31, "grad_norm": 1.4677266108101017, "learning_rate": 8.118754859414006e-06, "loss": 0.7568, "step": 3827 }, { "epoch": 0.31, "grad_norm": 1.5530649546064237, "learning_rate": 8.117739228285471e-06, "loss": 0.7867, "step": 3828 }, { "epoch": 0.31, "grad_norm": 1.6659948830070217, "learning_rate": 8.116723386640057e-06, "loss": 0.7192, "step": 3829 }, { "epoch": 0.31, "grad_norm": 1.5122200071820289, "learning_rate": 8.115707334546352e-06, "loss": 0.7892, "step": 3830 }, { "epoch": 0.31, "grad_norm": 1.5288178933601648, "learning_rate": 8.114691072072962e-06, "loss": 0.8215, "step": 3831 }, { "epoch": 0.31, "grad_norm": 1.5026383673556092, "learning_rate": 8.11367459928851e-06, "loss": 0.7931, "step": 3832 }, { "epoch": 0.31, "grad_norm": 1.6046061121231503, "learning_rate": 8.112657916261631e-06, "loss": 0.7624, "step": 3833 }, { "epoch": 0.31, "grad_norm": 1.4888653767173121, "learning_rate": 8.11164102306097e-06, "loss": 0.768, "step": 3834 }, { "epoch": 0.31, "grad_norm": 1.5057079829488431, "learning_rate": 8.11062391975519e-06, "loss": 0.8051, "step": 3835 }, { "epoch": 0.31, "grad_norm": 1.4173125578615495, "learning_rate": 8.109606606412972e-06, "loss": 0.7585, "step": 3836 }, { "epoch": 0.31, "grad_norm": 1.515333250481367, "learning_rate": 8.108589083103006e-06, "loss": 0.8022, "step": 3837 }, { "epoch": 0.31, "grad_norm": 1.4595418745971636, "learning_rate": 8.107571349893997e-06, "loss": 0.8191, "step": 3838 }, { "epoch": 0.31, "grad_norm": 1.4235478911545925, "learning_rate": 8.106553406854664e-06, "loss": 0.7653, "step": 3839 }, { "epoch": 0.31, "grad_norm": 1.5900959071913308, "learning_rate": 8.10553525405374e-06, "loss": 0.83, "step": 3840 }, { "epoch": 0.31, "grad_norm": 1.4092762504099623, "learning_rate": 8.104516891559977e-06, "loss": 0.7133, "step": 3841 }, { "epoch": 0.31, "grad_norm": 1.5692655958992905, "learning_rate": 8.103498319442133e-06, "loss": 0.7976, "step": 3842 }, { "epoch": 0.31, "grad_norm": 1.5511298237813889, "learning_rate": 8.102479537768985e-06, "loss": 0.8779, "step": 3843 }, { "epoch": 0.31, "grad_norm": 1.4965989545924243, "learning_rate": 8.101460546609327e-06, "loss": 0.8089, "step": 3844 }, { "epoch": 0.31, "grad_norm": 1.607638871790108, "learning_rate": 8.100441346031958e-06, "loss": 0.8427, "step": 3845 }, { "epoch": 0.31, "grad_norm": 1.4797124340015562, "learning_rate": 8.099421936105702e-06, "loss": 0.9141, "step": 3846 }, { "epoch": 0.31, "grad_norm": 0.9760770383415995, "learning_rate": 8.098402316899389e-06, "loss": 1.1339, "step": 3847 }, { "epoch": 0.31, "grad_norm": 1.5526191328045458, "learning_rate": 8.097382488481867e-06, "loss": 0.742, "step": 3848 }, { "epoch": 0.31, "grad_norm": 1.5453979701493838, "learning_rate": 8.096362450921995e-06, "loss": 0.8594, "step": 3849 }, { "epoch": 0.31, "grad_norm": 1.4374558724845192, "learning_rate": 8.095342204288651e-06, "loss": 0.7258, "step": 3850 }, { "epoch": 0.31, "grad_norm": 1.4980734851951187, "learning_rate": 8.094321748650725e-06, "loss": 0.7945, "step": 3851 }, { "epoch": 0.31, "grad_norm": 1.5357532763995854, "learning_rate": 8.093301084077116e-06, "loss": 0.8638, "step": 3852 }, { "epoch": 0.31, "grad_norm": 1.4497691010249782, "learning_rate": 8.092280210636747e-06, "loss": 0.7821, "step": 3853 }, { "epoch": 0.31, "grad_norm": 1.4674682640507208, "learning_rate": 8.091259128398548e-06, "loss": 0.7743, "step": 3854 }, { "epoch": 0.31, "grad_norm": 1.6029868491514643, "learning_rate": 8.09023783743146e-06, "loss": 0.7881, "step": 3855 }, { "epoch": 0.31, "grad_norm": 1.4536523567780455, "learning_rate": 8.089216337804452e-06, "loss": 0.7498, "step": 3856 }, { "epoch": 0.31, "grad_norm": 1.5803589661548947, "learning_rate": 8.08819462958649e-06, "loss": 0.7167, "step": 3857 }, { "epoch": 0.31, "grad_norm": 1.427776863249755, "learning_rate": 8.087172712846565e-06, "loss": 0.7956, "step": 3858 }, { "epoch": 0.31, "grad_norm": 1.4406870027362026, "learning_rate": 8.08615058765368e-06, "loss": 0.8, "step": 3859 }, { "epoch": 0.31, "grad_norm": 1.0592910744920072, "learning_rate": 8.08512825407685e-06, "loss": 1.1002, "step": 3860 }, { "epoch": 0.31, "grad_norm": 0.959155808902995, "learning_rate": 8.084105712185105e-06, "loss": 1.082, "step": 3861 }, { "epoch": 0.31, "grad_norm": 1.4944425388728546, "learning_rate": 8.08308296204749e-06, "loss": 0.7748, "step": 3862 }, { "epoch": 0.31, "grad_norm": 1.4769353439466872, "learning_rate": 8.082060003733065e-06, "loss": 0.798, "step": 3863 }, { "epoch": 0.31, "grad_norm": 1.568008258479058, "learning_rate": 8.0810368373109e-06, "loss": 0.8098, "step": 3864 }, { "epoch": 0.31, "grad_norm": 1.5682484320596064, "learning_rate": 8.080013462850083e-06, "loss": 0.8465, "step": 3865 }, { "epoch": 0.31, "grad_norm": 1.5531170905391214, "learning_rate": 8.078989880419715e-06, "loss": 0.7951, "step": 3866 }, { "epoch": 0.31, "grad_norm": 1.4437436400358405, "learning_rate": 8.07796609008891e-06, "loss": 0.7958, "step": 3867 }, { "epoch": 0.31, "grad_norm": 1.851946981784591, "learning_rate": 8.076942091926794e-06, "loss": 1.1192, "step": 3868 }, { "epoch": 0.31, "grad_norm": 1.4902794002262763, "learning_rate": 8.075917886002514e-06, "loss": 0.7888, "step": 3869 }, { "epoch": 0.31, "grad_norm": 1.5142626053545107, "learning_rate": 8.074893472385223e-06, "loss": 0.7871, "step": 3870 }, { "epoch": 0.31, "grad_norm": 1.624297612978066, "learning_rate": 8.073868851144094e-06, "loss": 0.8043, "step": 3871 }, { "epoch": 0.31, "grad_norm": 1.5482468964235694, "learning_rate": 8.072844022348312e-06, "loss": 0.7358, "step": 3872 }, { "epoch": 0.31, "grad_norm": 1.0558270817923883, "learning_rate": 8.071818986067075e-06, "loss": 1.11, "step": 3873 }, { "epoch": 0.31, "grad_norm": 1.5961546176017365, "learning_rate": 8.070793742369595e-06, "loss": 0.7534, "step": 3874 }, { "epoch": 0.31, "grad_norm": 1.5190197568140746, "learning_rate": 8.069768291325103e-06, "loss": 0.7875, "step": 3875 }, { "epoch": 0.31, "grad_norm": 1.6049338444511745, "learning_rate": 8.068742633002834e-06, "loss": 0.7808, "step": 3876 }, { "epoch": 0.31, "grad_norm": 1.526258122809751, "learning_rate": 8.067716767472045e-06, "loss": 0.8175, "step": 3877 }, { "epoch": 0.31, "grad_norm": 1.4975571767294351, "learning_rate": 8.066690694802007e-06, "loss": 0.7865, "step": 3878 }, { "epoch": 0.31, "grad_norm": 1.6032305116303007, "learning_rate": 8.065664415061998e-06, "loss": 0.9079, "step": 3879 }, { "epoch": 0.31, "grad_norm": 1.4272513408256953, "learning_rate": 8.064637928321319e-06, "loss": 0.8322, "step": 3880 }, { "epoch": 0.31, "grad_norm": 1.507469362960976, "learning_rate": 8.06361123464928e-06, "loss": 0.7513, "step": 3881 }, { "epoch": 0.31, "grad_norm": 1.4606703926712012, "learning_rate": 8.062584334115205e-06, "loss": 0.8167, "step": 3882 }, { "epoch": 0.31, "grad_norm": 1.512377878868531, "learning_rate": 8.061557226788433e-06, "loss": 0.8154, "step": 3883 }, { "epoch": 0.31, "grad_norm": 1.274152062813424, "learning_rate": 8.060529912738316e-06, "loss": 1.1046, "step": 3884 }, { "epoch": 0.31, "grad_norm": 1.5353433255701492, "learning_rate": 8.059502392034219e-06, "loss": 0.8182, "step": 3885 }, { "epoch": 0.31, "grad_norm": 1.5325766863458323, "learning_rate": 8.058474664745527e-06, "loss": 0.797, "step": 3886 }, { "epoch": 0.31, "grad_norm": 1.5491816845647797, "learning_rate": 8.057446730941631e-06, "loss": 0.7352, "step": 3887 }, { "epoch": 0.31, "grad_norm": 1.6204196710056848, "learning_rate": 8.056418590691942e-06, "loss": 0.8068, "step": 3888 }, { "epoch": 0.31, "grad_norm": 1.4963253745689755, "learning_rate": 8.055390244065878e-06, "loss": 0.7683, "step": 3889 }, { "epoch": 0.31, "grad_norm": 0.8502328116301726, "learning_rate": 8.05436169113288e-06, "loss": 1.0924, "step": 3890 }, { "epoch": 0.31, "grad_norm": 1.5034118476661062, "learning_rate": 8.053332931962397e-06, "loss": 0.8689, "step": 3891 }, { "epoch": 0.31, "grad_norm": 1.6894781389035394, "learning_rate": 8.052303966623892e-06, "loss": 0.8307, "step": 3892 }, { "epoch": 0.31, "grad_norm": 1.615911901695192, "learning_rate": 8.051274795186842e-06, "loss": 0.7851, "step": 3893 }, { "epoch": 0.31, "grad_norm": 1.6016333391138815, "learning_rate": 8.050245417720742e-06, "loss": 0.7695, "step": 3894 }, { "epoch": 0.31, "grad_norm": 1.5421956835095123, "learning_rate": 8.049215834295097e-06, "loss": 0.7585, "step": 3895 }, { "epoch": 0.31, "grad_norm": 1.6039131085541198, "learning_rate": 8.048186044979425e-06, "loss": 0.7706, "step": 3896 }, { "epoch": 0.31, "grad_norm": 0.8645859848822454, "learning_rate": 8.047156049843264e-06, "loss": 1.1456, "step": 3897 }, { "epoch": 0.31, "grad_norm": 0.825894519966886, "learning_rate": 8.046125848956155e-06, "loss": 1.1067, "step": 3898 }, { "epoch": 0.31, "grad_norm": 1.7435986848376734, "learning_rate": 8.045095442387666e-06, "loss": 0.8175, "step": 3899 }, { "epoch": 0.31, "grad_norm": 1.442719297918469, "learning_rate": 8.04406483020737e-06, "loss": 0.7432, "step": 3900 }, { "epoch": 0.31, "grad_norm": 1.484868110570449, "learning_rate": 8.043034012484853e-06, "loss": 0.837, "step": 3901 }, { "epoch": 0.31, "grad_norm": 1.5040116327475295, "learning_rate": 8.042002989289723e-06, "loss": 0.6894, "step": 3902 }, { "epoch": 0.31, "grad_norm": 1.635191258745165, "learning_rate": 8.040971760691596e-06, "loss": 0.8409, "step": 3903 }, { "epoch": 0.31, "grad_norm": 1.4931201052380514, "learning_rate": 8.0399403267601e-06, "loss": 0.7305, "step": 3904 }, { "epoch": 0.31, "grad_norm": 1.6160006047228046, "learning_rate": 8.038908687564884e-06, "loss": 0.7625, "step": 3905 }, { "epoch": 0.31, "grad_norm": 1.5789898914179807, "learning_rate": 8.037876843175602e-06, "loss": 0.7334, "step": 3906 }, { "epoch": 0.31, "grad_norm": 1.5161423110216306, "learning_rate": 8.036844793661933e-06, "loss": 0.7481, "step": 3907 }, { "epoch": 0.31, "grad_norm": 1.5279660145993619, "learning_rate": 8.035812539093557e-06, "loss": 0.8463, "step": 3908 }, { "epoch": 0.31, "grad_norm": 1.5729013542724508, "learning_rate": 8.034780079540177e-06, "loss": 0.8958, "step": 3909 }, { "epoch": 0.31, "grad_norm": 1.4780183930755386, "learning_rate": 8.033747415071507e-06, "loss": 0.7521, "step": 3910 }, { "epoch": 0.31, "grad_norm": 1.595416949058809, "learning_rate": 8.032714545757274e-06, "loss": 0.7413, "step": 3911 }, { "epoch": 0.31, "grad_norm": 1.5768405698393624, "learning_rate": 8.031681471667218e-06, "loss": 0.7642, "step": 3912 }, { "epoch": 0.31, "grad_norm": 1.2855672214473255, "learning_rate": 8.030648192871098e-06, "loss": 1.1237, "step": 3913 }, { "epoch": 0.31, "grad_norm": 1.6288616836578869, "learning_rate": 8.029614709438684e-06, "loss": 0.7922, "step": 3914 }, { "epoch": 0.31, "grad_norm": 1.4917122284767028, "learning_rate": 8.028581021439755e-06, "loss": 0.8448, "step": 3915 }, { "epoch": 0.31, "grad_norm": 1.4978350686115647, "learning_rate": 8.027547128944111e-06, "loss": 0.756, "step": 3916 }, { "epoch": 0.31, "grad_norm": 1.600167008675974, "learning_rate": 8.026513032021563e-06, "loss": 0.807, "step": 3917 }, { "epoch": 0.31, "grad_norm": 1.533081932848928, "learning_rate": 8.025478730741932e-06, "loss": 0.8281, "step": 3918 }, { "epoch": 0.31, "grad_norm": 1.528962051245787, "learning_rate": 8.02444422517506e-06, "loss": 0.7881, "step": 3919 }, { "epoch": 0.31, "grad_norm": 1.5880010832412603, "learning_rate": 8.023409515390798e-06, "loss": 0.8686, "step": 3920 }, { "epoch": 0.31, "grad_norm": 1.5772079374608643, "learning_rate": 8.022374601459012e-06, "loss": 0.8091, "step": 3921 }, { "epoch": 0.31, "grad_norm": 1.4950076215100974, "learning_rate": 8.021339483449585e-06, "loss": 0.7054, "step": 3922 }, { "epoch": 0.31, "grad_norm": 1.5981906239735482, "learning_rate": 8.020304161432404e-06, "loss": 0.8258, "step": 3923 }, { "epoch": 0.31, "grad_norm": 1.582871466044518, "learning_rate": 8.01926863547738e-06, "loss": 0.7819, "step": 3924 }, { "epoch": 0.31, "grad_norm": 1.5111313561514277, "learning_rate": 8.018232905654435e-06, "loss": 0.7971, "step": 3925 }, { "epoch": 0.31, "grad_norm": 1.4606330208308904, "learning_rate": 8.017196972033502e-06, "loss": 0.7065, "step": 3926 }, { "epoch": 0.32, "grad_norm": 1.5981221335128113, "learning_rate": 8.01616083468453e-06, "loss": 0.8025, "step": 3927 }, { "epoch": 0.32, "grad_norm": 1.5010692513893884, "learning_rate": 8.015124493677483e-06, "loss": 0.7582, "step": 3928 }, { "epoch": 0.32, "grad_norm": 1.5093800921341247, "learning_rate": 8.014087949082333e-06, "loss": 0.8315, "step": 3929 }, { "epoch": 0.32, "grad_norm": 1.5224157662854603, "learning_rate": 8.013051200969074e-06, "loss": 0.7838, "step": 3930 }, { "epoch": 0.32, "grad_norm": 1.3965478340878315, "learning_rate": 8.012014249407707e-06, "loss": 0.7161, "step": 3931 }, { "epoch": 0.32, "grad_norm": 1.522353712739441, "learning_rate": 8.01097709446825e-06, "loss": 0.822, "step": 3932 }, { "epoch": 0.32, "grad_norm": 1.6572966323217033, "learning_rate": 8.009939736220737e-06, "loss": 0.8421, "step": 3933 }, { "epoch": 0.32, "grad_norm": 1.5463011077422244, "learning_rate": 8.008902174735209e-06, "loss": 0.9301, "step": 3934 }, { "epoch": 0.32, "grad_norm": 1.5258653522267716, "learning_rate": 8.007864410081726e-06, "loss": 0.8304, "step": 3935 }, { "epoch": 0.32, "grad_norm": 0.9508047777912927, "learning_rate": 8.006826442330362e-06, "loss": 1.1158, "step": 3936 }, { "epoch": 0.32, "grad_norm": 1.4941367142638013, "learning_rate": 8.005788271551198e-06, "loss": 0.8637, "step": 3937 }, { "epoch": 0.32, "grad_norm": 1.5717426933810503, "learning_rate": 8.004749897814338e-06, "loss": 0.7602, "step": 3938 }, { "epoch": 0.32, "grad_norm": 1.41273927472715, "learning_rate": 8.003711321189895e-06, "loss": 0.8006, "step": 3939 }, { "epoch": 0.32, "grad_norm": 0.8022454838430284, "learning_rate": 8.002672541747996e-06, "loss": 1.1078, "step": 3940 }, { "epoch": 0.32, "grad_norm": 1.397537296827498, "learning_rate": 8.00163355955878e-06, "loss": 0.7286, "step": 3941 }, { "epoch": 0.32, "grad_norm": 1.6035521833292534, "learning_rate": 8.0005943746924e-06, "loss": 0.8873, "step": 3942 }, { "epoch": 0.32, "grad_norm": 1.5815275763886332, "learning_rate": 7.999554987219029e-06, "loss": 0.7995, "step": 3943 }, { "epoch": 0.32, "grad_norm": 1.4728916686088978, "learning_rate": 7.998515397208846e-06, "loss": 0.7566, "step": 3944 }, { "epoch": 0.32, "grad_norm": 1.4588430282845442, "learning_rate": 7.997475604732047e-06, "loss": 0.7871, "step": 3945 }, { "epoch": 0.32, "grad_norm": 1.4199227971928787, "learning_rate": 7.99643560985884e-06, "loss": 0.7435, "step": 3946 }, { "epoch": 0.32, "grad_norm": 1.511175635275945, "learning_rate": 7.995395412659449e-06, "loss": 0.7961, "step": 3947 }, { "epoch": 0.32, "grad_norm": 1.5800139440966647, "learning_rate": 7.994355013204111e-06, "loss": 0.7533, "step": 3948 }, { "epoch": 0.32, "grad_norm": 1.497130382038133, "learning_rate": 7.993314411563075e-06, "loss": 0.7005, "step": 3949 }, { "epoch": 0.32, "grad_norm": 1.523062216750589, "learning_rate": 7.992273607806607e-06, "loss": 0.8628, "step": 3950 }, { "epoch": 0.32, "grad_norm": 1.0552400363427563, "learning_rate": 7.99123260200498e-06, "loss": 1.1456, "step": 3951 }, { "epoch": 0.32, "grad_norm": 1.4838964707276467, "learning_rate": 7.99019139422849e-06, "loss": 0.8016, "step": 3952 }, { "epoch": 0.32, "grad_norm": 1.4721447584061125, "learning_rate": 7.98914998454744e-06, "loss": 0.8448, "step": 3953 }, { "epoch": 0.32, "grad_norm": 1.5045001981265207, "learning_rate": 7.988108373032147e-06, "loss": 0.8349, "step": 3954 }, { "epoch": 0.32, "grad_norm": 1.4764427517327932, "learning_rate": 7.987066559752943e-06, "loss": 0.8238, "step": 3955 }, { "epoch": 0.32, "grad_norm": 0.8068766089902887, "learning_rate": 7.986024544780175e-06, "loss": 1.1134, "step": 3956 }, { "epoch": 0.32, "grad_norm": 1.4506128140720853, "learning_rate": 7.984982328184203e-06, "loss": 0.8638, "step": 3957 }, { "epoch": 0.32, "grad_norm": 1.5971101122615332, "learning_rate": 7.983939910035398e-06, "loss": 0.8183, "step": 3958 }, { "epoch": 0.32, "grad_norm": 1.430903795450547, "learning_rate": 7.982897290404146e-06, "loss": 0.8311, "step": 3959 }, { "epoch": 0.32, "grad_norm": 1.5335505344391536, "learning_rate": 7.981854469360851e-06, "loss": 0.7507, "step": 3960 }, { "epoch": 0.32, "grad_norm": 1.6263028626172162, "learning_rate": 7.98081144697592e-06, "loss": 0.8323, "step": 3961 }, { "epoch": 0.32, "grad_norm": 1.55845023430453, "learning_rate": 7.979768223319786e-06, "loss": 0.8229, "step": 3962 }, { "epoch": 0.32, "grad_norm": 1.482748764866512, "learning_rate": 7.978724798462886e-06, "loss": 0.8022, "step": 3963 }, { "epoch": 0.32, "grad_norm": 1.493735008454351, "learning_rate": 7.977681172475679e-06, "loss": 0.8549, "step": 3964 }, { "epoch": 0.32, "grad_norm": 1.482532742297793, "learning_rate": 7.97663734542863e-06, "loss": 0.8127, "step": 3965 }, { "epoch": 0.32, "grad_norm": 1.7513453208855454, "learning_rate": 7.97559331739222e-06, "loss": 0.841, "step": 3966 }, { "epoch": 0.32, "grad_norm": 1.4847733527778437, "learning_rate": 7.974549088436945e-06, "loss": 0.766, "step": 3967 }, { "epoch": 0.32, "grad_norm": 0.9435898956883498, "learning_rate": 7.973504658633316e-06, "loss": 1.0978, "step": 3968 }, { "epoch": 0.32, "grad_norm": 1.4863797702954973, "learning_rate": 7.972460028051852e-06, "loss": 0.8088, "step": 3969 }, { "epoch": 0.32, "grad_norm": 1.4353803053852876, "learning_rate": 7.971415196763088e-06, "loss": 0.7848, "step": 3970 }, { "epoch": 0.32, "grad_norm": 1.5241599716294403, "learning_rate": 7.970370164837577e-06, "loss": 0.8083, "step": 3971 }, { "epoch": 0.32, "grad_norm": 1.469412444802688, "learning_rate": 7.96932493234588e-06, "loss": 0.7511, "step": 3972 }, { "epoch": 0.32, "grad_norm": 1.520296326018375, "learning_rate": 7.968279499358573e-06, "loss": 0.7745, "step": 3973 }, { "epoch": 0.32, "grad_norm": 1.8838530225665533, "learning_rate": 7.967233865946249e-06, "loss": 0.7725, "step": 3974 }, { "epoch": 0.32, "grad_norm": 1.4907306864964074, "learning_rate": 7.966188032179507e-06, "loss": 0.7788, "step": 3975 }, { "epoch": 0.32, "grad_norm": 1.4041968878156443, "learning_rate": 7.965141998128968e-06, "loss": 0.7796, "step": 3976 }, { "epoch": 0.32, "grad_norm": 1.469675791506989, "learning_rate": 7.96409576386526e-06, "loss": 0.7511, "step": 3977 }, { "epoch": 0.32, "grad_norm": 1.4097788653134227, "learning_rate": 7.963049329459029e-06, "loss": 0.7424, "step": 3978 }, { "epoch": 0.32, "grad_norm": 1.4844135232211404, "learning_rate": 7.962002694980933e-06, "loss": 0.8286, "step": 3979 }, { "epoch": 0.32, "grad_norm": 1.4362714088611868, "learning_rate": 7.960955860501641e-06, "loss": 0.7923, "step": 3980 }, { "epoch": 0.32, "grad_norm": 1.6764891449864034, "learning_rate": 7.959908826091838e-06, "loss": 0.8033, "step": 3981 }, { "epoch": 0.32, "grad_norm": 0.922305893030689, "learning_rate": 7.958861591822223e-06, "loss": 1.1002, "step": 3982 }, { "epoch": 0.32, "grad_norm": 2.1487739174794696, "learning_rate": 7.957814157763505e-06, "loss": 0.808, "step": 3983 }, { "epoch": 0.32, "grad_norm": 1.5184797715214313, "learning_rate": 7.956766523986416e-06, "loss": 0.8008, "step": 3984 }, { "epoch": 0.32, "grad_norm": 1.538374290753456, "learning_rate": 7.95571869056169e-06, "loss": 0.8075, "step": 3985 }, { "epoch": 0.32, "grad_norm": 1.4921750956302897, "learning_rate": 7.954670657560078e-06, "loss": 0.7502, "step": 3986 }, { "epoch": 0.32, "grad_norm": 0.7767683722944754, "learning_rate": 7.953622425052346e-06, "loss": 1.0855, "step": 3987 }, { "epoch": 0.32, "grad_norm": 0.8113986052820087, "learning_rate": 7.952573993109273e-06, "loss": 1.1276, "step": 3988 }, { "epoch": 0.32, "grad_norm": 1.6856254695787238, "learning_rate": 7.951525361801655e-06, "loss": 0.7938, "step": 3989 }, { "epoch": 0.32, "grad_norm": 1.4442232547313745, "learning_rate": 7.950476531200295e-06, "loss": 0.7076, "step": 3990 }, { "epoch": 0.32, "grad_norm": 0.7882897148337747, "learning_rate": 7.949427501376014e-06, "loss": 1.1051, "step": 3991 }, { "epoch": 0.32, "grad_norm": 1.42989455433651, "learning_rate": 7.948378272399641e-06, "loss": 0.8489, "step": 3992 }, { "epoch": 0.32, "grad_norm": 1.4323906353812812, "learning_rate": 7.947328844342028e-06, "loss": 0.7633, "step": 3993 }, { "epoch": 0.32, "grad_norm": 1.423544031476372, "learning_rate": 7.94627921727403e-06, "loss": 0.7676, "step": 3994 }, { "epoch": 0.32, "grad_norm": 1.6779422803489272, "learning_rate": 7.945229391266522e-06, "loss": 0.7558, "step": 3995 }, { "epoch": 0.32, "grad_norm": 1.4034077794978006, "learning_rate": 7.944179366390392e-06, "loss": 0.7166, "step": 3996 }, { "epoch": 0.32, "grad_norm": 1.4439694649847845, "learning_rate": 7.943129142716538e-06, "loss": 0.7891, "step": 3997 }, { "epoch": 0.32, "grad_norm": 0.9293026565166922, "learning_rate": 7.942078720315876e-06, "loss": 1.1202, "step": 3998 }, { "epoch": 0.32, "grad_norm": 1.6122789818754983, "learning_rate": 7.941028099259331e-06, "loss": 0.8726, "step": 3999 }, { "epoch": 0.32, "grad_norm": 1.6053021430983, "learning_rate": 7.939977279617843e-06, "loss": 0.7149, "step": 4000 }, { "epoch": 0.32, "grad_norm": 1.4264230849097115, "learning_rate": 7.938926261462366e-06, "loss": 0.7008, "step": 4001 }, { "epoch": 0.32, "grad_norm": 1.3770758150469997, "learning_rate": 7.937875044863868e-06, "loss": 0.6878, "step": 4002 }, { "epoch": 0.32, "grad_norm": 1.5716848223825886, "learning_rate": 7.93682362989333e-06, "loss": 0.7607, "step": 4003 }, { "epoch": 0.32, "grad_norm": 1.4644607383001038, "learning_rate": 7.935772016621744e-06, "loss": 0.8052, "step": 4004 }, { "epoch": 0.32, "grad_norm": 1.4080722193835142, "learning_rate": 7.93472020512012e-06, "loss": 0.7979, "step": 4005 }, { "epoch": 0.32, "grad_norm": 1.5861018672425293, "learning_rate": 7.933668195459474e-06, "loss": 0.7805, "step": 4006 }, { "epoch": 0.32, "grad_norm": 1.5487384721554986, "learning_rate": 7.932615987710846e-06, "loss": 0.7512, "step": 4007 }, { "epoch": 0.32, "grad_norm": 1.6335414516069713, "learning_rate": 7.931563581945278e-06, "loss": 0.8429, "step": 4008 }, { "epoch": 0.32, "grad_norm": 1.4901232154264255, "learning_rate": 7.930510978233837e-06, "loss": 0.7712, "step": 4009 }, { "epoch": 0.32, "grad_norm": 1.6244171023945388, "learning_rate": 7.92945817664759e-06, "loss": 0.7726, "step": 4010 }, { "epoch": 0.32, "grad_norm": 1.0244116488773327, "learning_rate": 7.928405177257632e-06, "loss": 1.0874, "step": 4011 }, { "epoch": 0.32, "grad_norm": 1.589382661929631, "learning_rate": 7.927351980135056e-06, "loss": 0.8932, "step": 4012 }, { "epoch": 0.32, "grad_norm": 1.4551717560896589, "learning_rate": 7.926298585350985e-06, "loss": 0.7523, "step": 4013 }, { "epoch": 0.32, "grad_norm": 0.8086252023680871, "learning_rate": 7.925244992976538e-06, "loss": 1.137, "step": 4014 }, { "epoch": 0.32, "grad_norm": 1.4313569773399977, "learning_rate": 7.924191203082863e-06, "loss": 0.7442, "step": 4015 }, { "epoch": 0.32, "grad_norm": 1.542888586651162, "learning_rate": 7.92313721574111e-06, "loss": 0.866, "step": 4016 }, { "epoch": 0.32, "grad_norm": 1.5041287099370164, "learning_rate": 7.922083031022448e-06, "loss": 0.7606, "step": 4017 }, { "epoch": 0.32, "grad_norm": 1.5654488246795153, "learning_rate": 7.92102864899806e-06, "loss": 0.781, "step": 4018 }, { "epoch": 0.32, "grad_norm": 1.409758508701643, "learning_rate": 7.919974069739136e-06, "loss": 0.7864, "step": 4019 }, { "epoch": 0.32, "grad_norm": 0.9756787816220674, "learning_rate": 7.918919293316886e-06, "loss": 1.1427, "step": 4020 }, { "epoch": 0.32, "grad_norm": 1.5737861390962202, "learning_rate": 7.917864319802533e-06, "loss": 0.6556, "step": 4021 }, { "epoch": 0.32, "grad_norm": 1.5410880969005443, "learning_rate": 7.916809149267307e-06, "loss": 0.7605, "step": 4022 }, { "epoch": 0.32, "grad_norm": 1.5377615528700967, "learning_rate": 7.915753781782458e-06, "loss": 0.7204, "step": 4023 }, { "epoch": 0.32, "grad_norm": 1.5184454524875124, "learning_rate": 7.914698217419246e-06, "loss": 0.7666, "step": 4024 }, { "epoch": 0.32, "grad_norm": 1.4538770627168731, "learning_rate": 7.913642456248947e-06, "loss": 0.7906, "step": 4025 }, { "epoch": 0.32, "grad_norm": 1.6910433230368835, "learning_rate": 7.912586498342845e-06, "loss": 0.7678, "step": 4026 }, { "epoch": 0.32, "grad_norm": 1.4585120888757963, "learning_rate": 7.911530343772244e-06, "loss": 0.877, "step": 4027 }, { "epoch": 0.32, "grad_norm": 1.516307564575182, "learning_rate": 7.910473992608456e-06, "loss": 0.7885, "step": 4028 }, { "epoch": 0.32, "grad_norm": 1.614751566727202, "learning_rate": 7.90941744492281e-06, "loss": 0.7929, "step": 4029 }, { "epoch": 0.32, "grad_norm": 1.3386850160481112, "learning_rate": 7.908360700786643e-06, "loss": 0.7608, "step": 4030 }, { "epoch": 0.32, "grad_norm": 1.4692965236456101, "learning_rate": 7.907303760271313e-06, "loss": 0.7737, "step": 4031 }, { "epoch": 0.32, "grad_norm": 1.434564167102992, "learning_rate": 7.906246623448184e-06, "loss": 0.753, "step": 4032 }, { "epoch": 0.32, "grad_norm": 1.5189727344698256, "learning_rate": 7.905189290388637e-06, "loss": 0.7387, "step": 4033 }, { "epoch": 0.32, "grad_norm": 1.6775843680061555, "learning_rate": 7.904131761164068e-06, "loss": 0.8349, "step": 4034 }, { "epoch": 0.32, "grad_norm": 1.4518383636582608, "learning_rate": 7.903074035845882e-06, "loss": 0.7412, "step": 4035 }, { "epoch": 0.32, "grad_norm": 1.4645602391130947, "learning_rate": 7.902016114505495e-06, "loss": 0.7586, "step": 4036 }, { "epoch": 0.32, "grad_norm": 1.586925272838685, "learning_rate": 7.900957997214349e-06, "loss": 0.8, "step": 4037 }, { "epoch": 0.32, "grad_norm": 1.7772556984880807, "learning_rate": 7.899899684043882e-06, "loss": 0.7893, "step": 4038 }, { "epoch": 0.32, "grad_norm": 1.4200462696909866, "learning_rate": 7.898841175065559e-06, "loss": 0.7463, "step": 4039 }, { "epoch": 0.32, "grad_norm": 1.0361127140023738, "learning_rate": 7.89778247035085e-06, "loss": 1.1175, "step": 4040 }, { "epoch": 0.32, "grad_norm": 1.4970211209021385, "learning_rate": 7.896723569971243e-06, "loss": 0.758, "step": 4041 }, { "epoch": 0.32, "grad_norm": 1.3944338380812138, "learning_rate": 7.895664473998237e-06, "loss": 0.7544, "step": 4042 }, { "epoch": 0.32, "grad_norm": 1.4572424437143552, "learning_rate": 7.894605182503346e-06, "loss": 0.805, "step": 4043 }, { "epoch": 0.32, "grad_norm": 0.8340840137789861, "learning_rate": 7.89354569555809e-06, "loss": 1.1268, "step": 4044 }, { "epoch": 0.32, "grad_norm": 1.409674244037888, "learning_rate": 7.892486013234015e-06, "loss": 0.7817, "step": 4045 }, { "epoch": 0.32, "grad_norm": 1.5416391090270438, "learning_rate": 7.891426135602672e-06, "loss": 0.779, "step": 4046 }, { "epoch": 0.32, "grad_norm": 1.4389902976456188, "learning_rate": 7.89036606273562e-06, "loss": 0.7497, "step": 4047 }, { "epoch": 0.32, "grad_norm": 1.4342151363392264, "learning_rate": 7.889305794704446e-06, "loss": 0.7498, "step": 4048 }, { "epoch": 0.32, "grad_norm": 1.4615083143292265, "learning_rate": 7.888245331580737e-06, "loss": 0.7344, "step": 4049 }, { "epoch": 0.32, "grad_norm": 1.1191123232837668, "learning_rate": 7.887184673436099e-06, "loss": 1.1355, "step": 4050 }, { "epoch": 0.33, "grad_norm": 1.507052673709932, "learning_rate": 7.88612382034215e-06, "loss": 0.7615, "step": 4051 }, { "epoch": 0.33, "grad_norm": 1.7046166682342563, "learning_rate": 7.88506277237052e-06, "loss": 0.9015, "step": 4052 }, { "epoch": 0.33, "grad_norm": 1.4480657340788563, "learning_rate": 7.884001529592855e-06, "loss": 0.776, "step": 4053 }, { "epoch": 0.33, "grad_norm": 1.451168725773406, "learning_rate": 7.882940092080813e-06, "loss": 0.7548, "step": 4054 }, { "epoch": 0.33, "grad_norm": 1.4824560484265543, "learning_rate": 7.881878459906062e-06, "loss": 0.7171, "step": 4055 }, { "epoch": 0.33, "grad_norm": 0.8727166542680257, "learning_rate": 7.880816633140289e-06, "loss": 1.1119, "step": 4056 }, { "epoch": 0.33, "grad_norm": 1.4786151724210466, "learning_rate": 7.879754611855191e-06, "loss": 0.7979, "step": 4057 }, { "epoch": 0.33, "grad_norm": 1.43418046278535, "learning_rate": 7.878692396122474e-06, "loss": 0.8375, "step": 4058 }, { "epoch": 0.33, "grad_norm": 1.597385724479353, "learning_rate": 7.877629986013864e-06, "loss": 0.7918, "step": 4059 }, { "epoch": 0.33, "grad_norm": 1.580689772398066, "learning_rate": 7.876567381601097e-06, "loss": 0.6873, "step": 4060 }, { "epoch": 0.33, "grad_norm": 0.8228734625899167, "learning_rate": 7.875504582955925e-06, "loss": 1.146, "step": 4061 }, { "epoch": 0.33, "grad_norm": 1.5864897145889505, "learning_rate": 7.874441590150105e-06, "loss": 0.8292, "step": 4062 }, { "epoch": 0.33, "grad_norm": 1.6022631171520454, "learning_rate": 7.87337840325542e-06, "loss": 0.7522, "step": 4063 }, { "epoch": 0.33, "grad_norm": 1.5566406270484123, "learning_rate": 7.872315022343654e-06, "loss": 0.8353, "step": 4064 }, { "epoch": 0.33, "grad_norm": 1.516582647511497, "learning_rate": 7.871251447486608e-06, "loss": 0.7415, "step": 4065 }, { "epoch": 0.33, "grad_norm": 1.5479071464902212, "learning_rate": 7.870187678756099e-06, "loss": 0.7962, "step": 4066 }, { "epoch": 0.33, "grad_norm": 1.4371510181474694, "learning_rate": 7.869123716223954e-06, "loss": 0.8238, "step": 4067 }, { "epoch": 0.33, "grad_norm": 1.6477147155495877, "learning_rate": 7.868059559962017e-06, "loss": 0.7683, "step": 4068 }, { "epoch": 0.33, "grad_norm": 0.8643213532316336, "learning_rate": 7.866995210042139e-06, "loss": 1.0929, "step": 4069 }, { "epoch": 0.33, "grad_norm": 1.4538245747162126, "learning_rate": 7.865930666536188e-06, "loss": 0.797, "step": 4070 }, { "epoch": 0.33, "grad_norm": 1.4427271612613777, "learning_rate": 7.864865929516047e-06, "loss": 0.6874, "step": 4071 }, { "epoch": 0.33, "grad_norm": 0.7917307904056428, "learning_rate": 7.863800999053609e-06, "loss": 1.0872, "step": 4072 }, { "epoch": 0.33, "grad_norm": 1.5068676435797244, "learning_rate": 7.862735875220775e-06, "loss": 0.799, "step": 4073 }, { "epoch": 0.33, "grad_norm": 1.3920598739037855, "learning_rate": 7.861670558089471e-06, "loss": 0.7388, "step": 4074 }, { "epoch": 0.33, "grad_norm": 1.4556337555821652, "learning_rate": 7.860605047731627e-06, "loss": 0.6964, "step": 4075 }, { "epoch": 0.33, "grad_norm": 1.4783271325254737, "learning_rate": 7.859539344219189e-06, "loss": 0.722, "step": 4076 }, { "epoch": 0.33, "grad_norm": 1.601602194872592, "learning_rate": 7.858473447624116e-06, "loss": 0.7748, "step": 4077 }, { "epoch": 0.33, "grad_norm": 0.8806870330300263, "learning_rate": 7.857407358018378e-06, "loss": 1.1427, "step": 4078 }, { "epoch": 0.33, "grad_norm": 1.506532956427876, "learning_rate": 7.856341075473963e-06, "loss": 0.9009, "step": 4079 }, { "epoch": 0.33, "grad_norm": 1.4226336392883272, "learning_rate": 7.855274600062866e-06, "loss": 0.74, "step": 4080 }, { "epoch": 0.33, "grad_norm": 1.4438381699499918, "learning_rate": 7.8542079318571e-06, "loss": 0.7447, "step": 4081 }, { "epoch": 0.33, "grad_norm": 1.579882102746191, "learning_rate": 7.853141070928687e-06, "loss": 0.8575, "step": 4082 }, { "epoch": 0.33, "grad_norm": 1.4538322322652721, "learning_rate": 7.852074017349665e-06, "loss": 0.7674, "step": 4083 }, { "epoch": 0.33, "grad_norm": 1.402468000645661, "learning_rate": 7.851006771192083e-06, "loss": 0.7969, "step": 4084 }, { "epoch": 0.33, "grad_norm": 1.6529370566615584, "learning_rate": 7.849939332528007e-06, "loss": 0.7627, "step": 4085 }, { "epoch": 0.33, "grad_norm": 1.5695476636550638, "learning_rate": 7.848871701429508e-06, "loss": 0.8248, "step": 4086 }, { "epoch": 0.33, "grad_norm": 1.6294974980001364, "learning_rate": 7.847803877968679e-06, "loss": 0.7705, "step": 4087 }, { "epoch": 0.33, "grad_norm": 1.571814779154909, "learning_rate": 7.84673586221762e-06, "loss": 0.7596, "step": 4088 }, { "epoch": 0.33, "grad_norm": 0.9084715371598262, "learning_rate": 7.845667654248445e-06, "loss": 1.1068, "step": 4089 }, { "epoch": 0.33, "grad_norm": 1.8207476228538237, "learning_rate": 7.844599254133284e-06, "loss": 0.7604, "step": 4090 }, { "epoch": 0.33, "grad_norm": 1.4508272775647215, "learning_rate": 7.843530661944277e-06, "loss": 0.726, "step": 4091 }, { "epoch": 0.33, "grad_norm": 1.5907223890401145, "learning_rate": 7.842461877753575e-06, "loss": 0.8023, "step": 4092 }, { "epoch": 0.33, "grad_norm": 1.5669366383091048, "learning_rate": 7.84139290163335e-06, "loss": 0.7904, "step": 4093 }, { "epoch": 0.33, "grad_norm": 0.8564035950884357, "learning_rate": 7.84032373365578e-06, "loss": 1.1278, "step": 4094 }, { "epoch": 0.33, "grad_norm": 1.556462657097795, "learning_rate": 7.839254373893056e-06, "loss": 0.7652, "step": 4095 }, { "epoch": 0.33, "grad_norm": 1.5299307383643663, "learning_rate": 7.838184822417382e-06, "loss": 0.8047, "step": 4096 }, { "epoch": 0.33, "grad_norm": 1.3197477452136857, "learning_rate": 7.83711507930098e-06, "loss": 0.6328, "step": 4097 }, { "epoch": 0.33, "grad_norm": 1.441556170905288, "learning_rate": 7.836045144616082e-06, "loss": 0.8265, "step": 4098 }, { "epoch": 0.33, "grad_norm": 0.7949569619280575, "learning_rate": 7.834975018434929e-06, "loss": 1.0918, "step": 4099 }, { "epoch": 0.33, "grad_norm": 1.521381015762904, "learning_rate": 7.833904700829782e-06, "loss": 0.7295, "step": 4100 }, { "epoch": 0.33, "grad_norm": 0.8054600486016754, "learning_rate": 7.832834191872907e-06, "loss": 1.1056, "step": 4101 }, { "epoch": 0.33, "grad_norm": 1.5923153208522647, "learning_rate": 7.831763491636592e-06, "loss": 0.7895, "step": 4102 }, { "epoch": 0.33, "grad_norm": 1.6630896943771885, "learning_rate": 7.830692600193129e-06, "loss": 0.8648, "step": 4103 }, { "epoch": 0.33, "grad_norm": 0.8160574882308241, "learning_rate": 7.829621517614829e-06, "loss": 1.1085, "step": 4104 }, { "epoch": 0.33, "grad_norm": 1.54162733514036, "learning_rate": 7.828550243974015e-06, "loss": 0.8517, "step": 4105 }, { "epoch": 0.33, "grad_norm": 1.638883667788359, "learning_rate": 7.827478779343021e-06, "loss": 0.7748, "step": 4106 }, { "epoch": 0.33, "grad_norm": 1.6654975842578723, "learning_rate": 7.826407123794195e-06, "loss": 0.8303, "step": 4107 }, { "epoch": 0.33, "grad_norm": 1.6193716780900804, "learning_rate": 7.825335277399896e-06, "loss": 0.815, "step": 4108 }, { "epoch": 0.33, "grad_norm": 1.5303807132423113, "learning_rate": 7.824263240232497e-06, "loss": 0.6822, "step": 4109 }, { "epoch": 0.33, "grad_norm": 1.6187999543342606, "learning_rate": 7.823191012364386e-06, "loss": 0.8476, "step": 4110 }, { "epoch": 0.33, "grad_norm": 1.3816735424447701, "learning_rate": 7.822118593867964e-06, "loss": 0.8098, "step": 4111 }, { "epoch": 0.33, "grad_norm": 1.578213594403489, "learning_rate": 7.821045984815641e-06, "loss": 0.7366, "step": 4112 }, { "epoch": 0.33, "grad_norm": 1.526516972990694, "learning_rate": 7.81997318527984e-06, "loss": 0.8391, "step": 4113 }, { "epoch": 0.33, "grad_norm": 1.5594017964384999, "learning_rate": 7.818900195333007e-06, "loss": 0.8134, "step": 4114 }, { "epoch": 0.33, "grad_norm": 0.9490098654622884, "learning_rate": 7.817827015047581e-06, "loss": 1.1202, "step": 4115 }, { "epoch": 0.33, "grad_norm": 1.6347637286981944, "learning_rate": 7.816753644496034e-06, "loss": 0.7623, "step": 4116 }, { "epoch": 0.33, "grad_norm": 1.4179584170073853, "learning_rate": 7.81568008375084e-06, "loss": 0.776, "step": 4117 }, { "epoch": 0.33, "grad_norm": 2.015602844713071, "learning_rate": 7.81460633288449e-06, "loss": 0.8749, "step": 4118 }, { "epoch": 0.33, "grad_norm": 1.5811573162590287, "learning_rate": 7.813532391969482e-06, "loss": 0.7697, "step": 4119 }, { "epoch": 0.33, "grad_norm": 0.8242715783884929, "learning_rate": 7.812458261078333e-06, "loss": 1.1056, "step": 4120 }, { "epoch": 0.33, "grad_norm": 0.7907055640814653, "learning_rate": 7.811383940283571e-06, "loss": 1.1142, "step": 4121 }, { "epoch": 0.33, "grad_norm": 1.4068079353304708, "learning_rate": 7.81030942965774e-06, "loss": 0.8015, "step": 4122 }, { "epoch": 0.33, "grad_norm": 1.5446237816410053, "learning_rate": 7.809234729273386e-06, "loss": 0.7982, "step": 4123 }, { "epoch": 0.33, "grad_norm": 1.4930966998179445, "learning_rate": 7.808159839203085e-06, "loss": 0.753, "step": 4124 }, { "epoch": 0.33, "grad_norm": 0.8500370876162396, "learning_rate": 7.807084759519405e-06, "loss": 1.1089, "step": 4125 }, { "epoch": 0.33, "grad_norm": 1.5481342382308017, "learning_rate": 7.806009490294946e-06, "loss": 0.7734, "step": 4126 }, { "epoch": 0.33, "grad_norm": 1.6396289498717669, "learning_rate": 7.80493403160231e-06, "loss": 0.7621, "step": 4127 }, { "epoch": 0.33, "grad_norm": 1.4884550053728478, "learning_rate": 7.803858383514111e-06, "loss": 0.7916, "step": 4128 }, { "epoch": 0.33, "grad_norm": 1.6710335838552137, "learning_rate": 7.802782546102987e-06, "loss": 0.7253, "step": 4129 }, { "epoch": 0.33, "grad_norm": 1.51243507440311, "learning_rate": 7.801706519441572e-06, "loss": 0.8137, "step": 4130 }, { "epoch": 0.33, "grad_norm": 1.4713141545070838, "learning_rate": 7.800630303602529e-06, "loss": 0.7663, "step": 4131 }, { "epoch": 0.33, "grad_norm": 1.7024140399457812, "learning_rate": 7.799553898658525e-06, "loss": 0.8451, "step": 4132 }, { "epoch": 0.33, "grad_norm": 1.4937818726418948, "learning_rate": 7.798477304682237e-06, "loss": 0.774, "step": 4133 }, { "epoch": 0.33, "grad_norm": 1.392286372853869, "learning_rate": 7.797400521746365e-06, "loss": 0.7338, "step": 4134 }, { "epoch": 0.33, "grad_norm": 1.8315094396665093, "learning_rate": 7.796323549923611e-06, "loss": 0.7982, "step": 4135 }, { "epoch": 0.33, "grad_norm": 1.5703303950496914, "learning_rate": 7.7952463892867e-06, "loss": 0.7972, "step": 4136 }, { "epoch": 0.33, "grad_norm": 1.4370505945353111, "learning_rate": 7.79416903990836e-06, "loss": 0.8176, "step": 4137 }, { "epoch": 0.33, "grad_norm": 1.5315378188189168, "learning_rate": 7.793091501861336e-06, "loss": 0.8113, "step": 4138 }, { "epoch": 0.33, "grad_norm": 1.493297826204814, "learning_rate": 7.792013775218385e-06, "loss": 0.7883, "step": 4139 }, { "epoch": 0.33, "grad_norm": 1.5103652741010474, "learning_rate": 7.790935860052283e-06, "loss": 0.8237, "step": 4140 }, { "epoch": 0.33, "grad_norm": 1.60878437066434, "learning_rate": 7.78985775643581e-06, "loss": 0.7883, "step": 4141 }, { "epoch": 0.33, "grad_norm": 1.6303379230767572, "learning_rate": 7.78877946444176e-06, "loss": 0.7849, "step": 4142 }, { "epoch": 0.33, "grad_norm": 1.4521125867053326, "learning_rate": 7.787700984142945e-06, "loss": 0.7383, "step": 4143 }, { "epoch": 0.33, "grad_norm": 1.5215535166000425, "learning_rate": 7.786622315612182e-06, "loss": 0.8376, "step": 4144 }, { "epoch": 0.33, "grad_norm": 1.4834470221984573, "learning_rate": 7.785543458922311e-06, "loss": 0.7714, "step": 4145 }, { "epoch": 0.33, "grad_norm": 1.4113437731746563, "learning_rate": 7.784464414146176e-06, "loss": 0.831, "step": 4146 }, { "epoch": 0.33, "grad_norm": 1.4426812193711636, "learning_rate": 7.783385181356637e-06, "loss": 0.7565, "step": 4147 }, { "epoch": 0.33, "grad_norm": 1.5369042303573446, "learning_rate": 7.782305760626564e-06, "loss": 0.8149, "step": 4148 }, { "epoch": 0.33, "grad_norm": 1.3885004308701723, "learning_rate": 7.781226152028845e-06, "loss": 0.8012, "step": 4149 }, { "epoch": 0.33, "grad_norm": 1.4805343010848515, "learning_rate": 7.780146355636378e-06, "loss": 0.8468, "step": 4150 }, { "epoch": 0.33, "grad_norm": 1.461350739339513, "learning_rate": 7.779066371522071e-06, "loss": 0.819, "step": 4151 }, { "epoch": 0.33, "grad_norm": 1.5010365283091356, "learning_rate": 7.777986199758847e-06, "loss": 0.7936, "step": 4152 }, { "epoch": 0.33, "grad_norm": 1.6005476932629914, "learning_rate": 7.776905840419643e-06, "loss": 0.6725, "step": 4153 }, { "epoch": 0.33, "grad_norm": 1.5268487675121112, "learning_rate": 7.775825293577407e-06, "loss": 0.8111, "step": 4154 }, { "epoch": 0.33, "grad_norm": 1.7348004092040583, "learning_rate": 7.7747445593051e-06, "loss": 0.8825, "step": 4155 }, { "epoch": 0.33, "grad_norm": 1.521061782528016, "learning_rate": 7.773663637675695e-06, "loss": 0.7844, "step": 4156 }, { "epoch": 0.33, "grad_norm": 1.5080029549405463, "learning_rate": 7.772582528762179e-06, "loss": 0.7922, "step": 4157 }, { "epoch": 0.33, "grad_norm": 1.5267580668560983, "learning_rate": 7.771501232637551e-06, "loss": 0.7863, "step": 4158 }, { "epoch": 0.33, "grad_norm": 1.4504849781900764, "learning_rate": 7.770419749374822e-06, "loss": 0.7597, "step": 4159 }, { "epoch": 0.33, "grad_norm": 1.4215401641975258, "learning_rate": 7.769338079047018e-06, "loss": 0.7474, "step": 4160 }, { "epoch": 0.33, "grad_norm": 1.4740358823908783, "learning_rate": 7.768256221727174e-06, "loss": 0.7466, "step": 4161 }, { "epoch": 0.33, "grad_norm": 1.480892350314493, "learning_rate": 7.767174177488337e-06, "loss": 0.7742, "step": 4162 }, { "epoch": 0.33, "grad_norm": 1.5689751089711186, "learning_rate": 7.766091946403575e-06, "loss": 0.7378, "step": 4163 }, { "epoch": 0.33, "grad_norm": 1.4764941955455988, "learning_rate": 7.76500952854596e-06, "loss": 0.817, "step": 4164 }, { "epoch": 0.33, "grad_norm": 1.5208929577878219, "learning_rate": 7.763926923988577e-06, "loss": 0.7989, "step": 4165 }, { "epoch": 0.33, "grad_norm": 1.5255794407822512, "learning_rate": 7.762844132804527e-06, "loss": 0.7369, "step": 4166 }, { "epoch": 0.33, "grad_norm": 1.4059737456689585, "learning_rate": 7.761761155066927e-06, "loss": 0.817, "step": 4167 }, { "epoch": 0.33, "grad_norm": 1.3859612241865986, "learning_rate": 7.760677990848896e-06, "loss": 0.7259, "step": 4168 }, { "epoch": 0.33, "grad_norm": 1.573993097685248, "learning_rate": 7.759594640223576e-06, "loss": 0.7785, "step": 4169 }, { "epoch": 0.33, "grad_norm": 1.5092795509331314, "learning_rate": 7.758511103264116e-06, "loss": 0.888, "step": 4170 }, { "epoch": 0.33, "grad_norm": 1.5571101154144542, "learning_rate": 7.757427380043678e-06, "loss": 0.8347, "step": 4171 }, { "epoch": 0.33, "grad_norm": 1.4738906182238791, "learning_rate": 7.756343470635439e-06, "loss": 0.9369, "step": 4172 }, { "epoch": 0.33, "grad_norm": 1.5614808897314527, "learning_rate": 7.755259375112584e-06, "loss": 0.7991, "step": 4173 }, { "epoch": 0.33, "grad_norm": 1.344243084474159, "learning_rate": 7.754175093548317e-06, "loss": 0.7198, "step": 4174 }, { "epoch": 0.33, "grad_norm": 1.482746674485004, "learning_rate": 7.753090626015849e-06, "loss": 0.8319, "step": 4175 }, { "epoch": 0.34, "grad_norm": 1.6475920421551085, "learning_rate": 7.752005972588407e-06, "loss": 0.7721, "step": 4176 }, { "epoch": 0.34, "grad_norm": 1.5715240201339817, "learning_rate": 7.75092113333923e-06, "loss": 0.8921, "step": 4177 }, { "epoch": 0.34, "grad_norm": 1.579177309058003, "learning_rate": 7.749836108341567e-06, "loss": 0.7415, "step": 4178 }, { "epoch": 0.34, "grad_norm": 0.8657553044010098, "learning_rate": 7.748750897668683e-06, "loss": 1.0925, "step": 4179 }, { "epoch": 0.34, "grad_norm": 1.4854591447709586, "learning_rate": 7.747665501393851e-06, "loss": 0.79, "step": 4180 }, { "epoch": 0.34, "grad_norm": 1.6709120749312794, "learning_rate": 7.746579919590361e-06, "loss": 0.9027, "step": 4181 }, { "epoch": 0.34, "grad_norm": 1.5488594144211054, "learning_rate": 7.745494152331516e-06, "loss": 0.8503, "step": 4182 }, { "epoch": 0.34, "grad_norm": 1.4598032600445816, "learning_rate": 7.744408199690628e-06, "loss": 0.7489, "step": 4183 }, { "epoch": 0.34, "grad_norm": 1.4956423355160344, "learning_rate": 7.743322061741024e-06, "loss": 0.7445, "step": 4184 }, { "epoch": 0.34, "grad_norm": 1.6692359573244475, "learning_rate": 7.742235738556039e-06, "loss": 0.8656, "step": 4185 }, { "epoch": 0.34, "grad_norm": 1.4944136664385241, "learning_rate": 7.741149230209027e-06, "loss": 0.7515, "step": 4186 }, { "epoch": 0.34, "grad_norm": 1.4329411554988916, "learning_rate": 7.740062536773352e-06, "loss": 0.8521, "step": 4187 }, { "epoch": 0.34, "grad_norm": 0.9262220855397046, "learning_rate": 7.738975658322387e-06, "loss": 1.0981, "step": 4188 }, { "epoch": 0.34, "grad_norm": 0.8486505636447852, "learning_rate": 7.737888594929523e-06, "loss": 1.1131, "step": 4189 }, { "epoch": 0.34, "grad_norm": 1.5269352771980167, "learning_rate": 7.73680134666816e-06, "loss": 0.8515, "step": 4190 }, { "epoch": 0.34, "grad_norm": 1.6269869950257942, "learning_rate": 7.735713913611716e-06, "loss": 0.8207, "step": 4191 }, { "epoch": 0.34, "grad_norm": 1.5697695565775103, "learning_rate": 7.734626295833609e-06, "loss": 0.7819, "step": 4192 }, { "epoch": 0.34, "grad_norm": 0.8993608569996664, "learning_rate": 7.733538493407283e-06, "loss": 1.1325, "step": 4193 }, { "epoch": 0.34, "grad_norm": 1.5062565891862143, "learning_rate": 7.732450506406187e-06, "loss": 0.8299, "step": 4194 }, { "epoch": 0.34, "grad_norm": 1.560781833527426, "learning_rate": 7.731362334903784e-06, "loss": 0.7587, "step": 4195 }, { "epoch": 0.34, "grad_norm": 1.4762869223912052, "learning_rate": 7.730273978973552e-06, "loss": 0.7801, "step": 4196 }, { "epoch": 0.34, "grad_norm": 1.620528820247918, "learning_rate": 7.729185438688978e-06, "loss": 0.7637, "step": 4197 }, { "epoch": 0.34, "grad_norm": 1.5120754040670334, "learning_rate": 7.728096714123561e-06, "loss": 0.8028, "step": 4198 }, { "epoch": 0.34, "grad_norm": 1.47642578481397, "learning_rate": 7.727007805350815e-06, "loss": 0.7648, "step": 4199 }, { "epoch": 0.34, "grad_norm": 0.9337473362751902, "learning_rate": 7.725918712444266e-06, "loss": 1.1137, "step": 4200 }, { "epoch": 0.34, "grad_norm": 0.8491196234026714, "learning_rate": 7.724829435477455e-06, "loss": 1.0872, "step": 4201 }, { "epoch": 0.34, "grad_norm": 1.4885911911670133, "learning_rate": 7.723739974523929e-06, "loss": 0.7191, "step": 4202 }, { "epoch": 0.34, "grad_norm": 1.532010853104647, "learning_rate": 7.72265032965725e-06, "loss": 0.8507, "step": 4203 }, { "epoch": 0.34, "grad_norm": 1.5116661490788128, "learning_rate": 7.721560500950997e-06, "loss": 0.8456, "step": 4204 }, { "epoch": 0.34, "grad_norm": 1.5542730504734836, "learning_rate": 7.720470488478755e-06, "loss": 0.847, "step": 4205 }, { "epoch": 0.34, "grad_norm": 1.4754077371952712, "learning_rate": 7.719380292314126e-06, "loss": 0.797, "step": 4206 }, { "epoch": 0.34, "grad_norm": 1.4867416314680901, "learning_rate": 7.71828991253072e-06, "loss": 0.7553, "step": 4207 }, { "epoch": 0.34, "grad_norm": 1.5585481286202898, "learning_rate": 7.717199349202165e-06, "loss": 0.7899, "step": 4208 }, { "epoch": 0.34, "grad_norm": 1.2332870615249518, "learning_rate": 7.716108602402094e-06, "loss": 1.1336, "step": 4209 }, { "epoch": 0.34, "grad_norm": 1.384018840270826, "learning_rate": 7.71501767220416e-06, "loss": 0.7228, "step": 4210 }, { "epoch": 0.34, "grad_norm": 1.0001086061011353, "learning_rate": 7.713926558682027e-06, "loss": 1.1181, "step": 4211 }, { "epoch": 0.34, "grad_norm": 1.5205406437322297, "learning_rate": 7.712835261909366e-06, "loss": 0.7845, "step": 4212 }, { "epoch": 0.34, "grad_norm": 1.5061567898121624, "learning_rate": 7.711743781959863e-06, "loss": 0.8497, "step": 4213 }, { "epoch": 0.34, "grad_norm": 1.5229451963354952, "learning_rate": 7.71065211890722e-06, "loss": 0.7751, "step": 4214 }, { "epoch": 0.34, "grad_norm": 1.5503241927131728, "learning_rate": 7.709560272825149e-06, "loss": 0.774, "step": 4215 }, { "epoch": 0.34, "grad_norm": 1.4753823556613102, "learning_rate": 7.708468243787371e-06, "loss": 0.7805, "step": 4216 }, { "epoch": 0.34, "grad_norm": 1.573694264778314, "learning_rate": 7.707376031867625e-06, "loss": 0.8028, "step": 4217 }, { "epoch": 0.34, "grad_norm": 1.400754269374335, "learning_rate": 7.706283637139658e-06, "loss": 0.7149, "step": 4218 }, { "epoch": 0.34, "grad_norm": 1.4270743058664341, "learning_rate": 7.705191059677231e-06, "loss": 0.7731, "step": 4219 }, { "epoch": 0.34, "grad_norm": 1.534769115221182, "learning_rate": 7.704098299554119e-06, "loss": 0.7238, "step": 4220 }, { "epoch": 0.34, "grad_norm": 1.4886995187871348, "learning_rate": 7.703005356844106e-06, "loss": 0.7922, "step": 4221 }, { "epoch": 0.34, "grad_norm": 1.3610081203008522, "learning_rate": 7.70191223162099e-06, "loss": 0.7748, "step": 4222 }, { "epoch": 0.34, "grad_norm": 1.4548267989979342, "learning_rate": 7.700818923958582e-06, "loss": 0.8084, "step": 4223 }, { "epoch": 0.34, "grad_norm": 1.2400816528957908, "learning_rate": 7.699725433930705e-06, "loss": 1.1251, "step": 4224 }, { "epoch": 0.34, "grad_norm": 1.5358913792434428, "learning_rate": 7.698631761611193e-06, "loss": 0.8301, "step": 4225 }, { "epoch": 0.34, "grad_norm": 1.438210203840085, "learning_rate": 7.697537907073893e-06, "loss": 0.7406, "step": 4226 }, { "epoch": 0.34, "grad_norm": 0.9842504321736207, "learning_rate": 7.696443870392666e-06, "loss": 1.1299, "step": 4227 }, { "epoch": 0.34, "grad_norm": 1.6360067375423002, "learning_rate": 7.695349651641384e-06, "loss": 0.7787, "step": 4228 }, { "epoch": 0.34, "grad_norm": 1.4504943693263903, "learning_rate": 7.694255250893927e-06, "loss": 0.7882, "step": 4229 }, { "epoch": 0.34, "grad_norm": 1.6371987991567813, "learning_rate": 7.693160668224197e-06, "loss": 0.7731, "step": 4230 }, { "epoch": 0.34, "grad_norm": 1.544152245441889, "learning_rate": 7.692065903706104e-06, "loss": 0.7121, "step": 4231 }, { "epoch": 0.34, "grad_norm": 1.5505410199279106, "learning_rate": 7.690970957413559e-06, "loss": 0.8486, "step": 4232 }, { "epoch": 0.34, "grad_norm": 1.5337651113506334, "learning_rate": 7.689875829420505e-06, "loss": 0.7536, "step": 4233 }, { "epoch": 0.34, "grad_norm": 1.5138418905910365, "learning_rate": 7.688780519800882e-06, "loss": 0.6508, "step": 4234 }, { "epoch": 0.34, "grad_norm": 1.546396844943002, "learning_rate": 7.687685028628653e-06, "loss": 0.7727, "step": 4235 }, { "epoch": 0.34, "grad_norm": 1.4193403776120561, "learning_rate": 7.686589355977785e-06, "loss": 0.7582, "step": 4236 }, { "epoch": 0.34, "grad_norm": 1.5449389287458746, "learning_rate": 7.685493501922258e-06, "loss": 0.7575, "step": 4237 }, { "epoch": 0.34, "grad_norm": 1.5512966988031274, "learning_rate": 7.684397466536071e-06, "loss": 0.79, "step": 4238 }, { "epoch": 0.34, "grad_norm": 1.4362023933909442, "learning_rate": 7.683301249893226e-06, "loss": 0.8598, "step": 4239 }, { "epoch": 0.34, "grad_norm": 2.7004927446082503, "learning_rate": 7.682204852067748e-06, "loss": 0.8044, "step": 4240 }, { "epoch": 0.34, "grad_norm": 1.5156650422296631, "learning_rate": 7.681108273133665e-06, "loss": 0.8103, "step": 4241 }, { "epoch": 0.34, "grad_norm": 1.4996818030672985, "learning_rate": 7.68001151316502e-06, "loss": 0.8051, "step": 4242 }, { "epoch": 0.34, "grad_norm": 1.4823611199318365, "learning_rate": 7.678914572235868e-06, "loss": 1.125, "step": 4243 }, { "epoch": 0.34, "grad_norm": 1.4681732840674302, "learning_rate": 7.677817450420279e-06, "loss": 0.8094, "step": 4244 }, { "epoch": 0.34, "grad_norm": 1.643339784082716, "learning_rate": 7.676720147792333e-06, "loss": 0.8536, "step": 4245 }, { "epoch": 0.34, "grad_norm": 1.5190489252386372, "learning_rate": 7.675622664426124e-06, "loss": 0.7592, "step": 4246 }, { "epoch": 0.34, "grad_norm": 1.5887328276269221, "learning_rate": 7.674525000395752e-06, "loss": 0.7962, "step": 4247 }, { "epoch": 0.34, "grad_norm": 1.6014947241379347, "learning_rate": 7.673427155775336e-06, "loss": 0.8027, "step": 4248 }, { "epoch": 0.34, "grad_norm": 1.498226002584108, "learning_rate": 7.672329130639007e-06, "loss": 0.7619, "step": 4249 }, { "epoch": 0.34, "grad_norm": 1.7112751540062408, "learning_rate": 7.671230925060903e-06, "loss": 0.7711, "step": 4250 }, { "epoch": 0.34, "grad_norm": 1.4313764021937572, "learning_rate": 7.670132539115179e-06, "loss": 0.8063, "step": 4251 }, { "epoch": 0.34, "grad_norm": 1.5333841356848545, "learning_rate": 7.669033972876001e-06, "loss": 0.7923, "step": 4252 }, { "epoch": 0.34, "grad_norm": 1.5204686882144647, "learning_rate": 7.667935226417545e-06, "loss": 0.7532, "step": 4253 }, { "epoch": 0.34, "grad_norm": 1.4893916451312839, "learning_rate": 7.666836299814003e-06, "loss": 0.7112, "step": 4254 }, { "epoch": 0.34, "grad_norm": 1.4594904915078954, "learning_rate": 7.665737193139575e-06, "loss": 0.7341, "step": 4255 }, { "epoch": 0.34, "grad_norm": 1.5167015008115199, "learning_rate": 7.664637906468477e-06, "loss": 0.7587, "step": 4256 }, { "epoch": 0.34, "grad_norm": 1.5330451058012113, "learning_rate": 7.663538439874934e-06, "loss": 0.7473, "step": 4257 }, { "epoch": 0.34, "grad_norm": 1.5886322712991043, "learning_rate": 7.662438793433185e-06, "loss": 0.8275, "step": 4258 }, { "epoch": 0.34, "grad_norm": 1.7863566802981898, "learning_rate": 7.661338967217483e-06, "loss": 0.7989, "step": 4259 }, { "epoch": 0.34, "grad_norm": 1.5713131040663302, "learning_rate": 7.660238961302085e-06, "loss": 0.821, "step": 4260 }, { "epoch": 0.34, "grad_norm": 1.1108849386792157, "learning_rate": 7.65913877576127e-06, "loss": 1.0498, "step": 4261 }, { "epoch": 0.34, "grad_norm": 1.5963217384674315, "learning_rate": 7.658038410669326e-06, "loss": 0.8797, "step": 4262 }, { "epoch": 0.34, "grad_norm": 1.4907705443218633, "learning_rate": 7.656937866100549e-06, "loss": 0.7793, "step": 4263 }, { "epoch": 0.34, "grad_norm": 1.4749402754291656, "learning_rate": 7.655837142129252e-06, "loss": 0.7628, "step": 4264 }, { "epoch": 0.34, "grad_norm": 1.4881702695950063, "learning_rate": 7.654736238829759e-06, "loss": 0.7763, "step": 4265 }, { "epoch": 0.34, "grad_norm": 1.5398864243399293, "learning_rate": 7.653635156276405e-06, "loss": 0.7818, "step": 4266 }, { "epoch": 0.34, "grad_norm": 1.5385809698618045, "learning_rate": 7.652533894543534e-06, "loss": 0.8133, "step": 4267 }, { "epoch": 0.34, "grad_norm": 1.4626000468635845, "learning_rate": 7.651432453705514e-06, "loss": 0.7307, "step": 4268 }, { "epoch": 0.34, "grad_norm": 1.5276105220949034, "learning_rate": 7.65033083383671e-06, "loss": 0.7797, "step": 4269 }, { "epoch": 0.34, "grad_norm": 1.5237831994498543, "learning_rate": 7.649229035011504e-06, "loss": 0.7812, "step": 4270 }, { "epoch": 0.34, "grad_norm": 1.469165776406536, "learning_rate": 7.648127057304302e-06, "loss": 0.8344, "step": 4271 }, { "epoch": 0.34, "grad_norm": 1.4143305900997072, "learning_rate": 7.6470249007895e-06, "loss": 0.6502, "step": 4272 }, { "epoch": 0.34, "grad_norm": 1.5022938013445049, "learning_rate": 7.645922565541528e-06, "loss": 0.7883, "step": 4273 }, { "epoch": 0.34, "grad_norm": 1.5182796070949698, "learning_rate": 7.644820051634813e-06, "loss": 0.8199, "step": 4274 }, { "epoch": 0.34, "grad_norm": 1.7024858698920895, "learning_rate": 7.6437173591438e-06, "loss": 0.8742, "step": 4275 }, { "epoch": 0.34, "grad_norm": 2.0923416916447097, "learning_rate": 7.642614488142946e-06, "loss": 0.7372, "step": 4276 }, { "epoch": 0.34, "grad_norm": 1.455838645607585, "learning_rate": 7.641511438706718e-06, "loss": 0.7721, "step": 4277 }, { "epoch": 0.34, "grad_norm": 1.46508158027866, "learning_rate": 7.640408210909599e-06, "loss": 0.774, "step": 4278 }, { "epoch": 0.34, "grad_norm": 1.0715073211515973, "learning_rate": 7.63930480482608e-06, "loss": 1.1039, "step": 4279 }, { "epoch": 0.34, "grad_norm": 1.4824786695416268, "learning_rate": 7.638201220530664e-06, "loss": 0.793, "step": 4280 }, { "epoch": 0.34, "grad_norm": 1.5541555329890155, "learning_rate": 7.637097458097871e-06, "loss": 0.7648, "step": 4281 }, { "epoch": 0.34, "grad_norm": 1.4715581830376807, "learning_rate": 7.635993517602226e-06, "loss": 0.8416, "step": 4282 }, { "epoch": 0.34, "grad_norm": 1.4705097981144006, "learning_rate": 7.634889399118271e-06, "loss": 0.778, "step": 4283 }, { "epoch": 0.34, "grad_norm": 1.4463876868230718, "learning_rate": 7.633785102720558e-06, "loss": 0.7733, "step": 4284 }, { "epoch": 0.34, "grad_norm": 0.8639601504419201, "learning_rate": 7.632680628483655e-06, "loss": 1.103, "step": 4285 }, { "epoch": 0.34, "grad_norm": 1.4571909782070105, "learning_rate": 7.631575976482135e-06, "loss": 0.8408, "step": 4286 }, { "epoch": 0.34, "grad_norm": 1.517812879755212, "learning_rate": 7.630471146790586e-06, "loss": 0.789, "step": 4287 }, { "epoch": 0.34, "grad_norm": 1.5462482534105548, "learning_rate": 7.629366139483611e-06, "loss": 0.7502, "step": 4288 }, { "epoch": 0.34, "grad_norm": 1.5847112411135258, "learning_rate": 7.628260954635822e-06, "loss": 0.8321, "step": 4289 }, { "epoch": 0.34, "grad_norm": 2.626924496872272, "learning_rate": 7.627155592321844e-06, "loss": 0.7551, "step": 4290 }, { "epoch": 0.34, "grad_norm": 0.8772560374759438, "learning_rate": 7.626050052616314e-06, "loss": 1.087, "step": 4291 }, { "epoch": 0.34, "grad_norm": 1.5511450638842579, "learning_rate": 7.624944335593878e-06, "loss": 0.8056, "step": 4292 }, { "epoch": 0.34, "grad_norm": 1.4983994197298636, "learning_rate": 7.623838441329197e-06, "loss": 0.8079, "step": 4293 }, { "epoch": 0.34, "grad_norm": 1.4373888864610964, "learning_rate": 7.622732369896946e-06, "loss": 0.7368, "step": 4294 }, { "epoch": 0.34, "grad_norm": 1.684183011001249, "learning_rate": 7.621626121371809e-06, "loss": 0.864, "step": 4295 }, { "epoch": 0.34, "grad_norm": 1.440889101203594, "learning_rate": 7.620519695828481e-06, "loss": 0.7506, "step": 4296 }, { "epoch": 0.34, "grad_norm": 1.4845237042277035, "learning_rate": 7.61941309334167e-06, "loss": 0.7719, "step": 4297 }, { "epoch": 0.34, "grad_norm": 1.4923881144684834, "learning_rate": 7.618306313986099e-06, "loss": 0.8285, "step": 4298 }, { "epoch": 0.34, "grad_norm": 1.449345623249864, "learning_rate": 7.617199357836498e-06, "loss": 0.8164, "step": 4299 }, { "epoch": 0.34, "grad_norm": 0.895653352120223, "learning_rate": 7.61609222496761e-06, "loss": 1.1347, "step": 4300 }, { "epoch": 0.35, "grad_norm": 1.4499179531961914, "learning_rate": 7.6149849154541955e-06, "loss": 0.7333, "step": 4301 }, { "epoch": 0.35, "grad_norm": 1.5156602284064744, "learning_rate": 7.613877429371019e-06, "loss": 0.8273, "step": 4302 }, { "epoch": 0.35, "grad_norm": 1.5202399665063493, "learning_rate": 7.612769766792861e-06, "loss": 0.7777, "step": 4303 }, { "epoch": 0.35, "grad_norm": 1.4178538454410083, "learning_rate": 7.6116619277945135e-06, "loss": 0.7463, "step": 4304 }, { "epoch": 0.35, "grad_norm": 1.5226165345090021, "learning_rate": 7.610553912450782e-06, "loss": 0.693, "step": 4305 }, { "epoch": 0.35, "grad_norm": 1.5169762742940291, "learning_rate": 7.60944572083648e-06, "loss": 0.8253, "step": 4306 }, { "epoch": 0.35, "grad_norm": 1.4233474818748284, "learning_rate": 7.608337353026437e-06, "loss": 0.7646, "step": 4307 }, { "epoch": 0.35, "grad_norm": 1.6090349201743785, "learning_rate": 7.607228809095491e-06, "loss": 0.8129, "step": 4308 }, { "epoch": 0.35, "grad_norm": 1.382930286371338, "learning_rate": 7.606120089118494e-06, "loss": 0.6996, "step": 4309 }, { "epoch": 0.35, "grad_norm": 1.4954050559958356, "learning_rate": 7.60501119317031e-06, "loss": 0.814, "step": 4310 }, { "epoch": 0.35, "grad_norm": 1.6677357418047905, "learning_rate": 7.603902121325813e-06, "loss": 0.8031, "step": 4311 }, { "epoch": 0.35, "grad_norm": 1.5535510126596617, "learning_rate": 7.60279287365989e-06, "loss": 0.7414, "step": 4312 }, { "epoch": 0.35, "grad_norm": 1.498769976494804, "learning_rate": 7.6016834502474415e-06, "loss": 0.7587, "step": 4313 }, { "epoch": 0.35, "grad_norm": 0.978610783976176, "learning_rate": 7.6005738511633755e-06, "loss": 1.1219, "step": 4314 }, { "epoch": 0.35, "grad_norm": 1.7269511289022488, "learning_rate": 7.599464076482619e-06, "loss": 0.8417, "step": 4315 }, { "epoch": 0.35, "grad_norm": 1.7060346774858497, "learning_rate": 7.598354126280102e-06, "loss": 0.7448, "step": 4316 }, { "epoch": 0.35, "grad_norm": 1.4753743345545858, "learning_rate": 7.597244000630775e-06, "loss": 0.7531, "step": 4317 }, { "epoch": 0.35, "grad_norm": 1.4280563318368944, "learning_rate": 7.596133699609591e-06, "loss": 0.8283, "step": 4318 }, { "epoch": 0.35, "grad_norm": 1.4649503477157357, "learning_rate": 7.595023223291525e-06, "loss": 0.8026, "step": 4319 }, { "epoch": 0.35, "grad_norm": 1.4984128177447051, "learning_rate": 7.593912571751558e-06, "loss": 0.7637, "step": 4320 }, { "epoch": 0.35, "grad_norm": 1.5527304287500094, "learning_rate": 7.592801745064682e-06, "loss": 0.728, "step": 4321 }, { "epoch": 0.35, "grad_norm": 1.4780724706426427, "learning_rate": 7.591690743305904e-06, "loss": 0.7869, "step": 4322 }, { "epoch": 0.35, "grad_norm": 1.6462237263694135, "learning_rate": 7.590579566550238e-06, "loss": 0.8134, "step": 4323 }, { "epoch": 0.35, "grad_norm": 1.6449242963562214, "learning_rate": 7.589468214872719e-06, "loss": 0.7595, "step": 4324 }, { "epoch": 0.35, "grad_norm": 1.6011153254890655, "learning_rate": 7.588356688348384e-06, "loss": 0.8581, "step": 4325 }, { "epoch": 0.35, "grad_norm": 1.6177060397970395, "learning_rate": 7.587244987052287e-06, "loss": 0.8288, "step": 4326 }, { "epoch": 0.35, "grad_norm": 0.8949060822975573, "learning_rate": 7.586133111059493e-06, "loss": 1.1403, "step": 4327 }, { "epoch": 0.35, "grad_norm": 1.447444160786925, "learning_rate": 7.585021060445074e-06, "loss": 0.7068, "step": 4328 }, { "epoch": 0.35, "grad_norm": 1.57448977320994, "learning_rate": 7.5839088352841265e-06, "loss": 0.7661, "step": 4329 }, { "epoch": 0.35, "grad_norm": 1.499405515443895, "learning_rate": 7.582796435651745e-06, "loss": 0.8352, "step": 4330 }, { "epoch": 0.35, "grad_norm": 1.5916389890347067, "learning_rate": 7.581683861623041e-06, "loss": 0.8205, "step": 4331 }, { "epoch": 0.35, "grad_norm": 1.5176762065767715, "learning_rate": 7.580571113273141e-06, "loss": 0.8708, "step": 4332 }, { "epoch": 0.35, "grad_norm": 1.4280023457206201, "learning_rate": 7.579458190677176e-06, "loss": 0.6761, "step": 4333 }, { "epoch": 0.35, "grad_norm": 1.5270159083107353, "learning_rate": 7.578345093910298e-06, "loss": 0.7619, "step": 4334 }, { "epoch": 0.35, "grad_norm": 1.520632281119033, "learning_rate": 7.577231823047664e-06, "loss": 0.8307, "step": 4335 }, { "epoch": 0.35, "grad_norm": 1.4538646097903793, "learning_rate": 7.576118378164442e-06, "loss": 0.7529, "step": 4336 }, { "epoch": 0.35, "grad_norm": 1.4675163782524028, "learning_rate": 7.575004759335817e-06, "loss": 0.717, "step": 4337 }, { "epoch": 0.35, "grad_norm": 1.5084054387201304, "learning_rate": 7.5738909666369816e-06, "loss": 0.758, "step": 4338 }, { "epoch": 0.35, "grad_norm": 0.9296587741729297, "learning_rate": 7.572777000143145e-06, "loss": 1.0737, "step": 4339 }, { "epoch": 0.35, "grad_norm": 0.8391554317580222, "learning_rate": 7.57166285992952e-06, "loss": 1.1121, "step": 4340 }, { "epoch": 0.35, "grad_norm": 1.4933739961715538, "learning_rate": 7.570548546071342e-06, "loss": 0.7411, "step": 4341 }, { "epoch": 0.35, "grad_norm": 1.4487064852882023, "learning_rate": 7.5694340586438446e-06, "loss": 0.7399, "step": 4342 }, { "epoch": 0.35, "grad_norm": 1.5604343512892715, "learning_rate": 7.568319397722284e-06, "loss": 0.7918, "step": 4343 }, { "epoch": 0.35, "grad_norm": 1.5442962835629153, "learning_rate": 7.567204563381927e-06, "loss": 0.8073, "step": 4344 }, { "epoch": 0.35, "grad_norm": 1.6528658285051003, "learning_rate": 7.566089555698046e-06, "loss": 0.7736, "step": 4345 }, { "epoch": 0.35, "grad_norm": 1.6325014598952858, "learning_rate": 7.564974374745931e-06, "loss": 0.6921, "step": 4346 }, { "epoch": 0.35, "grad_norm": 1.4975678958744811, "learning_rate": 7.563859020600882e-06, "loss": 0.8246, "step": 4347 }, { "epoch": 0.35, "grad_norm": 1.4249088103674723, "learning_rate": 7.562743493338207e-06, "loss": 0.7158, "step": 4348 }, { "epoch": 0.35, "grad_norm": 1.402532592590064, "learning_rate": 7.561627793033233e-06, "loss": 0.7845, "step": 4349 }, { "epoch": 0.35, "grad_norm": 1.4681373641464144, "learning_rate": 7.560511919761293e-06, "loss": 0.7661, "step": 4350 }, { "epoch": 0.35, "grad_norm": 1.4998228344296594, "learning_rate": 7.559395873597733e-06, "loss": 0.7771, "step": 4351 }, { "epoch": 0.35, "grad_norm": 1.5046530568252192, "learning_rate": 7.5582796546179125e-06, "loss": 0.741, "step": 4352 }, { "epoch": 0.35, "grad_norm": 1.1864870786322053, "learning_rate": 7.557163262897198e-06, "loss": 1.1282, "step": 4353 }, { "epoch": 0.35, "grad_norm": 1.6146223862429492, "learning_rate": 7.5560466985109726e-06, "loss": 0.8195, "step": 4354 }, { "epoch": 0.35, "grad_norm": 1.4153723594844139, "learning_rate": 7.5549299615346294e-06, "loss": 0.7825, "step": 4355 }, { "epoch": 0.35, "grad_norm": 1.4395470460852111, "learning_rate": 7.553813052043575e-06, "loss": 0.7692, "step": 4356 }, { "epoch": 0.35, "grad_norm": 1.3705871238030503, "learning_rate": 7.552695970113222e-06, "loss": 0.8619, "step": 4357 }, { "epoch": 0.35, "grad_norm": 1.5993427687030761, "learning_rate": 7.551578715819e-06, "loss": 0.8234, "step": 4358 }, { "epoch": 0.35, "grad_norm": 0.8463752470139627, "learning_rate": 7.550461289236348e-06, "loss": 1.1407, "step": 4359 }, { "epoch": 0.35, "grad_norm": 1.4827854245845675, "learning_rate": 7.549343690440718e-06, "loss": 0.7472, "step": 4360 }, { "epoch": 0.35, "grad_norm": 1.6147954901108643, "learning_rate": 7.548225919507575e-06, "loss": 0.8017, "step": 4361 }, { "epoch": 0.35, "grad_norm": 1.5413710275147965, "learning_rate": 7.547107976512387e-06, "loss": 0.804, "step": 4362 }, { "epoch": 0.35, "grad_norm": 0.8223464145809496, "learning_rate": 7.545989861530648e-06, "loss": 1.0896, "step": 4363 }, { "epoch": 0.35, "grad_norm": 1.4971003494653998, "learning_rate": 7.54487157463785e-06, "loss": 0.8084, "step": 4364 }, { "epoch": 0.35, "grad_norm": 1.5355669182765073, "learning_rate": 7.543753115909504e-06, "loss": 0.8691, "step": 4365 }, { "epoch": 0.35, "grad_norm": 1.4444675090143833, "learning_rate": 7.5426344854211304e-06, "loss": 0.7142, "step": 4366 }, { "epoch": 0.35, "grad_norm": 1.529455095851079, "learning_rate": 7.541515683248263e-06, "loss": 0.8909, "step": 4367 }, { "epoch": 0.35, "grad_norm": 1.6087136288377117, "learning_rate": 7.5403967094664454e-06, "loss": 0.804, "step": 4368 }, { "epoch": 0.35, "grad_norm": 1.5501334457764158, "learning_rate": 7.539277564151233e-06, "loss": 0.7987, "step": 4369 }, { "epoch": 0.35, "grad_norm": 0.8446806320154354, "learning_rate": 7.538158247378193e-06, "loss": 1.108, "step": 4370 }, { "epoch": 0.35, "grad_norm": 1.450893278301483, "learning_rate": 7.537038759222903e-06, "loss": 0.7438, "step": 4371 }, { "epoch": 0.35, "grad_norm": 1.4835860747238647, "learning_rate": 7.535919099760956e-06, "loss": 0.8105, "step": 4372 }, { "epoch": 0.35, "grad_norm": 0.7976284157809693, "learning_rate": 7.534799269067952e-06, "loss": 1.1268, "step": 4373 }, { "epoch": 0.35, "grad_norm": 1.5178645785045264, "learning_rate": 7.533679267219507e-06, "loss": 0.8134, "step": 4374 }, { "epoch": 0.35, "grad_norm": 1.5339237007045197, "learning_rate": 7.532559094291243e-06, "loss": 0.836, "step": 4375 }, { "epoch": 0.35, "grad_norm": 1.4191639501819395, "learning_rate": 7.531438750358797e-06, "loss": 0.8009, "step": 4376 }, { "epoch": 0.35, "grad_norm": 2.073404249264496, "learning_rate": 7.530318235497818e-06, "loss": 0.8453, "step": 4377 }, { "epoch": 0.35, "grad_norm": 1.544258890705297, "learning_rate": 7.529197549783967e-06, "loss": 0.7596, "step": 4378 }, { "epoch": 0.35, "grad_norm": 1.5847716319897944, "learning_rate": 7.528076693292914e-06, "loss": 0.8175, "step": 4379 }, { "epoch": 0.35, "grad_norm": 1.5634322928059634, "learning_rate": 7.526955666100343e-06, "loss": 0.8157, "step": 4380 }, { "epoch": 0.35, "grad_norm": 1.5309875894993068, "learning_rate": 7.525834468281945e-06, "loss": 0.8265, "step": 4381 }, { "epoch": 0.35, "grad_norm": 1.4992099689723153, "learning_rate": 7.52471309991343e-06, "loss": 0.8632, "step": 4382 }, { "epoch": 0.35, "grad_norm": 1.5229507569762466, "learning_rate": 7.523591561070511e-06, "loss": 0.8605, "step": 4383 }, { "epoch": 0.35, "grad_norm": 1.4936596960148676, "learning_rate": 7.5224698518289196e-06, "loss": 0.7816, "step": 4384 }, { "epoch": 0.35, "grad_norm": 1.57904237450513, "learning_rate": 7.521347972264399e-06, "loss": 0.808, "step": 4385 }, { "epoch": 0.35, "grad_norm": 1.4224433317540848, "learning_rate": 7.5202259224526945e-06, "loss": 0.8491, "step": 4386 }, { "epoch": 0.35, "grad_norm": 1.5678934524067225, "learning_rate": 7.5191037024695745e-06, "loss": 0.7717, "step": 4387 }, { "epoch": 0.35, "grad_norm": 1.4955881464980407, "learning_rate": 7.517981312390811e-06, "loss": 0.7346, "step": 4388 }, { "epoch": 0.35, "grad_norm": 1.5287352712131965, "learning_rate": 7.516858752292191e-06, "loss": 0.8329, "step": 4389 }, { "epoch": 0.35, "grad_norm": 1.4760573917619335, "learning_rate": 7.515736022249516e-06, "loss": 0.7144, "step": 4390 }, { "epoch": 0.35, "grad_norm": 1.5908775484915763, "learning_rate": 7.5146131223385895e-06, "loss": 0.9125, "step": 4391 }, { "epoch": 0.35, "grad_norm": 1.4446357512279138, "learning_rate": 7.5134900526352375e-06, "loss": 0.7508, "step": 4392 }, { "epoch": 0.35, "grad_norm": 1.5482528554724346, "learning_rate": 7.512366813215286e-06, "loss": 0.7382, "step": 4393 }, { "epoch": 0.35, "grad_norm": 1.3876184210655609, "learning_rate": 7.511243404154586e-06, "loss": 0.7232, "step": 4394 }, { "epoch": 0.35, "grad_norm": 1.4442582284719454, "learning_rate": 7.510119825528986e-06, "loss": 0.7791, "step": 4395 }, { "epoch": 0.35, "grad_norm": 1.5393808941433358, "learning_rate": 7.508996077414359e-06, "loss": 0.857, "step": 4396 }, { "epoch": 0.35, "grad_norm": 1.3776948031403315, "learning_rate": 7.507872159886578e-06, "loss": 0.7386, "step": 4397 }, { "epoch": 0.35, "grad_norm": 1.6032398901900127, "learning_rate": 7.506748073021532e-06, "loss": 0.7618, "step": 4398 }, { "epoch": 0.35, "grad_norm": 1.5171875576112623, "learning_rate": 7.505623816895126e-06, "loss": 0.7374, "step": 4399 }, { "epoch": 0.35, "grad_norm": 1.5651690176117776, "learning_rate": 7.504499391583271e-06, "loss": 0.7324, "step": 4400 }, { "epoch": 0.35, "grad_norm": 1.4565345845617848, "learning_rate": 7.503374797161891e-06, "loss": 0.7012, "step": 4401 }, { "epoch": 0.35, "grad_norm": 1.4037754607174209, "learning_rate": 7.502250033706919e-06, "loss": 0.7759, "step": 4402 }, { "epoch": 0.35, "grad_norm": 1.4495364046868457, "learning_rate": 7.501125101294302e-06, "loss": 0.8297, "step": 4403 }, { "epoch": 0.35, "grad_norm": 1.5264249502714267, "learning_rate": 7.500000000000001e-06, "loss": 0.8069, "step": 4404 }, { "epoch": 0.35, "grad_norm": 1.5478318214426088, "learning_rate": 7.498874729899982e-06, "loss": 0.7798, "step": 4405 }, { "epoch": 0.35, "grad_norm": 1.4691252815554494, "learning_rate": 7.497749291070226e-06, "loss": 0.803, "step": 4406 }, { "epoch": 0.35, "grad_norm": 1.923281081026553, "learning_rate": 7.49662368358673e-06, "loss": 0.7212, "step": 4407 }, { "epoch": 0.35, "grad_norm": 0.9067664229865748, "learning_rate": 7.495497907525492e-06, "loss": 1.1349, "step": 4408 }, { "epoch": 0.35, "grad_norm": 1.4700144285102168, "learning_rate": 7.49437196296253e-06, "loss": 0.8083, "step": 4409 }, { "epoch": 0.35, "grad_norm": 1.4812391974820225, "learning_rate": 7.493245849973868e-06, "loss": 0.7616, "step": 4410 }, { "epoch": 0.35, "grad_norm": 0.8369749140059285, "learning_rate": 7.492119568635545e-06, "loss": 1.0596, "step": 4411 }, { "epoch": 0.35, "grad_norm": 0.7938021944908413, "learning_rate": 7.490993119023611e-06, "loss": 1.113, "step": 4412 }, { "epoch": 0.35, "grad_norm": 1.4209759499328414, "learning_rate": 7.489866501214124e-06, "loss": 0.8308, "step": 4413 }, { "epoch": 0.35, "grad_norm": 2.5860663149442193, "learning_rate": 7.48873971528316e-06, "loss": 0.7837, "step": 4414 }, { "epoch": 0.35, "grad_norm": 1.4184322378109762, "learning_rate": 7.487612761306798e-06, "loss": 0.6778, "step": 4415 }, { "epoch": 0.35, "grad_norm": 1.4808637069724075, "learning_rate": 7.486485639361135e-06, "loss": 0.7912, "step": 4416 }, { "epoch": 0.35, "grad_norm": 1.3302913483851104, "learning_rate": 7.4853583495222745e-06, "loss": 0.7175, "step": 4417 }, { "epoch": 0.35, "grad_norm": 1.534302163497541, "learning_rate": 7.484230891866337e-06, "loss": 0.775, "step": 4418 }, { "epoch": 0.35, "grad_norm": 1.3908339473546543, "learning_rate": 7.483103266469448e-06, "loss": 0.7386, "step": 4419 }, { "epoch": 0.35, "grad_norm": 1.6856892358510989, "learning_rate": 7.481975473407748e-06, "loss": 0.9158, "step": 4420 }, { "epoch": 0.35, "grad_norm": 1.5243205134949183, "learning_rate": 7.48084751275739e-06, "loss": 0.7314, "step": 4421 }, { "epoch": 0.35, "grad_norm": 1.6164006810785032, "learning_rate": 7.479719384594533e-06, "loss": 0.8176, "step": 4422 }, { "epoch": 0.35, "grad_norm": 0.9683032367136145, "learning_rate": 7.478591088995355e-06, "loss": 1.126, "step": 4423 }, { "epoch": 0.35, "grad_norm": 1.5916230685098063, "learning_rate": 7.4774626260360384e-06, "loss": 0.705, "step": 4424 }, { "epoch": 0.36, "grad_norm": 0.8542153996657541, "learning_rate": 7.476333995792779e-06, "loss": 1.1145, "step": 4425 }, { "epoch": 0.36, "grad_norm": 1.6812178844583427, "learning_rate": 7.475205198341788e-06, "loss": 0.8239, "step": 4426 }, { "epoch": 0.36, "grad_norm": 1.4743410205209384, "learning_rate": 7.47407623375928e-06, "loss": 0.752, "step": 4427 }, { "epoch": 0.36, "grad_norm": 1.479955406980183, "learning_rate": 7.472947102121489e-06, "loss": 0.7278, "step": 4428 }, { "epoch": 0.36, "grad_norm": 1.4907345608657643, "learning_rate": 7.471817803504655e-06, "loss": 0.8304, "step": 4429 }, { "epoch": 0.36, "grad_norm": 0.9486845595597211, "learning_rate": 7.470688337985029e-06, "loss": 1.1356, "step": 4430 }, { "epoch": 0.36, "grad_norm": 1.5589725171143547, "learning_rate": 7.469558705638879e-06, "loss": 0.8287, "step": 4431 }, { "epoch": 0.36, "grad_norm": 1.5094618436847094, "learning_rate": 7.468428906542476e-06, "loss": 0.8714, "step": 4432 }, { "epoch": 0.36, "grad_norm": 1.480691476626689, "learning_rate": 7.46729894077211e-06, "loss": 0.7521, "step": 4433 }, { "epoch": 0.36, "grad_norm": 0.9057490845002483, "learning_rate": 7.466168808404077e-06, "loss": 1.1213, "step": 4434 }, { "epoch": 0.36, "grad_norm": 1.5192021096777966, "learning_rate": 7.465038509514688e-06, "loss": 0.7599, "step": 4435 }, { "epoch": 0.36, "grad_norm": 1.452669260275029, "learning_rate": 7.463908044180263e-06, "loss": 0.7493, "step": 4436 }, { "epoch": 0.36, "grad_norm": 1.5791738425488053, "learning_rate": 7.46277741247713e-06, "loss": 0.7953, "step": 4437 }, { "epoch": 0.36, "grad_norm": 1.4802366438930668, "learning_rate": 7.461646614481637e-06, "loss": 0.8104, "step": 4438 }, { "epoch": 0.36, "grad_norm": 1.4529672225795658, "learning_rate": 7.460515650270134e-06, "loss": 0.7314, "step": 4439 }, { "epoch": 0.36, "grad_norm": 1.5513619906625926, "learning_rate": 7.459384519918989e-06, "loss": 0.856, "step": 4440 }, { "epoch": 0.36, "grad_norm": 1.5407113701238604, "learning_rate": 7.458253223504577e-06, "loss": 0.8, "step": 4441 }, { "epoch": 0.36, "grad_norm": 1.5912154029641719, "learning_rate": 7.457121761103286e-06, "loss": 0.7282, "step": 4442 }, { "epoch": 0.36, "grad_norm": 1.5751070182298923, "learning_rate": 7.455990132791516e-06, "loss": 0.8197, "step": 4443 }, { "epoch": 0.36, "grad_norm": 1.502449450366959, "learning_rate": 7.454858338645675e-06, "loss": 0.758, "step": 4444 }, { "epoch": 0.36, "grad_norm": 1.587884959320326, "learning_rate": 7.453726378742187e-06, "loss": 0.8548, "step": 4445 }, { "epoch": 0.36, "grad_norm": 1.270947997604239, "learning_rate": 7.4525942531574836e-06, "loss": 1.1157, "step": 4446 }, { "epoch": 0.36, "grad_norm": 0.9882953318827534, "learning_rate": 7.451461961968006e-06, "loss": 1.1053, "step": 4447 }, { "epoch": 0.36, "grad_norm": 0.8137494513368353, "learning_rate": 7.450329505250212e-06, "loss": 1.0708, "step": 4448 }, { "epoch": 0.36, "grad_norm": 0.835422930949959, "learning_rate": 7.449196883080567e-06, "loss": 1.0709, "step": 4449 }, { "epoch": 0.36, "grad_norm": 1.6190029994898856, "learning_rate": 7.448064095535547e-06, "loss": 0.7643, "step": 4450 }, { "epoch": 0.36, "grad_norm": 1.6579467268678785, "learning_rate": 7.446931142691644e-06, "loss": 0.8615, "step": 4451 }, { "epoch": 0.36, "grad_norm": 1.5093710239680909, "learning_rate": 7.445798024625354e-06, "loss": 0.7787, "step": 4452 }, { "epoch": 0.36, "grad_norm": 1.5379650287917215, "learning_rate": 7.444664741413188e-06, "loss": 0.7675, "step": 4453 }, { "epoch": 0.36, "grad_norm": 1.5848515483771741, "learning_rate": 7.443531293131667e-06, "loss": 0.7581, "step": 4454 }, { "epoch": 0.36, "grad_norm": 1.6704589357492818, "learning_rate": 7.4423976798573285e-06, "loss": 0.8263, "step": 4455 }, { "epoch": 0.36, "grad_norm": 1.5631170523382516, "learning_rate": 7.441263901666711e-06, "loss": 0.8442, "step": 4456 }, { "epoch": 0.36, "grad_norm": 1.6217389237251711, "learning_rate": 7.440129958636375e-06, "loss": 0.7995, "step": 4457 }, { "epoch": 0.36, "grad_norm": 1.5626161828355551, "learning_rate": 7.438995850842884e-06, "loss": 0.7935, "step": 4458 }, { "epoch": 0.36, "grad_norm": 1.6020995854827766, "learning_rate": 7.437861578362815e-06, "loss": 0.8315, "step": 4459 }, { "epoch": 0.36, "grad_norm": 1.6582491822006413, "learning_rate": 7.436727141272759e-06, "loss": 0.7549, "step": 4460 }, { "epoch": 0.36, "grad_norm": 1.435160035167625, "learning_rate": 7.435592539649313e-06, "loss": 0.8647, "step": 4461 }, { "epoch": 0.36, "grad_norm": 1.5905219312794014, "learning_rate": 7.4344577735690915e-06, "loss": 0.8327, "step": 4462 }, { "epoch": 0.36, "grad_norm": 1.5433677509765416, "learning_rate": 7.433322843108714e-06, "loss": 0.7848, "step": 4463 }, { "epoch": 0.36, "grad_norm": 1.6767344127329553, "learning_rate": 7.4321877483448125e-06, "loss": 1.106, "step": 4464 }, { "epoch": 0.36, "grad_norm": 1.533550614717265, "learning_rate": 7.431052489354033e-06, "loss": 0.8358, "step": 4465 }, { "epoch": 0.36, "grad_norm": 1.4621292186024115, "learning_rate": 7.42991706621303e-06, "loss": 0.7932, "step": 4466 }, { "epoch": 0.36, "grad_norm": 1.0760036401895108, "learning_rate": 7.428781478998472e-06, "loss": 1.1319, "step": 4467 }, { "epoch": 0.36, "grad_norm": 1.59575383688077, "learning_rate": 7.427645727787035e-06, "loss": 0.7335, "step": 4468 }, { "epoch": 0.36, "grad_norm": 0.8557642023623613, "learning_rate": 7.4265098126554065e-06, "loss": 1.1122, "step": 4469 }, { "epoch": 0.36, "grad_norm": 1.509300032844583, "learning_rate": 7.425373733680286e-06, "loss": 0.7834, "step": 4470 }, { "epoch": 0.36, "grad_norm": 1.561025565333014, "learning_rate": 7.424237490938385e-06, "loss": 0.7641, "step": 4471 }, { "epoch": 0.36, "grad_norm": 1.446727324134945, "learning_rate": 7.423101084506427e-06, "loss": 0.7707, "step": 4472 }, { "epoch": 0.36, "grad_norm": 1.518145623590487, "learning_rate": 7.421964514461142e-06, "loss": 0.874, "step": 4473 }, { "epoch": 0.36, "grad_norm": 1.4931113269508136, "learning_rate": 7.420827780879276e-06, "loss": 0.7993, "step": 4474 }, { "epoch": 0.36, "grad_norm": 1.6705298487902591, "learning_rate": 7.4196908838375804e-06, "loss": 0.8045, "step": 4475 }, { "epoch": 0.36, "grad_norm": 1.567935852419217, "learning_rate": 7.418553823412824e-06, "loss": 0.7696, "step": 4476 }, { "epoch": 0.36, "grad_norm": 1.4108153137643755, "learning_rate": 7.4174165996817845e-06, "loss": 0.7291, "step": 4477 }, { "epoch": 0.36, "grad_norm": 1.5441984109733717, "learning_rate": 7.416279212721247e-06, "loss": 0.7615, "step": 4478 }, { "epoch": 0.36, "grad_norm": 1.571409104433138, "learning_rate": 7.415141662608013e-06, "loss": 1.1001, "step": 4479 }, { "epoch": 0.36, "grad_norm": 1.4674510068689417, "learning_rate": 7.414003949418891e-06, "loss": 0.7972, "step": 4480 }, { "epoch": 0.36, "grad_norm": 1.559559161589207, "learning_rate": 7.412866073230702e-06, "loss": 0.7933, "step": 4481 }, { "epoch": 0.36, "grad_norm": 1.5124636325568186, "learning_rate": 7.411728034120279e-06, "loss": 0.8364, "step": 4482 }, { "epoch": 0.36, "grad_norm": 1.4764521174429057, "learning_rate": 7.4105898321644635e-06, "loss": 0.7999, "step": 4483 }, { "epoch": 0.36, "grad_norm": 0.9994380360928251, "learning_rate": 7.409451467440111e-06, "loss": 1.1061, "step": 4484 }, { "epoch": 0.36, "grad_norm": 1.4280754616379234, "learning_rate": 7.408312940024086e-06, "loss": 0.817, "step": 4485 }, { "epoch": 0.36, "grad_norm": 1.564637307707769, "learning_rate": 7.407174249993266e-06, "loss": 0.8172, "step": 4486 }, { "epoch": 0.36, "grad_norm": 0.862540568225697, "learning_rate": 7.406035397424532e-06, "loss": 1.1264, "step": 4487 }, { "epoch": 0.36, "grad_norm": 1.5694149438684317, "learning_rate": 7.404896382394788e-06, "loss": 0.7129, "step": 4488 }, { "epoch": 0.36, "grad_norm": 1.5989724019661358, "learning_rate": 7.403757204980943e-06, "loss": 0.8007, "step": 4489 }, { "epoch": 0.36, "grad_norm": 1.5846469964759329, "learning_rate": 7.4026178652599146e-06, "loss": 0.717, "step": 4490 }, { "epoch": 0.36, "grad_norm": 0.9394319606599356, "learning_rate": 7.401478363308633e-06, "loss": 1.0998, "step": 4491 }, { "epoch": 0.36, "grad_norm": 1.5236728555917405, "learning_rate": 7.400338699204042e-06, "loss": 0.8036, "step": 4492 }, { "epoch": 0.36, "grad_norm": 1.773832535934365, "learning_rate": 7.3991988730230925e-06, "loss": 0.7195, "step": 4493 }, { "epoch": 0.36, "grad_norm": 1.4775360301054485, "learning_rate": 7.3980588848427485e-06, "loss": 0.8552, "step": 4494 }, { "epoch": 0.36, "grad_norm": 1.5374459487790948, "learning_rate": 7.396918734739985e-06, "loss": 0.7039, "step": 4495 }, { "epoch": 0.36, "grad_norm": 1.5569974631329133, "learning_rate": 7.395778422791788e-06, "loss": 0.7892, "step": 4496 }, { "epoch": 0.36, "grad_norm": 1.4311045343933464, "learning_rate": 7.3946379490751545e-06, "loss": 0.7424, "step": 4497 }, { "epoch": 0.36, "grad_norm": 1.6136030205219534, "learning_rate": 7.3934973136670905e-06, "loss": 0.8124, "step": 4498 }, { "epoch": 0.36, "grad_norm": 0.9137278403717024, "learning_rate": 7.392356516644614e-06, "loss": 1.1334, "step": 4499 }, { "epoch": 0.36, "grad_norm": 1.5618897397664098, "learning_rate": 7.391215558084755e-06, "loss": 0.7721, "step": 4500 }, { "epoch": 0.36, "grad_norm": 0.8346859955591993, "learning_rate": 7.390074438064555e-06, "loss": 1.0942, "step": 4501 }, { "epoch": 0.36, "grad_norm": 1.4709962071679026, "learning_rate": 7.388933156661064e-06, "loss": 0.8021, "step": 4502 }, { "epoch": 0.36, "grad_norm": 1.5227932686653578, "learning_rate": 7.387791713951343e-06, "loss": 0.7822, "step": 4503 }, { "epoch": 0.36, "grad_norm": 1.511214386707306, "learning_rate": 7.386650110012465e-06, "loss": 0.7607, "step": 4504 }, { "epoch": 0.36, "grad_norm": 1.5717853346110455, "learning_rate": 7.385508344921514e-06, "loss": 0.8101, "step": 4505 }, { "epoch": 0.36, "grad_norm": 1.5527214377975584, "learning_rate": 7.3843664187555855e-06, "loss": 0.827, "step": 4506 }, { "epoch": 0.36, "grad_norm": 1.4372600497301877, "learning_rate": 7.383224331591784e-06, "loss": 0.7536, "step": 4507 }, { "epoch": 0.36, "grad_norm": 1.5170466113504606, "learning_rate": 7.382082083507226e-06, "loss": 0.8055, "step": 4508 }, { "epoch": 0.36, "grad_norm": 1.4703107889201252, "learning_rate": 7.380939674579038e-06, "loss": 0.7668, "step": 4509 }, { "epoch": 0.36, "grad_norm": 1.5217976796363797, "learning_rate": 7.3797971048843606e-06, "loss": 0.7973, "step": 4510 }, { "epoch": 0.36, "grad_norm": 1.5646557771711247, "learning_rate": 7.378654374500339e-06, "loss": 0.8751, "step": 4511 }, { "epoch": 0.36, "grad_norm": 1.648499216136936, "learning_rate": 7.3775114835041366e-06, "loss": 0.763, "step": 4512 }, { "epoch": 0.36, "grad_norm": 1.4468493278035663, "learning_rate": 7.376368431972921e-06, "loss": 0.7567, "step": 4513 }, { "epoch": 0.36, "grad_norm": 1.534880464602087, "learning_rate": 7.375225219983876e-06, "loss": 0.7824, "step": 4514 }, { "epoch": 0.36, "grad_norm": 1.417214334223045, "learning_rate": 7.374081847614191e-06, "loss": 0.8012, "step": 4515 }, { "epoch": 0.36, "grad_norm": 1.5169733049776772, "learning_rate": 7.372938314941073e-06, "loss": 0.856, "step": 4516 }, { "epoch": 0.36, "grad_norm": 1.471238646938426, "learning_rate": 7.371794622041734e-06, "loss": 0.7251, "step": 4517 }, { "epoch": 0.36, "grad_norm": 1.478420950869702, "learning_rate": 7.370650768993398e-06, "loss": 0.8114, "step": 4518 }, { "epoch": 0.36, "grad_norm": 1.56609965284195, "learning_rate": 7.3695067558733015e-06, "loss": 0.7349, "step": 4519 }, { "epoch": 0.36, "grad_norm": 1.501973499341524, "learning_rate": 7.36836258275869e-06, "loss": 0.7551, "step": 4520 }, { "epoch": 0.36, "grad_norm": 1.3593224998388382, "learning_rate": 7.367218249726821e-06, "loss": 0.7322, "step": 4521 }, { "epoch": 0.36, "grad_norm": 0.96299982494295, "learning_rate": 7.3660737568549635e-06, "loss": 1.1156, "step": 4522 }, { "epoch": 0.36, "grad_norm": 1.479178383605749, "learning_rate": 7.3649291042203955e-06, "loss": 0.861, "step": 4523 }, { "epoch": 0.36, "grad_norm": 1.6239217981423475, "learning_rate": 7.363784291900407e-06, "loss": 0.8625, "step": 4524 }, { "epoch": 0.36, "grad_norm": 1.5466267012072479, "learning_rate": 7.362639319972298e-06, "loss": 0.7372, "step": 4525 }, { "epoch": 0.36, "grad_norm": 0.8297573882565058, "learning_rate": 7.3614941885133785e-06, "loss": 1.125, "step": 4526 }, { "epoch": 0.36, "grad_norm": 1.5747516875943632, "learning_rate": 7.360348897600973e-06, "loss": 0.7438, "step": 4527 }, { "epoch": 0.36, "grad_norm": 1.5776705484578561, "learning_rate": 7.35920344731241e-06, "loss": 0.8328, "step": 4528 }, { "epoch": 0.36, "grad_norm": 1.5133240724694084, "learning_rate": 7.358057837725039e-06, "loss": 0.7571, "step": 4529 }, { "epoch": 0.36, "grad_norm": 1.9634985790222286, "learning_rate": 7.35691206891621e-06, "loss": 0.8292, "step": 4530 }, { "epoch": 0.36, "grad_norm": 1.6446186441207828, "learning_rate": 7.355766140963288e-06, "loss": 0.785, "step": 4531 }, { "epoch": 0.36, "grad_norm": 1.4869081828584967, "learning_rate": 7.354620053943649e-06, "loss": 0.7866, "step": 4532 }, { "epoch": 0.36, "grad_norm": 1.513116775560579, "learning_rate": 7.35347380793468e-06, "loss": 0.7672, "step": 4533 }, { "epoch": 0.36, "grad_norm": 1.4576860551614095, "learning_rate": 7.352327403013779e-06, "loss": 0.7815, "step": 4534 }, { "epoch": 0.36, "grad_norm": 0.8783875179570352, "learning_rate": 7.3511808392583515e-06, "loss": 1.1358, "step": 4535 }, { "epoch": 0.36, "grad_norm": 1.5373897843430362, "learning_rate": 7.350034116745818e-06, "loss": 0.7773, "step": 4536 }, { "epoch": 0.36, "grad_norm": 1.5238170824036747, "learning_rate": 7.348887235553608e-06, "loss": 0.7779, "step": 4537 }, { "epoch": 0.36, "grad_norm": 1.4576988423339814, "learning_rate": 7.3477401957591586e-06, "loss": 0.8563, "step": 4538 }, { "epoch": 0.36, "grad_norm": 1.4689268014676848, "learning_rate": 7.346592997439925e-06, "loss": 0.8447, "step": 4539 }, { "epoch": 0.36, "grad_norm": 0.797290334326206, "learning_rate": 7.345445640673365e-06, "loss": 1.1263, "step": 4540 }, { "epoch": 0.36, "grad_norm": 1.5412287354675323, "learning_rate": 7.344298125536953e-06, "loss": 0.7407, "step": 4541 }, { "epoch": 0.36, "grad_norm": 1.3765144291168856, "learning_rate": 7.343150452108171e-06, "loss": 0.7819, "step": 4542 }, { "epoch": 0.36, "grad_norm": 1.4170475858166487, "learning_rate": 7.342002620464512e-06, "loss": 0.7877, "step": 4543 }, { "epoch": 0.36, "grad_norm": 1.6153469470951902, "learning_rate": 7.340854630683481e-06, "loss": 0.8111, "step": 4544 }, { "epoch": 0.36, "grad_norm": 1.4853307034715892, "learning_rate": 7.339706482842593e-06, "loss": 0.6871, "step": 4545 }, { "epoch": 0.36, "grad_norm": 0.8018667633294743, "learning_rate": 7.338558177019372e-06, "loss": 1.078, "step": 4546 }, { "epoch": 0.36, "grad_norm": 1.6132677679633705, "learning_rate": 7.337409713291357e-06, "loss": 0.8763, "step": 4547 }, { "epoch": 0.36, "grad_norm": 1.5599499378976402, "learning_rate": 7.336261091736092e-06, "loss": 0.7218, "step": 4548 }, { "epoch": 0.36, "grad_norm": 1.5133871959801573, "learning_rate": 7.335112312431137e-06, "loss": 0.806, "step": 4549 }, { "epoch": 0.37, "grad_norm": 1.4359670567053873, "learning_rate": 7.333963375454058e-06, "loss": 0.729, "step": 4550 }, { "epoch": 0.37, "grad_norm": 1.7163139114865082, "learning_rate": 7.332814280882437e-06, "loss": 0.808, "step": 4551 }, { "epoch": 0.37, "grad_norm": 0.7813993052819308, "learning_rate": 7.33166502879386e-06, "loss": 1.102, "step": 4552 }, { "epoch": 0.37, "grad_norm": 1.610011836222946, "learning_rate": 7.330515619265928e-06, "loss": 0.7423, "step": 4553 }, { "epoch": 0.37, "grad_norm": 1.44057529650174, "learning_rate": 7.3293660523762535e-06, "loss": 0.7548, "step": 4554 }, { "epoch": 0.37, "grad_norm": 1.493230618739604, "learning_rate": 7.328216328202456e-06, "loss": 0.8319, "step": 4555 }, { "epoch": 0.37, "grad_norm": 1.7211274838524104, "learning_rate": 7.3270664468221685e-06, "loss": 0.8386, "step": 4556 }, { "epoch": 0.37, "grad_norm": 1.4734333494349543, "learning_rate": 7.325916408313034e-06, "loss": 0.7887, "step": 4557 }, { "epoch": 0.37, "grad_norm": 0.8706362076510563, "learning_rate": 7.324766212752703e-06, "loss": 1.1077, "step": 4558 }, { "epoch": 0.37, "grad_norm": 1.500314755079102, "learning_rate": 7.323615860218844e-06, "loss": 0.6582, "step": 4559 }, { "epoch": 0.37, "grad_norm": 1.4359050462423344, "learning_rate": 7.322465350789126e-06, "loss": 0.7437, "step": 4560 }, { "epoch": 0.37, "grad_norm": 1.452246924245335, "learning_rate": 7.32131468454124e-06, "loss": 0.6863, "step": 4561 }, { "epoch": 0.37, "grad_norm": 0.7425143771132683, "learning_rate": 7.320163861552877e-06, "loss": 1.1132, "step": 4562 }, { "epoch": 0.37, "grad_norm": 1.6528053666337676, "learning_rate": 7.3190128819017455e-06, "loss": 0.735, "step": 4563 }, { "epoch": 0.37, "grad_norm": 0.7838750572933093, "learning_rate": 7.31786174566556e-06, "loss": 1.0975, "step": 4564 }, { "epoch": 0.37, "grad_norm": 0.8006898540404943, "learning_rate": 7.316710452922049e-06, "loss": 1.1369, "step": 4565 }, { "epoch": 0.37, "grad_norm": 1.520959534455294, "learning_rate": 7.315559003748952e-06, "loss": 0.8038, "step": 4566 }, { "epoch": 0.37, "grad_norm": 0.7915753251655056, "learning_rate": 7.314407398224016e-06, "loss": 1.1097, "step": 4567 }, { "epoch": 0.37, "grad_norm": 1.5205197459164397, "learning_rate": 7.313255636425002e-06, "loss": 0.807, "step": 4568 }, { "epoch": 0.37, "grad_norm": 0.835679507813991, "learning_rate": 7.312103718429675e-06, "loss": 1.0988, "step": 4569 }, { "epoch": 0.37, "grad_norm": 1.4102329720132356, "learning_rate": 7.310951644315818e-06, "loss": 0.8494, "step": 4570 }, { "epoch": 0.37, "grad_norm": 1.4050092065510982, "learning_rate": 7.309799414161224e-06, "loss": 0.7956, "step": 4571 }, { "epoch": 0.37, "grad_norm": 1.5269907826275169, "learning_rate": 7.30864702804369e-06, "loss": 0.7739, "step": 4572 }, { "epoch": 0.37, "grad_norm": 1.4851631654013626, "learning_rate": 7.30749448604103e-06, "loss": 0.7154, "step": 4573 }, { "epoch": 0.37, "grad_norm": 1.554502823323659, "learning_rate": 7.306341788231067e-06, "loss": 0.7708, "step": 4574 }, { "epoch": 0.37, "grad_norm": 1.5306898368352797, "learning_rate": 7.305188934691632e-06, "loss": 0.8476, "step": 4575 }, { "epoch": 0.37, "grad_norm": 1.5058027216673626, "learning_rate": 7.304035925500567e-06, "loss": 0.8892, "step": 4576 }, { "epoch": 0.37, "grad_norm": 1.4261896175346152, "learning_rate": 7.30288276073573e-06, "loss": 0.745, "step": 4577 }, { "epoch": 0.37, "grad_norm": 1.5146869291857377, "learning_rate": 7.301729440474984e-06, "loss": 0.8121, "step": 4578 }, { "epoch": 0.37, "grad_norm": 1.4675755543713302, "learning_rate": 7.300575964796201e-06, "loss": 0.7123, "step": 4579 }, { "epoch": 0.37, "grad_norm": 1.5417926167405183, "learning_rate": 7.299422333777271e-06, "loss": 0.8558, "step": 4580 }, { "epoch": 0.37, "grad_norm": 1.5599664311581074, "learning_rate": 7.298268547496084e-06, "loss": 0.7736, "step": 4581 }, { "epoch": 0.37, "grad_norm": 1.6341064090453004, "learning_rate": 7.29711460603055e-06, "loss": 0.7506, "step": 4582 }, { "epoch": 0.37, "grad_norm": 1.558610488855587, "learning_rate": 7.295960509458586e-06, "loss": 0.7454, "step": 4583 }, { "epoch": 0.37, "grad_norm": 1.5630480440594088, "learning_rate": 7.294806257858118e-06, "loss": 0.8568, "step": 4584 }, { "epoch": 0.37, "grad_norm": 1.3882603179005861, "learning_rate": 7.293651851307084e-06, "loss": 0.7046, "step": 4585 }, { "epoch": 0.37, "grad_norm": 1.4869904026901715, "learning_rate": 7.292497289883432e-06, "loss": 0.8207, "step": 4586 }, { "epoch": 0.37, "grad_norm": 0.9222619709560712, "learning_rate": 7.291342573665121e-06, "loss": 1.0963, "step": 4587 }, { "epoch": 0.37, "grad_norm": 1.5635459795793658, "learning_rate": 7.29018770273012e-06, "loss": 0.8578, "step": 4588 }, { "epoch": 0.37, "grad_norm": 1.4499581092029958, "learning_rate": 7.289032677156408e-06, "loss": 0.7989, "step": 4589 }, { "epoch": 0.37, "grad_norm": 1.5739923097791575, "learning_rate": 7.287877497021978e-06, "loss": 0.8267, "step": 4590 }, { "epoch": 0.37, "grad_norm": 1.5774278575228435, "learning_rate": 7.286722162404825e-06, "loss": 0.7308, "step": 4591 }, { "epoch": 0.37, "grad_norm": 1.5367162063957502, "learning_rate": 7.285566673382965e-06, "loss": 0.7739, "step": 4592 }, { "epoch": 0.37, "grad_norm": 1.4919534070041502, "learning_rate": 7.284411030034414e-06, "loss": 0.781, "step": 4593 }, { "epoch": 0.37, "grad_norm": 1.4695548868980683, "learning_rate": 7.2832552324372075e-06, "loss": 0.8481, "step": 4594 }, { "epoch": 0.37, "grad_norm": 1.408539649559492, "learning_rate": 7.282099280669388e-06, "loss": 0.7927, "step": 4595 }, { "epoch": 0.37, "grad_norm": 1.4535218871447468, "learning_rate": 7.280943174809006e-06, "loss": 0.7999, "step": 4596 }, { "epoch": 0.37, "grad_norm": 1.4497711259524442, "learning_rate": 7.279786914934126e-06, "loss": 0.7823, "step": 4597 }, { "epoch": 0.37, "grad_norm": 1.4143340308817791, "learning_rate": 7.278630501122819e-06, "loss": 0.7972, "step": 4598 }, { "epoch": 0.37, "grad_norm": 1.49317668349312, "learning_rate": 7.27747393345317e-06, "loss": 0.8239, "step": 4599 }, { "epoch": 0.37, "grad_norm": 1.5089484522728334, "learning_rate": 7.276317212003274e-06, "loss": 0.7534, "step": 4600 }, { "epoch": 0.37, "grad_norm": 1.656626757135166, "learning_rate": 7.2751603368512354e-06, "loss": 0.8086, "step": 4601 }, { "epoch": 0.37, "grad_norm": 1.3684730138091772, "learning_rate": 7.274003308075169e-06, "loss": 0.8026, "step": 4602 }, { "epoch": 0.37, "grad_norm": 1.58249112439259, "learning_rate": 7.272846125753198e-06, "loss": 0.8248, "step": 4603 }, { "epoch": 0.37, "grad_norm": 1.5605885966638997, "learning_rate": 7.271688789963458e-06, "loss": 0.8327, "step": 4604 }, { "epoch": 0.37, "grad_norm": 1.5680393457938742, "learning_rate": 7.2705313007841e-06, "loss": 0.7864, "step": 4605 }, { "epoch": 0.37, "grad_norm": 1.554642227587047, "learning_rate": 7.269373658293275e-06, "loss": 0.6971, "step": 4606 }, { "epoch": 0.37, "grad_norm": 0.9079736589620343, "learning_rate": 7.268215862569151e-06, "loss": 1.124, "step": 4607 }, { "epoch": 0.37, "grad_norm": 1.5559281402441132, "learning_rate": 7.2670579136899045e-06, "loss": 0.8009, "step": 4608 }, { "epoch": 0.37, "grad_norm": 1.4522046219291458, "learning_rate": 7.265899811733726e-06, "loss": 0.7476, "step": 4609 }, { "epoch": 0.37, "grad_norm": 1.7147423107046722, "learning_rate": 7.264741556778808e-06, "loss": 0.7651, "step": 4610 }, { "epoch": 0.37, "grad_norm": 1.5432222771012145, "learning_rate": 7.263583148903363e-06, "loss": 0.8001, "step": 4611 }, { "epoch": 0.37, "grad_norm": 1.527247636635427, "learning_rate": 7.2624245881856094e-06, "loss": 0.8456, "step": 4612 }, { "epoch": 0.37, "grad_norm": 1.5882101945762195, "learning_rate": 7.261265874703771e-06, "loss": 0.7857, "step": 4613 }, { "epoch": 0.37, "grad_norm": 1.6618429721523837, "learning_rate": 7.260107008536092e-06, "loss": 0.8391, "step": 4614 }, { "epoch": 0.37, "grad_norm": 1.7271491426580903, "learning_rate": 7.25894798976082e-06, "loss": 0.7935, "step": 4615 }, { "epoch": 0.37, "grad_norm": 1.4909041368048042, "learning_rate": 7.257788818456213e-06, "loss": 0.7526, "step": 4616 }, { "epoch": 0.37, "grad_norm": 1.5760311477886415, "learning_rate": 7.2566294947005434e-06, "loss": 0.8035, "step": 4617 }, { "epoch": 0.37, "grad_norm": 1.5100025415169613, "learning_rate": 7.255470018572091e-06, "loss": 0.7892, "step": 4618 }, { "epoch": 0.37, "grad_norm": 1.5494231019424556, "learning_rate": 7.254310390149144e-06, "loss": 0.7834, "step": 4619 }, { "epoch": 0.37, "grad_norm": 1.6053484764909907, "learning_rate": 7.253150609510005e-06, "loss": 0.7834, "step": 4620 }, { "epoch": 0.37, "grad_norm": 1.5422690019823806, "learning_rate": 7.251990676732985e-06, "loss": 0.7564, "step": 4621 }, { "epoch": 0.37, "grad_norm": 1.4555659452523377, "learning_rate": 7.250830591896404e-06, "loss": 0.7828, "step": 4622 }, { "epoch": 0.37, "grad_norm": 1.356230663872062, "learning_rate": 7.249670355078595e-06, "loss": 0.7126, "step": 4623 }, { "epoch": 0.37, "grad_norm": 1.5054456934583529, "learning_rate": 7.2485099663579e-06, "loss": 0.7813, "step": 4624 }, { "epoch": 0.37, "grad_norm": 1.4944373577851044, "learning_rate": 7.247349425812671e-06, "loss": 0.8102, "step": 4625 }, { "epoch": 0.37, "grad_norm": 1.5655681630023026, "learning_rate": 7.246188733521269e-06, "loss": 0.7098, "step": 4626 }, { "epoch": 0.37, "grad_norm": 1.556136235873271, "learning_rate": 7.245027889562067e-06, "loss": 0.8211, "step": 4627 }, { "epoch": 0.37, "grad_norm": 1.5530729986278262, "learning_rate": 7.243866894013449e-06, "loss": 0.8293, "step": 4628 }, { "epoch": 0.37, "grad_norm": 1.4844219835216177, "learning_rate": 7.242705746953809e-06, "loss": 0.7922, "step": 4629 }, { "epoch": 0.37, "grad_norm": 1.5030128696748513, "learning_rate": 7.241544448461546e-06, "loss": 0.7481, "step": 4630 }, { "epoch": 0.37, "grad_norm": 1.4868687273969419, "learning_rate": 7.240382998615079e-06, "loss": 0.8063, "step": 4631 }, { "epoch": 0.37, "grad_norm": 1.5311249362731447, "learning_rate": 7.239221397492826e-06, "loss": 0.7758, "step": 4632 }, { "epoch": 0.37, "grad_norm": 1.581873105053724, "learning_rate": 7.238059645173225e-06, "loss": 0.8057, "step": 4633 }, { "epoch": 0.37, "grad_norm": 1.5001216846599679, "learning_rate": 7.236897741734721e-06, "loss": 0.8261, "step": 4634 }, { "epoch": 0.37, "grad_norm": 0.9017323113833672, "learning_rate": 7.2357356872557646e-06, "loss": 1.0761, "step": 4635 }, { "epoch": 0.37, "grad_norm": 1.4638189923872649, "learning_rate": 7.234573481814823e-06, "loss": 0.7376, "step": 4636 }, { "epoch": 0.37, "grad_norm": 0.8284602880687917, "learning_rate": 7.233411125490369e-06, "loss": 1.1378, "step": 4637 }, { "epoch": 0.37, "grad_norm": 1.5145340481943748, "learning_rate": 7.232248618360889e-06, "loss": 0.7478, "step": 4638 }, { "epoch": 0.37, "grad_norm": 1.4341885715769642, "learning_rate": 7.231085960504879e-06, "loss": 0.7209, "step": 4639 }, { "epoch": 0.37, "grad_norm": 1.5434596172304318, "learning_rate": 7.229923152000844e-06, "loss": 0.7461, "step": 4640 }, { "epoch": 0.37, "grad_norm": 1.5921242351850915, "learning_rate": 7.2287601929272975e-06, "loss": 0.7693, "step": 4641 }, { "epoch": 0.37, "grad_norm": 1.4552397050096042, "learning_rate": 7.227597083362766e-06, "loss": 0.7114, "step": 4642 }, { "epoch": 0.37, "grad_norm": 1.7778863229514374, "learning_rate": 7.226433823385787e-06, "loss": 0.7765, "step": 4643 }, { "epoch": 0.37, "grad_norm": 1.4298980811750102, "learning_rate": 7.225270413074904e-06, "loss": 0.6778, "step": 4644 }, { "epoch": 0.37, "grad_norm": 1.9191843047887203, "learning_rate": 7.2241068525086745e-06, "loss": 0.7763, "step": 4645 }, { "epoch": 0.37, "grad_norm": 1.4958884764427443, "learning_rate": 7.222943141765666e-06, "loss": 0.8022, "step": 4646 }, { "epoch": 0.37, "grad_norm": 1.5353935157918404, "learning_rate": 7.221779280924451e-06, "loss": 0.805, "step": 4647 }, { "epoch": 0.37, "grad_norm": 1.5672524438746964, "learning_rate": 7.220615270063621e-06, "loss": 0.8547, "step": 4648 }, { "epoch": 0.37, "grad_norm": 1.4791295649184775, "learning_rate": 7.219451109261768e-06, "loss": 0.729, "step": 4649 }, { "epoch": 0.37, "grad_norm": 1.5072207574863103, "learning_rate": 7.2182867985975036e-06, "loss": 0.7533, "step": 4650 }, { "epoch": 0.37, "grad_norm": 1.2563532099145733, "learning_rate": 7.217122338149441e-06, "loss": 1.111, "step": 4651 }, { "epoch": 0.37, "grad_norm": 1.4966573008385011, "learning_rate": 7.215957727996208e-06, "loss": 0.8433, "step": 4652 }, { "epoch": 0.37, "grad_norm": 1.4573497273656213, "learning_rate": 7.214792968216442e-06, "loss": 0.8106, "step": 4653 }, { "epoch": 0.37, "grad_norm": 1.5201068717350101, "learning_rate": 7.21362805888879e-06, "loss": 0.8007, "step": 4654 }, { "epoch": 0.37, "grad_norm": 1.4448079159780887, "learning_rate": 7.21246300009191e-06, "loss": 0.7464, "step": 4655 }, { "epoch": 0.37, "grad_norm": 1.4595710020956827, "learning_rate": 7.21129779190447e-06, "loss": 0.7993, "step": 4656 }, { "epoch": 0.37, "grad_norm": 1.7455999304862324, "learning_rate": 7.210132434405146e-06, "loss": 0.776, "step": 4657 }, { "epoch": 0.37, "grad_norm": 1.802064195901956, "learning_rate": 7.208966927672627e-06, "loss": 0.8088, "step": 4658 }, { "epoch": 0.37, "grad_norm": 0.9532426347209004, "learning_rate": 7.2078012717856086e-06, "loss": 1.1044, "step": 4659 }, { "epoch": 0.37, "grad_norm": 1.5330124560751197, "learning_rate": 7.2066354668228e-06, "loss": 0.8845, "step": 4660 }, { "epoch": 0.37, "grad_norm": 1.7920123859247055, "learning_rate": 7.205469512862919e-06, "loss": 0.7932, "step": 4661 }, { "epoch": 0.37, "grad_norm": 1.52795734465919, "learning_rate": 7.204303409984694e-06, "loss": 0.8356, "step": 4662 }, { "epoch": 0.37, "grad_norm": 0.7955861053176866, "learning_rate": 7.203137158266863e-06, "loss": 1.1176, "step": 4663 }, { "epoch": 0.37, "grad_norm": 1.5111570276163415, "learning_rate": 7.201970757788172e-06, "loss": 0.7861, "step": 4664 }, { "epoch": 0.37, "grad_norm": 0.7976404007993863, "learning_rate": 7.200804208627381e-06, "loss": 1.109, "step": 4665 }, { "epoch": 0.37, "grad_norm": 1.4971517058325816, "learning_rate": 7.1996375108632566e-06, "loss": 0.7925, "step": 4666 }, { "epoch": 0.37, "grad_norm": 0.819389475096111, "learning_rate": 7.19847066457458e-06, "loss": 1.1012, "step": 4667 }, { "epoch": 0.37, "grad_norm": 0.8180190806499755, "learning_rate": 7.197303669840134e-06, "loss": 1.1038, "step": 4668 }, { "epoch": 0.37, "grad_norm": 1.7267538332590993, "learning_rate": 7.1961365267387205e-06, "loss": 0.8255, "step": 4669 }, { "epoch": 0.37, "grad_norm": 1.4519627778288022, "learning_rate": 7.194969235349149e-06, "loss": 0.755, "step": 4670 }, { "epoch": 0.37, "grad_norm": 1.4739521326190854, "learning_rate": 7.193801795750233e-06, "loss": 0.7597, "step": 4671 }, { "epoch": 0.37, "grad_norm": 1.5952054460417515, "learning_rate": 7.192634208020805e-06, "loss": 0.807, "step": 4672 }, { "epoch": 0.37, "grad_norm": 1.5972105363609925, "learning_rate": 7.191466472239701e-06, "loss": 0.7917, "step": 4673 }, { "epoch": 0.38, "grad_norm": 1.5456909904471943, "learning_rate": 7.190298588485769e-06, "loss": 0.7943, "step": 4674 }, { "epoch": 0.38, "grad_norm": 0.9832806310442592, "learning_rate": 7.189130556837869e-06, "loss": 1.1255, "step": 4675 }, { "epoch": 0.38, "grad_norm": 0.8739278074580219, "learning_rate": 7.187962377374866e-06, "loss": 1.079, "step": 4676 }, { "epoch": 0.38, "grad_norm": 1.6167959229171456, "learning_rate": 7.186794050175643e-06, "loss": 0.7271, "step": 4677 }, { "epoch": 0.38, "grad_norm": 1.5380461353563224, "learning_rate": 7.185625575319085e-06, "loss": 0.8499, "step": 4678 }, { "epoch": 0.38, "grad_norm": 1.4375321687959584, "learning_rate": 7.184456952884089e-06, "loss": 0.6912, "step": 4679 }, { "epoch": 0.38, "grad_norm": 1.6065665810045027, "learning_rate": 7.183288182949565e-06, "loss": 0.7215, "step": 4680 }, { "epoch": 0.38, "grad_norm": 0.9295714590017, "learning_rate": 7.182119265594429e-06, "loss": 1.1174, "step": 4681 }, { "epoch": 0.38, "grad_norm": 1.584268276821332, "learning_rate": 7.180950200897614e-06, "loss": 0.8308, "step": 4682 }, { "epoch": 0.38, "grad_norm": 0.8701558071867386, "learning_rate": 7.179780988938051e-06, "loss": 1.0828, "step": 4683 }, { "epoch": 0.38, "grad_norm": 1.5861893550133337, "learning_rate": 7.178611629794693e-06, "loss": 0.7777, "step": 4684 }, { "epoch": 0.38, "grad_norm": 1.5547645048160008, "learning_rate": 7.177442123546496e-06, "loss": 0.777, "step": 4685 }, { "epoch": 0.38, "grad_norm": 1.6277743617012475, "learning_rate": 7.1762724702724275e-06, "loss": 0.7349, "step": 4686 }, { "epoch": 0.38, "grad_norm": 1.4786975759617262, "learning_rate": 7.175102670051466e-06, "loss": 0.7861, "step": 4687 }, { "epoch": 0.38, "grad_norm": 0.9087873213334678, "learning_rate": 7.173932722962597e-06, "loss": 1.124, "step": 4688 }, { "epoch": 0.38, "grad_norm": 1.5132928646127968, "learning_rate": 7.172762629084821e-06, "loss": 0.8025, "step": 4689 }, { "epoch": 0.38, "grad_norm": 1.5042329740151752, "learning_rate": 7.171592388497144e-06, "loss": 0.8413, "step": 4690 }, { "epoch": 0.38, "grad_norm": 1.5426032993217735, "learning_rate": 7.170422001278583e-06, "loss": 0.8084, "step": 4691 }, { "epoch": 0.38, "grad_norm": 1.5470401683051946, "learning_rate": 7.169251467508165e-06, "loss": 0.867, "step": 4692 }, { "epoch": 0.38, "grad_norm": 1.4718186752424467, "learning_rate": 7.168080787264927e-06, "loss": 0.7206, "step": 4693 }, { "epoch": 0.38, "grad_norm": 1.4615703133144988, "learning_rate": 7.166909960627918e-06, "loss": 0.7467, "step": 4694 }, { "epoch": 0.38, "grad_norm": 1.6107050589118383, "learning_rate": 7.165738987676193e-06, "loss": 0.8347, "step": 4695 }, { "epoch": 0.38, "grad_norm": 1.4203285280556759, "learning_rate": 7.16456786848882e-06, "loss": 0.7266, "step": 4696 }, { "epoch": 0.38, "grad_norm": 1.6780348232938431, "learning_rate": 7.163396603144874e-06, "loss": 0.8072, "step": 4697 }, { "epoch": 0.38, "grad_norm": 1.553866768747694, "learning_rate": 7.162225191723442e-06, "loss": 0.8635, "step": 4698 }, { "epoch": 0.38, "grad_norm": 1.4633860668805754, "learning_rate": 7.161053634303622e-06, "loss": 0.8355, "step": 4699 }, { "epoch": 0.38, "grad_norm": 1.5695228071891096, "learning_rate": 7.159881930964518e-06, "loss": 0.768, "step": 4700 }, { "epoch": 0.38, "grad_norm": 1.5521824893609644, "learning_rate": 7.15871008178525e-06, "loss": 0.791, "step": 4701 }, { "epoch": 0.38, "grad_norm": 1.5709941451027039, "learning_rate": 7.157538086844937e-06, "loss": 0.7757, "step": 4702 }, { "epoch": 0.38, "grad_norm": 0.9355185611724404, "learning_rate": 7.156365946222721e-06, "loss": 1.0996, "step": 4703 }, { "epoch": 0.38, "grad_norm": 0.8949272571348935, "learning_rate": 7.155193659997746e-06, "loss": 1.1632, "step": 4704 }, { "epoch": 0.38, "grad_norm": 1.4633614874554774, "learning_rate": 7.154021228249165e-06, "loss": 0.7305, "step": 4705 }, { "epoch": 0.38, "grad_norm": 1.4716449478530491, "learning_rate": 7.1528486510561455e-06, "loss": 0.8039, "step": 4706 }, { "epoch": 0.38, "grad_norm": 1.4075663371076896, "learning_rate": 7.151675928497864e-06, "loss": 0.7332, "step": 4707 }, { "epoch": 0.38, "grad_norm": 1.4367493760369496, "learning_rate": 7.150503060653504e-06, "loss": 0.6884, "step": 4708 }, { "epoch": 0.38, "grad_norm": 1.4724479010673097, "learning_rate": 7.149330047602258e-06, "loss": 0.8272, "step": 4709 }, { "epoch": 0.38, "grad_norm": 1.537058225943324, "learning_rate": 7.148156889423332e-06, "loss": 0.7532, "step": 4710 }, { "epoch": 0.38, "grad_norm": 1.5530247571541198, "learning_rate": 7.146983586195942e-06, "loss": 0.7302, "step": 4711 }, { "epoch": 0.38, "grad_norm": 1.3649103684693964, "learning_rate": 7.145810137999312e-06, "loss": 0.795, "step": 4712 }, { "epoch": 0.38, "grad_norm": 1.389736103084445, "learning_rate": 7.144636544912674e-06, "loss": 0.7768, "step": 4713 }, { "epoch": 0.38, "grad_norm": 1.5723482335448258, "learning_rate": 7.143462807015271e-06, "loss": 0.8262, "step": 4714 }, { "epoch": 0.38, "grad_norm": 1.446910255274307, "learning_rate": 7.142288924386359e-06, "loss": 0.6704, "step": 4715 }, { "epoch": 0.38, "grad_norm": 1.1355781753960117, "learning_rate": 7.141114897105202e-06, "loss": 1.0961, "step": 4716 }, { "epoch": 0.38, "grad_norm": 1.4805296464591282, "learning_rate": 7.1399407252510685e-06, "loss": 0.7922, "step": 4717 }, { "epoch": 0.38, "grad_norm": 1.4235596442475063, "learning_rate": 7.138766408903246e-06, "loss": 0.7017, "step": 4718 }, { "epoch": 0.38, "grad_norm": 1.4978706236664283, "learning_rate": 7.137591948141023e-06, "loss": 0.813, "step": 4719 }, { "epoch": 0.38, "grad_norm": 1.506171555570857, "learning_rate": 7.136417343043704e-06, "loss": 0.8295, "step": 4720 }, { "epoch": 0.38, "grad_norm": 1.434289035553792, "learning_rate": 7.135242593690601e-06, "loss": 0.7568, "step": 4721 }, { "epoch": 0.38, "grad_norm": 0.8670395928025799, "learning_rate": 7.134067700161037e-06, "loss": 1.1006, "step": 4722 }, { "epoch": 0.38, "grad_norm": 1.6809299928443586, "learning_rate": 7.132892662534342e-06, "loss": 0.8345, "step": 4723 }, { "epoch": 0.38, "grad_norm": 1.4685430165054532, "learning_rate": 7.131717480889854e-06, "loss": 0.7915, "step": 4724 }, { "epoch": 0.38, "grad_norm": 0.8272451609205067, "learning_rate": 7.130542155306931e-06, "loss": 1.1014, "step": 4725 }, { "epoch": 0.38, "grad_norm": 0.8212032911436201, "learning_rate": 7.129366685864928e-06, "loss": 1.0921, "step": 4726 }, { "epoch": 0.38, "grad_norm": 1.457361089919343, "learning_rate": 7.128191072643217e-06, "loss": 0.7161, "step": 4727 }, { "epoch": 0.38, "grad_norm": 1.3906867190451795, "learning_rate": 7.127015315721179e-06, "loss": 0.7546, "step": 4728 }, { "epoch": 0.38, "grad_norm": 1.5614937817137227, "learning_rate": 7.125839415178204e-06, "loss": 0.7939, "step": 4729 }, { "epoch": 0.38, "grad_norm": 1.6030349819770577, "learning_rate": 7.124663371093691e-06, "loss": 0.7045, "step": 4730 }, { "epoch": 0.38, "grad_norm": 1.5466153661628774, "learning_rate": 7.123487183547046e-06, "loss": 0.8282, "step": 4731 }, { "epoch": 0.38, "grad_norm": 1.1866171752776085, "learning_rate": 7.122310852617693e-06, "loss": 1.1273, "step": 4732 }, { "epoch": 0.38, "grad_norm": 1.5341601744034048, "learning_rate": 7.121134378385057e-06, "loss": 0.7352, "step": 4733 }, { "epoch": 0.38, "grad_norm": 1.4835260697088204, "learning_rate": 7.11995776092858e-06, "loss": 0.8545, "step": 4734 }, { "epoch": 0.38, "grad_norm": 1.525620594644031, "learning_rate": 7.118781000327706e-06, "loss": 0.8009, "step": 4735 }, { "epoch": 0.38, "grad_norm": 1.4810109557536222, "learning_rate": 7.1176040966618934e-06, "loss": 0.7055, "step": 4736 }, { "epoch": 0.38, "grad_norm": 0.8000234904671825, "learning_rate": 7.116427050010611e-06, "loss": 1.1064, "step": 4737 }, { "epoch": 0.38, "grad_norm": 1.470231486794984, "learning_rate": 7.115249860453333e-06, "loss": 0.8462, "step": 4738 }, { "epoch": 0.38, "grad_norm": 1.6912256651903552, "learning_rate": 7.114072528069549e-06, "loss": 0.8138, "step": 4739 }, { "epoch": 0.38, "grad_norm": 1.4814240492003237, "learning_rate": 7.1128950529387534e-06, "loss": 0.7689, "step": 4740 }, { "epoch": 0.38, "grad_norm": 1.3926488617658959, "learning_rate": 7.1117174351404515e-06, "loss": 0.6873, "step": 4741 }, { "epoch": 0.38, "grad_norm": 1.9041495388596472, "learning_rate": 7.11053967475416e-06, "loss": 0.8173, "step": 4742 }, { "epoch": 0.38, "grad_norm": 1.4946878942453439, "learning_rate": 7.109361771859404e-06, "loss": 0.7981, "step": 4743 }, { "epoch": 0.38, "grad_norm": 1.6257482112000994, "learning_rate": 7.1081837265357174e-06, "loss": 0.7647, "step": 4744 }, { "epoch": 0.38, "grad_norm": 1.4471693455954073, "learning_rate": 7.107005538862647e-06, "loss": 0.7424, "step": 4745 }, { "epoch": 0.38, "grad_norm": 0.8689533635635018, "learning_rate": 7.1058272089197425e-06, "loss": 1.0694, "step": 4746 }, { "epoch": 0.38, "grad_norm": 1.5399021731342382, "learning_rate": 7.10464873678657e-06, "loss": 0.7775, "step": 4747 }, { "epoch": 0.38, "grad_norm": 1.4894470323079931, "learning_rate": 7.103470122542701e-06, "loss": 0.7631, "step": 4748 }, { "epoch": 0.38, "grad_norm": 1.5982741505583355, "learning_rate": 7.1022913662677225e-06, "loss": 0.8138, "step": 4749 }, { "epoch": 0.38, "grad_norm": 1.4693654107056158, "learning_rate": 7.101112468041221e-06, "loss": 0.7953, "step": 4750 }, { "epoch": 0.38, "grad_norm": 1.5090567344523407, "learning_rate": 7.099933427942803e-06, "loss": 0.7381, "step": 4751 }, { "epoch": 0.38, "grad_norm": 1.4597634931933907, "learning_rate": 7.098754246052077e-06, "loss": 0.7365, "step": 4752 }, { "epoch": 0.38, "grad_norm": 1.3822243525695932, "learning_rate": 7.0975749224486665e-06, "loss": 0.7496, "step": 4753 }, { "epoch": 0.38, "grad_norm": 1.428731981452327, "learning_rate": 7.0963954572122e-06, "loss": 0.7087, "step": 4754 }, { "epoch": 0.38, "grad_norm": 1.6130815642702456, "learning_rate": 7.095215850422318e-06, "loss": 0.745, "step": 4755 }, { "epoch": 0.38, "grad_norm": 1.5174966208222809, "learning_rate": 7.094036102158672e-06, "loss": 0.8015, "step": 4756 }, { "epoch": 0.38, "grad_norm": 1.5382952469387174, "learning_rate": 7.0928562125009195e-06, "loss": 0.7654, "step": 4757 }, { "epoch": 0.38, "grad_norm": 1.5587395342791164, "learning_rate": 7.091676181528729e-06, "loss": 0.7727, "step": 4758 }, { "epoch": 0.38, "grad_norm": 1.5500282452413885, "learning_rate": 7.090496009321781e-06, "loss": 0.7891, "step": 4759 }, { "epoch": 0.38, "grad_norm": 1.5577171672752854, "learning_rate": 7.089315695959762e-06, "loss": 0.886, "step": 4760 }, { "epoch": 0.38, "grad_norm": 1.5881840640509954, "learning_rate": 7.088135241522369e-06, "loss": 0.8257, "step": 4761 }, { "epoch": 0.38, "grad_norm": 1.5868128760290876, "learning_rate": 7.086954646089311e-06, "loss": 0.8228, "step": 4762 }, { "epoch": 0.38, "grad_norm": 0.8968101850521635, "learning_rate": 7.085773909740302e-06, "loss": 1.1076, "step": 4763 }, { "epoch": 0.38, "grad_norm": 1.4900700589273637, "learning_rate": 7.084593032555071e-06, "loss": 0.6934, "step": 4764 }, { "epoch": 0.38, "grad_norm": 1.4506193549529383, "learning_rate": 7.083412014613349e-06, "loss": 0.7996, "step": 4765 }, { "epoch": 0.38, "grad_norm": 1.3377442677604336, "learning_rate": 7.082230855994885e-06, "loss": 0.7271, "step": 4766 }, { "epoch": 0.38, "grad_norm": 1.5374098790903197, "learning_rate": 7.081049556779433e-06, "loss": 0.8265, "step": 4767 }, { "epoch": 0.38, "grad_norm": 1.474164789341235, "learning_rate": 7.079868117046755e-06, "loss": 0.7387, "step": 4768 }, { "epoch": 0.38, "grad_norm": 0.8279594969063385, "learning_rate": 7.078686536876627e-06, "loss": 1.0865, "step": 4769 }, { "epoch": 0.38, "grad_norm": 1.512197022132002, "learning_rate": 7.07750481634883e-06, "loss": 0.7947, "step": 4770 }, { "epoch": 0.38, "grad_norm": 1.3514133208397847, "learning_rate": 7.076322955543158e-06, "loss": 0.7407, "step": 4771 }, { "epoch": 0.38, "grad_norm": 0.8299497638856991, "learning_rate": 7.075140954539412e-06, "loss": 1.1444, "step": 4772 }, { "epoch": 0.38, "grad_norm": 1.6560337512722119, "learning_rate": 7.073958813417404e-06, "loss": 0.8352, "step": 4773 }, { "epoch": 0.38, "grad_norm": 1.5703790226769188, "learning_rate": 7.0727765322569545e-06, "loss": 0.8296, "step": 4774 }, { "epoch": 0.38, "grad_norm": 1.5669459823343077, "learning_rate": 7.071594111137892e-06, "loss": 0.8121, "step": 4775 }, { "epoch": 0.38, "grad_norm": 0.8153017556867063, "learning_rate": 7.07041155014006e-06, "loss": 1.0916, "step": 4776 }, { "epoch": 0.38, "grad_norm": 1.4907896201975301, "learning_rate": 7.069228849343306e-06, "loss": 0.8128, "step": 4777 }, { "epoch": 0.38, "grad_norm": 1.6162074965184496, "learning_rate": 7.0680460088274885e-06, "loss": 0.7233, "step": 4778 }, { "epoch": 0.38, "grad_norm": 1.4605572775121975, "learning_rate": 7.066863028672475e-06, "loss": 0.7819, "step": 4779 }, { "epoch": 0.38, "grad_norm": 0.7987014830552988, "learning_rate": 7.065679908958143e-06, "loss": 1.1055, "step": 4780 }, { "epoch": 0.38, "grad_norm": 1.5975690420650885, "learning_rate": 7.064496649764381e-06, "loss": 0.7966, "step": 4781 }, { "epoch": 0.38, "grad_norm": 1.3845048007180782, "learning_rate": 7.063313251171084e-06, "loss": 0.7921, "step": 4782 }, { "epoch": 0.38, "grad_norm": 0.9081352704701624, "learning_rate": 7.062129713258159e-06, "loss": 1.1293, "step": 4783 }, { "epoch": 0.38, "grad_norm": 1.5430158381729853, "learning_rate": 7.06094603610552e-06, "loss": 0.764, "step": 4784 }, { "epoch": 0.38, "grad_norm": 1.4742317028522816, "learning_rate": 7.059762219793091e-06, "loss": 0.8065, "step": 4785 }, { "epoch": 0.38, "grad_norm": 1.471919419726245, "learning_rate": 7.05857826440081e-06, "loss": 0.7715, "step": 4786 }, { "epoch": 0.38, "grad_norm": 1.449370585713447, "learning_rate": 7.057394170008614e-06, "loss": 0.8474, "step": 4787 }, { "epoch": 0.38, "grad_norm": 1.4181539216523533, "learning_rate": 7.056209936696461e-06, "loss": 0.8036, "step": 4788 }, { "epoch": 0.38, "grad_norm": 0.963546690739823, "learning_rate": 7.055025564544311e-06, "loss": 1.1111, "step": 4789 }, { "epoch": 0.38, "grad_norm": 1.454988992424918, "learning_rate": 7.053841053632135e-06, "loss": 0.8206, "step": 4790 }, { "epoch": 0.38, "grad_norm": 1.4717532136633846, "learning_rate": 7.052656404039915e-06, "loss": 0.7579, "step": 4791 }, { "epoch": 0.38, "grad_norm": 1.5259938487117404, "learning_rate": 7.0514716158476405e-06, "loss": 0.8525, "step": 4792 }, { "epoch": 0.38, "grad_norm": 1.4337201588967021, "learning_rate": 7.050286689135313e-06, "loss": 0.8067, "step": 4793 }, { "epoch": 0.38, "grad_norm": 1.769858358906952, "learning_rate": 7.049101623982938e-06, "loss": 0.7919, "step": 4794 }, { "epoch": 0.38, "grad_norm": 1.8595700059558524, "learning_rate": 7.0479164204705376e-06, "loss": 0.7871, "step": 4795 }, { "epoch": 0.38, "grad_norm": 1.5925553079726997, "learning_rate": 7.046731078678137e-06, "loss": 0.8546, "step": 4796 }, { "epoch": 0.38, "grad_norm": 1.4912729092977195, "learning_rate": 7.0455455986857724e-06, "loss": 0.7855, "step": 4797 }, { "epoch": 0.38, "grad_norm": 1.478574203483191, "learning_rate": 7.044359980573494e-06, "loss": 0.739, "step": 4798 }, { "epoch": 0.39, "grad_norm": 1.5488038520271574, "learning_rate": 7.043174224421353e-06, "loss": 0.8579, "step": 4799 }, { "epoch": 0.39, "grad_norm": 1.4901876969334749, "learning_rate": 7.041988330309417e-06, "loss": 0.7675, "step": 4800 }, { "epoch": 0.39, "grad_norm": 1.3958974175486691, "learning_rate": 7.040802298317762e-06, "loss": 0.8259, "step": 4801 }, { "epoch": 0.39, "grad_norm": 1.5901434476883838, "learning_rate": 7.039616128526465e-06, "loss": 0.744, "step": 4802 }, { "epoch": 0.39, "grad_norm": 1.4864627747289088, "learning_rate": 7.038429821015627e-06, "loss": 0.7929, "step": 4803 }, { "epoch": 0.39, "grad_norm": 1.4634583622816204, "learning_rate": 7.037243375865344e-06, "loss": 0.7793, "step": 4804 }, { "epoch": 0.39, "grad_norm": 1.498476364278755, "learning_rate": 7.03605679315573e-06, "loss": 0.8505, "step": 4805 }, { "epoch": 0.39, "grad_norm": 1.5023445431357645, "learning_rate": 7.034870072966906e-06, "loss": 0.7958, "step": 4806 }, { "epoch": 0.39, "grad_norm": 1.5428843966554227, "learning_rate": 7.033683215379002e-06, "loss": 0.7326, "step": 4807 }, { "epoch": 0.39, "grad_norm": 0.846428854483477, "learning_rate": 7.032496220472157e-06, "loss": 1.0755, "step": 4808 }, { "epoch": 0.39, "grad_norm": 1.5647307369161931, "learning_rate": 7.031309088326519e-06, "loss": 0.8288, "step": 4809 }, { "epoch": 0.39, "grad_norm": 1.5753474452757965, "learning_rate": 7.030121819022247e-06, "loss": 0.8789, "step": 4810 }, { "epoch": 0.39, "grad_norm": 1.4888670742947328, "learning_rate": 7.028934412639508e-06, "loss": 0.7963, "step": 4811 }, { "epoch": 0.39, "grad_norm": 1.5211996431023533, "learning_rate": 7.027746869258477e-06, "loss": 0.8492, "step": 4812 }, { "epoch": 0.39, "grad_norm": 1.513409449787587, "learning_rate": 7.026559188959341e-06, "loss": 0.8311, "step": 4813 }, { "epoch": 0.39, "grad_norm": 1.9484344368646307, "learning_rate": 7.025371371822294e-06, "loss": 0.8402, "step": 4814 }, { "epoch": 0.39, "grad_norm": 1.463577453332081, "learning_rate": 7.024183417927542e-06, "loss": 0.7955, "step": 4815 }, { "epoch": 0.39, "grad_norm": 1.5854670313746038, "learning_rate": 7.022995327355296e-06, "loss": 0.7536, "step": 4816 }, { "epoch": 0.39, "grad_norm": 1.3542936232000855, "learning_rate": 7.02180710018578e-06, "loss": 0.7971, "step": 4817 }, { "epoch": 0.39, "grad_norm": 1.6415891605027393, "learning_rate": 7.0206187364992255e-06, "loss": 0.7249, "step": 4818 }, { "epoch": 0.39, "grad_norm": 1.494184725537653, "learning_rate": 7.0194302363758735e-06, "loss": 0.7803, "step": 4819 }, { "epoch": 0.39, "grad_norm": 0.8446516574572622, "learning_rate": 7.018241599895974e-06, "loss": 1.1093, "step": 4820 }, { "epoch": 0.39, "grad_norm": 1.5311126234558692, "learning_rate": 7.017052827139786e-06, "loss": 0.6977, "step": 4821 }, { "epoch": 0.39, "grad_norm": 1.5457996914169605, "learning_rate": 7.0158639181875795e-06, "loss": 0.7429, "step": 4822 }, { "epoch": 0.39, "grad_norm": 0.8032721028539408, "learning_rate": 7.014674873119634e-06, "loss": 1.1343, "step": 4823 }, { "epoch": 0.39, "grad_norm": 1.5619961832735656, "learning_rate": 7.013485692016232e-06, "loss": 0.8616, "step": 4824 }, { "epoch": 0.39, "grad_norm": 1.5480958162366456, "learning_rate": 7.012296374957671e-06, "loss": 0.7769, "step": 4825 }, { "epoch": 0.39, "grad_norm": 1.4724327746348307, "learning_rate": 7.011106922024258e-06, "loss": 0.6519, "step": 4826 }, { "epoch": 0.39, "grad_norm": 0.8745714462995694, "learning_rate": 7.009917333296308e-06, "loss": 1.132, "step": 4827 }, { "epoch": 0.39, "grad_norm": 1.4435004302095549, "learning_rate": 7.0087276088541435e-06, "loss": 0.8217, "step": 4828 }, { "epoch": 0.39, "grad_norm": 1.462227384167135, "learning_rate": 7.007537748778097e-06, "loss": 0.7958, "step": 4829 }, { "epoch": 0.39, "grad_norm": 1.4893170218551093, "learning_rate": 7.006347753148511e-06, "loss": 0.7683, "step": 4830 }, { "epoch": 0.39, "grad_norm": 2.1703738121094354, "learning_rate": 7.005157622045737e-06, "loss": 0.7549, "step": 4831 }, { "epoch": 0.39, "grad_norm": 1.6987051860533304, "learning_rate": 7.0039673555501365e-06, "loss": 0.6992, "step": 4832 }, { "epoch": 0.39, "grad_norm": 1.4696003345374853, "learning_rate": 7.002776953742078e-06, "loss": 0.7719, "step": 4833 }, { "epoch": 0.39, "grad_norm": 1.4905628923933092, "learning_rate": 7.001586416701939e-06, "loss": 0.7616, "step": 4834 }, { "epoch": 0.39, "grad_norm": 1.6590880696017936, "learning_rate": 7.000395744510107e-06, "loss": 0.718, "step": 4835 }, { "epoch": 0.39, "grad_norm": 1.5732705361025423, "learning_rate": 6.9992049372469815e-06, "loss": 0.8023, "step": 4836 }, { "epoch": 0.39, "grad_norm": 1.455824718157807, "learning_rate": 6.998013994992967e-06, "loss": 0.7086, "step": 4837 }, { "epoch": 0.39, "grad_norm": 0.8571724559128869, "learning_rate": 6.9968229178284775e-06, "loss": 1.1274, "step": 4838 }, { "epoch": 0.39, "grad_norm": 0.8136378075900033, "learning_rate": 6.995631705833942e-06, "loss": 1.113, "step": 4839 }, { "epoch": 0.39, "grad_norm": 1.5825325181630443, "learning_rate": 6.994440359089787e-06, "loss": 0.8577, "step": 4840 }, { "epoch": 0.39, "grad_norm": 1.8041988510822367, "learning_rate": 6.99324887767646e-06, "loss": 0.7573, "step": 4841 }, { "epoch": 0.39, "grad_norm": 0.7881952416451836, "learning_rate": 6.9920572616744096e-06, "loss": 1.0923, "step": 4842 }, { "epoch": 0.39, "grad_norm": 0.7929672627501072, "learning_rate": 6.9908655111640984e-06, "loss": 1.095, "step": 4843 }, { "epoch": 0.39, "grad_norm": 1.4642194256795575, "learning_rate": 6.989673626225997e-06, "loss": 0.8323, "step": 4844 }, { "epoch": 0.39, "grad_norm": 1.5139854054672122, "learning_rate": 6.988481606940582e-06, "loss": 0.7803, "step": 4845 }, { "epoch": 0.39, "grad_norm": 1.5765629689516845, "learning_rate": 6.9872894533883415e-06, "loss": 0.8097, "step": 4846 }, { "epoch": 0.39, "grad_norm": 1.5488066037830295, "learning_rate": 6.986097165649772e-06, "loss": 0.8177, "step": 4847 }, { "epoch": 0.39, "grad_norm": 1.4722389476083872, "learning_rate": 6.984904743805383e-06, "loss": 0.7389, "step": 4848 }, { "epoch": 0.39, "grad_norm": 1.4324636595180675, "learning_rate": 6.983712187935684e-06, "loss": 0.7645, "step": 4849 }, { "epoch": 0.39, "grad_norm": 1.478334349905141, "learning_rate": 6.982519498121204e-06, "loss": 0.7917, "step": 4850 }, { "epoch": 0.39, "grad_norm": 1.5258947025611473, "learning_rate": 6.981326674442474e-06, "loss": 0.8413, "step": 4851 }, { "epoch": 0.39, "grad_norm": 1.631648229609166, "learning_rate": 6.980133716980035e-06, "loss": 0.7732, "step": 4852 }, { "epoch": 0.39, "grad_norm": 1.5769998757385868, "learning_rate": 6.978940625814441e-06, "loss": 0.6912, "step": 4853 }, { "epoch": 0.39, "grad_norm": 1.5251288646524548, "learning_rate": 6.977747401026249e-06, "loss": 0.8208, "step": 4854 }, { "epoch": 0.39, "grad_norm": 1.5092084157227337, "learning_rate": 6.9765540426960334e-06, "loss": 0.8071, "step": 4855 }, { "epoch": 0.39, "grad_norm": 1.4912611488464829, "learning_rate": 6.9753605509043665e-06, "loss": 0.7391, "step": 4856 }, { "epoch": 0.39, "grad_norm": 1.6416057697280744, "learning_rate": 6.974166925731839e-06, "loss": 0.8117, "step": 4857 }, { "epoch": 0.39, "grad_norm": 1.5284424000546486, "learning_rate": 6.972973167259046e-06, "loss": 0.7248, "step": 4858 }, { "epoch": 0.39, "grad_norm": 1.4648419973114, "learning_rate": 6.971779275566593e-06, "loss": 0.7482, "step": 4859 }, { "epoch": 0.39, "grad_norm": 1.4496584683804032, "learning_rate": 6.9705852507350945e-06, "loss": 0.8385, "step": 4860 }, { "epoch": 0.39, "grad_norm": 1.5411292158803716, "learning_rate": 6.969391092845177e-06, "loss": 0.8319, "step": 4861 }, { "epoch": 0.39, "grad_norm": 0.9505968468371127, "learning_rate": 6.968196801977466e-06, "loss": 1.0884, "step": 4862 }, { "epoch": 0.39, "grad_norm": 1.6866597226454823, "learning_rate": 6.967002378212608e-06, "loss": 0.7212, "step": 4863 }, { "epoch": 0.39, "grad_norm": 1.432594992913711, "learning_rate": 6.965807821631251e-06, "loss": 0.7808, "step": 4864 }, { "epoch": 0.39, "grad_norm": 1.4827826555361951, "learning_rate": 6.964613132314055e-06, "loss": 0.774, "step": 4865 }, { "epoch": 0.39, "grad_norm": 1.532949788111831, "learning_rate": 6.963418310341688e-06, "loss": 0.837, "step": 4866 }, { "epoch": 0.39, "grad_norm": 1.4303334914568375, "learning_rate": 6.962223355794827e-06, "loss": 0.7445, "step": 4867 }, { "epoch": 0.39, "grad_norm": 1.6820797250385324, "learning_rate": 6.961028268754159e-06, "loss": 0.7739, "step": 4868 }, { "epoch": 0.39, "grad_norm": 0.8017476268988318, "learning_rate": 6.959833049300376e-06, "loss": 1.1255, "step": 4869 }, { "epoch": 0.39, "grad_norm": 1.420187307305575, "learning_rate": 6.958637697514186e-06, "loss": 0.7809, "step": 4870 }, { "epoch": 0.39, "grad_norm": 1.522283390003779, "learning_rate": 6.957442213476299e-06, "loss": 0.7384, "step": 4871 }, { "epoch": 0.39, "grad_norm": 1.5408893494416438, "learning_rate": 6.956246597267438e-06, "loss": 0.7645, "step": 4872 }, { "epoch": 0.39, "grad_norm": 0.7948083027528613, "learning_rate": 6.955050848968334e-06, "loss": 1.126, "step": 4873 }, { "epoch": 0.39, "grad_norm": 1.4391041997251297, "learning_rate": 6.953854968659726e-06, "loss": 0.7539, "step": 4874 }, { "epoch": 0.39, "grad_norm": 1.5595289857553398, "learning_rate": 6.952658956422362e-06, "loss": 0.7451, "step": 4875 }, { "epoch": 0.39, "grad_norm": 1.5370544228553753, "learning_rate": 6.951462812337e-06, "loss": 0.8079, "step": 4876 }, { "epoch": 0.39, "grad_norm": 1.5324439334635023, "learning_rate": 6.950266536484408e-06, "loss": 0.8161, "step": 4877 }, { "epoch": 0.39, "grad_norm": 0.8177139320477639, "learning_rate": 6.94907012894536e-06, "loss": 1.1288, "step": 4878 }, { "epoch": 0.39, "grad_norm": 1.5959775910861793, "learning_rate": 6.947873589800637e-06, "loss": 0.6745, "step": 4879 }, { "epoch": 0.39, "grad_norm": 1.6021370765210248, "learning_rate": 6.946676919131039e-06, "loss": 0.8935, "step": 4880 }, { "epoch": 0.39, "grad_norm": 1.4972463458560592, "learning_rate": 6.945480117017362e-06, "loss": 0.8267, "step": 4881 }, { "epoch": 0.39, "grad_norm": 0.7789943336075468, "learning_rate": 6.94428318354042e-06, "loss": 1.0786, "step": 4882 }, { "epoch": 0.39, "grad_norm": 1.4531248816760898, "learning_rate": 6.943086118781032e-06, "loss": 0.7368, "step": 4883 }, { "epoch": 0.39, "grad_norm": 0.7913345565827008, "learning_rate": 6.941888922820023e-06, "loss": 1.0832, "step": 4884 }, { "epoch": 0.39, "grad_norm": 1.5252456853733112, "learning_rate": 6.940691595738237e-06, "loss": 0.7606, "step": 4885 }, { "epoch": 0.39, "grad_norm": 1.414901232185457, "learning_rate": 6.939494137616515e-06, "loss": 0.8687, "step": 4886 }, { "epoch": 0.39, "grad_norm": 1.4060458739046064, "learning_rate": 6.938296548535714e-06, "loss": 0.7735, "step": 4887 }, { "epoch": 0.39, "grad_norm": 1.4155585347914468, "learning_rate": 6.937098828576699e-06, "loss": 0.7896, "step": 4888 }, { "epoch": 0.39, "grad_norm": 1.557774313037611, "learning_rate": 6.935900977820341e-06, "loss": 0.7633, "step": 4889 }, { "epoch": 0.39, "grad_norm": 1.3852216430455788, "learning_rate": 6.934702996347522e-06, "loss": 0.7178, "step": 4890 }, { "epoch": 0.39, "grad_norm": 0.8327437670664632, "learning_rate": 6.933504884239133e-06, "loss": 1.0711, "step": 4891 }, { "epoch": 0.39, "grad_norm": 1.4490893978345991, "learning_rate": 6.932306641576073e-06, "loss": 0.7439, "step": 4892 }, { "epoch": 0.39, "grad_norm": 0.8018367696285715, "learning_rate": 6.93110826843925e-06, "loss": 1.0943, "step": 4893 }, { "epoch": 0.39, "grad_norm": 0.7591132884052368, "learning_rate": 6.929909764909582e-06, "loss": 1.076, "step": 4894 }, { "epoch": 0.39, "grad_norm": 1.524573127851954, "learning_rate": 6.928711131067992e-06, "loss": 0.7505, "step": 4895 }, { "epoch": 0.39, "grad_norm": 0.7619258065974707, "learning_rate": 6.927512366995416e-06, "loss": 1.1199, "step": 4896 }, { "epoch": 0.39, "grad_norm": 1.5051605691761225, "learning_rate": 6.926313472772799e-06, "loss": 0.8038, "step": 4897 }, { "epoch": 0.39, "grad_norm": 1.6118386413258803, "learning_rate": 6.925114448481089e-06, "loss": 0.8371, "step": 4898 }, { "epoch": 0.39, "grad_norm": 1.5718790716829274, "learning_rate": 6.923915294201252e-06, "loss": 0.8059, "step": 4899 }, { "epoch": 0.39, "grad_norm": 1.5676217912142556, "learning_rate": 6.922716010014256e-06, "loss": 0.7753, "step": 4900 }, { "epoch": 0.39, "grad_norm": 1.5272250925703863, "learning_rate": 6.921516596001075e-06, "loss": 0.7547, "step": 4901 }, { "epoch": 0.39, "grad_norm": 1.7900271050921532, "learning_rate": 6.920317052242702e-06, "loss": 0.8723, "step": 4902 }, { "epoch": 0.39, "grad_norm": 1.5166296826673427, "learning_rate": 6.919117378820129e-06, "loss": 0.6612, "step": 4903 }, { "epoch": 0.39, "grad_norm": 1.5228404032072853, "learning_rate": 6.917917575814364e-06, "loss": 0.8532, "step": 4904 }, { "epoch": 0.39, "grad_norm": 1.553375936272835, "learning_rate": 6.9167176433064175e-06, "loss": 0.8056, "step": 4905 }, { "epoch": 0.39, "grad_norm": 1.4465161832955624, "learning_rate": 6.915517581377314e-06, "loss": 0.8077, "step": 4906 }, { "epoch": 0.39, "grad_norm": 1.6624152013665454, "learning_rate": 6.914317390108082e-06, "loss": 0.7941, "step": 4907 }, { "epoch": 0.39, "grad_norm": 1.477955459907076, "learning_rate": 6.913117069579763e-06, "loss": 0.8156, "step": 4908 }, { "epoch": 0.39, "grad_norm": 1.4649758545419018, "learning_rate": 6.9119166198734046e-06, "loss": 0.7713, "step": 4909 }, { "epoch": 0.39, "grad_norm": 1.5019665174134, "learning_rate": 6.910716041070064e-06, "loss": 0.6512, "step": 4910 }, { "epoch": 0.39, "grad_norm": 1.5352783441953315, "learning_rate": 6.909515333250809e-06, "loss": 0.8194, "step": 4911 }, { "epoch": 0.39, "grad_norm": 1.6799420995401015, "learning_rate": 6.90831449649671e-06, "loss": 0.7191, "step": 4912 }, { "epoch": 0.39, "grad_norm": 1.5334128424109752, "learning_rate": 6.907113530888853e-06, "loss": 0.7335, "step": 4913 }, { "epoch": 0.39, "grad_norm": 1.507613981875418, "learning_rate": 6.905912436508331e-06, "loss": 0.8172, "step": 4914 }, { "epoch": 0.39, "grad_norm": 0.9586626567819123, "learning_rate": 6.904711213436241e-06, "loss": 1.1071, "step": 4915 }, { "epoch": 0.39, "grad_norm": 1.532772455292424, "learning_rate": 6.903509861753695e-06, "loss": 0.8972, "step": 4916 }, { "epoch": 0.39, "grad_norm": 1.4785724245233158, "learning_rate": 6.902308381541812e-06, "loss": 0.7494, "step": 4917 }, { "epoch": 0.39, "grad_norm": 1.5755638357587924, "learning_rate": 6.901106772881716e-06, "loss": 0.9011, "step": 4918 }, { "epoch": 0.39, "grad_norm": 1.5502723445917514, "learning_rate": 6.899905035854544e-06, "loss": 0.8711, "step": 4919 }, { "epoch": 0.39, "grad_norm": 1.5695534979964207, "learning_rate": 6.898703170541439e-06, "loss": 0.8299, "step": 4920 }, { "epoch": 0.39, "grad_norm": 1.5049259045245127, "learning_rate": 6.897501177023556e-06, "loss": 0.6952, "step": 4921 }, { "epoch": 0.39, "grad_norm": 1.4392622483846549, "learning_rate": 6.896299055382053e-06, "loss": 0.7493, "step": 4922 }, { "epoch": 0.39, "grad_norm": 1.6892054460585515, "learning_rate": 6.895096805698103e-06, "loss": 0.7811, "step": 4923 }, { "epoch": 0.4, "grad_norm": 1.6538163559410586, "learning_rate": 6.893894428052881e-06, "loss": 0.7638, "step": 4924 }, { "epoch": 0.4, "grad_norm": 0.868621555216517, "learning_rate": 6.892691922527576e-06, "loss": 1.1282, "step": 4925 }, { "epoch": 0.4, "grad_norm": 1.460657709351587, "learning_rate": 6.891489289203388e-06, "loss": 0.7836, "step": 4926 }, { "epoch": 0.4, "grad_norm": 1.5780388159636856, "learning_rate": 6.890286528161516e-06, "loss": 0.7835, "step": 4927 }, { "epoch": 0.4, "grad_norm": 1.404558738521302, "learning_rate": 6.889083639483176e-06, "loss": 0.8381, "step": 4928 }, { "epoch": 0.4, "grad_norm": 0.8160851761888349, "learning_rate": 6.887880623249589e-06, "loss": 1.0954, "step": 4929 }, { "epoch": 0.4, "grad_norm": 1.5647038494330388, "learning_rate": 6.886677479541984e-06, "loss": 0.7225, "step": 4930 }, { "epoch": 0.4, "grad_norm": 0.7952371249376197, "learning_rate": 6.885474208441602e-06, "loss": 1.1073, "step": 4931 }, { "epoch": 0.4, "grad_norm": 0.7840895137168205, "learning_rate": 6.88427081002969e-06, "loss": 1.1003, "step": 4932 }, { "epoch": 0.4, "grad_norm": 1.4466921777254216, "learning_rate": 6.883067284387505e-06, "loss": 0.6985, "step": 4933 }, { "epoch": 0.4, "grad_norm": 0.7905802827730208, "learning_rate": 6.881863631596313e-06, "loss": 1.0844, "step": 4934 }, { "epoch": 0.4, "grad_norm": 1.6754842274060233, "learning_rate": 6.880659851737384e-06, "loss": 0.8007, "step": 4935 }, { "epoch": 0.4, "grad_norm": 1.5027688591855028, "learning_rate": 6.879455944892e-06, "loss": 0.7553, "step": 4936 }, { "epoch": 0.4, "grad_norm": 1.5257269544408887, "learning_rate": 6.8782519111414515e-06, "loss": 0.7812, "step": 4937 }, { "epoch": 0.4, "grad_norm": 1.512371522133477, "learning_rate": 6.877047750567042e-06, "loss": 0.8606, "step": 4938 }, { "epoch": 0.4, "grad_norm": 1.4979082648413515, "learning_rate": 6.8758434632500756e-06, "loss": 0.8904, "step": 4939 }, { "epoch": 0.4, "grad_norm": 1.4704267951599195, "learning_rate": 6.8746390492718695e-06, "loss": 0.689, "step": 4940 }, { "epoch": 0.4, "grad_norm": 1.4972714208887041, "learning_rate": 6.873434508713748e-06, "loss": 0.7085, "step": 4941 }, { "epoch": 0.4, "grad_norm": 0.8640609018953209, "learning_rate": 6.872229841657043e-06, "loss": 1.1167, "step": 4942 }, { "epoch": 0.4, "grad_norm": 1.4223380690795144, "learning_rate": 6.8710250481831e-06, "loss": 0.7326, "step": 4943 }, { "epoch": 0.4, "grad_norm": 1.4298978336561585, "learning_rate": 6.869820128373267e-06, "loss": 0.7181, "step": 4944 }, { "epoch": 0.4, "grad_norm": 1.4308609741654195, "learning_rate": 6.868615082308904e-06, "loss": 0.7658, "step": 4945 }, { "epoch": 0.4, "grad_norm": 1.4487943382723723, "learning_rate": 6.867409910071376e-06, "loss": 0.7761, "step": 4946 }, { "epoch": 0.4, "grad_norm": 1.4537968564964712, "learning_rate": 6.866204611742062e-06, "loss": 0.7442, "step": 4947 }, { "epoch": 0.4, "grad_norm": 1.4652358873007205, "learning_rate": 6.864999187402343e-06, "loss": 0.7698, "step": 4948 }, { "epoch": 0.4, "grad_norm": 1.5910199613327107, "learning_rate": 6.863793637133618e-06, "loss": 0.7808, "step": 4949 }, { "epoch": 0.4, "grad_norm": 1.4286914552163246, "learning_rate": 6.862587961017283e-06, "loss": 0.6706, "step": 4950 }, { "epoch": 0.4, "grad_norm": 1.569757434781543, "learning_rate": 6.86138215913475e-06, "loss": 0.7655, "step": 4951 }, { "epoch": 0.4, "grad_norm": 1.506536875878353, "learning_rate": 6.860176231567437e-06, "loss": 0.7916, "step": 4952 }, { "epoch": 0.4, "grad_norm": 1.5274987368730564, "learning_rate": 6.858970178396771e-06, "loss": 0.8464, "step": 4953 }, { "epoch": 0.4, "grad_norm": 1.5170822114946045, "learning_rate": 6.857763999704188e-06, "loss": 0.7187, "step": 4954 }, { "epoch": 0.4, "grad_norm": 1.4906290747063256, "learning_rate": 6.856557695571131e-06, "loss": 0.7635, "step": 4955 }, { "epoch": 0.4, "grad_norm": 1.4693273909686388, "learning_rate": 6.855351266079056e-06, "loss": 0.8361, "step": 4956 }, { "epoch": 0.4, "grad_norm": 1.4352476252446822, "learning_rate": 6.854144711309418e-06, "loss": 0.823, "step": 4957 }, { "epoch": 0.4, "grad_norm": 1.4138985772503154, "learning_rate": 6.85293803134369e-06, "loss": 0.7002, "step": 4958 }, { "epoch": 0.4, "grad_norm": 1.6037479745766512, "learning_rate": 6.851731226263348e-06, "loss": 0.8248, "step": 4959 }, { "epoch": 0.4, "grad_norm": 1.4569830268142927, "learning_rate": 6.8505242961498816e-06, "loss": 0.7894, "step": 4960 }, { "epoch": 0.4, "grad_norm": 1.5489351476247422, "learning_rate": 6.849317241084783e-06, "loss": 0.8321, "step": 4961 }, { "epoch": 0.4, "grad_norm": 1.5029234600317602, "learning_rate": 6.848110061149555e-06, "loss": 0.8559, "step": 4962 }, { "epoch": 0.4, "grad_norm": 1.3631777728324188, "learning_rate": 6.846902756425709e-06, "loss": 0.728, "step": 4963 }, { "epoch": 0.4, "grad_norm": 1.5879121380838839, "learning_rate": 6.845695326994768e-06, "loss": 0.8068, "step": 4964 }, { "epoch": 0.4, "grad_norm": 0.8552289082094887, "learning_rate": 6.844487772938255e-06, "loss": 1.0985, "step": 4965 }, { "epoch": 0.4, "grad_norm": 0.8034991595012907, "learning_rate": 6.843280094337712e-06, "loss": 1.1268, "step": 4966 }, { "epoch": 0.4, "grad_norm": 1.5755477126193738, "learning_rate": 6.842072291274681e-06, "loss": 0.8266, "step": 4967 }, { "epoch": 0.4, "grad_norm": 1.4959580774978334, "learning_rate": 6.840864363830718e-06, "loss": 0.8185, "step": 4968 }, { "epoch": 0.4, "grad_norm": 0.8336096238418472, "learning_rate": 6.839656312087384e-06, "loss": 1.0631, "step": 4969 }, { "epoch": 0.4, "grad_norm": 1.4493704299463315, "learning_rate": 6.838448136126247e-06, "loss": 0.8427, "step": 4970 }, { "epoch": 0.4, "grad_norm": 1.5561195223171072, "learning_rate": 6.837239836028889e-06, "loss": 0.7969, "step": 4971 }, { "epoch": 0.4, "grad_norm": 1.4970766903639408, "learning_rate": 6.836031411876898e-06, "loss": 0.7874, "step": 4972 }, { "epoch": 0.4, "grad_norm": 1.5607115528740678, "learning_rate": 6.834822863751864e-06, "loss": 0.8213, "step": 4973 }, { "epoch": 0.4, "grad_norm": 0.8745393421365627, "learning_rate": 6.833614191735398e-06, "loss": 1.0678, "step": 4974 }, { "epoch": 0.4, "grad_norm": 1.968332827726, "learning_rate": 6.832405395909107e-06, "loss": 0.8423, "step": 4975 }, { "epoch": 0.4, "grad_norm": 1.4197418592926685, "learning_rate": 6.831196476354615e-06, "loss": 0.7379, "step": 4976 }, { "epoch": 0.4, "grad_norm": 1.5020517325316227, "learning_rate": 6.829987433153549e-06, "loss": 0.728, "step": 4977 }, { "epoch": 0.4, "grad_norm": 1.466772375003132, "learning_rate": 6.828778266387547e-06, "loss": 0.7998, "step": 4978 }, { "epoch": 0.4, "grad_norm": 1.4056696695988935, "learning_rate": 6.827568976138255e-06, "loss": 0.7544, "step": 4979 }, { "epoch": 0.4, "grad_norm": 1.5391505366589904, "learning_rate": 6.826359562487326e-06, "loss": 0.7788, "step": 4980 }, { "epoch": 0.4, "grad_norm": 1.5405905775449409, "learning_rate": 6.825150025516423e-06, "loss": 0.8438, "step": 4981 }, { "epoch": 0.4, "grad_norm": 1.454214656151131, "learning_rate": 6.823940365307217e-06, "loss": 0.823, "step": 4982 }, { "epoch": 0.4, "grad_norm": 1.5229918925797552, "learning_rate": 6.822730581941388e-06, "loss": 0.8193, "step": 4983 }, { "epoch": 0.4, "grad_norm": 1.4828640557193797, "learning_rate": 6.8215206755006214e-06, "loss": 0.7979, "step": 4984 }, { "epoch": 0.4, "grad_norm": 1.006148628177163, "learning_rate": 6.820310646066613e-06, "loss": 1.0742, "step": 4985 }, { "epoch": 0.4, "grad_norm": 1.476194304258027, "learning_rate": 6.819100493721068e-06, "loss": 0.7482, "step": 4986 }, { "epoch": 0.4, "grad_norm": 1.554574162394729, "learning_rate": 6.817890218545697e-06, "loss": 0.8601, "step": 4987 }, { "epoch": 0.4, "grad_norm": 1.4343588507991312, "learning_rate": 6.816679820622223e-06, "loss": 0.7558, "step": 4988 }, { "epoch": 0.4, "grad_norm": 1.5545987578806304, "learning_rate": 6.815469300032374e-06, "loss": 0.7503, "step": 4989 }, { "epoch": 0.4, "grad_norm": 1.3877945325380683, "learning_rate": 6.814258656857885e-06, "loss": 0.8107, "step": 4990 }, { "epoch": 0.4, "grad_norm": 1.3741605303519229, "learning_rate": 6.8130478911805044e-06, "loss": 0.7256, "step": 4991 }, { "epoch": 0.4, "grad_norm": 0.8447756237278693, "learning_rate": 6.811837003081983e-06, "loss": 1.0847, "step": 4992 }, { "epoch": 0.4, "grad_norm": 0.8507802574544433, "learning_rate": 6.810625992644085e-06, "loss": 1.1196, "step": 4993 }, { "epoch": 0.4, "grad_norm": 1.3929417299987816, "learning_rate": 6.809414859948579e-06, "loss": 0.7948, "step": 4994 }, { "epoch": 0.4, "grad_norm": 1.5415217409864312, "learning_rate": 6.808203605077244e-06, "loss": 0.8411, "step": 4995 }, { "epoch": 0.4, "grad_norm": 0.8037984952101191, "learning_rate": 6.806992228111868e-06, "loss": 1.088, "step": 4996 }, { "epoch": 0.4, "grad_norm": 1.5826994220395743, "learning_rate": 6.805780729134244e-06, "loss": 0.7394, "step": 4997 }, { "epoch": 0.4, "grad_norm": 1.495436723031971, "learning_rate": 6.804569108226176e-06, "loss": 0.7921, "step": 4998 }, { "epoch": 0.4, "grad_norm": 1.4014224918813896, "learning_rate": 6.803357365469475e-06, "loss": 0.7566, "step": 4999 }, { "epoch": 0.4, "grad_norm": 1.491679971724398, "learning_rate": 6.802145500945962e-06, "loss": 0.7684, "step": 5000 }, { "epoch": 0.4, "grad_norm": 1.4480147662112097, "learning_rate": 6.800933514737465e-06, "loss": 0.7929, "step": 5001 }, { "epoch": 0.4, "grad_norm": 1.4591330477025146, "learning_rate": 6.7997214069258166e-06, "loss": 0.724, "step": 5002 }, { "epoch": 0.4, "grad_norm": 1.4047769858337935, "learning_rate": 6.7985091775928646e-06, "loss": 0.752, "step": 5003 }, { "epoch": 0.4, "grad_norm": 0.9396517858083783, "learning_rate": 6.79729682682046e-06, "loss": 1.11, "step": 5004 }, { "epoch": 0.4, "grad_norm": 1.5423142191357428, "learning_rate": 6.796084354690465e-06, "loss": 0.7244, "step": 5005 }, { "epoch": 0.4, "grad_norm": 1.5397871889947181, "learning_rate": 6.794871761284747e-06, "loss": 0.8116, "step": 5006 }, { "epoch": 0.4, "grad_norm": 1.6258564458620668, "learning_rate": 6.793659046685182e-06, "loss": 0.7877, "step": 5007 }, { "epoch": 0.4, "grad_norm": 1.5813999579260771, "learning_rate": 6.792446210973658e-06, "loss": 0.7602, "step": 5008 }, { "epoch": 0.4, "grad_norm": 1.4805218712136188, "learning_rate": 6.791233254232066e-06, "loss": 0.7801, "step": 5009 }, { "epoch": 0.4, "grad_norm": 1.4773935576623607, "learning_rate": 6.79002017654231e-06, "loss": 0.7622, "step": 5010 }, { "epoch": 0.4, "grad_norm": 1.7277716737894064, "learning_rate": 6.7888069779863e-06, "loss": 0.8064, "step": 5011 }, { "epoch": 0.4, "grad_norm": 1.4792453256601847, "learning_rate": 6.787593658645949e-06, "loss": 0.7778, "step": 5012 }, { "epoch": 0.4, "grad_norm": 1.5144915658159812, "learning_rate": 6.786380218603189e-06, "loss": 0.7453, "step": 5013 }, { "epoch": 0.4, "grad_norm": 0.8926871605833002, "learning_rate": 6.78516665793995e-06, "loss": 1.0838, "step": 5014 }, { "epoch": 0.4, "grad_norm": 1.407776356341224, "learning_rate": 6.7839529767381785e-06, "loss": 0.8013, "step": 5015 }, { "epoch": 0.4, "grad_norm": 1.4287730840964246, "learning_rate": 6.7827391750798225e-06, "loss": 0.8443, "step": 5016 }, { "epoch": 0.4, "grad_norm": 1.5261468235859288, "learning_rate": 6.781525253046839e-06, "loss": 0.8063, "step": 5017 }, { "epoch": 0.4, "grad_norm": 1.7038505933217547, "learning_rate": 6.780311210721198e-06, "loss": 0.8744, "step": 5018 }, { "epoch": 0.4, "grad_norm": 1.3902740918030598, "learning_rate": 6.779097048184873e-06, "loss": 0.7427, "step": 5019 }, { "epoch": 0.4, "grad_norm": 1.5540586308532773, "learning_rate": 6.777882765519846e-06, "loss": 0.7996, "step": 5020 }, { "epoch": 0.4, "grad_norm": 0.8541748684459625, "learning_rate": 6.776668362808111e-06, "loss": 1.1154, "step": 5021 }, { "epoch": 0.4, "grad_norm": 1.5063528573963116, "learning_rate": 6.775453840131666e-06, "loss": 0.7627, "step": 5022 }, { "epoch": 0.4, "grad_norm": 1.5020249872902989, "learning_rate": 6.774239197572516e-06, "loss": 0.7419, "step": 5023 }, { "epoch": 0.4, "grad_norm": 1.4502692488082431, "learning_rate": 6.773024435212678e-06, "loss": 0.6662, "step": 5024 }, { "epoch": 0.4, "grad_norm": 1.396223679869627, "learning_rate": 6.771809553134178e-06, "loss": 0.7312, "step": 5025 }, { "epoch": 0.4, "grad_norm": 1.4738823954155023, "learning_rate": 6.770594551419044e-06, "loss": 0.7917, "step": 5026 }, { "epoch": 0.4, "grad_norm": 1.5978441721322767, "learning_rate": 6.769379430149318e-06, "loss": 0.7563, "step": 5027 }, { "epoch": 0.4, "grad_norm": 1.4705464045679253, "learning_rate": 6.768164189407047e-06, "loss": 0.8256, "step": 5028 }, { "epoch": 0.4, "grad_norm": 1.6296459695063903, "learning_rate": 6.766948829274286e-06, "loss": 0.8454, "step": 5029 }, { "epoch": 0.4, "grad_norm": 1.5154801134495768, "learning_rate": 6.7657333498331e-06, "loss": 0.7218, "step": 5030 }, { "epoch": 0.4, "grad_norm": 1.4101027917915399, "learning_rate": 6.76451775116556e-06, "loss": 0.8047, "step": 5031 }, { "epoch": 0.4, "grad_norm": 1.5404248899563413, "learning_rate": 6.763302033353748e-06, "loss": 0.7869, "step": 5032 }, { "epoch": 0.4, "grad_norm": 1.5326165653874584, "learning_rate": 6.7620861964797505e-06, "loss": 0.8152, "step": 5033 }, { "epoch": 0.4, "grad_norm": 1.5470521736685614, "learning_rate": 6.760870240625663e-06, "loss": 0.7296, "step": 5034 }, { "epoch": 0.4, "grad_norm": 1.6429712866818738, "learning_rate": 6.75965416587359e-06, "loss": 0.8103, "step": 5035 }, { "epoch": 0.4, "grad_norm": 0.9001982270018848, "learning_rate": 6.758437972305645e-06, "loss": 1.0904, "step": 5036 }, { "epoch": 0.4, "grad_norm": 1.6081751851887238, "learning_rate": 6.757221660003947e-06, "loss": 0.8092, "step": 5037 }, { "epoch": 0.4, "grad_norm": 1.4627321351088385, "learning_rate": 6.756005229050624e-06, "loss": 0.8273, "step": 5038 }, { "epoch": 0.4, "grad_norm": 1.463084579768749, "learning_rate": 6.7547886795278136e-06, "loss": 0.7042, "step": 5039 }, { "epoch": 0.4, "grad_norm": 1.485218244602575, "learning_rate": 6.753572011517658e-06, "loss": 0.8188, "step": 5040 }, { "epoch": 0.4, "grad_norm": 0.79903068672677, "learning_rate": 6.752355225102309e-06, "loss": 1.1297, "step": 5041 }, { "epoch": 0.4, "grad_norm": 1.5331218414089514, "learning_rate": 6.75113832036393e-06, "loss": 0.7544, "step": 5042 }, { "epoch": 0.4, "grad_norm": 0.7945505539034713, "learning_rate": 6.749921297384688e-06, "loss": 1.0935, "step": 5043 }, { "epoch": 0.4, "grad_norm": 1.535504296883687, "learning_rate": 6.748704156246759e-06, "loss": 0.8205, "step": 5044 }, { "epoch": 0.4, "grad_norm": 1.4376724213713956, "learning_rate": 6.747486897032325e-06, "loss": 0.8611, "step": 5045 }, { "epoch": 0.4, "grad_norm": 1.374361158816089, "learning_rate": 6.74626951982358e-06, "loss": 0.745, "step": 5046 }, { "epoch": 0.4, "grad_norm": 1.4749142152124315, "learning_rate": 6.745052024702724e-06, "loss": 0.7402, "step": 5047 }, { "epoch": 0.41, "grad_norm": 0.8261629194669423, "learning_rate": 6.743834411751964e-06, "loss": 1.1295, "step": 5048 }, { "epoch": 0.41, "grad_norm": 1.567634925873744, "learning_rate": 6.742616681053518e-06, "loss": 0.6677, "step": 5049 }, { "epoch": 0.41, "grad_norm": 1.4493852941895933, "learning_rate": 6.7413988326896106e-06, "loss": 0.743, "step": 5050 }, { "epoch": 0.41, "grad_norm": 1.572896648088167, "learning_rate": 6.740180866742472e-06, "loss": 0.7349, "step": 5051 }, { "epoch": 0.41, "grad_norm": 1.4435258523313497, "learning_rate": 6.738962783294339e-06, "loss": 0.7944, "step": 5052 }, { "epoch": 0.41, "grad_norm": 0.785160021783493, "learning_rate": 6.737744582427464e-06, "loss": 1.0889, "step": 5053 }, { "epoch": 0.41, "grad_norm": 1.5948482406248132, "learning_rate": 6.736526264224101e-06, "loss": 0.7932, "step": 5054 }, { "epoch": 0.41, "grad_norm": 1.6944631745282728, "learning_rate": 6.735307828766515e-06, "loss": 0.8387, "step": 5055 }, { "epoch": 0.41, "grad_norm": 1.4503711240281798, "learning_rate": 6.734089276136977e-06, "loss": 0.7067, "step": 5056 }, { "epoch": 0.41, "grad_norm": 0.7968476304282831, "learning_rate": 6.732870606417764e-06, "loss": 1.0947, "step": 5057 }, { "epoch": 0.41, "grad_norm": 1.4521058090156567, "learning_rate": 6.7316518196911654e-06, "loss": 0.7915, "step": 5058 }, { "epoch": 0.41, "grad_norm": 1.5457251846000761, "learning_rate": 6.730432916039476e-06, "loss": 0.781, "step": 5059 }, { "epoch": 0.41, "grad_norm": 1.48439726042228, "learning_rate": 6.7292138955450005e-06, "loss": 0.7692, "step": 5060 }, { "epoch": 0.41, "grad_norm": 1.5960466298966511, "learning_rate": 6.727994758290048e-06, "loss": 0.8215, "step": 5061 }, { "epoch": 0.41, "grad_norm": 1.5011927950091615, "learning_rate": 6.726775504356939e-06, "loss": 0.7675, "step": 5062 }, { "epoch": 0.41, "grad_norm": 1.5902706011676242, "learning_rate": 6.725556133827998e-06, "loss": 0.648, "step": 5063 }, { "epoch": 0.41, "grad_norm": 1.4987346409577067, "learning_rate": 6.724336646785561e-06, "loss": 0.7903, "step": 5064 }, { "epoch": 0.41, "grad_norm": 1.4854535693793278, "learning_rate": 6.723117043311971e-06, "loss": 0.7689, "step": 5065 }, { "epoch": 0.41, "grad_norm": 1.5700939019860534, "learning_rate": 6.7218973234895805e-06, "loss": 0.7494, "step": 5066 }, { "epoch": 0.41, "grad_norm": 1.6870096465506468, "learning_rate": 6.7206774874007415e-06, "loss": 0.6651, "step": 5067 }, { "epoch": 0.41, "grad_norm": 1.4582543455797539, "learning_rate": 6.719457535127827e-06, "loss": 0.7589, "step": 5068 }, { "epoch": 0.41, "grad_norm": 1.4964858133449725, "learning_rate": 6.718237466753206e-06, "loss": 0.7601, "step": 5069 }, { "epoch": 0.41, "grad_norm": 1.440818931827845, "learning_rate": 6.717017282359263e-06, "loss": 0.7123, "step": 5070 }, { "epoch": 0.41, "grad_norm": 0.8923344600308764, "learning_rate": 6.715796982028386e-06, "loss": 1.1276, "step": 5071 }, { "epoch": 0.41, "grad_norm": 0.8495259493276508, "learning_rate": 6.714576565842976e-06, "loss": 1.1155, "step": 5072 }, { "epoch": 0.41, "grad_norm": 0.7779454828982353, "learning_rate": 6.713356033885434e-06, "loss": 1.073, "step": 5073 }, { "epoch": 0.41, "grad_norm": 1.5374235379354897, "learning_rate": 6.7121353862381746e-06, "loss": 0.8581, "step": 5074 }, { "epoch": 0.41, "grad_norm": 1.4550850967900268, "learning_rate": 6.710914622983619e-06, "loss": 0.7091, "step": 5075 }, { "epoch": 0.41, "grad_norm": 0.977205876697971, "learning_rate": 6.7096937442041956e-06, "loss": 1.1335, "step": 5076 }, { "epoch": 0.41, "grad_norm": 1.3945927607609554, "learning_rate": 6.708472749982341e-06, "loss": 0.7045, "step": 5077 }, { "epoch": 0.41, "grad_norm": 1.5620170742633996, "learning_rate": 6.707251640400501e-06, "loss": 0.8065, "step": 5078 }, { "epoch": 0.41, "grad_norm": 1.4289914203518963, "learning_rate": 6.706030415541125e-06, "loss": 0.7928, "step": 5079 }, { "epoch": 0.41, "grad_norm": 2.5835502971605004, "learning_rate": 6.704809075486674e-06, "loss": 0.7148, "step": 5080 }, { "epoch": 0.41, "grad_norm": 1.3973673551905534, "learning_rate": 6.703587620319616e-06, "loss": 0.7474, "step": 5081 }, { "epoch": 0.41, "grad_norm": 1.5370550876446254, "learning_rate": 6.702366050122428e-06, "loss": 0.8092, "step": 5082 }, { "epoch": 0.41, "grad_norm": 1.5290032699688738, "learning_rate": 6.701144364977591e-06, "loss": 0.7435, "step": 5083 }, { "epoch": 0.41, "grad_norm": 1.511879756661044, "learning_rate": 6.6999225649675955e-06, "loss": 0.7817, "step": 5084 }, { "epoch": 0.41, "grad_norm": 1.4754122675053818, "learning_rate": 6.698700650174943e-06, "loss": 0.8127, "step": 5085 }, { "epoch": 0.41, "grad_norm": 1.489005543125916, "learning_rate": 6.697478620682137e-06, "loss": 0.8221, "step": 5086 }, { "epoch": 0.41, "grad_norm": 1.4169914567587139, "learning_rate": 6.696256476571692e-06, "loss": 0.6785, "step": 5087 }, { "epoch": 0.41, "grad_norm": 1.4480620542066576, "learning_rate": 6.695034217926133e-06, "loss": 0.7187, "step": 5088 }, { "epoch": 0.41, "grad_norm": 1.4307090609999533, "learning_rate": 6.693811844827987e-06, "loss": 0.7619, "step": 5089 }, { "epoch": 0.41, "grad_norm": 1.5500147102983772, "learning_rate": 6.692589357359792e-06, "loss": 0.7921, "step": 5090 }, { "epoch": 0.41, "grad_norm": 1.4627049078015082, "learning_rate": 6.691366755604093e-06, "loss": 0.7047, "step": 5091 }, { "epoch": 0.41, "grad_norm": 1.4636887343590725, "learning_rate": 6.690144039643443e-06, "loss": 0.8126, "step": 5092 }, { "epoch": 0.41, "grad_norm": 1.4962124938115384, "learning_rate": 6.6889212095604036e-06, "loss": 0.8002, "step": 5093 }, { "epoch": 0.41, "grad_norm": 1.567767800125546, "learning_rate": 6.687698265437542e-06, "loss": 0.7422, "step": 5094 }, { "epoch": 0.41, "grad_norm": 1.6489763409068139, "learning_rate": 6.686475207357435e-06, "loss": 0.7398, "step": 5095 }, { "epoch": 0.41, "grad_norm": 1.5145787523676202, "learning_rate": 6.6852520354026625e-06, "loss": 0.7133, "step": 5096 }, { "epoch": 0.41, "grad_norm": 0.955426655707722, "learning_rate": 6.684028749655822e-06, "loss": 1.1258, "step": 5097 }, { "epoch": 0.41, "grad_norm": 0.8799552242984676, "learning_rate": 6.682805350199508e-06, "loss": 1.1338, "step": 5098 }, { "epoch": 0.41, "grad_norm": 1.5092073210863823, "learning_rate": 6.681581837116331e-06, "loss": 0.7238, "step": 5099 }, { "epoch": 0.41, "grad_norm": 0.772200211048803, "learning_rate": 6.680358210488902e-06, "loss": 1.1036, "step": 5100 }, { "epoch": 0.41, "grad_norm": 0.8424215179596961, "learning_rate": 6.679134470399843e-06, "loss": 1.077, "step": 5101 }, { "epoch": 0.41, "grad_norm": 1.5138503210315437, "learning_rate": 6.677910616931787e-06, "loss": 0.7778, "step": 5102 }, { "epoch": 0.41, "grad_norm": 1.5536808988422206, "learning_rate": 6.676686650167367e-06, "loss": 0.7322, "step": 5103 }, { "epoch": 0.41, "grad_norm": 0.8506095396258891, "learning_rate": 6.6754625701892325e-06, "loss": 1.1135, "step": 5104 }, { "epoch": 0.41, "grad_norm": 1.5438816161775908, "learning_rate": 6.674238377080034e-06, "loss": 0.791, "step": 5105 }, { "epoch": 0.41, "grad_norm": 1.4251306881380594, "learning_rate": 6.67301407092243e-06, "loss": 0.8141, "step": 5106 }, { "epoch": 0.41, "grad_norm": 1.5255984603090358, "learning_rate": 6.671789651799092e-06, "loss": 0.7204, "step": 5107 }, { "epoch": 0.41, "grad_norm": 1.572330300068239, "learning_rate": 6.670565119792694e-06, "loss": 0.7839, "step": 5108 }, { "epoch": 0.41, "grad_norm": 1.5161347506869107, "learning_rate": 6.669340474985918e-06, "loss": 0.8893, "step": 5109 }, { "epoch": 0.41, "grad_norm": 1.6198426630045863, "learning_rate": 6.6681157174614575e-06, "loss": 0.7937, "step": 5110 }, { "epoch": 0.41, "grad_norm": 1.5229523205137783, "learning_rate": 6.666890847302008e-06, "loss": 0.7341, "step": 5111 }, { "epoch": 0.41, "grad_norm": 1.509117549877118, "learning_rate": 6.665665864590277e-06, "loss": 0.7757, "step": 5112 }, { "epoch": 0.41, "grad_norm": 1.9794985704911618, "learning_rate": 6.664440769408977e-06, "loss": 0.775, "step": 5113 }, { "epoch": 0.41, "grad_norm": 1.6023062656879932, "learning_rate": 6.6632155618408335e-06, "loss": 0.8046, "step": 5114 }, { "epoch": 0.41, "grad_norm": 1.4492366153321568, "learning_rate": 6.66199024196857e-06, "loss": 0.8473, "step": 5115 }, { "epoch": 0.41, "grad_norm": 1.5590187531630764, "learning_rate": 6.6607648098749244e-06, "loss": 0.8577, "step": 5116 }, { "epoch": 0.41, "grad_norm": 1.6070371460118378, "learning_rate": 6.659539265642643e-06, "loss": 0.8596, "step": 5117 }, { "epoch": 0.41, "grad_norm": 1.695580628428527, "learning_rate": 6.658313609354474e-06, "loss": 0.8882, "step": 5118 }, { "epoch": 0.41, "grad_norm": 1.408800420256203, "learning_rate": 6.657087841093179e-06, "loss": 0.7806, "step": 5119 }, { "epoch": 0.41, "grad_norm": 1.4218064884357873, "learning_rate": 6.655861960941524e-06, "loss": 0.8707, "step": 5120 }, { "epoch": 0.41, "grad_norm": 1.5146069956536823, "learning_rate": 6.654635968982284e-06, "loss": 0.8131, "step": 5121 }, { "epoch": 0.41, "grad_norm": 1.4824627392713794, "learning_rate": 6.653409865298238e-06, "loss": 0.7709, "step": 5122 }, { "epoch": 0.41, "grad_norm": 1.584130625118169, "learning_rate": 6.652183649972177e-06, "loss": 0.8291, "step": 5123 }, { "epoch": 0.41, "grad_norm": 1.593476420095539, "learning_rate": 6.6509573230868995e-06, "loss": 0.7683, "step": 5124 }, { "epoch": 0.41, "grad_norm": 1.558735792463535, "learning_rate": 6.6497308847252074e-06, "loss": 0.7646, "step": 5125 }, { "epoch": 0.41, "grad_norm": 1.420576389298931, "learning_rate": 6.648504334969914e-06, "loss": 0.7846, "step": 5126 }, { "epoch": 0.41, "grad_norm": 0.8893763894071135, "learning_rate": 6.647277673903838e-06, "loss": 1.1154, "step": 5127 }, { "epoch": 0.41, "grad_norm": 1.4724727808839415, "learning_rate": 6.646050901609806e-06, "loss": 0.6825, "step": 5128 }, { "epoch": 0.41, "grad_norm": 1.4581446933854199, "learning_rate": 6.644824018170655e-06, "loss": 0.8903, "step": 5129 }, { "epoch": 0.41, "grad_norm": 1.4670498149691078, "learning_rate": 6.643597023669224e-06, "loss": 0.7702, "step": 5130 }, { "epoch": 0.41, "grad_norm": 1.5645346601026624, "learning_rate": 6.642369918188365e-06, "loss": 0.6753, "step": 5131 }, { "epoch": 0.41, "grad_norm": 1.7769710743553795, "learning_rate": 6.641142701810932e-06, "loss": 0.7957, "step": 5132 }, { "epoch": 0.41, "grad_norm": 1.5494821060273092, "learning_rate": 6.639915374619793e-06, "loss": 0.756, "step": 5133 }, { "epoch": 0.41, "grad_norm": 1.5338875636229348, "learning_rate": 6.638687936697816e-06, "loss": 0.7786, "step": 5134 }, { "epoch": 0.41, "grad_norm": 1.4983787649310027, "learning_rate": 6.637460388127882e-06, "loss": 0.7771, "step": 5135 }, { "epoch": 0.41, "grad_norm": 0.8539983648978486, "learning_rate": 6.6362327289928795e-06, "loss": 1.104, "step": 5136 }, { "epoch": 0.41, "grad_norm": 0.840596336146245, "learning_rate": 6.635004959375701e-06, "loss": 1.0664, "step": 5137 }, { "epoch": 0.41, "grad_norm": 1.3252389216348521, "learning_rate": 6.6337770793592515e-06, "loss": 0.7437, "step": 5138 }, { "epoch": 0.41, "grad_norm": 1.5390346101157333, "learning_rate": 6.632549089026435e-06, "loss": 0.7795, "step": 5139 }, { "epoch": 0.41, "grad_norm": 1.6717524875268783, "learning_rate": 6.631320988460172e-06, "loss": 0.7855, "step": 5140 }, { "epoch": 0.41, "grad_norm": 1.4760544431432925, "learning_rate": 6.6300927777433856e-06, "loss": 0.8559, "step": 5141 }, { "epoch": 0.41, "grad_norm": 1.5653357090723692, "learning_rate": 6.6288644569590065e-06, "loss": 0.764, "step": 5142 }, { "epoch": 0.41, "grad_norm": 1.4308848622185135, "learning_rate": 6.627636026189975e-06, "loss": 0.7637, "step": 5143 }, { "epoch": 0.41, "grad_norm": 1.3685992404361451, "learning_rate": 6.6264074855192385e-06, "loss": 0.7791, "step": 5144 }, { "epoch": 0.41, "grad_norm": 1.4991518368616403, "learning_rate": 6.625178835029749e-06, "loss": 0.757, "step": 5145 }, { "epoch": 0.41, "grad_norm": 0.9641363787914339, "learning_rate": 6.623950074804468e-06, "loss": 1.0753, "step": 5146 }, { "epoch": 0.41, "grad_norm": 1.456606393289079, "learning_rate": 6.622721204926363e-06, "loss": 0.8207, "step": 5147 }, { "epoch": 0.41, "grad_norm": 1.4734584338466337, "learning_rate": 6.6214922254784145e-06, "loss": 0.765, "step": 5148 }, { "epoch": 0.41, "grad_norm": 0.7983631399469822, "learning_rate": 6.620263136543602e-06, "loss": 1.0865, "step": 5149 }, { "epoch": 0.41, "grad_norm": 1.6012191451540079, "learning_rate": 6.619033938204917e-06, "loss": 0.7889, "step": 5150 }, { "epoch": 0.41, "grad_norm": 1.602507798884412, "learning_rate": 6.617804630545359e-06, "loss": 0.7615, "step": 5151 }, { "epoch": 0.41, "grad_norm": 1.4327616304972914, "learning_rate": 6.616575213647932e-06, "loss": 0.7766, "step": 5152 }, { "epoch": 0.41, "grad_norm": 1.5601012898187485, "learning_rate": 6.615345687595652e-06, "loss": 0.7794, "step": 5153 }, { "epoch": 0.41, "grad_norm": 0.8458255060409743, "learning_rate": 6.614116052471537e-06, "loss": 1.0786, "step": 5154 }, { "epoch": 0.41, "grad_norm": 1.4700300341889976, "learning_rate": 6.612886308358615e-06, "loss": 0.7635, "step": 5155 }, { "epoch": 0.41, "grad_norm": 1.551623399036214, "learning_rate": 6.61165645533992e-06, "loss": 0.7971, "step": 5156 }, { "epoch": 0.41, "grad_norm": 1.5364775948795695, "learning_rate": 6.610426493498496e-06, "loss": 0.8218, "step": 5157 }, { "epoch": 0.41, "grad_norm": 1.4052003778608038, "learning_rate": 6.609196422917394e-06, "loss": 0.7381, "step": 5158 }, { "epoch": 0.41, "grad_norm": 1.556766391257282, "learning_rate": 6.607966243679669e-06, "loss": 0.7958, "step": 5159 }, { "epoch": 0.41, "grad_norm": 1.5796470453708265, "learning_rate": 6.606735955868387e-06, "loss": 0.7808, "step": 5160 }, { "epoch": 0.41, "grad_norm": 1.5797931148574027, "learning_rate": 6.605505559566619e-06, "loss": 0.8879, "step": 5161 }, { "epoch": 0.41, "grad_norm": 1.4268791314212117, "learning_rate": 6.6042750548574455e-06, "loss": 0.7412, "step": 5162 }, { "epoch": 0.41, "grad_norm": 1.4746829057806619, "learning_rate": 6.6030444418239495e-06, "loss": 0.7214, "step": 5163 }, { "epoch": 0.41, "grad_norm": 1.4375031428668945, "learning_rate": 6.601813720549229e-06, "loss": 0.7641, "step": 5164 }, { "epoch": 0.41, "grad_norm": 1.511179755174562, "learning_rate": 6.600582891116383e-06, "loss": 0.7948, "step": 5165 }, { "epoch": 0.41, "grad_norm": 1.5348156923438556, "learning_rate": 6.599351953608519e-06, "loss": 0.8116, "step": 5166 }, { "epoch": 0.41, "grad_norm": 1.5039512406587, "learning_rate": 6.598120908108756e-06, "loss": 0.7429, "step": 5167 }, { "epoch": 0.41, "grad_norm": 1.5924894209712466, "learning_rate": 6.596889754700213e-06, "loss": 0.8116, "step": 5168 }, { "epoch": 0.41, "grad_norm": 1.5251115803419217, "learning_rate": 6.595658493466024e-06, "loss": 0.7482, "step": 5169 }, { "epoch": 0.41, "grad_norm": 1.5849991948936215, "learning_rate": 6.594427124489325e-06, "loss": 0.8505, "step": 5170 }, { "epoch": 0.41, "grad_norm": 1.417780668352426, "learning_rate": 6.5931956478532585e-06, "loss": 0.7258, "step": 5171 }, { "epoch": 0.41, "grad_norm": 0.8703729671655517, "learning_rate": 6.591964063640981e-06, "loss": 1.0598, "step": 5172 }, { "epoch": 0.42, "grad_norm": 1.3890323111777436, "learning_rate": 6.590732371935649e-06, "loss": 0.7876, "step": 5173 }, { "epoch": 0.42, "grad_norm": 0.8174734888178268, "learning_rate": 6.589500572820428e-06, "loss": 1.0945, "step": 5174 }, { "epoch": 0.42, "grad_norm": 0.8132054278186829, "learning_rate": 6.5882686663784955e-06, "loss": 1.0857, "step": 5175 }, { "epoch": 0.42, "grad_norm": 1.4553071516205818, "learning_rate": 6.587036652693031e-06, "loss": 0.832, "step": 5176 }, { "epoch": 0.42, "grad_norm": 1.6029592563547859, "learning_rate": 6.585804531847223e-06, "loss": 0.804, "step": 5177 }, { "epoch": 0.42, "grad_norm": 1.5450025744395206, "learning_rate": 6.584572303924266e-06, "loss": 0.7707, "step": 5178 }, { "epoch": 0.42, "grad_norm": 1.3923966540439565, "learning_rate": 6.583339969007364e-06, "loss": 0.7885, "step": 5179 }, { "epoch": 0.42, "grad_norm": 1.5078869105885953, "learning_rate": 6.582107527179726e-06, "loss": 0.7597, "step": 5180 }, { "epoch": 0.42, "grad_norm": 0.896617557544874, "learning_rate": 6.58087497852457e-06, "loss": 1.0477, "step": 5181 }, { "epoch": 0.42, "grad_norm": 1.8249240460012384, "learning_rate": 6.579642323125123e-06, "loss": 0.878, "step": 5182 }, { "epoch": 0.42, "grad_norm": 1.5048926217001046, "learning_rate": 6.5784095610646115e-06, "loss": 0.7826, "step": 5183 }, { "epoch": 0.42, "grad_norm": 1.5510802406580388, "learning_rate": 6.5771766924262795e-06, "loss": 0.8751, "step": 5184 }, { "epoch": 0.42, "grad_norm": 1.6257504568888683, "learning_rate": 6.575943717293368e-06, "loss": 0.8428, "step": 5185 }, { "epoch": 0.42, "grad_norm": 1.5806136248560991, "learning_rate": 6.574710635749134e-06, "loss": 0.8112, "step": 5186 }, { "epoch": 0.42, "grad_norm": 1.4005448024960188, "learning_rate": 6.573477447876838e-06, "loss": 0.7214, "step": 5187 }, { "epoch": 0.42, "grad_norm": 1.4501427343156157, "learning_rate": 6.572244153759747e-06, "loss": 0.7962, "step": 5188 }, { "epoch": 0.42, "grad_norm": 1.4383394430649246, "learning_rate": 6.571010753481135e-06, "loss": 0.7874, "step": 5189 }, { "epoch": 0.42, "grad_norm": 1.39161976603999, "learning_rate": 6.569777247124285e-06, "loss": 0.7453, "step": 5190 }, { "epoch": 0.42, "grad_norm": 1.4878496536596488, "learning_rate": 6.568543634772485e-06, "loss": 0.8082, "step": 5191 }, { "epoch": 0.42, "grad_norm": 1.4090363984372747, "learning_rate": 6.567309916509033e-06, "loss": 0.7249, "step": 5192 }, { "epoch": 0.42, "grad_norm": 0.9092238473112751, "learning_rate": 6.5660760924172304e-06, "loss": 1.1009, "step": 5193 }, { "epoch": 0.42, "grad_norm": 1.9281437165493032, "learning_rate": 6.56484216258039e-06, "loss": 0.7729, "step": 5194 }, { "epoch": 0.42, "grad_norm": 1.4644267617008637, "learning_rate": 6.563608127081827e-06, "loss": 0.8013, "step": 5195 }, { "epoch": 0.42, "grad_norm": 1.5283021595307977, "learning_rate": 6.562373986004871e-06, "loss": 0.7289, "step": 5196 }, { "epoch": 0.42, "grad_norm": 1.7148511513980296, "learning_rate": 6.5611397394328465e-06, "loss": 0.771, "step": 5197 }, { "epoch": 0.42, "grad_norm": 1.5959857301858007, "learning_rate": 6.5599053874491e-06, "loss": 0.7692, "step": 5198 }, { "epoch": 0.42, "grad_norm": 1.437633961481371, "learning_rate": 6.558670930136975e-06, "loss": 0.6625, "step": 5199 }, { "epoch": 0.42, "grad_norm": 1.5109525411461984, "learning_rate": 6.557436367579823e-06, "loss": 0.8966, "step": 5200 }, { "epoch": 0.42, "grad_norm": 1.485055052782625, "learning_rate": 6.556201699861008e-06, "loss": 0.758, "step": 5201 }, { "epoch": 0.42, "grad_norm": 1.349609233105596, "learning_rate": 6.554966927063895e-06, "loss": 0.6523, "step": 5202 }, { "epoch": 0.42, "grad_norm": 1.5062272927607419, "learning_rate": 6.55373204927186e-06, "loss": 0.7185, "step": 5203 }, { "epoch": 0.42, "grad_norm": 1.3966728842588125, "learning_rate": 6.552497066568282e-06, "loss": 0.7576, "step": 5204 }, { "epoch": 0.42, "grad_norm": 1.499573535320154, "learning_rate": 6.551261979036554e-06, "loss": 0.6838, "step": 5205 }, { "epoch": 0.42, "grad_norm": 1.4620457172275592, "learning_rate": 6.55002678676007e-06, "loss": 0.7419, "step": 5206 }, { "epoch": 0.42, "grad_norm": 1.48944356128856, "learning_rate": 6.548791489822232e-06, "loss": 0.8102, "step": 5207 }, { "epoch": 0.42, "grad_norm": 1.436678881564857, "learning_rate": 6.547556088306453e-06, "loss": 0.7478, "step": 5208 }, { "epoch": 0.42, "grad_norm": 1.4375720239595842, "learning_rate": 6.546320582296145e-06, "loss": 0.6943, "step": 5209 }, { "epoch": 0.42, "grad_norm": 1.4586176293922488, "learning_rate": 6.545084971874738e-06, "loss": 0.7885, "step": 5210 }, { "epoch": 0.42, "grad_norm": 1.5794478989789005, "learning_rate": 6.543849257125661e-06, "loss": 0.7313, "step": 5211 }, { "epoch": 0.42, "grad_norm": 1.528726653900992, "learning_rate": 6.542613438132349e-06, "loss": 0.8158, "step": 5212 }, { "epoch": 0.42, "grad_norm": 1.496523166353207, "learning_rate": 6.541377514978253e-06, "loss": 0.6831, "step": 5213 }, { "epoch": 0.42, "grad_norm": 0.9874270594359373, "learning_rate": 6.54014148774682e-06, "loss": 1.0966, "step": 5214 }, { "epoch": 0.42, "grad_norm": 0.8567491297679107, "learning_rate": 6.538905356521515e-06, "loss": 1.1355, "step": 5215 }, { "epoch": 0.42, "grad_norm": 1.599359279026421, "learning_rate": 6.537669121385801e-06, "loss": 0.8165, "step": 5216 }, { "epoch": 0.42, "grad_norm": 1.5347982026986946, "learning_rate": 6.53643278242315e-06, "loss": 0.7704, "step": 5217 }, { "epoch": 0.42, "grad_norm": 1.5925121857957893, "learning_rate": 6.535196339717046e-06, "loss": 0.8009, "step": 5218 }, { "epoch": 0.42, "grad_norm": 1.4858693246143697, "learning_rate": 6.533959793350974e-06, "loss": 0.8262, "step": 5219 }, { "epoch": 0.42, "grad_norm": 1.4574767012029153, "learning_rate": 6.532723143408428e-06, "loss": 0.7158, "step": 5220 }, { "epoch": 0.42, "grad_norm": 1.1637011275501026, "learning_rate": 6.531486389972913e-06, "loss": 1.0776, "step": 5221 }, { "epoch": 0.42, "grad_norm": 1.550112350985245, "learning_rate": 6.530249533127932e-06, "loss": 0.7893, "step": 5222 }, { "epoch": 0.42, "grad_norm": 1.4967564947691119, "learning_rate": 6.5290125729570066e-06, "loss": 0.8214, "step": 5223 }, { "epoch": 0.42, "grad_norm": 1.4121715877769112, "learning_rate": 6.527775509543653e-06, "loss": 0.734, "step": 5224 }, { "epoch": 0.42, "grad_norm": 1.4261991779203285, "learning_rate": 6.526538342971406e-06, "loss": 0.7186, "step": 5225 }, { "epoch": 0.42, "grad_norm": 2.459626269826093, "learning_rate": 6.525301073323798e-06, "loss": 0.7568, "step": 5226 }, { "epoch": 0.42, "grad_norm": 0.8547828232623059, "learning_rate": 6.524063700684375e-06, "loss": 1.0745, "step": 5227 }, { "epoch": 0.42, "grad_norm": 1.4707010946419086, "learning_rate": 6.522826225136685e-06, "loss": 0.7694, "step": 5228 }, { "epoch": 0.42, "grad_norm": 1.4557131832517571, "learning_rate": 6.5215886467642855e-06, "loss": 0.6333, "step": 5229 }, { "epoch": 0.42, "grad_norm": 0.7712295070343915, "learning_rate": 6.520350965650742e-06, "loss": 1.0863, "step": 5230 }, { "epoch": 0.42, "grad_norm": 1.4369618043305292, "learning_rate": 6.519113181879624e-06, "loss": 0.7632, "step": 5231 }, { "epoch": 0.42, "grad_norm": 1.4049921404386776, "learning_rate": 6.517875295534511e-06, "loss": 0.8069, "step": 5232 }, { "epoch": 0.42, "grad_norm": 1.6985905692974774, "learning_rate": 6.5166373066989885e-06, "loss": 0.8274, "step": 5233 }, { "epoch": 0.42, "grad_norm": 1.6612872054521255, "learning_rate": 6.5153992154566445e-06, "loss": 0.794, "step": 5234 }, { "epoch": 0.42, "grad_norm": 1.559978684126204, "learning_rate": 6.514161021891082e-06, "loss": 0.7599, "step": 5235 }, { "epoch": 0.42, "grad_norm": 1.4999739183267309, "learning_rate": 6.512922726085904e-06, "loss": 0.7276, "step": 5236 }, { "epoch": 0.42, "grad_norm": 1.4849937991500837, "learning_rate": 6.511684328124725e-06, "loss": 0.7094, "step": 5237 }, { "epoch": 0.42, "grad_norm": 1.5279540910222877, "learning_rate": 6.510445828091164e-06, "loss": 0.7954, "step": 5238 }, { "epoch": 0.42, "grad_norm": 0.9951782115026421, "learning_rate": 6.509207226068845e-06, "loss": 1.1128, "step": 5239 }, { "epoch": 0.42, "grad_norm": 1.5104256185754006, "learning_rate": 6.507968522141405e-06, "loss": 0.7559, "step": 5240 }, { "epoch": 0.42, "grad_norm": 1.4310896263894965, "learning_rate": 6.50672971639248e-06, "loss": 0.771, "step": 5241 }, { "epoch": 0.42, "grad_norm": 1.5793364432785386, "learning_rate": 6.505490808905721e-06, "loss": 0.7956, "step": 5242 }, { "epoch": 0.42, "grad_norm": 0.806615855347152, "learning_rate": 6.50425179976478e-06, "loss": 1.0759, "step": 5243 }, { "epoch": 0.42, "grad_norm": 0.7742593467529842, "learning_rate": 6.5030126890533165e-06, "loss": 1.0867, "step": 5244 }, { "epoch": 0.42, "grad_norm": 0.8065194775254936, "learning_rate": 6.501773476855e-06, "loss": 1.0647, "step": 5245 }, { "epoch": 0.42, "grad_norm": 1.540288359073978, "learning_rate": 6.5005341632535045e-06, "loss": 0.7725, "step": 5246 }, { "epoch": 0.42, "grad_norm": 1.7789658303300508, "learning_rate": 6.499294748332512e-06, "loss": 0.7726, "step": 5247 }, { "epoch": 0.42, "grad_norm": 1.852017610816155, "learning_rate": 6.498055232175708e-06, "loss": 0.8574, "step": 5248 }, { "epoch": 0.42, "grad_norm": 1.487844482988714, "learning_rate": 6.496815614866792e-06, "loss": 0.7859, "step": 5249 }, { "epoch": 0.42, "grad_norm": 0.8634870110044907, "learning_rate": 6.49557589648946e-06, "loss": 1.1476, "step": 5250 }, { "epoch": 0.42, "grad_norm": 1.4670722260763087, "learning_rate": 6.4943360771274235e-06, "loss": 0.802, "step": 5251 }, { "epoch": 0.42, "grad_norm": 0.8219069665444414, "learning_rate": 6.4930961568644e-06, "loss": 1.087, "step": 5252 }, { "epoch": 0.42, "grad_norm": 1.5159137378009837, "learning_rate": 6.491856135784109e-06, "loss": 0.8721, "step": 5253 }, { "epoch": 0.42, "grad_norm": 1.4188785100072878, "learning_rate": 6.490616013970281e-06, "loss": 0.7953, "step": 5254 }, { "epoch": 0.42, "grad_norm": 1.4380403964902955, "learning_rate": 6.489375791506651e-06, "loss": 0.7783, "step": 5255 }, { "epoch": 0.42, "grad_norm": 0.8258157029777003, "learning_rate": 6.48813546847696e-06, "loss": 1.1079, "step": 5256 }, { "epoch": 0.42, "grad_norm": 1.4168873158216369, "learning_rate": 6.486895044964963e-06, "loss": 0.7948, "step": 5257 }, { "epoch": 0.42, "grad_norm": 1.5052840745076674, "learning_rate": 6.485654521054408e-06, "loss": 0.7526, "step": 5258 }, { "epoch": 0.42, "grad_norm": 1.5066756195793956, "learning_rate": 6.484413896829067e-06, "loss": 0.7546, "step": 5259 }, { "epoch": 0.42, "grad_norm": 1.6006447154078212, "learning_rate": 6.4831731723727035e-06, "loss": 0.7806, "step": 5260 }, { "epoch": 0.42, "grad_norm": 0.8630537367780872, "learning_rate": 6.481932347769097e-06, "loss": 1.0906, "step": 5261 }, { "epoch": 0.42, "grad_norm": 1.507401652754367, "learning_rate": 6.480691423102028e-06, "loss": 0.7909, "step": 5262 }, { "epoch": 0.42, "grad_norm": 1.4855505832547082, "learning_rate": 6.479450398455287e-06, "loss": 0.7958, "step": 5263 }, { "epoch": 0.42, "grad_norm": 1.4156618333042317, "learning_rate": 6.478209273912675e-06, "loss": 0.8299, "step": 5264 }, { "epoch": 0.42, "grad_norm": 1.5044052714540666, "learning_rate": 6.476968049557993e-06, "loss": 0.7844, "step": 5265 }, { "epoch": 0.42, "grad_norm": 1.481148259038955, "learning_rate": 6.475726725475049e-06, "loss": 0.7787, "step": 5266 }, { "epoch": 0.42, "grad_norm": 1.5599739993062285, "learning_rate": 6.474485301747663e-06, "loss": 0.7952, "step": 5267 }, { "epoch": 0.42, "grad_norm": 1.6091669508372386, "learning_rate": 6.473243778459657e-06, "loss": 0.8427, "step": 5268 }, { "epoch": 0.42, "grad_norm": 1.5044858875186025, "learning_rate": 6.472002155694863e-06, "loss": 0.7513, "step": 5269 }, { "epoch": 0.42, "grad_norm": 1.4282311467118167, "learning_rate": 6.470760433537116e-06, "loss": 0.6772, "step": 5270 }, { "epoch": 0.42, "grad_norm": 1.5739984277706165, "learning_rate": 6.469518612070265e-06, "loss": 0.8621, "step": 5271 }, { "epoch": 0.42, "grad_norm": 1.5096571256541809, "learning_rate": 6.468276691378155e-06, "loss": 0.7788, "step": 5272 }, { "epoch": 0.42, "grad_norm": 1.430132676171242, "learning_rate": 6.467034671544644e-06, "loss": 0.7634, "step": 5273 }, { "epoch": 0.42, "grad_norm": 1.5277401212199342, "learning_rate": 6.4657925526535995e-06, "loss": 0.8105, "step": 5274 }, { "epoch": 0.42, "grad_norm": 1.5373378947787077, "learning_rate": 6.464550334788888e-06, "loss": 0.8413, "step": 5275 }, { "epoch": 0.42, "grad_norm": 1.4523577237646559, "learning_rate": 6.463308018034391e-06, "loss": 0.8318, "step": 5276 }, { "epoch": 0.42, "grad_norm": 1.4948652324714429, "learning_rate": 6.46206560247399e-06, "loss": 0.8731, "step": 5277 }, { "epoch": 0.42, "grad_norm": 1.5845131775561614, "learning_rate": 6.460823088191577e-06, "loss": 0.8073, "step": 5278 }, { "epoch": 0.42, "grad_norm": 1.5395145556534044, "learning_rate": 6.4595804752710475e-06, "loss": 0.7508, "step": 5279 }, { "epoch": 0.42, "grad_norm": 1.4984431359385824, "learning_rate": 6.458337763796306e-06, "loss": 0.8454, "step": 5280 }, { "epoch": 0.42, "grad_norm": 1.8819296806898511, "learning_rate": 6.457094953851266e-06, "loss": 0.8309, "step": 5281 }, { "epoch": 0.42, "grad_norm": 1.5214378575283345, "learning_rate": 6.455852045519843e-06, "loss": 0.7265, "step": 5282 }, { "epoch": 0.42, "grad_norm": 0.8903688142471105, "learning_rate": 6.454609038885959e-06, "loss": 1.1128, "step": 5283 }, { "epoch": 0.42, "grad_norm": 0.8235192477404827, "learning_rate": 6.453365934033548e-06, "loss": 1.1065, "step": 5284 }, { "epoch": 0.42, "grad_norm": 1.4719376572840779, "learning_rate": 6.452122731046544e-06, "loss": 0.7281, "step": 5285 }, { "epoch": 0.42, "grad_norm": 1.4953144851234383, "learning_rate": 6.450879430008895e-06, "loss": 0.7349, "step": 5286 }, { "epoch": 0.42, "grad_norm": 1.7378750273183305, "learning_rate": 6.449636031004548e-06, "loss": 0.8772, "step": 5287 }, { "epoch": 0.42, "grad_norm": 1.5683923217629299, "learning_rate": 6.4483925341174625e-06, "loss": 0.854, "step": 5288 }, { "epoch": 0.42, "grad_norm": 1.432686483084503, "learning_rate": 6.4471489394316e-06, "loss": 0.7757, "step": 5289 }, { "epoch": 0.42, "grad_norm": 1.451056603739025, "learning_rate": 6.4459052470309324e-06, "loss": 0.7772, "step": 5290 }, { "epoch": 0.42, "grad_norm": 0.90596064847353, "learning_rate": 6.444661456999435e-06, "loss": 1.0833, "step": 5291 }, { "epoch": 0.42, "grad_norm": 0.8609914777939631, "learning_rate": 6.443417569421093e-06, "loss": 1.1188, "step": 5292 }, { "epoch": 0.42, "grad_norm": 1.6317522249117764, "learning_rate": 6.442173584379898e-06, "loss": 0.8119, "step": 5293 }, { "epoch": 0.42, "grad_norm": 1.492164679637194, "learning_rate": 6.440929501959844e-06, "loss": 0.7717, "step": 5294 }, { "epoch": 0.42, "grad_norm": 0.850892898296131, "learning_rate": 6.439685322244935e-06, "loss": 1.1042, "step": 5295 }, { "epoch": 0.42, "grad_norm": 1.5534527895717405, "learning_rate": 6.43844104531918e-06, "loss": 0.8116, "step": 5296 }, { "epoch": 0.42, "grad_norm": 1.5015576121800887, "learning_rate": 6.437196671266597e-06, "loss": 0.8203, "step": 5297 }, { "epoch": 0.43, "grad_norm": 1.4322675531428575, "learning_rate": 6.435952200171209e-06, "loss": 0.8335, "step": 5298 }, { "epoch": 0.43, "grad_norm": 1.49500522110894, "learning_rate": 6.434707632117046e-06, "loss": 0.6922, "step": 5299 }, { "epoch": 0.43, "grad_norm": 1.5008745865828224, "learning_rate": 6.4334629671881425e-06, "loss": 0.7739, "step": 5300 }, { "epoch": 0.43, "grad_norm": 1.4689707691327347, "learning_rate": 6.432218205468539e-06, "loss": 0.7873, "step": 5301 }, { "epoch": 0.43, "grad_norm": 1.5660354672698713, "learning_rate": 6.430973347042289e-06, "loss": 0.8623, "step": 5302 }, { "epoch": 0.43, "grad_norm": 1.4245259342070176, "learning_rate": 6.429728391993446e-06, "loss": 0.7679, "step": 5303 }, { "epoch": 0.43, "grad_norm": 1.6841374778575604, "learning_rate": 6.428483340406074e-06, "loss": 0.8218, "step": 5304 }, { "epoch": 0.43, "grad_norm": 1.5850225382158867, "learning_rate": 6.42723819236424e-06, "loss": 0.7831, "step": 5305 }, { "epoch": 0.43, "grad_norm": 1.4401851269816714, "learning_rate": 6.42599294795202e-06, "loss": 0.6947, "step": 5306 }, { "epoch": 0.43, "grad_norm": 1.5635028122293435, "learning_rate": 6.424747607253494e-06, "loss": 0.7772, "step": 5307 }, { "epoch": 0.43, "grad_norm": 1.0031186748345458, "learning_rate": 6.423502170352752e-06, "loss": 1.0475, "step": 5308 }, { "epoch": 0.43, "grad_norm": 0.8849167020330146, "learning_rate": 6.42225663733389e-06, "loss": 1.0783, "step": 5309 }, { "epoch": 0.43, "grad_norm": 1.74501474209221, "learning_rate": 6.4210110082810076e-06, "loss": 0.7476, "step": 5310 }, { "epoch": 0.43, "grad_norm": 1.6241108858250781, "learning_rate": 6.41976528327821e-06, "loss": 0.8459, "step": 5311 }, { "epoch": 0.43, "grad_norm": 1.4048208009988643, "learning_rate": 6.418519462409616e-06, "loss": 0.6686, "step": 5312 }, { "epoch": 0.43, "grad_norm": 1.4492779302498808, "learning_rate": 6.4172735457593435e-06, "loss": 0.7703, "step": 5313 }, { "epoch": 0.43, "grad_norm": 1.5057880756249267, "learning_rate": 6.41602753341152e-06, "loss": 0.702, "step": 5314 }, { "epoch": 0.43, "grad_norm": 1.407104835889254, "learning_rate": 6.414781425450282e-06, "loss": 0.7783, "step": 5315 }, { "epoch": 0.43, "grad_norm": 1.2280257485360622, "learning_rate": 6.413535221959765e-06, "loss": 1.096, "step": 5316 }, { "epoch": 0.43, "grad_norm": 1.5530811519591503, "learning_rate": 6.412288923024118e-06, "loss": 0.8153, "step": 5317 }, { "epoch": 0.43, "grad_norm": 1.6268409014393808, "learning_rate": 6.411042528727492e-06, "loss": 0.8463, "step": 5318 }, { "epoch": 0.43, "grad_norm": 1.0005156558194832, "learning_rate": 6.4097960391540505e-06, "loss": 1.0893, "step": 5319 }, { "epoch": 0.43, "grad_norm": 1.5189762256756192, "learning_rate": 6.408549454387954e-06, "loss": 0.7862, "step": 5320 }, { "epoch": 0.43, "grad_norm": 1.53639011463977, "learning_rate": 6.40730277451338e-06, "loss": 0.7521, "step": 5321 }, { "epoch": 0.43, "grad_norm": 1.5430263105159505, "learning_rate": 6.406055999614504e-06, "loss": 0.7904, "step": 5322 }, { "epoch": 0.43, "grad_norm": 1.5434220777723873, "learning_rate": 6.404809129775511e-06, "loss": 0.7888, "step": 5323 }, { "epoch": 0.43, "grad_norm": 1.871134837480756, "learning_rate": 6.403562165080594e-06, "loss": 0.7186, "step": 5324 }, { "epoch": 0.43, "grad_norm": 1.4452661384284828, "learning_rate": 6.4023151056139495e-06, "loss": 0.7594, "step": 5325 }, { "epoch": 0.43, "grad_norm": 1.1087797049967127, "learning_rate": 6.401067951459783e-06, "loss": 1.0993, "step": 5326 }, { "epoch": 0.43, "grad_norm": 1.0562632902191567, "learning_rate": 6.3998207027023056e-06, "loss": 1.1241, "step": 5327 }, { "epoch": 0.43, "grad_norm": 1.6405202212246943, "learning_rate": 6.398573359425732e-06, "loss": 0.8116, "step": 5328 }, { "epoch": 0.43, "grad_norm": 1.6237800864190006, "learning_rate": 6.397325921714288e-06, "loss": 0.8108, "step": 5329 }, { "epoch": 0.43, "grad_norm": 1.557928162185094, "learning_rate": 6.396078389652201e-06, "loss": 0.7979, "step": 5330 }, { "epoch": 0.43, "grad_norm": 1.486344372118869, "learning_rate": 6.394830763323711e-06, "loss": 0.6812, "step": 5331 }, { "epoch": 0.43, "grad_norm": 1.0423822030650176, "learning_rate": 6.393583042813058e-06, "loss": 1.0886, "step": 5332 }, { "epoch": 0.43, "grad_norm": 1.5066837348423356, "learning_rate": 6.392335228204489e-06, "loss": 0.7758, "step": 5333 }, { "epoch": 0.43, "grad_norm": 1.500828606972683, "learning_rate": 6.391087319582264e-06, "loss": 0.742, "step": 5334 }, { "epoch": 0.43, "grad_norm": 0.9480205099603639, "learning_rate": 6.389839317030642e-06, "loss": 1.0778, "step": 5335 }, { "epoch": 0.43, "grad_norm": 0.8433307176323245, "learning_rate": 6.388591220633891e-06, "loss": 1.0834, "step": 5336 }, { "epoch": 0.43, "grad_norm": 1.5011799882531291, "learning_rate": 6.387343030476285e-06, "loss": 0.807, "step": 5337 }, { "epoch": 0.43, "grad_norm": 1.4303518314255452, "learning_rate": 6.386094746642105e-06, "loss": 0.7564, "step": 5338 }, { "epoch": 0.43, "grad_norm": 1.5741579910284678, "learning_rate": 6.3848463692156396e-06, "loss": 0.8296, "step": 5339 }, { "epoch": 0.43, "grad_norm": 1.4704493185583964, "learning_rate": 6.383597898281179e-06, "loss": 0.807, "step": 5340 }, { "epoch": 0.43, "grad_norm": 1.5783176542888324, "learning_rate": 6.382349333923026e-06, "loss": 0.8305, "step": 5341 }, { "epoch": 0.43, "grad_norm": 1.5145323393812957, "learning_rate": 6.3811006762254845e-06, "loss": 0.8426, "step": 5342 }, { "epoch": 0.43, "grad_norm": 1.5384758390645246, "learning_rate": 6.379851925272867e-06, "loss": 0.7776, "step": 5343 }, { "epoch": 0.43, "grad_norm": 1.6005859975629086, "learning_rate": 6.3786030811494935e-06, "loss": 0.7349, "step": 5344 }, { "epoch": 0.43, "grad_norm": 1.5708403110485372, "learning_rate": 6.377354143939686e-06, "loss": 0.8812, "step": 5345 }, { "epoch": 0.43, "grad_norm": 1.4941344981749325, "learning_rate": 6.376105113727778e-06, "loss": 0.7335, "step": 5346 }, { "epoch": 0.43, "grad_norm": 1.4040914038721555, "learning_rate": 6.374855990598106e-06, "loss": 0.8081, "step": 5347 }, { "epoch": 0.43, "grad_norm": 1.3906902034589184, "learning_rate": 6.3736067746350135e-06, "loss": 0.7892, "step": 5348 }, { "epoch": 0.43, "grad_norm": 1.5533349467866688, "learning_rate": 6.372357465922851e-06, "loss": 0.656, "step": 5349 }, { "epoch": 0.43, "grad_norm": 1.5143468171940913, "learning_rate": 6.371108064545974e-06, "loss": 0.7623, "step": 5350 }, { "epoch": 0.43, "grad_norm": 1.5109184872793924, "learning_rate": 6.369858570588745e-06, "loss": 0.8606, "step": 5351 }, { "epoch": 0.43, "grad_norm": 1.4834064476914546, "learning_rate": 6.368608984135534e-06, "loss": 0.7124, "step": 5352 }, { "epoch": 0.43, "grad_norm": 1.360531245082571, "learning_rate": 6.367359305270714e-06, "loss": 1.0811, "step": 5353 }, { "epoch": 0.43, "grad_norm": 1.469737752250483, "learning_rate": 6.366109534078667e-06, "loss": 0.7693, "step": 5354 }, { "epoch": 0.43, "grad_norm": 1.492891086326922, "learning_rate": 6.36485967064378e-06, "loss": 0.739, "step": 5355 }, { "epoch": 0.43, "grad_norm": 1.5032027491740074, "learning_rate": 6.363609715050449e-06, "loss": 0.7538, "step": 5356 }, { "epoch": 0.43, "grad_norm": 1.5648596322115182, "learning_rate": 6.36235966738307e-06, "loss": 0.7838, "step": 5357 }, { "epoch": 0.43, "grad_norm": 1.4848532760086242, "learning_rate": 6.361109527726052e-06, "loss": 0.7417, "step": 5358 }, { "epoch": 0.43, "grad_norm": 1.5640871417333742, "learning_rate": 6.3598592961638065e-06, "loss": 0.8714, "step": 5359 }, { "epoch": 0.43, "grad_norm": 1.4973083032988224, "learning_rate": 6.358608972780752e-06, "loss": 0.8038, "step": 5360 }, { "epoch": 0.43, "grad_norm": 0.8094706590089262, "learning_rate": 6.3573585576613115e-06, "loss": 1.1199, "step": 5361 }, { "epoch": 0.43, "grad_norm": 1.490765376706642, "learning_rate": 6.356108050889918e-06, "loss": 0.7659, "step": 5362 }, { "epoch": 0.43, "grad_norm": 1.5471169950442245, "learning_rate": 6.354857452551009e-06, "loss": 0.7897, "step": 5363 }, { "epoch": 0.43, "grad_norm": 1.428211377421227, "learning_rate": 6.353606762729025e-06, "loss": 0.7642, "step": 5364 }, { "epoch": 0.43, "grad_norm": 0.8250701724102718, "learning_rate": 6.35235598150842e-06, "loss": 1.1082, "step": 5365 }, { "epoch": 0.43, "grad_norm": 0.8869276971802629, "learning_rate": 6.351105108973644e-06, "loss": 1.1015, "step": 5366 }, { "epoch": 0.43, "grad_norm": 1.5247901050010868, "learning_rate": 6.349854145209162e-06, "loss": 0.8135, "step": 5367 }, { "epoch": 0.43, "grad_norm": 1.4789837187161965, "learning_rate": 6.348603090299442e-06, "loss": 0.8451, "step": 5368 }, { "epoch": 0.43, "grad_norm": 0.8155726006074527, "learning_rate": 6.347351944328958e-06, "loss": 1.1285, "step": 5369 }, { "epoch": 0.43, "grad_norm": 1.7652460094477846, "learning_rate": 6.346100707382189e-06, "loss": 0.7804, "step": 5370 }, { "epoch": 0.43, "grad_norm": 1.422822455002983, "learning_rate": 6.344849379543623e-06, "loss": 0.7176, "step": 5371 }, { "epoch": 0.43, "grad_norm": 1.4312015242790952, "learning_rate": 6.3435979608977515e-06, "loss": 0.8002, "step": 5372 }, { "epoch": 0.43, "grad_norm": 0.8722686912644217, "learning_rate": 6.342346451529073e-06, "loss": 1.1037, "step": 5373 }, { "epoch": 0.43, "grad_norm": 1.4678717194011006, "learning_rate": 6.341094851522093e-06, "loss": 0.7994, "step": 5374 }, { "epoch": 0.43, "grad_norm": 1.5052718083893966, "learning_rate": 6.339843160961321e-06, "loss": 0.8363, "step": 5375 }, { "epoch": 0.43, "grad_norm": 1.8808169447180074, "learning_rate": 6.338591379931277e-06, "loss": 0.7709, "step": 5376 }, { "epoch": 0.43, "grad_norm": 1.6200282130665045, "learning_rate": 6.337339508516481e-06, "loss": 0.864, "step": 5377 }, { "epoch": 0.43, "grad_norm": 1.418687446531194, "learning_rate": 6.336087546801464e-06, "loss": 0.7275, "step": 5378 }, { "epoch": 0.43, "grad_norm": 1.4966220671446786, "learning_rate": 6.334835494870759e-06, "loss": 0.7685, "step": 5379 }, { "epoch": 0.43, "grad_norm": 1.5859332949647846, "learning_rate": 6.33358335280891e-06, "loss": 0.7941, "step": 5380 }, { "epoch": 0.43, "grad_norm": 1.700895736693925, "learning_rate": 6.332331120700465e-06, "loss": 0.7966, "step": 5381 }, { "epoch": 0.43, "grad_norm": 2.4063372006106443, "learning_rate": 6.331078798629975e-06, "loss": 0.8023, "step": 5382 }, { "epoch": 0.43, "grad_norm": 1.969135685469333, "learning_rate": 6.329826386682e-06, "loss": 0.8289, "step": 5383 }, { "epoch": 0.43, "grad_norm": 1.655925897083058, "learning_rate": 6.328573884941107e-06, "loss": 0.7308, "step": 5384 }, { "epoch": 0.43, "grad_norm": 1.5208108060122223, "learning_rate": 6.327321293491868e-06, "loss": 0.8282, "step": 5385 }, { "epoch": 0.43, "grad_norm": 1.6485806240675949, "learning_rate": 6.326068612418859e-06, "loss": 0.8586, "step": 5386 }, { "epoch": 0.43, "grad_norm": 1.5968333937978891, "learning_rate": 6.324815841806668e-06, "loss": 0.7899, "step": 5387 }, { "epoch": 0.43, "grad_norm": 1.5334832002052323, "learning_rate": 6.323562981739878e-06, "loss": 0.6899, "step": 5388 }, { "epoch": 0.43, "grad_norm": 0.8720925399208505, "learning_rate": 6.322310032303092e-06, "loss": 1.0891, "step": 5389 }, { "epoch": 0.43, "grad_norm": 1.6615435238378606, "learning_rate": 6.3210569935809076e-06, "loss": 0.8083, "step": 5390 }, { "epoch": 0.43, "grad_norm": 1.4334545926458857, "learning_rate": 6.319803865657933e-06, "loss": 0.8002, "step": 5391 }, { "epoch": 0.43, "grad_norm": 1.5659303893462666, "learning_rate": 6.318550648618785e-06, "loss": 0.7595, "step": 5392 }, { "epoch": 0.43, "grad_norm": 1.386954472438171, "learning_rate": 6.317297342548083e-06, "loss": 0.7803, "step": 5393 }, { "epoch": 0.43, "grad_norm": 0.8645651950690447, "learning_rate": 6.3160439475304515e-06, "loss": 1.0884, "step": 5394 }, { "epoch": 0.43, "grad_norm": 1.5010219794307362, "learning_rate": 6.314790463650522e-06, "loss": 0.7613, "step": 5395 }, { "epoch": 0.43, "grad_norm": 1.5371773700821099, "learning_rate": 6.313536890992935e-06, "loss": 0.8612, "step": 5396 }, { "epoch": 0.43, "grad_norm": 1.5718175196200712, "learning_rate": 6.312283229642333e-06, "loss": 0.7411, "step": 5397 }, { "epoch": 0.43, "grad_norm": 1.7950852671335584, "learning_rate": 6.3110294796833685e-06, "loss": 0.782, "step": 5398 }, { "epoch": 0.43, "grad_norm": 1.5131312464469449, "learning_rate": 6.309775641200695e-06, "loss": 0.8505, "step": 5399 }, { "epoch": 0.43, "grad_norm": 1.5088457349755084, "learning_rate": 6.308521714278973e-06, "loss": 0.8246, "step": 5400 }, { "epoch": 0.43, "grad_norm": 1.4992540107373507, "learning_rate": 6.307267699002874e-06, "loss": 0.7985, "step": 5401 }, { "epoch": 0.43, "grad_norm": 1.562086029763132, "learning_rate": 6.306013595457072e-06, "loss": 0.8156, "step": 5402 }, { "epoch": 0.43, "grad_norm": 1.6973329738520702, "learning_rate": 6.304759403726245e-06, "loss": 0.7775, "step": 5403 }, { "epoch": 0.43, "grad_norm": 0.8584774214732819, "learning_rate": 6.303505123895079e-06, "loss": 1.1366, "step": 5404 }, { "epoch": 0.43, "grad_norm": 1.5135967721215173, "learning_rate": 6.302250756048267e-06, "loss": 0.7829, "step": 5405 }, { "epoch": 0.43, "grad_norm": 1.5016862737743029, "learning_rate": 6.300996300270505e-06, "loss": 0.7169, "step": 5406 }, { "epoch": 0.43, "grad_norm": 1.4742189834747996, "learning_rate": 6.299741756646499e-06, "loss": 0.7993, "step": 5407 }, { "epoch": 0.43, "grad_norm": 1.5389975462690597, "learning_rate": 6.298487125260957e-06, "loss": 0.7871, "step": 5408 }, { "epoch": 0.43, "grad_norm": 1.4227004133206302, "learning_rate": 6.297232406198597e-06, "loss": 0.7838, "step": 5409 }, { "epoch": 0.43, "grad_norm": 0.7893124859044485, "learning_rate": 6.295977599544137e-06, "loss": 1.0856, "step": 5410 }, { "epoch": 0.43, "grad_norm": 1.4701241116350714, "learning_rate": 6.2947227053823055e-06, "loss": 0.8232, "step": 5411 }, { "epoch": 0.43, "grad_norm": 1.5737045005292047, "learning_rate": 6.293467723797837e-06, "loss": 0.8197, "step": 5412 }, { "epoch": 0.43, "grad_norm": 1.542679663774191, "learning_rate": 6.2922126548754696e-06, "loss": 0.7851, "step": 5413 }, { "epoch": 0.43, "grad_norm": 0.7989419771611457, "learning_rate": 6.290957498699949e-06, "loss": 1.1112, "step": 5414 }, { "epoch": 0.43, "grad_norm": 1.4752573130822828, "learning_rate": 6.289702255356027e-06, "loss": 0.8467, "step": 5415 }, { "epoch": 0.43, "grad_norm": 1.5150567158880677, "learning_rate": 6.288446924928459e-06, "loss": 0.7914, "step": 5416 }, { "epoch": 0.43, "grad_norm": 0.7912596392762239, "learning_rate": 6.287191507502009e-06, "loss": 1.0817, "step": 5417 }, { "epoch": 0.43, "grad_norm": 1.6293098617223778, "learning_rate": 6.285936003161445e-06, "loss": 0.8351, "step": 5418 }, { "epoch": 0.43, "grad_norm": 0.7887330318299309, "learning_rate": 6.2846804119915405e-06, "loss": 1.0767, "step": 5419 }, { "epoch": 0.43, "grad_norm": 1.5155755324763134, "learning_rate": 6.283424734077078e-06, "loss": 0.8251, "step": 5420 }, { "epoch": 0.43, "grad_norm": 0.7867052919372468, "learning_rate": 6.282168969502843e-06, "loss": 1.1088, "step": 5421 }, { "epoch": 0.44, "grad_norm": 1.5179695081207245, "learning_rate": 6.280913118353627e-06, "loss": 0.7166, "step": 5422 }, { "epoch": 0.44, "grad_norm": 1.4487264724458715, "learning_rate": 6.279657180714227e-06, "loss": 0.7547, "step": 5423 }, { "epoch": 0.44, "grad_norm": 1.5195469849755592, "learning_rate": 6.278401156669449e-06, "loss": 0.7795, "step": 5424 }, { "epoch": 0.44, "grad_norm": 2.272245231206262, "learning_rate": 6.2771450463041015e-06, "loss": 0.8092, "step": 5425 }, { "epoch": 0.44, "grad_norm": 1.4005666108108732, "learning_rate": 6.275888849703001e-06, "loss": 0.7948, "step": 5426 }, { "epoch": 0.44, "grad_norm": 1.4864242039133861, "learning_rate": 6.274632566950967e-06, "loss": 0.8035, "step": 5427 }, { "epoch": 0.44, "grad_norm": 1.4236139898651898, "learning_rate": 6.273376198132828e-06, "loss": 0.7952, "step": 5428 }, { "epoch": 0.44, "grad_norm": 1.717549775192585, "learning_rate": 6.272119743333416e-06, "loss": 0.8176, "step": 5429 }, { "epoch": 0.44, "grad_norm": 1.565628430878305, "learning_rate": 6.270863202637569e-06, "loss": 0.7198, "step": 5430 }, { "epoch": 0.44, "grad_norm": 1.5062731928807451, "learning_rate": 6.269606576130135e-06, "loss": 0.8672, "step": 5431 }, { "epoch": 0.44, "grad_norm": 1.5522639693257139, "learning_rate": 6.268349863895959e-06, "loss": 0.7177, "step": 5432 }, { "epoch": 0.44, "grad_norm": 1.5697288331426367, "learning_rate": 6.2670930660199025e-06, "loss": 0.7857, "step": 5433 }, { "epoch": 0.44, "grad_norm": 1.4780266572910774, "learning_rate": 6.265836182586823e-06, "loss": 0.7931, "step": 5434 }, { "epoch": 0.44, "grad_norm": 1.5441673387737247, "learning_rate": 6.264579213681592e-06, "loss": 0.7781, "step": 5435 }, { "epoch": 0.44, "grad_norm": 1.4730138771942984, "learning_rate": 6.263322159389078e-06, "loss": 0.7246, "step": 5436 }, { "epoch": 0.44, "grad_norm": 1.658339698149235, "learning_rate": 6.262065019794165e-06, "loss": 0.8579, "step": 5437 }, { "epoch": 0.44, "grad_norm": 1.4682722853919492, "learning_rate": 6.260807794981736e-06, "loss": 0.8046, "step": 5438 }, { "epoch": 0.44, "grad_norm": 0.9080079513423435, "learning_rate": 6.259550485036681e-06, "loss": 1.0977, "step": 5439 }, { "epoch": 0.44, "grad_norm": 1.6034843319809537, "learning_rate": 6.2582930900438975e-06, "loss": 0.7421, "step": 5440 }, { "epoch": 0.44, "grad_norm": 0.8207860063121404, "learning_rate": 6.257035610088287e-06, "loss": 1.0766, "step": 5441 }, { "epoch": 0.44, "grad_norm": 1.5562070891453748, "learning_rate": 6.255778045254758e-06, "loss": 0.7825, "step": 5442 }, { "epoch": 0.44, "grad_norm": 0.8198184254046909, "learning_rate": 6.254520395628224e-06, "loss": 1.0962, "step": 5443 }, { "epoch": 0.44, "grad_norm": 0.8246407650671811, "learning_rate": 6.2532626612936035e-06, "loss": 1.0878, "step": 5444 }, { "epoch": 0.44, "grad_norm": 1.5159315743523272, "learning_rate": 6.252004842335824e-06, "loss": 0.8269, "step": 5445 }, { "epoch": 0.44, "grad_norm": 0.8121480318247445, "learning_rate": 6.250746938839811e-06, "loss": 1.0714, "step": 5446 }, { "epoch": 0.44, "grad_norm": 1.5219204678732232, "learning_rate": 6.249488950890509e-06, "loss": 0.7544, "step": 5447 }, { "epoch": 0.44, "grad_norm": 1.605321370470892, "learning_rate": 6.248230878572854e-06, "loss": 0.7475, "step": 5448 }, { "epoch": 0.44, "grad_norm": 1.4780324446307072, "learning_rate": 6.246972721971796e-06, "loss": 0.8557, "step": 5449 }, { "epoch": 0.44, "grad_norm": 1.5378015129041276, "learning_rate": 6.245714481172288e-06, "loss": 0.6901, "step": 5450 }, { "epoch": 0.44, "grad_norm": 1.4226277785703438, "learning_rate": 6.24445615625929e-06, "loss": 0.6884, "step": 5451 }, { "epoch": 0.44, "grad_norm": 1.4905823589889238, "learning_rate": 6.243197747317766e-06, "loss": 0.7551, "step": 5452 }, { "epoch": 0.44, "grad_norm": 1.3833002841437545, "learning_rate": 6.24193925443269e-06, "loss": 0.8042, "step": 5453 }, { "epoch": 0.44, "grad_norm": 1.5221016434073646, "learning_rate": 6.240680677689033e-06, "loss": 0.7531, "step": 5454 }, { "epoch": 0.44, "grad_norm": 0.9568446557218008, "learning_rate": 6.2394220171717805e-06, "loss": 1.07, "step": 5455 }, { "epoch": 0.44, "grad_norm": 1.5169510171456861, "learning_rate": 6.238163272965918e-06, "loss": 0.804, "step": 5456 }, { "epoch": 0.44, "grad_norm": 1.6034476659208345, "learning_rate": 6.236904445156442e-06, "loss": 0.867, "step": 5457 }, { "epoch": 0.44, "grad_norm": 1.540845701915718, "learning_rate": 6.235645533828348e-06, "loss": 0.8733, "step": 5458 }, { "epoch": 0.44, "grad_norm": 1.5236112078899997, "learning_rate": 6.234386539066643e-06, "loss": 0.7613, "step": 5459 }, { "epoch": 0.44, "grad_norm": 1.7793286555034444, "learning_rate": 6.233127460956337e-06, "loss": 0.7042, "step": 5460 }, { "epoch": 0.44, "grad_norm": 1.5919274978252669, "learning_rate": 6.231868299582444e-06, "loss": 0.6878, "step": 5461 }, { "epoch": 0.44, "grad_norm": 0.8631552062489987, "learning_rate": 6.230609055029987e-06, "loss": 1.1271, "step": 5462 }, { "epoch": 0.44, "grad_norm": 1.5268155917378479, "learning_rate": 6.229349727383992e-06, "loss": 0.7975, "step": 5463 }, { "epoch": 0.44, "grad_norm": 1.618781817005855, "learning_rate": 6.228090316729493e-06, "loss": 0.7134, "step": 5464 }, { "epoch": 0.44, "grad_norm": 1.4722772107996318, "learning_rate": 6.226830823151528e-06, "loss": 0.7715, "step": 5465 }, { "epoch": 0.44, "grad_norm": 1.4761464362424526, "learning_rate": 6.22557124673514e-06, "loss": 0.8188, "step": 5466 }, { "epoch": 0.44, "grad_norm": 1.6714219066089482, "learning_rate": 6.224311587565379e-06, "loss": 0.7785, "step": 5467 }, { "epoch": 0.44, "grad_norm": 1.5071692497754334, "learning_rate": 6.223051845727299e-06, "loss": 0.8151, "step": 5468 }, { "epoch": 0.44, "grad_norm": 1.5764130057880015, "learning_rate": 6.221792021305964e-06, "loss": 0.7856, "step": 5469 }, { "epoch": 0.44, "grad_norm": 0.8807057436212978, "learning_rate": 6.220532114386437e-06, "loss": 1.0866, "step": 5470 }, { "epoch": 0.44, "grad_norm": 1.5240455551862604, "learning_rate": 6.21927212505379e-06, "loss": 0.7511, "step": 5471 }, { "epoch": 0.44, "grad_norm": 0.8297996185513958, "learning_rate": 6.218012053393101e-06, "loss": 1.0808, "step": 5472 }, { "epoch": 0.44, "grad_norm": 1.4171828096522558, "learning_rate": 6.216751899489454e-06, "loss": 0.726, "step": 5473 }, { "epoch": 0.44, "grad_norm": 1.5126088564320275, "learning_rate": 6.215491663427936e-06, "loss": 0.7379, "step": 5474 }, { "epoch": 0.44, "grad_norm": 1.4770132312672042, "learning_rate": 6.214231345293641e-06, "loss": 0.8145, "step": 5475 }, { "epoch": 0.44, "grad_norm": 1.4604208677512673, "learning_rate": 6.21297094517167e-06, "loss": 0.8136, "step": 5476 }, { "epoch": 0.44, "grad_norm": 1.5079490522727337, "learning_rate": 6.211710463147127e-06, "loss": 0.7736, "step": 5477 }, { "epoch": 0.44, "grad_norm": 1.5011891920907696, "learning_rate": 6.210449899305121e-06, "loss": 0.7279, "step": 5478 }, { "epoch": 0.44, "grad_norm": 1.6481383781541319, "learning_rate": 6.209189253730772e-06, "loss": 0.7833, "step": 5479 }, { "epoch": 0.44, "grad_norm": 1.6731111948885569, "learning_rate": 6.207928526509198e-06, "loss": 0.7765, "step": 5480 }, { "epoch": 0.44, "grad_norm": 1.7038795674985152, "learning_rate": 6.206667717725529e-06, "loss": 0.7674, "step": 5481 }, { "epoch": 0.44, "grad_norm": 1.4988646318436027, "learning_rate": 6.205406827464897e-06, "loss": 0.8714, "step": 5482 }, { "epoch": 0.44, "grad_norm": 1.5094695545345682, "learning_rate": 6.204145855812439e-06, "loss": 0.7562, "step": 5483 }, { "epoch": 0.44, "grad_norm": 1.382218882544167, "learning_rate": 6.202884802853299e-06, "loss": 0.7441, "step": 5484 }, { "epoch": 0.44, "grad_norm": 1.4728695916855619, "learning_rate": 6.201623668672627e-06, "loss": 0.7311, "step": 5485 }, { "epoch": 0.44, "grad_norm": 1.3737410527556522, "learning_rate": 6.200362453355578e-06, "loss": 0.7437, "step": 5486 }, { "epoch": 0.44, "grad_norm": 1.4643724326947702, "learning_rate": 6.19910115698731e-06, "loss": 0.7836, "step": 5487 }, { "epoch": 0.44, "grad_norm": 1.477599177649633, "learning_rate": 6.197839779652991e-06, "loss": 0.807, "step": 5488 }, { "epoch": 0.44, "grad_norm": 1.775395684686117, "learning_rate": 6.1965783214377895e-06, "loss": 0.7791, "step": 5489 }, { "epoch": 0.44, "grad_norm": 1.492123924927538, "learning_rate": 6.195316782426884e-06, "loss": 0.7618, "step": 5490 }, { "epoch": 0.44, "grad_norm": 1.567247686616791, "learning_rate": 6.194055162705457e-06, "loss": 0.7902, "step": 5491 }, { "epoch": 0.44, "grad_norm": 1.5444411107796612, "learning_rate": 6.192793462358695e-06, "loss": 0.7651, "step": 5492 }, { "epoch": 0.44, "grad_norm": 1.4674369671918859, "learning_rate": 6.191531681471792e-06, "loss": 0.7291, "step": 5493 }, { "epoch": 0.44, "grad_norm": 1.5718302599513225, "learning_rate": 6.1902698201299425e-06, "loss": 0.7819, "step": 5494 }, { "epoch": 0.44, "grad_norm": 0.9416648752255887, "learning_rate": 6.189007878418354e-06, "loss": 1.1107, "step": 5495 }, { "epoch": 0.44, "grad_norm": 1.6305591640464028, "learning_rate": 6.187745856422236e-06, "loss": 0.7657, "step": 5496 }, { "epoch": 0.44, "grad_norm": 1.4954947775141298, "learning_rate": 6.1864837542268e-06, "loss": 0.8048, "step": 5497 }, { "epoch": 0.44, "grad_norm": 1.530189247898666, "learning_rate": 6.185221571917271e-06, "loss": 0.8644, "step": 5498 }, { "epoch": 0.44, "grad_norm": 1.5658013030075886, "learning_rate": 6.1839593095788675e-06, "loss": 0.7989, "step": 5499 }, { "epoch": 0.44, "grad_norm": 1.5576844021068639, "learning_rate": 6.182696967296825e-06, "loss": 0.7853, "step": 5500 }, { "epoch": 0.44, "grad_norm": 1.5178709036485662, "learning_rate": 6.181434545156379e-06, "loss": 0.7978, "step": 5501 }, { "epoch": 0.44, "grad_norm": 1.6681298000627816, "learning_rate": 6.180172043242772e-06, "loss": 0.8353, "step": 5502 }, { "epoch": 0.44, "grad_norm": 1.621568790014267, "learning_rate": 6.17890946164125e-06, "loss": 0.7352, "step": 5503 }, { "epoch": 0.44, "grad_norm": 1.6062805616744213, "learning_rate": 6.177646800437066e-06, "loss": 0.7327, "step": 5504 }, { "epoch": 0.44, "grad_norm": 1.7291237998637405, "learning_rate": 6.176384059715477e-06, "loss": 0.7475, "step": 5505 }, { "epoch": 0.44, "grad_norm": 1.562937862613243, "learning_rate": 6.175121239561745e-06, "loss": 0.7731, "step": 5506 }, { "epoch": 0.44, "grad_norm": 0.8820085519300088, "learning_rate": 6.173858340061138e-06, "loss": 1.1217, "step": 5507 }, { "epoch": 0.44, "grad_norm": 1.5033407823816525, "learning_rate": 6.172595361298935e-06, "loss": 0.7506, "step": 5508 }, { "epoch": 0.44, "grad_norm": 0.800638521062958, "learning_rate": 6.171332303360411e-06, "loss": 1.1081, "step": 5509 }, { "epoch": 0.44, "grad_norm": 1.5014196397592523, "learning_rate": 6.170069166330852e-06, "loss": 0.7662, "step": 5510 }, { "epoch": 0.44, "grad_norm": 1.4862517772029635, "learning_rate": 6.168805950295547e-06, "loss": 0.6755, "step": 5511 }, { "epoch": 0.44, "grad_norm": 1.4870704372687238, "learning_rate": 6.167542655339791e-06, "loss": 0.7667, "step": 5512 }, { "epoch": 0.44, "grad_norm": 1.6418751680938126, "learning_rate": 6.166279281548886e-06, "loss": 0.8638, "step": 5513 }, { "epoch": 0.44, "grad_norm": 1.6038263458979407, "learning_rate": 6.165015829008137e-06, "loss": 0.8035, "step": 5514 }, { "epoch": 0.44, "grad_norm": 1.4569311182060263, "learning_rate": 6.1637522978028545e-06, "loss": 0.7309, "step": 5515 }, { "epoch": 0.44, "grad_norm": 1.422291285646642, "learning_rate": 6.1624886880183555e-06, "loss": 0.769, "step": 5516 }, { "epoch": 0.44, "grad_norm": 0.9660891929188207, "learning_rate": 6.161224999739963e-06, "loss": 1.0942, "step": 5517 }, { "epoch": 0.44, "grad_norm": 1.5949811276579677, "learning_rate": 6.159961233053002e-06, "loss": 0.73, "step": 5518 }, { "epoch": 0.44, "grad_norm": 1.6248135231866787, "learning_rate": 6.158697388042807e-06, "loss": 0.7777, "step": 5519 }, { "epoch": 0.44, "grad_norm": 0.8076602535159536, "learning_rate": 6.157433464794717e-06, "loss": 1.1087, "step": 5520 }, { "epoch": 0.44, "grad_norm": 1.4792580752168412, "learning_rate": 6.15616946339407e-06, "loss": 0.7546, "step": 5521 }, { "epoch": 0.44, "grad_norm": 1.4323087527715166, "learning_rate": 6.154905383926218e-06, "loss": 0.751, "step": 5522 }, { "epoch": 0.44, "grad_norm": 0.8492462697722462, "learning_rate": 6.153641226476512e-06, "loss": 1.1035, "step": 5523 }, { "epoch": 0.44, "grad_norm": 1.5082179536207279, "learning_rate": 6.152376991130313e-06, "loss": 0.7956, "step": 5524 }, { "epoch": 0.44, "grad_norm": 1.6128802965916764, "learning_rate": 6.151112677972987e-06, "loss": 0.7954, "step": 5525 }, { "epoch": 0.44, "grad_norm": 1.4211362726795298, "learning_rate": 6.149848287089899e-06, "loss": 0.7191, "step": 5526 }, { "epoch": 0.44, "grad_norm": 1.569961725303403, "learning_rate": 6.148583818566426e-06, "loss": 0.7789, "step": 5527 }, { "epoch": 0.44, "grad_norm": 1.5241827231041294, "learning_rate": 6.147319272487946e-06, "loss": 0.7761, "step": 5528 }, { "epoch": 0.44, "grad_norm": 1.4395342122586952, "learning_rate": 6.146054648939845e-06, "loss": 0.7692, "step": 5529 }, { "epoch": 0.44, "grad_norm": 1.5127711941161888, "learning_rate": 6.144789948007516e-06, "loss": 0.7397, "step": 5530 }, { "epoch": 0.44, "grad_norm": 1.5006750947809413, "learning_rate": 6.14352516977635e-06, "loss": 0.7947, "step": 5531 }, { "epoch": 0.44, "grad_norm": 1.6278917518202463, "learning_rate": 6.142260314331751e-06, "loss": 0.6689, "step": 5532 }, { "epoch": 0.44, "grad_norm": 1.4843556926873132, "learning_rate": 6.140995381759121e-06, "loss": 0.8571, "step": 5533 }, { "epoch": 0.44, "grad_norm": 1.5062605893425838, "learning_rate": 6.139730372143877e-06, "loss": 0.7303, "step": 5534 }, { "epoch": 0.44, "grad_norm": 1.6026004885527438, "learning_rate": 6.1384652855714295e-06, "loss": 0.8356, "step": 5535 }, { "epoch": 0.44, "grad_norm": 0.8520758762043584, "learning_rate": 6.1372001221272045e-06, "loss": 1.0947, "step": 5536 }, { "epoch": 0.44, "grad_norm": 1.430515282655726, "learning_rate": 6.1359348818966265e-06, "loss": 0.7422, "step": 5537 }, { "epoch": 0.44, "grad_norm": 1.5081100186969714, "learning_rate": 6.134669564965128e-06, "loss": 0.7766, "step": 5538 }, { "epoch": 0.44, "grad_norm": 1.5508750650234953, "learning_rate": 6.133404171418145e-06, "loss": 0.7957, "step": 5539 }, { "epoch": 0.44, "grad_norm": 1.8601603479761468, "learning_rate": 6.13213870134112e-06, "loss": 0.7683, "step": 5540 }, { "epoch": 0.44, "grad_norm": 1.5426365835035472, "learning_rate": 6.1308731548195025e-06, "loss": 0.8316, "step": 5541 }, { "epoch": 0.44, "grad_norm": 1.5965920622988348, "learning_rate": 6.129607531938744e-06, "loss": 0.8433, "step": 5542 }, { "epoch": 0.44, "grad_norm": 1.483010589431316, "learning_rate": 6.128341832784301e-06, "loss": 0.7276, "step": 5543 }, { "epoch": 0.44, "grad_norm": 1.472950805673831, "learning_rate": 6.127076057441638e-06, "loss": 0.8164, "step": 5544 }, { "epoch": 0.44, "grad_norm": 1.6298862761138326, "learning_rate": 6.125810205996221e-06, "loss": 0.7912, "step": 5545 }, { "epoch": 0.44, "grad_norm": 1.5003767966940265, "learning_rate": 6.124544278533526e-06, "loss": 0.8195, "step": 5546 }, { "epoch": 0.45, "grad_norm": 1.5028119841929566, "learning_rate": 6.12327827513903e-06, "loss": 0.7714, "step": 5547 }, { "epoch": 0.45, "grad_norm": 1.4321050544791523, "learning_rate": 6.122012195898216e-06, "loss": 0.7605, "step": 5548 }, { "epoch": 0.45, "grad_norm": 0.8511037934342788, "learning_rate": 6.120746040896572e-06, "loss": 1.1185, "step": 5549 }, { "epoch": 0.45, "grad_norm": 1.536054342996016, "learning_rate": 6.119479810219593e-06, "loss": 0.8502, "step": 5550 }, { "epoch": 0.45, "grad_norm": 1.5510074210539517, "learning_rate": 6.118213503952779e-06, "loss": 0.7806, "step": 5551 }, { "epoch": 0.45, "grad_norm": 1.5212532338471112, "learning_rate": 6.11694712218163e-06, "loss": 0.7551, "step": 5552 }, { "epoch": 0.45, "grad_norm": 1.573339575859948, "learning_rate": 6.115680664991658e-06, "loss": 0.8611, "step": 5553 }, { "epoch": 0.45, "grad_norm": 0.8207003076126247, "learning_rate": 6.114414132468377e-06, "loss": 1.1008, "step": 5554 }, { "epoch": 0.45, "grad_norm": 1.5101746958483362, "learning_rate": 6.113147524697305e-06, "loss": 0.7543, "step": 5555 }, { "epoch": 0.45, "grad_norm": 1.4474904978496517, "learning_rate": 6.111880841763966e-06, "loss": 0.7546, "step": 5556 }, { "epoch": 0.45, "grad_norm": 0.8103151684072666, "learning_rate": 6.110614083753891e-06, "loss": 1.1013, "step": 5557 }, { "epoch": 0.45, "grad_norm": 1.6485552567099258, "learning_rate": 6.109347250752614e-06, "loss": 0.8573, "step": 5558 }, { "epoch": 0.45, "grad_norm": 1.4065521205048261, "learning_rate": 6.1080803428456735e-06, "loss": 0.7182, "step": 5559 }, { "epoch": 0.45, "grad_norm": 1.5373697658467078, "learning_rate": 6.106813360118614e-06, "loss": 0.7306, "step": 5560 }, { "epoch": 0.45, "grad_norm": 1.3751684246128415, "learning_rate": 6.105546302656986e-06, "loss": 0.7514, "step": 5561 }, { "epoch": 0.45, "grad_norm": 1.5026263110434799, "learning_rate": 6.104279170546344e-06, "loss": 0.7797, "step": 5562 }, { "epoch": 0.45, "grad_norm": 0.8602072999023384, "learning_rate": 6.103011963872246e-06, "loss": 1.1022, "step": 5563 }, { "epoch": 0.45, "grad_norm": 1.5592814840559779, "learning_rate": 6.10174468272026e-06, "loss": 0.7896, "step": 5564 }, { "epoch": 0.45, "grad_norm": 1.6421771581641462, "learning_rate": 6.100477327175951e-06, "loss": 0.7751, "step": 5565 }, { "epoch": 0.45, "grad_norm": 1.3676666686111933, "learning_rate": 6.0992098973249e-06, "loss": 0.6513, "step": 5566 }, { "epoch": 0.45, "grad_norm": 1.4876123858431767, "learning_rate": 6.09794239325268e-06, "loss": 0.687, "step": 5567 }, { "epoch": 0.45, "grad_norm": 1.3912679043820255, "learning_rate": 6.09667481504488e-06, "loss": 0.6871, "step": 5568 }, { "epoch": 0.45, "grad_norm": 1.6443611072099442, "learning_rate": 6.095407162787088e-06, "loss": 0.7504, "step": 5569 }, { "epoch": 0.45, "grad_norm": 1.5331678881960737, "learning_rate": 6.094139436564902e-06, "loss": 0.7454, "step": 5570 }, { "epoch": 0.45, "grad_norm": 1.5778201683310515, "learning_rate": 6.092871636463919e-06, "loss": 0.8287, "step": 5571 }, { "epoch": 0.45, "grad_norm": 1.4162117527184854, "learning_rate": 6.0916037625697425e-06, "loss": 0.7904, "step": 5572 }, { "epoch": 0.45, "grad_norm": 1.5103676347298858, "learning_rate": 6.090335814967984e-06, "loss": 0.8383, "step": 5573 }, { "epoch": 0.45, "grad_norm": 1.4415038126314506, "learning_rate": 6.089067793744258e-06, "loss": 0.7411, "step": 5574 }, { "epoch": 0.45, "grad_norm": 1.5519048847931312, "learning_rate": 6.0877996989841845e-06, "loss": 0.7356, "step": 5575 }, { "epoch": 0.45, "grad_norm": 1.6495779749869393, "learning_rate": 6.086531530773389e-06, "loss": 0.7407, "step": 5576 }, { "epoch": 0.45, "grad_norm": 1.5202203242824701, "learning_rate": 6.0852632891974986e-06, "loss": 0.8504, "step": 5577 }, { "epoch": 0.45, "grad_norm": 1.4787871729956095, "learning_rate": 6.083994974342151e-06, "loss": 0.7985, "step": 5578 }, { "epoch": 0.45, "grad_norm": 1.5504304657665704, "learning_rate": 6.082726586292982e-06, "loss": 0.7828, "step": 5579 }, { "epoch": 0.45, "grad_norm": 0.8978247823049745, "learning_rate": 6.081458125135639e-06, "loss": 1.1133, "step": 5580 }, { "epoch": 0.45, "grad_norm": 1.4942877754364783, "learning_rate": 6.080189590955772e-06, "loss": 0.7998, "step": 5581 }, { "epoch": 0.45, "grad_norm": 1.536676737204576, "learning_rate": 6.078920983839032e-06, "loss": 0.7578, "step": 5582 }, { "epoch": 0.45, "grad_norm": 1.4855675171969795, "learning_rate": 6.07765230387108e-06, "loss": 0.7457, "step": 5583 }, { "epoch": 0.45, "grad_norm": 1.6567148601544681, "learning_rate": 6.07638355113758e-06, "loss": 0.7723, "step": 5584 }, { "epoch": 0.45, "grad_norm": 1.5382448980473618, "learning_rate": 6.075114725724203e-06, "loss": 0.8082, "step": 5585 }, { "epoch": 0.45, "grad_norm": 1.490408788516059, "learning_rate": 6.07384582771662e-06, "loss": 0.851, "step": 5586 }, { "epoch": 0.45, "grad_norm": 1.5117484718138865, "learning_rate": 6.072576857200512e-06, "loss": 0.8081, "step": 5587 }, { "epoch": 0.45, "grad_norm": 1.472805435279507, "learning_rate": 6.071307814261561e-06, "loss": 0.7661, "step": 5588 }, { "epoch": 0.45, "grad_norm": 1.5540061508366736, "learning_rate": 6.070038698985457e-06, "loss": 0.8585, "step": 5589 }, { "epoch": 0.45, "grad_norm": 1.3545397713683067, "learning_rate": 6.068769511457894e-06, "loss": 0.6834, "step": 5590 }, { "epoch": 0.45, "grad_norm": 1.5024707412832095, "learning_rate": 6.0675002517645685e-06, "loss": 0.7546, "step": 5591 }, { "epoch": 0.45, "grad_norm": 0.818118551471803, "learning_rate": 6.0662309199911874e-06, "loss": 1.1065, "step": 5592 }, { "epoch": 0.45, "grad_norm": 0.868781219719909, "learning_rate": 6.064961516223453e-06, "loss": 1.1333, "step": 5593 }, { "epoch": 0.45, "grad_norm": 1.6369505096857158, "learning_rate": 6.063692040547083e-06, "loss": 0.7694, "step": 5594 }, { "epoch": 0.45, "grad_norm": 1.403317968425861, "learning_rate": 6.062422493047796e-06, "loss": 0.7115, "step": 5595 }, { "epoch": 0.45, "grad_norm": 1.475295784011105, "learning_rate": 6.061152873811311e-06, "loss": 0.73, "step": 5596 }, { "epoch": 0.45, "grad_norm": 1.4427827065806427, "learning_rate": 6.059883182923359e-06, "loss": 0.7404, "step": 5597 }, { "epoch": 0.45, "grad_norm": 1.69121367146256, "learning_rate": 6.05861342046967e-06, "loss": 0.7316, "step": 5598 }, { "epoch": 0.45, "grad_norm": 1.5629393375192662, "learning_rate": 6.057343586535982e-06, "loss": 0.8191, "step": 5599 }, { "epoch": 0.45, "grad_norm": 1.426722983580572, "learning_rate": 6.056073681208038e-06, "loss": 0.7803, "step": 5600 }, { "epoch": 0.45, "grad_norm": 1.470334387422037, "learning_rate": 6.054803704571582e-06, "loss": 0.8007, "step": 5601 }, { "epoch": 0.45, "grad_norm": 1.4462786215810641, "learning_rate": 6.0535336567123715e-06, "loss": 0.8064, "step": 5602 }, { "epoch": 0.45, "grad_norm": 0.9555436772052028, "learning_rate": 6.052263537716158e-06, "loss": 1.091, "step": 5603 }, { "epoch": 0.45, "grad_norm": 1.5104333040917042, "learning_rate": 6.0509933476687036e-06, "loss": 0.7518, "step": 5604 }, { "epoch": 0.45, "grad_norm": 1.5978506557201515, "learning_rate": 6.0497230866557746e-06, "loss": 0.8114, "step": 5605 }, { "epoch": 0.45, "grad_norm": 1.469934088289661, "learning_rate": 6.048452754763143e-06, "loss": 0.7422, "step": 5606 }, { "epoch": 0.45, "grad_norm": 1.459429660226803, "learning_rate": 6.047182352076585e-06, "loss": 0.773, "step": 5607 }, { "epoch": 0.45, "grad_norm": 1.5518583239230312, "learning_rate": 6.045911878681879e-06, "loss": 0.8757, "step": 5608 }, { "epoch": 0.45, "grad_norm": 1.5486071124802223, "learning_rate": 6.044641334664812e-06, "loss": 0.8106, "step": 5609 }, { "epoch": 0.45, "grad_norm": 1.5918920727015184, "learning_rate": 6.043370720111172e-06, "loss": 0.8088, "step": 5610 }, { "epoch": 0.45, "grad_norm": 0.8318583848826501, "learning_rate": 6.042100035106756e-06, "loss": 1.0843, "step": 5611 }, { "epoch": 0.45, "grad_norm": 1.4896308953045287, "learning_rate": 6.040829279737363e-06, "loss": 0.6898, "step": 5612 }, { "epoch": 0.45, "grad_norm": 1.435376309898992, "learning_rate": 6.039558454088796e-06, "loss": 0.7784, "step": 5613 }, { "epoch": 0.45, "grad_norm": 1.433405440349099, "learning_rate": 6.038287558246868e-06, "loss": 0.6801, "step": 5614 }, { "epoch": 0.45, "grad_norm": 1.5667811839552435, "learning_rate": 6.037016592297388e-06, "loss": 0.7946, "step": 5615 }, { "epoch": 0.45, "grad_norm": 2.3032381274246556, "learning_rate": 6.035745556326176e-06, "loss": 0.8212, "step": 5616 }, { "epoch": 0.45, "grad_norm": 1.5779041016914368, "learning_rate": 6.034474450419056e-06, "loss": 0.7203, "step": 5617 }, { "epoch": 0.45, "grad_norm": 0.813191361041576, "learning_rate": 6.033203274661854e-06, "loss": 1.079, "step": 5618 }, { "epoch": 0.45, "grad_norm": 0.8215572772447268, "learning_rate": 6.031932029140407e-06, "loss": 1.0935, "step": 5619 }, { "epoch": 0.45, "grad_norm": 1.5535889067523356, "learning_rate": 6.030660713940549e-06, "loss": 0.8094, "step": 5620 }, { "epoch": 0.45, "grad_norm": 0.7340064976542419, "learning_rate": 6.029389329148123e-06, "loss": 1.0866, "step": 5621 }, { "epoch": 0.45, "grad_norm": 1.481239515753897, "learning_rate": 6.0281178748489745e-06, "loss": 0.7314, "step": 5622 }, { "epoch": 0.45, "grad_norm": 0.8063177529147812, "learning_rate": 6.026846351128955e-06, "loss": 1.108, "step": 5623 }, { "epoch": 0.45, "grad_norm": 1.4603019031357471, "learning_rate": 6.025574758073925e-06, "loss": 0.6998, "step": 5624 }, { "epoch": 0.45, "grad_norm": 1.5665352387640574, "learning_rate": 6.024303095769741e-06, "loss": 0.7475, "step": 5625 }, { "epoch": 0.45, "grad_norm": 0.7919445066477342, "learning_rate": 6.02303136430227e-06, "loss": 1.116, "step": 5626 }, { "epoch": 0.45, "grad_norm": 1.4077201347659456, "learning_rate": 6.021759563757381e-06, "loss": 0.7817, "step": 5627 }, { "epoch": 0.45, "grad_norm": 1.5346187768313124, "learning_rate": 6.02048769422095e-06, "loss": 0.7108, "step": 5628 }, { "epoch": 0.45, "grad_norm": 1.4366248203901046, "learning_rate": 6.019215755778857e-06, "loss": 0.7652, "step": 5629 }, { "epoch": 0.45, "grad_norm": 1.576415782847079, "learning_rate": 6.017943748516987e-06, "loss": 0.8228, "step": 5630 }, { "epoch": 0.45, "grad_norm": 1.5775103902934209, "learning_rate": 6.016671672521226e-06, "loss": 0.7539, "step": 5631 }, { "epoch": 0.45, "grad_norm": 1.3751257650760778, "learning_rate": 6.015399527877468e-06, "loss": 0.7824, "step": 5632 }, { "epoch": 0.45, "grad_norm": 1.505940762259155, "learning_rate": 6.014127314671613e-06, "loss": 0.8309, "step": 5633 }, { "epoch": 0.45, "grad_norm": 0.8349901020570976, "learning_rate": 6.0128550329895615e-06, "loss": 1.1306, "step": 5634 }, { "epoch": 0.45, "grad_norm": 1.695651013117236, "learning_rate": 6.011582682917223e-06, "loss": 0.8057, "step": 5635 }, { "epoch": 0.45, "grad_norm": 1.5041700747473987, "learning_rate": 6.010310264540511e-06, "loss": 0.7831, "step": 5636 }, { "epoch": 0.45, "grad_norm": 1.535048492717353, "learning_rate": 6.009037777945337e-06, "loss": 0.8563, "step": 5637 }, { "epoch": 0.45, "grad_norm": 1.523477171557345, "learning_rate": 6.007765223217626e-06, "loss": 0.8115, "step": 5638 }, { "epoch": 0.45, "grad_norm": 1.5398949547576108, "learning_rate": 6.006492600443301e-06, "loss": 0.7932, "step": 5639 }, { "epoch": 0.45, "grad_norm": 0.7815058738985868, "learning_rate": 6.0052199097082955e-06, "loss": 1.0339, "step": 5640 }, { "epoch": 0.45, "grad_norm": 1.5159170873872643, "learning_rate": 6.003947151098543e-06, "loss": 0.7593, "step": 5641 }, { "epoch": 0.45, "grad_norm": 1.4409890915083312, "learning_rate": 6.002674324699983e-06, "loss": 0.7713, "step": 5642 }, { "epoch": 0.45, "grad_norm": 1.4965347827141842, "learning_rate": 6.001401430598561e-06, "loss": 0.7966, "step": 5643 }, { "epoch": 0.45, "grad_norm": 1.7098542854061902, "learning_rate": 6.000128468880223e-06, "loss": 0.7718, "step": 5644 }, { "epoch": 0.45, "grad_norm": 1.524801070568037, "learning_rate": 5.998855439630925e-06, "loss": 0.8508, "step": 5645 }, { "epoch": 0.45, "grad_norm": 1.5111958297374621, "learning_rate": 5.997582342936622e-06, "loss": 0.8444, "step": 5646 }, { "epoch": 0.45, "grad_norm": 1.439839326648737, "learning_rate": 5.99630917888328e-06, "loss": 0.8335, "step": 5647 }, { "epoch": 0.45, "grad_norm": 0.8348560747127013, "learning_rate": 5.9950359475568634e-06, "loss": 1.0826, "step": 5648 }, { "epoch": 0.45, "grad_norm": 1.5984925713805744, "learning_rate": 5.993762649043344e-06, "loss": 0.759, "step": 5649 }, { "epoch": 0.45, "grad_norm": 1.4774590515903818, "learning_rate": 5.992489283428699e-06, "loss": 0.7978, "step": 5650 }, { "epoch": 0.45, "grad_norm": 0.7722649379562184, "learning_rate": 5.991215850798906e-06, "loss": 1.0801, "step": 5651 }, { "epoch": 0.45, "grad_norm": 1.6057534380497958, "learning_rate": 5.989942351239954e-06, "loss": 0.8025, "step": 5652 }, { "epoch": 0.45, "grad_norm": 1.9095355562917857, "learning_rate": 5.988668784837831e-06, "loss": 0.8037, "step": 5653 }, { "epoch": 0.45, "grad_norm": 1.526420885177167, "learning_rate": 5.98739515167853e-06, "loss": 0.7798, "step": 5654 }, { "epoch": 0.45, "grad_norm": 1.4834909886340653, "learning_rate": 5.986121451848051e-06, "loss": 0.7411, "step": 5655 }, { "epoch": 0.45, "grad_norm": 1.5226607522669677, "learning_rate": 5.984847685432397e-06, "loss": 0.8023, "step": 5656 }, { "epoch": 0.45, "grad_norm": 1.526595001380897, "learning_rate": 5.983573852517575e-06, "loss": 0.7506, "step": 5657 }, { "epoch": 0.45, "grad_norm": 1.4268422495093869, "learning_rate": 5.982299953189598e-06, "loss": 0.7688, "step": 5658 }, { "epoch": 0.45, "grad_norm": 1.5672927211415975, "learning_rate": 5.9810259875344815e-06, "loss": 0.7405, "step": 5659 }, { "epoch": 0.45, "grad_norm": 1.3996835807789478, "learning_rate": 5.979751955638246e-06, "loss": 0.7606, "step": 5660 }, { "epoch": 0.45, "grad_norm": 1.6051727635080095, "learning_rate": 5.9784778575869185e-06, "loss": 0.8566, "step": 5661 }, { "epoch": 0.45, "grad_norm": 1.6809194025368093, "learning_rate": 5.97720369346653e-06, "loss": 0.757, "step": 5662 }, { "epoch": 0.45, "grad_norm": 0.9438614456665771, "learning_rate": 5.975929463363112e-06, "loss": 1.0925, "step": 5663 }, { "epoch": 0.45, "grad_norm": 1.5273297585187247, "learning_rate": 5.9746551673627065e-06, "loss": 0.7874, "step": 5664 }, { "epoch": 0.45, "grad_norm": 0.828386765177613, "learning_rate": 5.973380805551354e-06, "loss": 1.0894, "step": 5665 }, { "epoch": 0.45, "grad_norm": 1.4896897373247366, "learning_rate": 5.972106378015104e-06, "loss": 0.8328, "step": 5666 }, { "epoch": 0.45, "grad_norm": 1.5720608127532691, "learning_rate": 5.970831884840011e-06, "loss": 0.7886, "step": 5667 }, { "epoch": 0.45, "grad_norm": 1.4444822089161087, "learning_rate": 5.969557326112125e-06, "loss": 0.809, "step": 5668 }, { "epoch": 0.45, "grad_norm": 1.4218702735152222, "learning_rate": 5.968282701917514e-06, "loss": 0.7674, "step": 5669 }, { "epoch": 0.45, "grad_norm": 1.466692819167025, "learning_rate": 5.967008012342242e-06, "loss": 0.74, "step": 5670 }, { "epoch": 0.45, "grad_norm": 1.421509572885203, "learning_rate": 5.965733257472374e-06, "loss": 0.6694, "step": 5671 }, { "epoch": 0.46, "grad_norm": 1.4922104890042747, "learning_rate": 5.964458437393992e-06, "loss": 0.7952, "step": 5672 }, { "epoch": 0.46, "grad_norm": 1.5225312861199365, "learning_rate": 5.963183552193168e-06, "loss": 0.7426, "step": 5673 }, { "epoch": 0.46, "grad_norm": 0.9021362671752503, "learning_rate": 5.96190860195599e-06, "loss": 1.075, "step": 5674 }, { "epoch": 0.46, "grad_norm": 1.4305369146787803, "learning_rate": 5.9606335867685424e-06, "loss": 0.748, "step": 5675 }, { "epoch": 0.46, "grad_norm": 1.458889798144112, "learning_rate": 5.9593585067169195e-06, "loss": 0.8969, "step": 5676 }, { "epoch": 0.46, "grad_norm": 0.8543215626568816, "learning_rate": 5.958083361887216e-06, "loss": 1.1012, "step": 5677 }, { "epoch": 0.46, "grad_norm": 0.8110222326096646, "learning_rate": 5.956808152365532e-06, "loss": 1.1232, "step": 5678 }, { "epoch": 0.46, "grad_norm": 0.7940939975266098, "learning_rate": 5.9555328782379765e-06, "loss": 1.1021, "step": 5679 }, { "epoch": 0.46, "grad_norm": 0.8076291784401028, "learning_rate": 5.954257539590654e-06, "loss": 1.0965, "step": 5680 }, { "epoch": 0.46, "grad_norm": 1.6929953635629338, "learning_rate": 5.952982136509681e-06, "loss": 0.7546, "step": 5681 }, { "epoch": 0.46, "grad_norm": 1.4011541543137216, "learning_rate": 5.951706669081174e-06, "loss": 0.7925, "step": 5682 }, { "epoch": 0.46, "grad_norm": 1.4129079000263332, "learning_rate": 5.950431137391257e-06, "loss": 0.6795, "step": 5683 }, { "epoch": 0.46, "grad_norm": 1.6460306387291959, "learning_rate": 5.949155541526057e-06, "loss": 0.8413, "step": 5684 }, { "epoch": 0.46, "grad_norm": 1.4282296325226402, "learning_rate": 5.947879881571703e-06, "loss": 0.8495, "step": 5685 }, { "epoch": 0.46, "grad_norm": 1.6397249859688259, "learning_rate": 5.946604157614334e-06, "loss": 0.8121, "step": 5686 }, { "epoch": 0.46, "grad_norm": 1.6162467452259364, "learning_rate": 5.945328369740088e-06, "loss": 0.8288, "step": 5687 }, { "epoch": 0.46, "grad_norm": 1.4478603521480466, "learning_rate": 5.9440525180351064e-06, "loss": 0.8153, "step": 5688 }, { "epoch": 0.46, "grad_norm": 1.497948519969562, "learning_rate": 5.942776602585542e-06, "loss": 0.7353, "step": 5689 }, { "epoch": 0.46, "grad_norm": 1.6204420054989939, "learning_rate": 5.9415006234775445e-06, "loss": 0.7318, "step": 5690 }, { "epoch": 0.46, "grad_norm": 1.4135909404663896, "learning_rate": 5.940224580797272e-06, "loss": 0.743, "step": 5691 }, { "epoch": 0.46, "grad_norm": 1.539401407735776, "learning_rate": 5.9389484746308865e-06, "loss": 0.7527, "step": 5692 }, { "epoch": 0.46, "grad_norm": 2.0395637017567574, "learning_rate": 5.937672305064552e-06, "loss": 0.6611, "step": 5693 }, { "epoch": 0.46, "grad_norm": 1.6370687733939733, "learning_rate": 5.93639607218444e-06, "loss": 0.6842, "step": 5694 }, { "epoch": 0.46, "grad_norm": 1.5938905685071598, "learning_rate": 5.935119776076724e-06, "loss": 0.7207, "step": 5695 }, { "epoch": 0.46, "grad_norm": 1.6148030260303214, "learning_rate": 5.933843416827584e-06, "loss": 0.8154, "step": 5696 }, { "epoch": 0.46, "grad_norm": 1.5115429137134893, "learning_rate": 5.932566994523199e-06, "loss": 0.7641, "step": 5697 }, { "epoch": 0.46, "grad_norm": 1.5557822956375593, "learning_rate": 5.931290509249758e-06, "loss": 0.8198, "step": 5698 }, { "epoch": 0.46, "grad_norm": 1.54314141781923, "learning_rate": 5.930013961093454e-06, "loss": 0.7995, "step": 5699 }, { "epoch": 0.46, "grad_norm": 1.5116949930892296, "learning_rate": 5.9287373501404786e-06, "loss": 0.7689, "step": 5700 }, { "epoch": 0.46, "grad_norm": 1.7167777262377522, "learning_rate": 5.927460676477036e-06, "loss": 0.8324, "step": 5701 }, { "epoch": 0.46, "grad_norm": 0.954125251160797, "learning_rate": 5.926183940189327e-06, "loss": 1.0747, "step": 5702 }, { "epoch": 0.46, "grad_norm": 1.5178798232387414, "learning_rate": 5.92490714136356e-06, "loss": 0.7605, "step": 5703 }, { "epoch": 0.46, "grad_norm": 1.5597335039867501, "learning_rate": 5.923630280085948e-06, "loss": 0.8045, "step": 5704 }, { "epoch": 0.46, "grad_norm": 1.5186027824687112, "learning_rate": 5.922353356442706e-06, "loss": 0.7547, "step": 5705 }, { "epoch": 0.46, "grad_norm": 1.5648297348217117, "learning_rate": 5.921076370520058e-06, "loss": 0.8297, "step": 5706 }, { "epoch": 0.46, "grad_norm": 1.459351140188791, "learning_rate": 5.919799322404227e-06, "loss": 0.7997, "step": 5707 }, { "epoch": 0.46, "grad_norm": 1.5459568145277383, "learning_rate": 5.918522212181444e-06, "loss": 0.7875, "step": 5708 }, { "epoch": 0.46, "grad_norm": 1.4689232867288686, "learning_rate": 5.9172450399379385e-06, "loss": 0.7791, "step": 5709 }, { "epoch": 0.46, "grad_norm": 1.61753931111374, "learning_rate": 5.915967805759951e-06, "loss": 0.8298, "step": 5710 }, { "epoch": 0.46, "grad_norm": 0.7768477622530853, "learning_rate": 5.914690509733723e-06, "loss": 1.0899, "step": 5711 }, { "epoch": 0.46, "grad_norm": 1.5952476727372327, "learning_rate": 5.9134131519455005e-06, "loss": 0.856, "step": 5712 }, { "epoch": 0.46, "grad_norm": 0.8026911749478612, "learning_rate": 5.912135732481533e-06, "loss": 1.0508, "step": 5713 }, { "epoch": 0.46, "grad_norm": 1.5840419090575626, "learning_rate": 5.910858251428077e-06, "loss": 0.7421, "step": 5714 }, { "epoch": 0.46, "grad_norm": 1.4937544193307006, "learning_rate": 5.909580708871388e-06, "loss": 0.7678, "step": 5715 }, { "epoch": 0.46, "grad_norm": 0.778227386729819, "learning_rate": 5.908303104897728e-06, "loss": 1.095, "step": 5716 }, { "epoch": 0.46, "grad_norm": 1.5654589721766001, "learning_rate": 5.907025439593366e-06, "loss": 0.8198, "step": 5717 }, { "epoch": 0.46, "grad_norm": 0.7887550927219074, "learning_rate": 5.905747713044573e-06, "loss": 1.0722, "step": 5718 }, { "epoch": 0.46, "grad_norm": 1.6008553739828169, "learning_rate": 5.904469925337624e-06, "loss": 0.7557, "step": 5719 }, { "epoch": 0.46, "grad_norm": 1.5775484063354812, "learning_rate": 5.903192076558795e-06, "loss": 0.7345, "step": 5720 }, { "epoch": 0.46, "grad_norm": 1.4949331650301871, "learning_rate": 5.901914166794374e-06, "loss": 0.8018, "step": 5721 }, { "epoch": 0.46, "grad_norm": 1.5508956187707397, "learning_rate": 5.900636196130645e-06, "loss": 0.7628, "step": 5722 }, { "epoch": 0.46, "grad_norm": 1.568251182010219, "learning_rate": 5.899358164653901e-06, "loss": 0.7937, "step": 5723 }, { "epoch": 0.46, "grad_norm": 1.4155553890709534, "learning_rate": 5.898080072450437e-06, "loss": 0.7352, "step": 5724 }, { "epoch": 0.46, "grad_norm": 1.5461200075109482, "learning_rate": 5.896801919606554e-06, "loss": 0.7535, "step": 5725 }, { "epoch": 0.46, "grad_norm": 1.5370931150473581, "learning_rate": 5.895523706208552e-06, "loss": 0.7555, "step": 5726 }, { "epoch": 0.46, "grad_norm": 0.8453028187430656, "learning_rate": 5.894245432342743e-06, "loss": 1.0878, "step": 5727 }, { "epoch": 0.46, "grad_norm": 0.8178326000527647, "learning_rate": 5.892967098095439e-06, "loss": 1.0712, "step": 5728 }, { "epoch": 0.46, "grad_norm": 1.551627418309578, "learning_rate": 5.891688703552953e-06, "loss": 0.8184, "step": 5729 }, { "epoch": 0.46, "grad_norm": 0.773952446798454, "learning_rate": 5.890410248801608e-06, "loss": 1.0728, "step": 5730 }, { "epoch": 0.46, "grad_norm": 1.7022689027087656, "learning_rate": 5.889131733927726e-06, "loss": 0.7897, "step": 5731 }, { "epoch": 0.46, "grad_norm": 1.4912416571839358, "learning_rate": 5.887853159017638e-06, "loss": 0.727, "step": 5732 }, { "epoch": 0.46, "grad_norm": 1.4980657988746622, "learning_rate": 5.886574524157672e-06, "loss": 0.7159, "step": 5733 }, { "epoch": 0.46, "grad_norm": 1.5333745264976293, "learning_rate": 5.885295829434168e-06, "loss": 0.7615, "step": 5734 }, { "epoch": 0.46, "grad_norm": 1.48301351597407, "learning_rate": 5.884017074933466e-06, "loss": 0.773, "step": 5735 }, { "epoch": 0.46, "grad_norm": 1.4909182725753882, "learning_rate": 5.8827382607419084e-06, "loss": 0.6996, "step": 5736 }, { "epoch": 0.46, "grad_norm": 1.6268247693364153, "learning_rate": 5.8814593869458455e-06, "loss": 0.7597, "step": 5737 }, { "epoch": 0.46, "grad_norm": 1.5599869043112304, "learning_rate": 5.880180453631628e-06, "loss": 0.8173, "step": 5738 }, { "epoch": 0.46, "grad_norm": 1.4267192239917286, "learning_rate": 5.878901460885616e-06, "loss": 0.7975, "step": 5739 }, { "epoch": 0.46, "grad_norm": 1.5230330742961529, "learning_rate": 5.877622408794166e-06, "loss": 0.8192, "step": 5740 }, { "epoch": 0.46, "grad_norm": 1.3938911660760969, "learning_rate": 5.876343297443645e-06, "loss": 0.7536, "step": 5741 }, { "epoch": 0.46, "grad_norm": 1.4691804421664285, "learning_rate": 5.87506412692042e-06, "loss": 0.7638, "step": 5742 }, { "epoch": 0.46, "grad_norm": 1.4933921322777197, "learning_rate": 5.873784897310864e-06, "loss": 0.796, "step": 5743 }, { "epoch": 0.46, "grad_norm": 1.5429335377287474, "learning_rate": 5.872505608701354e-06, "loss": 0.8118, "step": 5744 }, { "epoch": 0.46, "grad_norm": 1.4337045354895883, "learning_rate": 5.87122626117827e-06, "loss": 0.7163, "step": 5745 }, { "epoch": 0.46, "grad_norm": 1.3980506077269867, "learning_rate": 5.869946854827996e-06, "loss": 0.7991, "step": 5746 }, { "epoch": 0.46, "grad_norm": 1.5373122358805782, "learning_rate": 5.868667389736924e-06, "loss": 0.7672, "step": 5747 }, { "epoch": 0.46, "grad_norm": 1.5772908897266074, "learning_rate": 5.8673878659914405e-06, "loss": 0.7159, "step": 5748 }, { "epoch": 0.46, "grad_norm": 1.5049659914457099, "learning_rate": 5.866108283677947e-06, "loss": 0.8005, "step": 5749 }, { "epoch": 0.46, "grad_norm": 1.0746114102377629, "learning_rate": 5.864828642882839e-06, "loss": 1.097, "step": 5750 }, { "epoch": 0.46, "grad_norm": 1.4832307584955144, "learning_rate": 5.863548943692525e-06, "loss": 0.804, "step": 5751 }, { "epoch": 0.46, "grad_norm": 1.459852551796001, "learning_rate": 5.862269186193412e-06, "loss": 0.7892, "step": 5752 }, { "epoch": 0.46, "grad_norm": 1.4014312083579374, "learning_rate": 5.860989370471912e-06, "loss": 0.7448, "step": 5753 }, { "epoch": 0.46, "grad_norm": 1.4734907190814381, "learning_rate": 5.859709496614442e-06, "loss": 0.7862, "step": 5754 }, { "epoch": 0.46, "grad_norm": 1.5761024659784348, "learning_rate": 5.858429564707419e-06, "loss": 0.8225, "step": 5755 }, { "epoch": 0.46, "grad_norm": 1.5611611426032175, "learning_rate": 5.857149574837269e-06, "loss": 0.8523, "step": 5756 }, { "epoch": 0.46, "grad_norm": 0.819837240186687, "learning_rate": 5.85586952709042e-06, "loss": 1.108, "step": 5757 }, { "epoch": 0.46, "grad_norm": 1.5287523348703764, "learning_rate": 5.854589421553304e-06, "loss": 0.7085, "step": 5758 }, { "epoch": 0.46, "grad_norm": 1.4916046645291923, "learning_rate": 5.853309258312356e-06, "loss": 0.7768, "step": 5759 }, { "epoch": 0.46, "grad_norm": 1.4800492909611327, "learning_rate": 5.852029037454014e-06, "loss": 0.7517, "step": 5760 }, { "epoch": 0.46, "grad_norm": 0.8400630704317602, "learning_rate": 5.850748759064726e-06, "loss": 1.0763, "step": 5761 }, { "epoch": 0.46, "grad_norm": 1.5147587332498131, "learning_rate": 5.849468423230934e-06, "loss": 0.7944, "step": 5762 }, { "epoch": 0.46, "grad_norm": 1.4493766481656487, "learning_rate": 5.848188030039093e-06, "loss": 0.8653, "step": 5763 }, { "epoch": 0.46, "grad_norm": 1.5471727076722313, "learning_rate": 5.846907579575657e-06, "loss": 0.7546, "step": 5764 }, { "epoch": 0.46, "grad_norm": 0.7829873502007327, "learning_rate": 5.8456270719270835e-06, "loss": 1.0988, "step": 5765 }, { "epoch": 0.46, "grad_norm": 1.5366256312020363, "learning_rate": 5.8443465071798365e-06, "loss": 0.6816, "step": 5766 }, { "epoch": 0.46, "grad_norm": 1.4195496763490516, "learning_rate": 5.843065885420382e-06, "loss": 0.6935, "step": 5767 }, { "epoch": 0.46, "grad_norm": 0.8481618002860285, "learning_rate": 5.841785206735192e-06, "loss": 1.0897, "step": 5768 }, { "epoch": 0.46, "grad_norm": 1.4802867012676408, "learning_rate": 5.840504471210742e-06, "loss": 0.818, "step": 5769 }, { "epoch": 0.46, "grad_norm": 1.501874456244142, "learning_rate": 5.839223678933505e-06, "loss": 0.7886, "step": 5770 }, { "epoch": 0.46, "grad_norm": 0.7738976921437907, "learning_rate": 5.837942829989969e-06, "loss": 1.1067, "step": 5771 }, { "epoch": 0.46, "grad_norm": 1.4535084052209677, "learning_rate": 5.836661924466614e-06, "loss": 0.7251, "step": 5772 }, { "epoch": 0.46, "grad_norm": 1.5280900206679697, "learning_rate": 5.835380962449936e-06, "loss": 0.8349, "step": 5773 }, { "epoch": 0.46, "grad_norm": 1.5793060713483904, "learning_rate": 5.834099944026422e-06, "loss": 0.7871, "step": 5774 }, { "epoch": 0.46, "grad_norm": 1.4253156786211705, "learning_rate": 5.832818869282575e-06, "loss": 0.7868, "step": 5775 }, { "epoch": 0.46, "grad_norm": 0.8244166392555515, "learning_rate": 5.831537738304893e-06, "loss": 1.069, "step": 5776 }, { "epoch": 0.46, "grad_norm": 1.539197233090516, "learning_rate": 5.8302565511798805e-06, "loss": 0.7491, "step": 5777 }, { "epoch": 0.46, "grad_norm": 1.3891245800445329, "learning_rate": 5.828975307994048e-06, "loss": 0.7212, "step": 5778 }, { "epoch": 0.46, "grad_norm": 1.535944895882464, "learning_rate": 5.827694008833906e-06, "loss": 0.8067, "step": 5779 }, { "epoch": 0.46, "grad_norm": 1.4852202194422794, "learning_rate": 5.826412653785974e-06, "loss": 0.7948, "step": 5780 }, { "epoch": 0.46, "grad_norm": 1.4701036338306253, "learning_rate": 5.825131242936768e-06, "loss": 0.7581, "step": 5781 }, { "epoch": 0.46, "grad_norm": 1.5637533344934782, "learning_rate": 5.823849776372814e-06, "loss": 0.7299, "step": 5782 }, { "epoch": 0.46, "grad_norm": 1.6298064591796844, "learning_rate": 5.82256825418064e-06, "loss": 0.8367, "step": 5783 }, { "epoch": 0.46, "grad_norm": 1.5563926281469649, "learning_rate": 5.821286676446776e-06, "loss": 0.8282, "step": 5784 }, { "epoch": 0.46, "grad_norm": 1.3784518230332832, "learning_rate": 5.820005043257758e-06, "loss": 0.7092, "step": 5785 }, { "epoch": 0.46, "grad_norm": 1.575577794914877, "learning_rate": 5.818723354700124e-06, "loss": 0.81, "step": 5786 }, { "epoch": 0.46, "grad_norm": 1.5200625037567643, "learning_rate": 5.817441610860417e-06, "loss": 0.7574, "step": 5787 }, { "epoch": 0.46, "grad_norm": 1.4372474016304093, "learning_rate": 5.816159811825184e-06, "loss": 0.712, "step": 5788 }, { "epoch": 0.46, "grad_norm": 1.582400819450848, "learning_rate": 5.814877957680973e-06, "loss": 0.7774, "step": 5789 }, { "epoch": 0.46, "grad_norm": 0.8066280226728584, "learning_rate": 5.81359604851434e-06, "loss": 1.0984, "step": 5790 }, { "epoch": 0.46, "grad_norm": 1.513530275943973, "learning_rate": 5.812314084411842e-06, "loss": 0.7394, "step": 5791 }, { "epoch": 0.46, "grad_norm": 0.7802058464359282, "learning_rate": 5.811032065460037e-06, "loss": 1.0883, "step": 5792 }, { "epoch": 0.46, "grad_norm": 1.5913157082885896, "learning_rate": 5.809749991745495e-06, "loss": 0.7676, "step": 5793 }, { "epoch": 0.46, "grad_norm": 1.5019773432266628, "learning_rate": 5.808467863354781e-06, "loss": 0.741, "step": 5794 }, { "epoch": 0.46, "grad_norm": 1.470996814837823, "learning_rate": 5.807185680374467e-06, "loss": 0.7939, "step": 5795 }, { "epoch": 0.47, "grad_norm": 0.8065435699176291, "learning_rate": 5.805903442891132e-06, "loss": 1.1002, "step": 5796 }, { "epoch": 0.47, "grad_norm": 1.5390604418206384, "learning_rate": 5.804621150991353e-06, "loss": 0.7494, "step": 5797 }, { "epoch": 0.47, "grad_norm": 1.5957927341076568, "learning_rate": 5.803338804761714e-06, "loss": 0.8009, "step": 5798 }, { "epoch": 0.47, "grad_norm": 1.588620312225338, "learning_rate": 5.8020564042888015e-06, "loss": 0.7788, "step": 5799 }, { "epoch": 0.47, "grad_norm": 1.5169071768935753, "learning_rate": 5.8007739496592075e-06, "loss": 0.7146, "step": 5800 }, { "epoch": 0.47, "grad_norm": 1.4721410937612827, "learning_rate": 5.7994914409595236e-06, "loss": 0.8114, "step": 5801 }, { "epoch": 0.47, "grad_norm": 1.433077801767643, "learning_rate": 5.798208878276352e-06, "loss": 0.794, "step": 5802 }, { "epoch": 0.47, "grad_norm": 1.5455097180833473, "learning_rate": 5.7969262616962905e-06, "loss": 0.7979, "step": 5803 }, { "epoch": 0.47, "grad_norm": 1.570103272976438, "learning_rate": 5.795643591305945e-06, "loss": 0.8431, "step": 5804 }, { "epoch": 0.47, "grad_norm": 0.797097769088055, "learning_rate": 5.794360867191926e-06, "loss": 1.0817, "step": 5805 }, { "epoch": 0.47, "grad_norm": 0.8128357728704698, "learning_rate": 5.7930780894408435e-06, "loss": 1.0477, "step": 5806 }, { "epoch": 0.47, "grad_norm": 1.5949078281655433, "learning_rate": 5.7917952581393155e-06, "loss": 0.8799, "step": 5807 }, { "epoch": 0.47, "grad_norm": 1.516985498254989, "learning_rate": 5.790512373373962e-06, "loss": 0.7622, "step": 5808 }, { "epoch": 0.47, "grad_norm": 0.7489844835485704, "learning_rate": 5.789229435231404e-06, "loss": 1.0504, "step": 5809 }, { "epoch": 0.47, "grad_norm": 1.4866152908634782, "learning_rate": 5.787946443798271e-06, "loss": 0.7616, "step": 5810 }, { "epoch": 0.47, "grad_norm": 1.4340081603790742, "learning_rate": 5.786663399161191e-06, "loss": 0.7242, "step": 5811 }, { "epoch": 0.47, "grad_norm": 1.4760383766182872, "learning_rate": 5.785380301406801e-06, "loss": 0.7569, "step": 5812 }, { "epoch": 0.47, "grad_norm": 1.6335384722939872, "learning_rate": 5.784097150621737e-06, "loss": 0.7062, "step": 5813 }, { "epoch": 0.47, "grad_norm": 1.487750567543447, "learning_rate": 5.782813946892639e-06, "loss": 0.7709, "step": 5814 }, { "epoch": 0.47, "grad_norm": 1.5574743909657494, "learning_rate": 5.781530690306156e-06, "loss": 0.7884, "step": 5815 }, { "epoch": 0.47, "grad_norm": 1.5996529275333928, "learning_rate": 5.78024738094893e-06, "loss": 0.8216, "step": 5816 }, { "epoch": 0.47, "grad_norm": 0.8544291642391503, "learning_rate": 5.778964018907619e-06, "loss": 1.1223, "step": 5817 }, { "epoch": 0.47, "grad_norm": 0.8169870089796534, "learning_rate": 5.777680604268876e-06, "loss": 1.0772, "step": 5818 }, { "epoch": 0.47, "grad_norm": 1.449767058791186, "learning_rate": 5.776397137119362e-06, "loss": 0.7769, "step": 5819 }, { "epoch": 0.47, "grad_norm": 1.5185048417343046, "learning_rate": 5.775113617545735e-06, "loss": 0.8004, "step": 5820 }, { "epoch": 0.47, "grad_norm": 1.4564064452685923, "learning_rate": 5.773830045634664e-06, "loss": 0.7787, "step": 5821 }, { "epoch": 0.47, "grad_norm": 1.599756428510654, "learning_rate": 5.772546421472821e-06, "loss": 0.7891, "step": 5822 }, { "epoch": 0.47, "grad_norm": 1.5164228984188264, "learning_rate": 5.771262745146876e-06, "loss": 0.7594, "step": 5823 }, { "epoch": 0.47, "grad_norm": 1.5034470824594237, "learning_rate": 5.769979016743508e-06, "loss": 0.743, "step": 5824 }, { "epoch": 0.47, "grad_norm": 1.7454844252835044, "learning_rate": 5.768695236349396e-06, "loss": 0.7202, "step": 5825 }, { "epoch": 0.47, "grad_norm": 1.4946602878420343, "learning_rate": 5.767411404051222e-06, "loss": 0.8346, "step": 5826 }, { "epoch": 0.47, "grad_norm": 0.9335828328616237, "learning_rate": 5.766127519935676e-06, "loss": 1.0634, "step": 5827 }, { "epoch": 0.47, "grad_norm": 1.4205622027225868, "learning_rate": 5.7648435840894475e-06, "loss": 0.7832, "step": 5828 }, { "epoch": 0.47, "grad_norm": 0.8380583209855147, "learning_rate": 5.763559596599233e-06, "loss": 1.1054, "step": 5829 }, { "epoch": 0.47, "grad_norm": 1.8501707813433197, "learning_rate": 5.762275557551728e-06, "loss": 0.7687, "step": 5830 }, { "epoch": 0.47, "grad_norm": 1.5985793843080427, "learning_rate": 5.760991467033634e-06, "loss": 0.7126, "step": 5831 }, { "epoch": 0.47, "grad_norm": 1.5647886047898651, "learning_rate": 5.759707325131656e-06, "loss": 0.7593, "step": 5832 }, { "epoch": 0.47, "grad_norm": 1.5474184707853607, "learning_rate": 5.758423131932501e-06, "loss": 0.7409, "step": 5833 }, { "epoch": 0.47, "grad_norm": 1.4768617684505911, "learning_rate": 5.757138887522884e-06, "loss": 0.7363, "step": 5834 }, { "epoch": 0.47, "grad_norm": 1.4760777722118392, "learning_rate": 5.755854591989518e-06, "loss": 0.7675, "step": 5835 }, { "epoch": 0.47, "grad_norm": 1.439785343754657, "learning_rate": 5.754570245419121e-06, "loss": 0.8068, "step": 5836 }, { "epoch": 0.47, "grad_norm": 1.640506322433333, "learning_rate": 5.7532858478984144e-06, "loss": 0.7746, "step": 5837 }, { "epoch": 0.47, "grad_norm": 1.562413802742019, "learning_rate": 5.752001399514125e-06, "loss": 0.7913, "step": 5838 }, { "epoch": 0.47, "grad_norm": 1.1005026651926941, "learning_rate": 5.750716900352983e-06, "loss": 1.088, "step": 5839 }, { "epoch": 0.47, "grad_norm": 1.5284037257601057, "learning_rate": 5.749432350501718e-06, "loss": 0.8084, "step": 5840 }, { "epoch": 0.47, "grad_norm": 1.5111665818828572, "learning_rate": 5.7481477500470695e-06, "loss": 0.7495, "step": 5841 }, { "epoch": 0.47, "grad_norm": 1.6688530806542463, "learning_rate": 5.746863099075771e-06, "loss": 0.8173, "step": 5842 }, { "epoch": 0.47, "grad_norm": 0.7953212401547314, "learning_rate": 5.74557839767457e-06, "loss": 1.0993, "step": 5843 }, { "epoch": 0.47, "grad_norm": 0.8436862181802415, "learning_rate": 5.74429364593021e-06, "loss": 1.0853, "step": 5844 }, { "epoch": 0.47, "grad_norm": 1.6110560579734319, "learning_rate": 5.743008843929441e-06, "loss": 0.765, "step": 5845 }, { "epoch": 0.47, "grad_norm": 1.5554662933301058, "learning_rate": 5.741723991759016e-06, "loss": 0.786, "step": 5846 }, { "epoch": 0.47, "grad_norm": 1.4614745592493747, "learning_rate": 5.740439089505691e-06, "loss": 0.6362, "step": 5847 }, { "epoch": 0.47, "grad_norm": 2.1711884828118513, "learning_rate": 5.739154137256227e-06, "loss": 0.7614, "step": 5848 }, { "epoch": 0.47, "grad_norm": 0.9381442857265868, "learning_rate": 5.7378691350973835e-06, "loss": 1.0614, "step": 5849 }, { "epoch": 0.47, "grad_norm": 1.4422838664769437, "learning_rate": 5.736584083115929e-06, "loss": 0.6935, "step": 5850 }, { "epoch": 0.47, "grad_norm": 1.4289419271289645, "learning_rate": 5.735298981398634e-06, "loss": 0.7549, "step": 5851 }, { "epoch": 0.47, "grad_norm": 1.46133656806265, "learning_rate": 5.73401383003227e-06, "loss": 0.742, "step": 5852 }, { "epoch": 0.47, "grad_norm": 1.608701101132847, "learning_rate": 5.732728629103615e-06, "loss": 0.7362, "step": 5853 }, { "epoch": 0.47, "grad_norm": 1.4837608511146556, "learning_rate": 5.731443378699445e-06, "loss": 0.7248, "step": 5854 }, { "epoch": 0.47, "grad_norm": 1.5114090157954212, "learning_rate": 5.730158078906546e-06, "loss": 0.8165, "step": 5855 }, { "epoch": 0.47, "grad_norm": 1.6099166435456116, "learning_rate": 5.728872729811705e-06, "loss": 0.795, "step": 5856 }, { "epoch": 0.47, "grad_norm": 1.5491046098025547, "learning_rate": 5.72758733150171e-06, "loss": 0.7501, "step": 5857 }, { "epoch": 0.47, "grad_norm": 1.5750484340442703, "learning_rate": 5.726301884063356e-06, "loss": 0.781, "step": 5858 }, { "epoch": 0.47, "grad_norm": 1.496490261892033, "learning_rate": 5.725016387583435e-06, "loss": 0.786, "step": 5859 }, { "epoch": 0.47, "grad_norm": 0.9315131779074122, "learning_rate": 5.723730842148752e-06, "loss": 1.0825, "step": 5860 }, { "epoch": 0.47, "grad_norm": 0.8808526810077502, "learning_rate": 5.722445247846107e-06, "loss": 1.1027, "step": 5861 }, { "epoch": 0.47, "grad_norm": 1.5362984546215928, "learning_rate": 5.721159604762307e-06, "loss": 0.8489, "step": 5862 }, { "epoch": 0.47, "grad_norm": 1.5994619417088973, "learning_rate": 5.719873912984163e-06, "loss": 0.8417, "step": 5863 }, { "epoch": 0.47, "grad_norm": 1.6334479676120663, "learning_rate": 5.7185881725984835e-06, "loss": 0.7365, "step": 5864 }, { "epoch": 0.47, "grad_norm": 1.5895892406269867, "learning_rate": 5.71730238369209e-06, "loss": 0.7966, "step": 5865 }, { "epoch": 0.47, "grad_norm": 1.5888211138816024, "learning_rate": 5.716016546351797e-06, "loss": 0.7566, "step": 5866 }, { "epoch": 0.47, "grad_norm": 1.4516495878645554, "learning_rate": 5.714730660664429e-06, "loss": 0.7459, "step": 5867 }, { "epoch": 0.47, "grad_norm": 1.5262962520940098, "learning_rate": 5.713444726716814e-06, "loss": 0.7556, "step": 5868 }, { "epoch": 0.47, "grad_norm": 1.5779654073220646, "learning_rate": 5.712158744595781e-06, "loss": 0.7889, "step": 5869 }, { "epoch": 0.47, "grad_norm": 1.607065773107172, "learning_rate": 5.71087271438816e-06, "loss": 0.7714, "step": 5870 }, { "epoch": 0.47, "grad_norm": 1.6125608877174562, "learning_rate": 5.709586636180787e-06, "loss": 0.7576, "step": 5871 }, { "epoch": 0.47, "grad_norm": 1.4907071487516352, "learning_rate": 5.708300510060502e-06, "loss": 0.7055, "step": 5872 }, { "epoch": 0.47, "grad_norm": 1.608952818578593, "learning_rate": 5.707014336114147e-06, "loss": 0.8863, "step": 5873 }, { "epoch": 0.47, "grad_norm": 1.5624774241030055, "learning_rate": 5.705728114428568e-06, "loss": 0.775, "step": 5874 }, { "epoch": 0.47, "grad_norm": 1.4638403840208445, "learning_rate": 5.704441845090614e-06, "loss": 0.7494, "step": 5875 }, { "epoch": 0.47, "grad_norm": 1.4980128401365091, "learning_rate": 5.703155528187133e-06, "loss": 0.7738, "step": 5876 }, { "epoch": 0.47, "grad_norm": 1.027160937238546, "learning_rate": 5.701869163804985e-06, "loss": 1.0507, "step": 5877 }, { "epoch": 0.47, "grad_norm": 1.5049704567862197, "learning_rate": 5.700582752031025e-06, "loss": 0.7648, "step": 5878 }, { "epoch": 0.47, "grad_norm": 1.512652132554367, "learning_rate": 5.699296292952117e-06, "loss": 0.7476, "step": 5879 }, { "epoch": 0.47, "grad_norm": 1.5847666308782775, "learning_rate": 5.698009786655123e-06, "loss": 0.7543, "step": 5880 }, { "epoch": 0.47, "grad_norm": 1.5143370777107985, "learning_rate": 5.6967232332269116e-06, "loss": 0.6776, "step": 5881 }, { "epoch": 0.47, "grad_norm": 1.4209244589937435, "learning_rate": 5.695436632754356e-06, "loss": 0.7523, "step": 5882 }, { "epoch": 0.47, "grad_norm": 1.6920724998689949, "learning_rate": 5.694149985324326e-06, "loss": 0.744, "step": 5883 }, { "epoch": 0.47, "grad_norm": 1.4988884922405947, "learning_rate": 5.6928632910237035e-06, "loss": 0.7717, "step": 5884 }, { "epoch": 0.47, "grad_norm": 1.4814773030328379, "learning_rate": 5.691576549939369e-06, "loss": 0.7758, "step": 5885 }, { "epoch": 0.47, "grad_norm": 1.6157679904086464, "learning_rate": 5.690289762158203e-06, "loss": 0.7836, "step": 5886 }, { "epoch": 0.47, "grad_norm": 1.588512627107001, "learning_rate": 5.689002927767094e-06, "loss": 0.7802, "step": 5887 }, { "epoch": 0.47, "grad_norm": 1.4468213962473515, "learning_rate": 5.687716046852931e-06, "loss": 0.7751, "step": 5888 }, { "epoch": 0.47, "grad_norm": 1.623802354204206, "learning_rate": 5.686429119502608e-06, "loss": 0.8245, "step": 5889 }, { "epoch": 0.47, "grad_norm": 1.5124256001021952, "learning_rate": 5.685142145803021e-06, "loss": 0.6293, "step": 5890 }, { "epoch": 0.47, "grad_norm": 1.4070623924393106, "learning_rate": 5.683855125841071e-06, "loss": 0.7372, "step": 5891 }, { "epoch": 0.47, "grad_norm": 0.9991227072035811, "learning_rate": 5.682568059703659e-06, "loss": 1.0781, "step": 5892 }, { "epoch": 0.47, "grad_norm": 1.407788834418454, "learning_rate": 5.68128094747769e-06, "loss": 0.724, "step": 5893 }, { "epoch": 0.47, "grad_norm": 0.8206247133721507, "learning_rate": 5.679993789250075e-06, "loss": 1.066, "step": 5894 }, { "epoch": 0.47, "grad_norm": 1.4889730438300162, "learning_rate": 5.678706585107721e-06, "loss": 0.791, "step": 5895 }, { "epoch": 0.47, "grad_norm": 1.5357798763881876, "learning_rate": 5.677419335137549e-06, "loss": 0.7585, "step": 5896 }, { "epoch": 0.47, "grad_norm": 1.6044705950576097, "learning_rate": 5.676132039426475e-06, "loss": 0.8208, "step": 5897 }, { "epoch": 0.47, "grad_norm": 1.5864313745933896, "learning_rate": 5.674844698061419e-06, "loss": 0.8241, "step": 5898 }, { "epoch": 0.47, "grad_norm": 0.8708834939219458, "learning_rate": 5.673557311129306e-06, "loss": 1.0945, "step": 5899 }, { "epoch": 0.47, "grad_norm": 1.5348855680610956, "learning_rate": 5.672269878717063e-06, "loss": 0.8096, "step": 5900 }, { "epoch": 0.47, "grad_norm": 1.4673275804687378, "learning_rate": 5.67098240091162e-06, "loss": 0.7726, "step": 5901 }, { "epoch": 0.47, "grad_norm": 1.6026638838786798, "learning_rate": 5.669694877799912e-06, "loss": 0.7184, "step": 5902 }, { "epoch": 0.47, "grad_norm": 1.4020098717446732, "learning_rate": 5.668407309468873e-06, "loss": 0.7547, "step": 5903 }, { "epoch": 0.47, "grad_norm": 0.7965778819140131, "learning_rate": 5.667119696005445e-06, "loss": 1.0741, "step": 5904 }, { "epoch": 0.47, "grad_norm": 1.5136555928844029, "learning_rate": 5.665832037496569e-06, "loss": 0.6591, "step": 5905 }, { "epoch": 0.47, "grad_norm": 1.525527203728997, "learning_rate": 5.664544334029193e-06, "loss": 0.7488, "step": 5906 }, { "epoch": 0.47, "grad_norm": 1.5670808951194715, "learning_rate": 5.663256585690263e-06, "loss": 0.8026, "step": 5907 }, { "epoch": 0.47, "grad_norm": 1.5789505974292397, "learning_rate": 5.661968792566731e-06, "loss": 0.7475, "step": 5908 }, { "epoch": 0.47, "grad_norm": 1.5566919617571968, "learning_rate": 5.660680954745554e-06, "loss": 0.7586, "step": 5909 }, { "epoch": 0.47, "grad_norm": 1.657428890143161, "learning_rate": 5.659393072313687e-06, "loss": 0.81, "step": 5910 }, { "epoch": 0.47, "grad_norm": 0.8462584332209059, "learning_rate": 5.658105145358093e-06, "loss": 1.075, "step": 5911 }, { "epoch": 0.47, "grad_norm": 0.7963206079158476, "learning_rate": 5.656817173965733e-06, "loss": 1.0993, "step": 5912 }, { "epoch": 0.47, "grad_norm": 1.50766264428043, "learning_rate": 5.655529158223577e-06, "loss": 0.7685, "step": 5913 }, { "epoch": 0.47, "grad_norm": 1.5245860187361033, "learning_rate": 5.654241098218594e-06, "loss": 0.7412, "step": 5914 }, { "epoch": 0.47, "grad_norm": 1.4493064630388122, "learning_rate": 5.6529529940377526e-06, "loss": 0.7498, "step": 5915 }, { "epoch": 0.47, "grad_norm": 1.4824137286193177, "learning_rate": 5.651664845768036e-06, "loss": 0.7086, "step": 5916 }, { "epoch": 0.47, "grad_norm": 0.8019281354706871, "learning_rate": 5.6503766534964156e-06, "loss": 1.0756, "step": 5917 }, { "epoch": 0.47, "grad_norm": 1.4536695958373542, "learning_rate": 5.649088417309878e-06, "loss": 0.7049, "step": 5918 }, { "epoch": 0.47, "grad_norm": 1.5041696370498356, "learning_rate": 5.647800137295407e-06, "loss": 0.7508, "step": 5919 }, { "epoch": 0.47, "grad_norm": 1.444701034643854, "learning_rate": 5.646511813539987e-06, "loss": 0.7732, "step": 5920 }, { "epoch": 0.48, "grad_norm": 1.6699440848371692, "learning_rate": 5.6452234461306145e-06, "loss": 0.7844, "step": 5921 }, { "epoch": 0.48, "grad_norm": 0.8523935507607763, "learning_rate": 5.6439350351542765e-06, "loss": 1.0795, "step": 5922 }, { "epoch": 0.48, "grad_norm": 1.5493872203455221, "learning_rate": 5.642646580697974e-06, "loss": 0.7777, "step": 5923 }, { "epoch": 0.48, "grad_norm": 1.463241233707051, "learning_rate": 5.641358082848705e-06, "loss": 0.7354, "step": 5924 }, { "epoch": 0.48, "grad_norm": 1.347545773481136, "learning_rate": 5.64006954169347e-06, "loss": 0.7244, "step": 5925 }, { "epoch": 0.48, "grad_norm": 1.3936973973131739, "learning_rate": 5.638780957319278e-06, "loss": 0.7214, "step": 5926 }, { "epoch": 0.48, "grad_norm": 1.4891159645152297, "learning_rate": 5.637492329813133e-06, "loss": 0.7874, "step": 5927 }, { "epoch": 0.48, "grad_norm": 1.5555941579779757, "learning_rate": 5.636203659262049e-06, "loss": 0.7314, "step": 5928 }, { "epoch": 0.48, "grad_norm": 1.4047199627721416, "learning_rate": 5.634914945753041e-06, "loss": 0.7842, "step": 5929 }, { "epoch": 0.48, "grad_norm": 0.8298289933968395, "learning_rate": 5.633626189373123e-06, "loss": 1.038, "step": 5930 }, { "epoch": 0.48, "grad_norm": 1.5751175922925589, "learning_rate": 5.632337390209315e-06, "loss": 0.7756, "step": 5931 }, { "epoch": 0.48, "grad_norm": 1.4201299297873249, "learning_rate": 5.63104854834864e-06, "loss": 0.7292, "step": 5932 }, { "epoch": 0.48, "grad_norm": 1.6040853553982801, "learning_rate": 5.629759663878125e-06, "loss": 0.8243, "step": 5933 }, { "epoch": 0.48, "grad_norm": 1.5352924023310548, "learning_rate": 5.628470736884797e-06, "loss": 0.7589, "step": 5934 }, { "epoch": 0.48, "grad_norm": 1.3445162260845536, "learning_rate": 5.627181767455688e-06, "loss": 0.7096, "step": 5935 }, { "epoch": 0.48, "grad_norm": 1.5821437914019594, "learning_rate": 5.625892755677833e-06, "loss": 0.8604, "step": 5936 }, { "epoch": 0.48, "grad_norm": 1.473341032007916, "learning_rate": 5.624603701638266e-06, "loss": 0.7553, "step": 5937 }, { "epoch": 0.48, "grad_norm": 1.6007093509528745, "learning_rate": 5.623314605424031e-06, "loss": 0.7745, "step": 5938 }, { "epoch": 0.48, "grad_norm": 1.507218267562719, "learning_rate": 5.622025467122167e-06, "loss": 0.8203, "step": 5939 }, { "epoch": 0.48, "grad_norm": 1.5314831656870473, "learning_rate": 5.620736286819721e-06, "loss": 0.8062, "step": 5940 }, { "epoch": 0.48, "grad_norm": 1.4992307863405472, "learning_rate": 5.619447064603743e-06, "loss": 0.7829, "step": 5941 }, { "epoch": 0.48, "grad_norm": 1.564854248079923, "learning_rate": 5.6181578005612805e-06, "loss": 0.8141, "step": 5942 }, { "epoch": 0.48, "grad_norm": 1.5333252733700806, "learning_rate": 5.616868494779391e-06, "loss": 0.8169, "step": 5943 }, { "epoch": 0.48, "grad_norm": 1.4499270751445121, "learning_rate": 5.61557914734513e-06, "loss": 0.7379, "step": 5944 }, { "epoch": 0.48, "grad_norm": 1.4100564604428272, "learning_rate": 5.614289758345558e-06, "loss": 0.8083, "step": 5945 }, { "epoch": 0.48, "grad_norm": 1.5580319585459659, "learning_rate": 5.613000327867737e-06, "loss": 0.7592, "step": 5946 }, { "epoch": 0.48, "grad_norm": 1.601481866364937, "learning_rate": 5.611710855998732e-06, "loss": 0.778, "step": 5947 }, { "epoch": 0.48, "grad_norm": 0.8199154944599473, "learning_rate": 5.610421342825611e-06, "loss": 1.095, "step": 5948 }, { "epoch": 0.48, "grad_norm": 0.8079723764775566, "learning_rate": 5.6091317884354435e-06, "loss": 1.0976, "step": 5949 }, { "epoch": 0.48, "grad_norm": 1.6121274992897798, "learning_rate": 5.607842192915307e-06, "loss": 0.7935, "step": 5950 }, { "epoch": 0.48, "grad_norm": 1.930476984581987, "learning_rate": 5.606552556352275e-06, "loss": 0.7101, "step": 5951 }, { "epoch": 0.48, "grad_norm": 1.415745758595045, "learning_rate": 5.6052628788334285e-06, "loss": 0.7505, "step": 5952 }, { "epoch": 0.48, "grad_norm": 1.6596705883597394, "learning_rate": 5.603973160445846e-06, "loss": 0.7445, "step": 5953 }, { "epoch": 0.48, "grad_norm": 2.0779013623514633, "learning_rate": 5.6026834012766155e-06, "loss": 0.8563, "step": 5954 }, { "epoch": 0.48, "grad_norm": 0.9553806677230609, "learning_rate": 5.601393601412825e-06, "loss": 1.1122, "step": 5955 }, { "epoch": 0.48, "grad_norm": 0.8319055871420634, "learning_rate": 5.600103760941561e-06, "loss": 1.0582, "step": 5956 }, { "epoch": 0.48, "grad_norm": 1.5860280972272478, "learning_rate": 5.598813879949922e-06, "loss": 0.7836, "step": 5957 }, { "epoch": 0.48, "grad_norm": 1.4875453002150707, "learning_rate": 5.597523958524999e-06, "loss": 0.8224, "step": 5958 }, { "epoch": 0.48, "grad_norm": 1.4599186957439978, "learning_rate": 5.5962339967538915e-06, "loss": 0.7772, "step": 5959 }, { "epoch": 0.48, "grad_norm": 1.4302685095143886, "learning_rate": 5.5949439947237004e-06, "loss": 0.81, "step": 5960 }, { "epoch": 0.48, "grad_norm": 0.84489409005719, "learning_rate": 5.593653952521532e-06, "loss": 1.0758, "step": 5961 }, { "epoch": 0.48, "grad_norm": 1.4123792497786907, "learning_rate": 5.5923638702344905e-06, "loss": 0.7303, "step": 5962 }, { "epoch": 0.48, "grad_norm": 1.406677621001775, "learning_rate": 5.591073747949687e-06, "loss": 0.7461, "step": 5963 }, { "epoch": 0.48, "grad_norm": 1.8485569446136199, "learning_rate": 5.5897835857542315e-06, "loss": 0.8364, "step": 5964 }, { "epoch": 0.48, "grad_norm": 1.4700971802809797, "learning_rate": 5.588493383735239e-06, "loss": 0.7785, "step": 5965 }, { "epoch": 0.48, "grad_norm": 1.5219937783052218, "learning_rate": 5.587203141979828e-06, "loss": 0.7831, "step": 5966 }, { "epoch": 0.48, "grad_norm": 1.5962996102716793, "learning_rate": 5.585912860575119e-06, "loss": 0.7269, "step": 5967 }, { "epoch": 0.48, "grad_norm": 1.4927877264214104, "learning_rate": 5.584622539608234e-06, "loss": 0.7713, "step": 5968 }, { "epoch": 0.48, "grad_norm": 1.565317310355898, "learning_rate": 5.5833321791662975e-06, "loss": 0.8127, "step": 5969 }, { "epoch": 0.48, "grad_norm": 2.0281768717100572, "learning_rate": 5.582041779336438e-06, "loss": 0.8077, "step": 5970 }, { "epoch": 0.48, "grad_norm": 1.5184433380331699, "learning_rate": 5.580751340205788e-06, "loss": 0.8087, "step": 5971 }, { "epoch": 0.48, "grad_norm": 0.7930397032455248, "learning_rate": 5.579460861861477e-06, "loss": 1.0772, "step": 5972 }, { "epoch": 0.48, "grad_norm": 1.445560748985988, "learning_rate": 5.578170344390647e-06, "loss": 0.8262, "step": 5973 }, { "epoch": 0.48, "grad_norm": 1.7424757517189902, "learning_rate": 5.576879787880432e-06, "loss": 0.7891, "step": 5974 }, { "epoch": 0.48, "grad_norm": 1.574812538181466, "learning_rate": 5.575589192417973e-06, "loss": 0.8085, "step": 5975 }, { "epoch": 0.48, "grad_norm": 1.5688656199310413, "learning_rate": 5.574298558090418e-06, "loss": 0.7845, "step": 5976 }, { "epoch": 0.48, "grad_norm": 1.5534326478579819, "learning_rate": 5.573007884984909e-06, "loss": 0.7844, "step": 5977 }, { "epoch": 0.48, "grad_norm": 0.7929497828212151, "learning_rate": 5.571717173188597e-06, "loss": 1.0588, "step": 5978 }, { "epoch": 0.48, "grad_norm": 1.646593233246233, "learning_rate": 5.570426422788636e-06, "loss": 0.7443, "step": 5979 }, { "epoch": 0.48, "grad_norm": 1.5418093913835174, "learning_rate": 5.569135633872178e-06, "loss": 0.7411, "step": 5980 }, { "epoch": 0.48, "grad_norm": 1.5499448781662335, "learning_rate": 5.567844806526381e-06, "loss": 0.7305, "step": 5981 }, { "epoch": 0.48, "grad_norm": 1.5329073609701704, "learning_rate": 5.566553940838404e-06, "loss": 0.7944, "step": 5982 }, { "epoch": 0.48, "grad_norm": 1.5794353471686298, "learning_rate": 5.565263036895409e-06, "loss": 0.8752, "step": 5983 }, { "epoch": 0.48, "grad_norm": 1.4822037241526296, "learning_rate": 5.563972094784561e-06, "loss": 0.728, "step": 5984 }, { "epoch": 0.48, "grad_norm": 1.48288199500195, "learning_rate": 5.562681114593028e-06, "loss": 0.8409, "step": 5985 }, { "epoch": 0.48, "grad_norm": 1.5708098794069498, "learning_rate": 5.5613900964079805e-06, "loss": 0.7635, "step": 5986 }, { "epoch": 0.48, "grad_norm": 1.4902254962709587, "learning_rate": 5.560099040316588e-06, "loss": 0.8317, "step": 5987 }, { "epoch": 0.48, "grad_norm": 1.5802122031892938, "learning_rate": 5.5588079464060285e-06, "loss": 0.78, "step": 5988 }, { "epoch": 0.48, "grad_norm": 1.4117193943684558, "learning_rate": 5.557516814763478e-06, "loss": 0.6466, "step": 5989 }, { "epoch": 0.48, "grad_norm": 1.6150346105788806, "learning_rate": 5.556225645476119e-06, "loss": 0.7792, "step": 5990 }, { "epoch": 0.48, "grad_norm": 1.4755418146474861, "learning_rate": 5.5549344386311325e-06, "loss": 0.755, "step": 5991 }, { "epoch": 0.48, "grad_norm": 0.8332869822352872, "learning_rate": 5.553643194315702e-06, "loss": 1.1144, "step": 5992 }, { "epoch": 0.48, "grad_norm": 1.5628245970710906, "learning_rate": 5.552351912617017e-06, "loss": 0.8442, "step": 5993 }, { "epoch": 0.48, "grad_norm": 0.7902071376801086, "learning_rate": 5.551060593622269e-06, "loss": 1.0962, "step": 5994 }, { "epoch": 0.48, "grad_norm": 1.5274324628015281, "learning_rate": 5.549769237418649e-06, "loss": 0.7402, "step": 5995 }, { "epoch": 0.48, "grad_norm": 1.6032663134152718, "learning_rate": 5.548477844093354e-06, "loss": 0.7946, "step": 5996 }, { "epoch": 0.48, "grad_norm": 1.6166058572186293, "learning_rate": 5.547186413733579e-06, "loss": 0.8306, "step": 5997 }, { "epoch": 0.48, "grad_norm": 1.565688609490214, "learning_rate": 5.545894946426529e-06, "loss": 0.7431, "step": 5998 }, { "epoch": 0.48, "grad_norm": 1.4712657595278058, "learning_rate": 5.544603442259401e-06, "loss": 0.7215, "step": 5999 }, { "epoch": 0.48, "grad_norm": 1.3837086071775688, "learning_rate": 5.543311901319405e-06, "loss": 0.7301, "step": 6000 }, { "epoch": 0.48, "grad_norm": 1.4271384867310195, "learning_rate": 5.542020323693745e-06, "loss": 0.7714, "step": 6001 }, { "epoch": 0.48, "grad_norm": 2.1842659078429074, "learning_rate": 5.540728709469636e-06, "loss": 0.7552, "step": 6002 }, { "epoch": 0.48, "grad_norm": 0.855122248399551, "learning_rate": 5.539437058734287e-06, "loss": 1.0891, "step": 6003 }, { "epoch": 0.48, "grad_norm": 1.580855109539842, "learning_rate": 5.538145371574913e-06, "loss": 0.7704, "step": 6004 }, { "epoch": 0.48, "grad_norm": 1.464140866988004, "learning_rate": 5.536853648078735e-06, "loss": 0.8278, "step": 6005 }, { "epoch": 0.48, "grad_norm": 1.4618854422445564, "learning_rate": 5.53556188833297e-06, "loss": 0.7636, "step": 6006 }, { "epoch": 0.48, "grad_norm": 1.6363193168488492, "learning_rate": 5.534270092424843e-06, "loss": 0.751, "step": 6007 }, { "epoch": 0.48, "grad_norm": 1.5727034029029818, "learning_rate": 5.532978260441576e-06, "loss": 0.8132, "step": 6008 }, { "epoch": 0.48, "grad_norm": 1.7747358686722856, "learning_rate": 5.5316863924703986e-06, "loss": 0.7117, "step": 6009 }, { "epoch": 0.48, "grad_norm": 2.0564444029218185, "learning_rate": 5.5303944885985405e-06, "loss": 0.8769, "step": 6010 }, { "epoch": 0.48, "grad_norm": 1.7045341414835142, "learning_rate": 5.529102548913233e-06, "loss": 0.7454, "step": 6011 }, { "epoch": 0.48, "grad_norm": 1.5579177349516862, "learning_rate": 5.527810573501713e-06, "loss": 0.7409, "step": 6012 }, { "epoch": 0.48, "grad_norm": 1.422780670899157, "learning_rate": 5.526518562451215e-06, "loss": 0.7363, "step": 6013 }, { "epoch": 0.48, "grad_norm": 1.5106972941620882, "learning_rate": 5.525226515848979e-06, "loss": 0.7387, "step": 6014 }, { "epoch": 0.48, "grad_norm": 1.6265852660676492, "learning_rate": 5.52393443378225e-06, "loss": 0.7616, "step": 6015 }, { "epoch": 0.48, "grad_norm": 1.6410639198854176, "learning_rate": 5.522642316338268e-06, "loss": 0.7269, "step": 6016 }, { "epoch": 0.48, "grad_norm": 1.5610594468328525, "learning_rate": 5.521350163604282e-06, "loss": 0.7979, "step": 6017 }, { "epoch": 0.48, "grad_norm": 1.4629038144503204, "learning_rate": 5.520057975667542e-06, "loss": 0.7269, "step": 6018 }, { "epoch": 0.48, "grad_norm": 1.4288792448484384, "learning_rate": 5.518765752615297e-06, "loss": 0.7394, "step": 6019 }, { "epoch": 0.48, "grad_norm": 1.5063641645832095, "learning_rate": 5.517473494534803e-06, "loss": 0.7594, "step": 6020 }, { "epoch": 0.48, "grad_norm": 1.5038044975225866, "learning_rate": 5.516181201513314e-06, "loss": 0.7605, "step": 6021 }, { "epoch": 0.48, "grad_norm": 1.3836185044801907, "learning_rate": 5.514888873638091e-06, "loss": 0.7421, "step": 6022 }, { "epoch": 0.48, "grad_norm": 0.8421231505930131, "learning_rate": 5.513596510996393e-06, "loss": 1.0979, "step": 6023 }, { "epoch": 0.48, "grad_norm": 1.553669247391853, "learning_rate": 5.5123041136754865e-06, "loss": 0.888, "step": 6024 }, { "epoch": 0.48, "grad_norm": 1.4512625940203483, "learning_rate": 5.5110116817626335e-06, "loss": 0.7492, "step": 6025 }, { "epoch": 0.48, "grad_norm": 1.4862261204081997, "learning_rate": 5.5097192153451014e-06, "loss": 0.7676, "step": 6026 }, { "epoch": 0.48, "grad_norm": 1.5556693695866262, "learning_rate": 5.508426714510164e-06, "loss": 0.761, "step": 6027 }, { "epoch": 0.48, "grad_norm": 1.442847396954611, "learning_rate": 5.507134179345093e-06, "loss": 0.7573, "step": 6028 }, { "epoch": 0.48, "grad_norm": 0.8330335174866577, "learning_rate": 5.505841609937162e-06, "loss": 1.1024, "step": 6029 }, { "epoch": 0.48, "grad_norm": 1.6866642327377033, "learning_rate": 5.504549006373649e-06, "loss": 0.83, "step": 6030 }, { "epoch": 0.48, "grad_norm": 1.570547416682292, "learning_rate": 5.503256368741832e-06, "loss": 0.828, "step": 6031 }, { "epoch": 0.48, "grad_norm": 0.7814164692416823, "learning_rate": 5.501963697128995e-06, "loss": 1.0829, "step": 6032 }, { "epoch": 0.48, "grad_norm": 1.5516604901744246, "learning_rate": 5.500670991622421e-06, "loss": 0.8551, "step": 6033 }, { "epoch": 0.48, "grad_norm": 1.4275974473526831, "learning_rate": 5.499378252309397e-06, "loss": 0.8281, "step": 6034 }, { "epoch": 0.48, "grad_norm": 1.4586287409128862, "learning_rate": 5.498085479277213e-06, "loss": 0.8356, "step": 6035 }, { "epoch": 0.48, "grad_norm": 1.4996655813804465, "learning_rate": 5.496792672613157e-06, "loss": 0.7862, "step": 6036 }, { "epoch": 0.48, "grad_norm": 1.8486346400343505, "learning_rate": 5.495499832404525e-06, "loss": 0.7251, "step": 6037 }, { "epoch": 0.48, "grad_norm": 1.564745757573283, "learning_rate": 5.494206958738609e-06, "loss": 0.7719, "step": 6038 }, { "epoch": 0.48, "grad_norm": 0.8354563046202366, "learning_rate": 5.492914051702711e-06, "loss": 1.0724, "step": 6039 }, { "epoch": 0.48, "grad_norm": 1.541223225414002, "learning_rate": 5.491621111384129e-06, "loss": 0.8607, "step": 6040 }, { "epoch": 0.48, "grad_norm": 1.5908053992177627, "learning_rate": 5.490328137870164e-06, "loss": 0.7671, "step": 6041 }, { "epoch": 0.48, "grad_norm": 0.7886766426600219, "learning_rate": 5.489035131248124e-06, "loss": 1.078, "step": 6042 }, { "epoch": 0.48, "grad_norm": 0.8007623465262053, "learning_rate": 5.487742091605311e-06, "loss": 1.0837, "step": 6043 }, { "epoch": 0.48, "grad_norm": 1.6578550735102104, "learning_rate": 5.4864490190290386e-06, "loss": 0.7808, "step": 6044 }, { "epoch": 0.48, "grad_norm": 1.5148744820641664, "learning_rate": 5.4851559136066154e-06, "loss": 0.6848, "step": 6045 }, { "epoch": 0.49, "grad_norm": 1.3716273320120578, "learning_rate": 5.483862775425358e-06, "loss": 0.7081, "step": 6046 }, { "epoch": 0.49, "grad_norm": 1.5378673225917214, "learning_rate": 5.482569604572577e-06, "loss": 0.7926, "step": 6047 }, { "epoch": 0.49, "grad_norm": 1.4797577436293656, "learning_rate": 5.481276401135592e-06, "loss": 0.8041, "step": 6048 }, { "epoch": 0.49, "grad_norm": 1.5336177656956764, "learning_rate": 5.479983165201726e-06, "loss": 0.7295, "step": 6049 }, { "epoch": 0.49, "grad_norm": 1.3875361825796522, "learning_rate": 5.478689896858298e-06, "loss": 0.7232, "step": 6050 }, { "epoch": 0.49, "grad_norm": 0.8449530839207916, "learning_rate": 5.477396596192633e-06, "loss": 1.0746, "step": 6051 }, { "epoch": 0.49, "grad_norm": 1.4499606630082549, "learning_rate": 5.476103263292061e-06, "loss": 0.8577, "step": 6052 }, { "epoch": 0.49, "grad_norm": 1.4614611194311344, "learning_rate": 5.474809898243905e-06, "loss": 0.8287, "step": 6053 }, { "epoch": 0.49, "grad_norm": 1.525757170750006, "learning_rate": 5.4735165011355005e-06, "loss": 0.8469, "step": 6054 }, { "epoch": 0.49, "grad_norm": 1.4898577419330654, "learning_rate": 5.472223072054178e-06, "loss": 0.7924, "step": 6055 }, { "epoch": 0.49, "grad_norm": 1.3858086720299816, "learning_rate": 5.470929611087274e-06, "loss": 0.7982, "step": 6056 }, { "epoch": 0.49, "grad_norm": 1.523584581811622, "learning_rate": 5.469636118322128e-06, "loss": 0.7631, "step": 6057 }, { "epoch": 0.49, "grad_norm": 1.4794027085902963, "learning_rate": 5.468342593846075e-06, "loss": 0.7928, "step": 6058 }, { "epoch": 0.49, "grad_norm": 1.4930709201544579, "learning_rate": 5.46704903774646e-06, "loss": 0.7905, "step": 6059 }, { "epoch": 0.49, "grad_norm": 1.6941601014354108, "learning_rate": 5.465755450110624e-06, "loss": 0.7748, "step": 6060 }, { "epoch": 0.49, "grad_norm": 1.5269076739738683, "learning_rate": 5.464461831025918e-06, "loss": 0.6925, "step": 6061 }, { "epoch": 0.49, "grad_norm": 1.3731826313223685, "learning_rate": 5.463168180579686e-06, "loss": 0.7844, "step": 6062 }, { "epoch": 0.49, "grad_norm": 1.5827418051772875, "learning_rate": 5.461874498859281e-06, "loss": 0.8311, "step": 6063 }, { "epoch": 0.49, "grad_norm": 0.7952820429839367, "learning_rate": 5.4605807859520506e-06, "loss": 1.0773, "step": 6064 }, { "epoch": 0.49, "grad_norm": 1.7079853422440503, "learning_rate": 5.4592870419453534e-06, "loss": 0.8215, "step": 6065 }, { "epoch": 0.49, "grad_norm": 1.4577486148060619, "learning_rate": 5.457993266926546e-06, "loss": 0.8119, "step": 6066 }, { "epoch": 0.49, "grad_norm": 0.7888077510417876, "learning_rate": 5.456699460982983e-06, "loss": 1.0781, "step": 6067 }, { "epoch": 0.49, "grad_norm": 1.5937897025592462, "learning_rate": 5.455405624202032e-06, "loss": 0.7633, "step": 6068 }, { "epoch": 0.49, "grad_norm": 1.4554191009533146, "learning_rate": 5.45411175667105e-06, "loss": 0.677, "step": 6069 }, { "epoch": 0.49, "grad_norm": 0.791576231802028, "learning_rate": 5.452817858477404e-06, "loss": 1.0731, "step": 6070 }, { "epoch": 0.49, "grad_norm": 0.7966236637647214, "learning_rate": 5.451523929708461e-06, "loss": 1.1007, "step": 6071 }, { "epoch": 0.49, "grad_norm": 1.4841474244410227, "learning_rate": 5.45022997045159e-06, "loss": 0.7929, "step": 6072 }, { "epoch": 0.49, "grad_norm": 1.500995350369392, "learning_rate": 5.448935980794161e-06, "loss": 0.7996, "step": 6073 }, { "epoch": 0.49, "grad_norm": 1.5378250893131635, "learning_rate": 5.447641960823549e-06, "loss": 0.7429, "step": 6074 }, { "epoch": 0.49, "grad_norm": 1.4551455566952733, "learning_rate": 5.446347910627128e-06, "loss": 0.7279, "step": 6075 }, { "epoch": 0.49, "grad_norm": 1.592300245576374, "learning_rate": 5.445053830292274e-06, "loss": 0.7401, "step": 6076 }, { "epoch": 0.49, "grad_norm": 1.5037374430158446, "learning_rate": 5.443759719906369e-06, "loss": 0.7925, "step": 6077 }, { "epoch": 0.49, "grad_norm": 1.534221426740775, "learning_rate": 5.442465579556793e-06, "loss": 0.7847, "step": 6078 }, { "epoch": 0.49, "grad_norm": 0.828382904565253, "learning_rate": 5.4411714093309295e-06, "loss": 1.0995, "step": 6079 }, { "epoch": 0.49, "grad_norm": 1.5234031233884195, "learning_rate": 5.4398772093161635e-06, "loss": 0.775, "step": 6080 }, { "epoch": 0.49, "grad_norm": 1.4966825722778214, "learning_rate": 5.4385829795998815e-06, "loss": 0.7703, "step": 6081 }, { "epoch": 0.49, "grad_norm": 1.5006025877376132, "learning_rate": 5.4372887202694735e-06, "loss": 0.7246, "step": 6082 }, { "epoch": 0.49, "grad_norm": 0.7611038898183065, "learning_rate": 5.435994431412334e-06, "loss": 1.0626, "step": 6083 }, { "epoch": 0.49, "grad_norm": 1.5312711852464498, "learning_rate": 5.434700113115852e-06, "loss": 0.7951, "step": 6084 }, { "epoch": 0.49, "grad_norm": 1.5152597241907526, "learning_rate": 5.433405765467424e-06, "loss": 0.8271, "step": 6085 }, { "epoch": 0.49, "grad_norm": 1.5183001346406908, "learning_rate": 5.432111388554448e-06, "loss": 0.7933, "step": 6086 }, { "epoch": 0.49, "grad_norm": 1.4737755240970125, "learning_rate": 5.430816982464322e-06, "loss": 0.8196, "step": 6087 }, { "epoch": 0.49, "grad_norm": 1.5043825152904817, "learning_rate": 5.429522547284449e-06, "loss": 0.7877, "step": 6088 }, { "epoch": 0.49, "grad_norm": 1.5971530205552558, "learning_rate": 5.42822808310223e-06, "loss": 0.733, "step": 6089 }, { "epoch": 0.49, "grad_norm": 1.673784769977393, "learning_rate": 5.426933590005076e-06, "loss": 0.8405, "step": 6090 }, { "epoch": 0.49, "grad_norm": 1.5460038854258638, "learning_rate": 5.425639068080386e-06, "loss": 0.8078, "step": 6091 }, { "epoch": 0.49, "grad_norm": 1.6909644001045452, "learning_rate": 5.424344517415574e-06, "loss": 0.7843, "step": 6092 }, { "epoch": 0.49, "grad_norm": 1.612815367268557, "learning_rate": 5.423049938098048e-06, "loss": 0.801, "step": 6093 }, { "epoch": 0.49, "grad_norm": 1.4592798146681034, "learning_rate": 5.421755330215223e-06, "loss": 0.7615, "step": 6094 }, { "epoch": 0.49, "grad_norm": 1.434752420565882, "learning_rate": 5.420460693854517e-06, "loss": 0.7936, "step": 6095 }, { "epoch": 0.49, "grad_norm": 1.5426359385359758, "learning_rate": 5.419166029103342e-06, "loss": 0.7573, "step": 6096 }, { "epoch": 0.49, "grad_norm": 1.5516441126522518, "learning_rate": 5.417871336049119e-06, "loss": 0.8267, "step": 6097 }, { "epoch": 0.49, "grad_norm": 0.8718892416100053, "learning_rate": 5.416576614779265e-06, "loss": 1.0703, "step": 6098 }, { "epoch": 0.49, "grad_norm": 1.702384028174739, "learning_rate": 5.415281865381207e-06, "loss": 0.8367, "step": 6099 }, { "epoch": 0.49, "grad_norm": 0.7920825337805188, "learning_rate": 5.413987087942369e-06, "loss": 1.0711, "step": 6100 }, { "epoch": 0.49, "grad_norm": 1.4040724025534994, "learning_rate": 5.412692282550175e-06, "loss": 0.6389, "step": 6101 }, { "epoch": 0.49, "grad_norm": 1.632530556748743, "learning_rate": 5.411397449292054e-06, "loss": 0.8328, "step": 6102 }, { "epoch": 0.49, "grad_norm": 1.3902769735570726, "learning_rate": 5.410102588255437e-06, "loss": 0.8013, "step": 6103 }, { "epoch": 0.49, "grad_norm": 1.5398472466172348, "learning_rate": 5.4088076995277564e-06, "loss": 0.7995, "step": 6104 }, { "epoch": 0.49, "grad_norm": 1.6350555583317152, "learning_rate": 5.407512783196443e-06, "loss": 0.7636, "step": 6105 }, { "epoch": 0.49, "grad_norm": 1.6663666022064754, "learning_rate": 5.406217839348936e-06, "loss": 0.7771, "step": 6106 }, { "epoch": 0.49, "grad_norm": 1.6400131034928693, "learning_rate": 5.404922868072673e-06, "loss": 0.7562, "step": 6107 }, { "epoch": 0.49, "grad_norm": 1.2447378219478407, "learning_rate": 5.403627869455089e-06, "loss": 1.0713, "step": 6108 }, { "epoch": 0.49, "grad_norm": 1.3918762533679103, "learning_rate": 5.402332843583631e-06, "loss": 0.7488, "step": 6109 }, { "epoch": 0.49, "grad_norm": 0.8477741799867561, "learning_rate": 5.401037790545737e-06, "loss": 1.1029, "step": 6110 }, { "epoch": 0.49, "grad_norm": 2.4835920265437306, "learning_rate": 5.399742710428855e-06, "loss": 0.7269, "step": 6111 }, { "epoch": 0.49, "grad_norm": 1.5338598153774472, "learning_rate": 5.398447603320433e-06, "loss": 0.7006, "step": 6112 }, { "epoch": 0.49, "grad_norm": 1.5027443722809748, "learning_rate": 5.397152469307916e-06, "loss": 0.7093, "step": 6113 }, { "epoch": 0.49, "grad_norm": 1.5820138990129178, "learning_rate": 5.395857308478757e-06, "loss": 0.6186, "step": 6114 }, { "epoch": 0.49, "grad_norm": 1.482190458505023, "learning_rate": 5.394562120920407e-06, "loss": 0.7594, "step": 6115 }, { "epoch": 0.49, "grad_norm": 1.4613754207850298, "learning_rate": 5.39326690672032e-06, "loss": 0.8068, "step": 6116 }, { "epoch": 0.49, "grad_norm": 1.4781941154565186, "learning_rate": 5.3919716659659515e-06, "loss": 0.7615, "step": 6117 }, { "epoch": 0.49, "grad_norm": 1.5085945361103859, "learning_rate": 5.390676398744762e-06, "loss": 0.8003, "step": 6118 }, { "epoch": 0.49, "grad_norm": 1.4974122857455892, "learning_rate": 5.389381105144208e-06, "loss": 0.77, "step": 6119 }, { "epoch": 0.49, "grad_norm": 1.656554571389215, "learning_rate": 5.38808578525175e-06, "loss": 0.7482, "step": 6120 }, { "epoch": 0.49, "grad_norm": 1.635294938167728, "learning_rate": 5.386790439154854e-06, "loss": 0.7551, "step": 6121 }, { "epoch": 0.49, "grad_norm": 1.4292380946156966, "learning_rate": 5.3854950669409825e-06, "loss": 0.6948, "step": 6122 }, { "epoch": 0.49, "grad_norm": 1.490584988066094, "learning_rate": 5.384199668697602e-06, "loss": 0.7984, "step": 6123 }, { "epoch": 0.49, "grad_norm": 1.496409333953262, "learning_rate": 5.3829042445121825e-06, "loss": 0.8403, "step": 6124 }, { "epoch": 0.49, "grad_norm": 1.531335377402628, "learning_rate": 5.381608794472192e-06, "loss": 0.827, "step": 6125 }, { "epoch": 0.49, "grad_norm": 1.4531957694380173, "learning_rate": 5.380313318665103e-06, "loss": 0.7984, "step": 6126 }, { "epoch": 0.49, "grad_norm": 1.4278714069304286, "learning_rate": 5.379017817178389e-06, "loss": 0.7817, "step": 6127 }, { "epoch": 0.49, "grad_norm": 1.5572813012510762, "learning_rate": 5.377722290099526e-06, "loss": 0.7135, "step": 6128 }, { "epoch": 0.49, "grad_norm": 1.5391938288480635, "learning_rate": 5.37642673751599e-06, "loss": 0.766, "step": 6129 }, { "epoch": 0.49, "grad_norm": 1.5430414446150635, "learning_rate": 5.37513115951526e-06, "loss": 0.7054, "step": 6130 }, { "epoch": 0.49, "grad_norm": 1.6244777525499756, "learning_rate": 5.373835556184817e-06, "loss": 0.8931, "step": 6131 }, { "epoch": 0.49, "grad_norm": 1.5196654266250602, "learning_rate": 5.37253992761214e-06, "loss": 0.7817, "step": 6132 }, { "epoch": 0.49, "grad_norm": 1.6153873714957685, "learning_rate": 5.371244273884718e-06, "loss": 0.7855, "step": 6133 }, { "epoch": 0.49, "grad_norm": 1.0082471817337435, "learning_rate": 5.369948595090033e-06, "loss": 1.0878, "step": 6134 }, { "epoch": 0.49, "grad_norm": 1.5451451206590685, "learning_rate": 5.368652891315571e-06, "loss": 0.7485, "step": 6135 }, { "epoch": 0.49, "grad_norm": 0.8741249094930904, "learning_rate": 5.3673571626488254e-06, "loss": 1.0283, "step": 6136 }, { "epoch": 0.49, "grad_norm": 1.378494873608061, "learning_rate": 5.3660614091772826e-06, "loss": 0.7324, "step": 6137 }, { "epoch": 0.49, "grad_norm": 1.6350155457402777, "learning_rate": 5.3647656309884365e-06, "loss": 0.7822, "step": 6138 }, { "epoch": 0.49, "grad_norm": 1.4566786766397117, "learning_rate": 5.363469828169782e-06, "loss": 0.7019, "step": 6139 }, { "epoch": 0.49, "grad_norm": 1.4413445666294156, "learning_rate": 5.362174000808813e-06, "loss": 0.8039, "step": 6140 }, { "epoch": 0.49, "grad_norm": 1.466224689543455, "learning_rate": 5.360878148993027e-06, "loss": 0.7871, "step": 6141 }, { "epoch": 0.49, "grad_norm": 1.0772611602818163, "learning_rate": 5.359582272809922e-06, "loss": 1.0855, "step": 6142 }, { "epoch": 0.49, "grad_norm": 1.529564707849868, "learning_rate": 5.358286372347002e-06, "loss": 0.7655, "step": 6143 }, { "epoch": 0.49, "grad_norm": 1.541330666300403, "learning_rate": 5.356990447691765e-06, "loss": 0.7838, "step": 6144 }, { "epoch": 0.49, "grad_norm": 1.5903178148146007, "learning_rate": 5.355694498931718e-06, "loss": 0.8331, "step": 6145 }, { "epoch": 0.49, "grad_norm": 1.4850842033569462, "learning_rate": 5.354398526154365e-06, "loss": 0.7853, "step": 6146 }, { "epoch": 0.49, "grad_norm": 1.5073178084707142, "learning_rate": 5.353102529447213e-06, "loss": 0.7854, "step": 6147 }, { "epoch": 0.49, "grad_norm": 1.5816679685000012, "learning_rate": 5.351806508897771e-06, "loss": 0.76, "step": 6148 }, { "epoch": 0.49, "grad_norm": 1.418381058901958, "learning_rate": 5.350510464593548e-06, "loss": 0.7104, "step": 6149 }, { "epoch": 0.49, "grad_norm": 1.6113929285438835, "learning_rate": 5.349214396622058e-06, "loss": 0.84, "step": 6150 }, { "epoch": 0.49, "grad_norm": 1.583936523510558, "learning_rate": 5.347918305070813e-06, "loss": 0.7816, "step": 6151 }, { "epoch": 0.49, "grad_norm": 1.5221054852777243, "learning_rate": 5.346622190027329e-06, "loss": 0.7551, "step": 6152 }, { "epoch": 0.49, "grad_norm": 0.7980098339808834, "learning_rate": 5.3453260515791216e-06, "loss": 1.0953, "step": 6153 }, { "epoch": 0.49, "grad_norm": 1.554246521776231, "learning_rate": 5.3440298898137084e-06, "loss": 0.8085, "step": 6154 }, { "epoch": 0.49, "grad_norm": 1.4549040998466947, "learning_rate": 5.3427337048186124e-06, "loss": 0.7355, "step": 6155 }, { "epoch": 0.49, "grad_norm": 1.4698179408359375, "learning_rate": 5.341437496681352e-06, "loss": 0.7503, "step": 6156 }, { "epoch": 0.49, "grad_norm": 1.5312447022158004, "learning_rate": 5.340141265489451e-06, "loss": 0.7929, "step": 6157 }, { "epoch": 0.49, "grad_norm": 1.5381990604151663, "learning_rate": 5.338845011330435e-06, "loss": 0.8005, "step": 6158 }, { "epoch": 0.49, "grad_norm": 1.5433112818826693, "learning_rate": 5.337548734291827e-06, "loss": 0.7997, "step": 6159 }, { "epoch": 0.49, "grad_norm": 0.810840244039006, "learning_rate": 5.336252434461158e-06, "loss": 1.0592, "step": 6160 }, { "epoch": 0.49, "grad_norm": 1.592939409200986, "learning_rate": 5.3349561119259555e-06, "loss": 0.8555, "step": 6161 }, { "epoch": 0.49, "grad_norm": 1.541202972602141, "learning_rate": 5.33365976677375e-06, "loss": 0.8041, "step": 6162 }, { "epoch": 0.49, "grad_norm": 0.7996924278983366, "learning_rate": 5.332363399092076e-06, "loss": 1.1015, "step": 6163 }, { "epoch": 0.49, "grad_norm": 1.5142760393107522, "learning_rate": 5.331067008968462e-06, "loss": 0.8158, "step": 6164 }, { "epoch": 0.49, "grad_norm": 1.481805833712582, "learning_rate": 5.329770596490449e-06, "loss": 0.7794, "step": 6165 }, { "epoch": 0.49, "grad_norm": 1.4994044699031979, "learning_rate": 5.328474161745571e-06, "loss": 0.6773, "step": 6166 }, { "epoch": 0.49, "grad_norm": 1.5419100892792639, "learning_rate": 5.327177704821366e-06, "loss": 0.7793, "step": 6167 }, { "epoch": 0.49, "grad_norm": 1.533158550656812, "learning_rate": 5.325881225805373e-06, "loss": 0.8013, "step": 6168 }, { "epoch": 0.49, "grad_norm": 1.5295844200043938, "learning_rate": 5.324584724785137e-06, "loss": 0.7692, "step": 6169 }, { "epoch": 0.5, "grad_norm": 1.4522664998350614, "learning_rate": 5.323288201848197e-06, "loss": 0.7406, "step": 6170 }, { "epoch": 0.5, "grad_norm": 1.5884272705708322, "learning_rate": 5.3219916570820976e-06, "loss": 0.8644, "step": 6171 }, { "epoch": 0.5, "grad_norm": 1.410326554784251, "learning_rate": 5.320695090574386e-06, "loss": 0.6637, "step": 6172 }, { "epoch": 0.5, "grad_norm": 1.5747870133251298, "learning_rate": 5.319398502412609e-06, "loss": 0.7521, "step": 6173 }, { "epoch": 0.5, "grad_norm": 1.5645256145372708, "learning_rate": 5.318101892684315e-06, "loss": 0.822, "step": 6174 }, { "epoch": 0.5, "grad_norm": 1.4569519390039, "learning_rate": 5.316805261477052e-06, "loss": 0.7591, "step": 6175 }, { "epoch": 0.5, "grad_norm": 1.5783260109934827, "learning_rate": 5.315508608878375e-06, "loss": 0.708, "step": 6176 }, { "epoch": 0.5, "grad_norm": 1.5682445085522791, "learning_rate": 5.314211934975835e-06, "loss": 0.7619, "step": 6177 }, { "epoch": 0.5, "grad_norm": 1.5671645778327399, "learning_rate": 5.312915239856986e-06, "loss": 0.7714, "step": 6178 }, { "epoch": 0.5, "grad_norm": 0.8714206214517676, "learning_rate": 5.311618523609386e-06, "loss": 1.0774, "step": 6179 }, { "epoch": 0.5, "grad_norm": 1.3927122495351656, "learning_rate": 5.310321786320588e-06, "loss": 0.7807, "step": 6180 }, { "epoch": 0.5, "grad_norm": 1.5985220415854617, "learning_rate": 5.309025028078155e-06, "loss": 0.8846, "step": 6181 }, { "epoch": 0.5, "grad_norm": 1.46899464382845, "learning_rate": 5.307728248969646e-06, "loss": 0.7032, "step": 6182 }, { "epoch": 0.5, "grad_norm": 1.5128566782425388, "learning_rate": 5.306431449082621e-06, "loss": 0.7722, "step": 6183 }, { "epoch": 0.5, "grad_norm": 1.400965973592896, "learning_rate": 5.3051346285046445e-06, "loss": 0.7942, "step": 6184 }, { "epoch": 0.5, "grad_norm": 1.4547480601478833, "learning_rate": 5.30383778732328e-06, "loss": 0.7191, "step": 6185 }, { "epoch": 0.5, "grad_norm": 1.5908646063455092, "learning_rate": 5.302540925626094e-06, "loss": 0.8474, "step": 6186 }, { "epoch": 0.5, "grad_norm": 1.7489266843177886, "learning_rate": 5.301244043500651e-06, "loss": 0.7549, "step": 6187 }, { "epoch": 0.5, "grad_norm": 1.5658987608567778, "learning_rate": 5.299947141034521e-06, "loss": 0.8284, "step": 6188 }, { "epoch": 0.5, "grad_norm": 1.4484850789891373, "learning_rate": 5.298650218315277e-06, "loss": 0.7353, "step": 6189 }, { "epoch": 0.5, "grad_norm": 1.6286243928904662, "learning_rate": 5.297353275430487e-06, "loss": 0.8623, "step": 6190 }, { "epoch": 0.5, "grad_norm": 1.5104690239109555, "learning_rate": 5.296056312467723e-06, "loss": 0.7441, "step": 6191 }, { "epoch": 0.5, "grad_norm": 0.9034051548329042, "learning_rate": 5.29475932951456e-06, "loss": 1.0686, "step": 6192 }, { "epoch": 0.5, "grad_norm": 0.8961401342661431, "learning_rate": 5.293462326658572e-06, "loss": 1.0518, "step": 6193 }, { "epoch": 0.5, "grad_norm": 1.4309438637927183, "learning_rate": 5.292165303987336e-06, "loss": 0.6935, "step": 6194 }, { "epoch": 0.5, "grad_norm": 1.512715868398929, "learning_rate": 5.290868261588433e-06, "loss": 0.6855, "step": 6195 }, { "epoch": 0.5, "grad_norm": 1.5007440179323912, "learning_rate": 5.28957119954944e-06, "loss": 0.8193, "step": 6196 }, { "epoch": 0.5, "grad_norm": 1.4424621682927343, "learning_rate": 5.288274117957936e-06, "loss": 0.8322, "step": 6197 }, { "epoch": 0.5, "grad_norm": 1.561701661660001, "learning_rate": 5.286977016901503e-06, "loss": 0.6842, "step": 6198 }, { "epoch": 0.5, "grad_norm": 1.6708980385910963, "learning_rate": 5.285679896467729e-06, "loss": 0.7983, "step": 6199 }, { "epoch": 0.5, "grad_norm": 1.4369534492140668, "learning_rate": 5.284382756744194e-06, "loss": 0.7119, "step": 6200 }, { "epoch": 0.5, "grad_norm": 1.6830686851561298, "learning_rate": 5.283085597818485e-06, "loss": 0.8005, "step": 6201 }, { "epoch": 0.5, "grad_norm": 1.484900894403216, "learning_rate": 5.281788419778187e-06, "loss": 0.8058, "step": 6202 }, { "epoch": 0.5, "grad_norm": 1.5114127391963805, "learning_rate": 5.280491222710893e-06, "loss": 0.7907, "step": 6203 }, { "epoch": 0.5, "grad_norm": 1.5084261404031585, "learning_rate": 5.279194006704189e-06, "loss": 0.7445, "step": 6204 }, { "epoch": 0.5, "grad_norm": 1.5003009157600762, "learning_rate": 5.277896771845668e-06, "loss": 0.7848, "step": 6205 }, { "epoch": 0.5, "grad_norm": 1.5616288779906593, "learning_rate": 5.27659951822292e-06, "loss": 0.7609, "step": 6206 }, { "epoch": 0.5, "grad_norm": 1.4854748643269604, "learning_rate": 5.275302245923543e-06, "loss": 0.7471, "step": 6207 }, { "epoch": 0.5, "grad_norm": 1.4991651236280947, "learning_rate": 5.2740049550351266e-06, "loss": 0.756, "step": 6208 }, { "epoch": 0.5, "grad_norm": 0.9779216431791927, "learning_rate": 5.27270764564527e-06, "loss": 1.1001, "step": 6209 }, { "epoch": 0.5, "grad_norm": 1.4724426143611455, "learning_rate": 5.271410317841568e-06, "loss": 0.6746, "step": 6210 }, { "epoch": 0.5, "grad_norm": 1.5257229090314113, "learning_rate": 5.2701129717116215e-06, "loss": 0.8869, "step": 6211 }, { "epoch": 0.5, "grad_norm": 1.4874201051517941, "learning_rate": 5.26881560734303e-06, "loss": 0.8118, "step": 6212 }, { "epoch": 0.5, "grad_norm": 1.4992883257663459, "learning_rate": 5.267518224823395e-06, "loss": 0.7743, "step": 6213 }, { "epoch": 0.5, "grad_norm": 1.5045865352606573, "learning_rate": 5.266220824240316e-06, "loss": 0.7756, "step": 6214 }, { "epoch": 0.5, "grad_norm": 1.6468691718427197, "learning_rate": 5.264923405681399e-06, "loss": 0.791, "step": 6215 }, { "epoch": 0.5, "grad_norm": 1.3672834532321414, "learning_rate": 5.263625969234247e-06, "loss": 0.7074, "step": 6216 }, { "epoch": 0.5, "grad_norm": 1.5252195707331477, "learning_rate": 5.262328514986468e-06, "loss": 0.8207, "step": 6217 }, { "epoch": 0.5, "grad_norm": 1.453961311876116, "learning_rate": 5.261031043025669e-06, "loss": 0.6847, "step": 6218 }, { "epoch": 0.5, "grad_norm": 1.4810590671863757, "learning_rate": 5.259733553439453e-06, "loss": 0.8028, "step": 6219 }, { "epoch": 0.5, "grad_norm": 1.5803318118291212, "learning_rate": 5.258436046315437e-06, "loss": 0.7664, "step": 6220 }, { "epoch": 0.5, "grad_norm": 1.5123040499463003, "learning_rate": 5.257138521741226e-06, "loss": 0.7067, "step": 6221 }, { "epoch": 0.5, "grad_norm": 1.729258935668166, "learning_rate": 5.255840979804436e-06, "loss": 0.7802, "step": 6222 }, { "epoch": 0.5, "grad_norm": 1.8243649301993219, "learning_rate": 5.254543420592677e-06, "loss": 0.7732, "step": 6223 }, { "epoch": 0.5, "grad_norm": 1.4521535339958287, "learning_rate": 5.253245844193564e-06, "loss": 0.7393, "step": 6224 }, { "epoch": 0.5, "grad_norm": 1.4852544705772817, "learning_rate": 5.2519482506947135e-06, "loss": 0.7899, "step": 6225 }, { "epoch": 0.5, "grad_norm": 1.3879269507442464, "learning_rate": 5.25065064018374e-06, "loss": 0.8284, "step": 6226 }, { "epoch": 0.5, "grad_norm": 1.5569475949901304, "learning_rate": 5.2493530127482624e-06, "loss": 0.7264, "step": 6227 }, { "epoch": 0.5, "grad_norm": 1.3638706482737752, "learning_rate": 5.248055368475899e-06, "loss": 0.7721, "step": 6228 }, { "epoch": 0.5, "grad_norm": 0.8463547639684167, "learning_rate": 5.246757707454271e-06, "loss": 1.0576, "step": 6229 }, { "epoch": 0.5, "grad_norm": 1.4700921487954783, "learning_rate": 5.245460029770998e-06, "loss": 0.767, "step": 6230 }, { "epoch": 0.5, "grad_norm": 1.5000611219789426, "learning_rate": 5.244162335513701e-06, "loss": 0.797, "step": 6231 }, { "epoch": 0.5, "grad_norm": 1.416677568241714, "learning_rate": 5.242864624770007e-06, "loss": 0.7819, "step": 6232 }, { "epoch": 0.5, "grad_norm": 1.4689530775184194, "learning_rate": 5.241566897627536e-06, "loss": 0.8362, "step": 6233 }, { "epoch": 0.5, "grad_norm": 1.3741261745692872, "learning_rate": 5.240269154173917e-06, "loss": 0.7145, "step": 6234 }, { "epoch": 0.5, "grad_norm": 0.7735346046973679, "learning_rate": 5.238971394496776e-06, "loss": 1.0754, "step": 6235 }, { "epoch": 0.5, "grad_norm": 1.3907519883119903, "learning_rate": 5.237673618683737e-06, "loss": 0.7581, "step": 6236 }, { "epoch": 0.5, "grad_norm": 0.8092572074832497, "learning_rate": 5.236375826822435e-06, "loss": 1.0802, "step": 6237 }, { "epoch": 0.5, "grad_norm": 1.564227758128665, "learning_rate": 5.235078019000495e-06, "loss": 0.8006, "step": 6238 }, { "epoch": 0.5, "grad_norm": 1.6283526178546928, "learning_rate": 5.23378019530555e-06, "loss": 0.75, "step": 6239 }, { "epoch": 0.5, "grad_norm": 1.5230778898536241, "learning_rate": 5.232482355825233e-06, "loss": 0.7897, "step": 6240 }, { "epoch": 0.5, "grad_norm": 0.8051200030726031, "learning_rate": 5.231184500647173e-06, "loss": 1.112, "step": 6241 }, { "epoch": 0.5, "grad_norm": 1.4907451576605, "learning_rate": 5.229886629859009e-06, "loss": 0.7798, "step": 6242 }, { "epoch": 0.5, "grad_norm": 1.4477576365288327, "learning_rate": 5.228588743548373e-06, "loss": 0.77, "step": 6243 }, { "epoch": 0.5, "grad_norm": 1.42725829228119, "learning_rate": 5.227290841802903e-06, "loss": 0.8002, "step": 6244 }, { "epoch": 0.5, "grad_norm": 1.4605560020704147, "learning_rate": 5.225992924710236e-06, "loss": 0.6835, "step": 6245 }, { "epoch": 0.5, "grad_norm": 1.3778136800964071, "learning_rate": 5.224694992358009e-06, "loss": 0.7018, "step": 6246 }, { "epoch": 0.5, "grad_norm": 1.446889355460579, "learning_rate": 5.223397044833863e-06, "loss": 0.6648, "step": 6247 }, { "epoch": 0.5, "grad_norm": 1.436088098732948, "learning_rate": 5.222099082225437e-06, "loss": 0.7799, "step": 6248 }, { "epoch": 0.5, "grad_norm": 1.4704426557874466, "learning_rate": 5.2208011046203735e-06, "loss": 0.6749, "step": 6249 }, { "epoch": 0.5, "grad_norm": 1.4802288567188224, "learning_rate": 5.2195031121063145e-06, "loss": 0.7463, "step": 6250 }, { "epoch": 0.5, "grad_norm": 1.4050833510604857, "learning_rate": 5.2182051047709035e-06, "loss": 0.7849, "step": 6251 }, { "epoch": 0.5, "grad_norm": 0.8451818055126844, "learning_rate": 5.2169070827017855e-06, "loss": 1.0693, "step": 6252 }, { "epoch": 0.5, "grad_norm": 1.5830983878856357, "learning_rate": 5.215609045986604e-06, "loss": 0.7405, "step": 6253 }, { "epoch": 0.5, "grad_norm": 1.547912743420679, "learning_rate": 5.214310994713008e-06, "loss": 0.8108, "step": 6254 }, { "epoch": 0.5, "grad_norm": 0.8070808826537955, "learning_rate": 5.213012928968642e-06, "loss": 1.0768, "step": 6255 }, { "epoch": 0.5, "grad_norm": 0.8150422292930273, "learning_rate": 5.211714848841157e-06, "loss": 1.0579, "step": 6256 }, { "epoch": 0.5, "grad_norm": 1.5243946443021381, "learning_rate": 5.210416754418202e-06, "loss": 0.8226, "step": 6257 }, { "epoch": 0.5, "grad_norm": 1.498951266337785, "learning_rate": 5.209118645787425e-06, "loss": 0.7314, "step": 6258 }, { "epoch": 0.5, "grad_norm": 1.543515035118257, "learning_rate": 5.2078205230364795e-06, "loss": 0.7619, "step": 6259 }, { "epoch": 0.5, "grad_norm": 0.8208229037548771, "learning_rate": 5.206522386253017e-06, "loss": 1.0976, "step": 6260 }, { "epoch": 0.5, "grad_norm": 1.532222719468825, "learning_rate": 5.205224235524692e-06, "loss": 0.7399, "step": 6261 }, { "epoch": 0.5, "grad_norm": 1.3776954098364917, "learning_rate": 5.203926070939156e-06, "loss": 0.7549, "step": 6262 }, { "epoch": 0.5, "grad_norm": 1.610515748635863, "learning_rate": 5.2026278925840656e-06, "loss": 0.8061, "step": 6263 }, { "epoch": 0.5, "grad_norm": 1.3471024878310238, "learning_rate": 5.201329700547077e-06, "loss": 0.7441, "step": 6264 }, { "epoch": 0.5, "grad_norm": 1.4872570384346513, "learning_rate": 5.2000314949158445e-06, "loss": 0.7436, "step": 6265 }, { "epoch": 0.5, "grad_norm": 1.4658078972471462, "learning_rate": 5.198733275778031e-06, "loss": 0.7997, "step": 6266 }, { "epoch": 0.5, "grad_norm": 1.5012307342720812, "learning_rate": 5.197435043221291e-06, "loss": 0.7275, "step": 6267 }, { "epoch": 0.5, "grad_norm": 1.539153282332061, "learning_rate": 5.196136797333285e-06, "loss": 0.733, "step": 6268 }, { "epoch": 0.5, "grad_norm": 1.6338612016312521, "learning_rate": 5.194838538201676e-06, "loss": 0.776, "step": 6269 }, { "epoch": 0.5, "grad_norm": 1.498938571411505, "learning_rate": 5.193540265914121e-06, "loss": 0.6951, "step": 6270 }, { "epoch": 0.5, "grad_norm": 1.4989844858971688, "learning_rate": 5.192241980558286e-06, "loss": 0.7822, "step": 6271 }, { "epoch": 0.5, "grad_norm": 1.6202017559003221, "learning_rate": 5.1909436822218316e-06, "loss": 0.7284, "step": 6272 }, { "epoch": 0.5, "grad_norm": 1.5909837443241308, "learning_rate": 5.189645370992426e-06, "loss": 0.7786, "step": 6273 }, { "epoch": 0.5, "grad_norm": 1.5732940045044745, "learning_rate": 5.188347046957728e-06, "loss": 0.7136, "step": 6274 }, { "epoch": 0.5, "grad_norm": 1.5030399260076257, "learning_rate": 5.187048710205407e-06, "loss": 0.7658, "step": 6275 }, { "epoch": 0.5, "grad_norm": 1.5076804418392957, "learning_rate": 5.18575036082313e-06, "loss": 0.7279, "step": 6276 }, { "epoch": 0.5, "grad_norm": 1.5980246452668478, "learning_rate": 5.184451998898565e-06, "loss": 0.8237, "step": 6277 }, { "epoch": 0.5, "grad_norm": 0.8480911893618831, "learning_rate": 5.1831536245193795e-06, "loss": 1.0797, "step": 6278 }, { "epoch": 0.5, "grad_norm": 1.597148773607799, "learning_rate": 5.181855237773242e-06, "loss": 0.86, "step": 6279 }, { "epoch": 0.5, "grad_norm": 1.634317052136927, "learning_rate": 5.180556838747821e-06, "loss": 0.7782, "step": 6280 }, { "epoch": 0.5, "grad_norm": 1.5478313643937323, "learning_rate": 5.179258427530791e-06, "loss": 0.7518, "step": 6281 }, { "epoch": 0.5, "grad_norm": 1.341627850169706, "learning_rate": 5.177960004209822e-06, "loss": 0.7667, "step": 6282 }, { "epoch": 0.5, "grad_norm": 1.466622320972047, "learning_rate": 5.1766615688725865e-06, "loss": 0.8626, "step": 6283 }, { "epoch": 0.5, "grad_norm": 1.4233475731692977, "learning_rate": 5.175363121606759e-06, "loss": 0.7703, "step": 6284 }, { "epoch": 0.5, "grad_norm": 1.5357054352525195, "learning_rate": 5.174064662500011e-06, "loss": 0.7055, "step": 6285 }, { "epoch": 0.5, "grad_norm": 1.5791718042416973, "learning_rate": 5.1727661916400195e-06, "loss": 0.7831, "step": 6286 }, { "epoch": 0.5, "grad_norm": 1.4349237759769478, "learning_rate": 5.171467709114458e-06, "loss": 0.7848, "step": 6287 }, { "epoch": 0.5, "grad_norm": 1.5755525187639967, "learning_rate": 5.170169215011007e-06, "loss": 0.8502, "step": 6288 }, { "epoch": 0.5, "grad_norm": 1.5565650422455792, "learning_rate": 5.168870709417342e-06, "loss": 0.8066, "step": 6289 }, { "epoch": 0.5, "grad_norm": 1.5082161257822573, "learning_rate": 5.16757219242114e-06, "loss": 0.6827, "step": 6290 }, { "epoch": 0.5, "grad_norm": 1.4002100339848378, "learning_rate": 5.166273664110079e-06, "loss": 0.7315, "step": 6291 }, { "epoch": 0.5, "grad_norm": 1.3792027610106983, "learning_rate": 5.16497512457184e-06, "loss": 0.7131, "step": 6292 }, { "epoch": 0.5, "grad_norm": 0.8479786058355041, "learning_rate": 5.163676573894104e-06, "loss": 1.1027, "step": 6293 }, { "epoch": 0.5, "grad_norm": 1.4984035639360112, "learning_rate": 5.162378012164552e-06, "loss": 0.7994, "step": 6294 }, { "epoch": 0.51, "grad_norm": 1.6558855934789058, "learning_rate": 5.1610794394708665e-06, "loss": 0.7824, "step": 6295 }, { "epoch": 0.51, "grad_norm": 1.6890517177535387, "learning_rate": 5.159780855900725e-06, "loss": 0.79, "step": 6296 }, { "epoch": 0.51, "grad_norm": 1.5318677375742684, "learning_rate": 5.158482261541817e-06, "loss": 0.8145, "step": 6297 }, { "epoch": 0.51, "grad_norm": 1.439137895224054, "learning_rate": 5.157183656481826e-06, "loss": 0.6736, "step": 6298 }, { "epoch": 0.51, "grad_norm": 1.3980259286074601, "learning_rate": 5.155885040808432e-06, "loss": 0.7377, "step": 6299 }, { "epoch": 0.51, "grad_norm": 0.8093400694190801, "learning_rate": 5.154586414609326e-06, "loss": 1.0899, "step": 6300 }, { "epoch": 0.51, "grad_norm": 1.4856023835425332, "learning_rate": 5.153287777972192e-06, "loss": 0.6975, "step": 6301 }, { "epoch": 0.51, "grad_norm": 1.6179470723165938, "learning_rate": 5.151989130984715e-06, "loss": 0.8213, "step": 6302 }, { "epoch": 0.51, "grad_norm": 1.47335456747346, "learning_rate": 5.150690473734584e-06, "loss": 0.8258, "step": 6303 }, { "epoch": 0.51, "grad_norm": 1.6263330133461826, "learning_rate": 5.149391806309488e-06, "loss": 0.7669, "step": 6304 }, { "epoch": 0.51, "grad_norm": 1.5019543844458962, "learning_rate": 5.148093128797117e-06, "loss": 0.8397, "step": 6305 }, { "epoch": 0.51, "grad_norm": 1.4809467184384197, "learning_rate": 5.146794441285159e-06, "loss": 0.6856, "step": 6306 }, { "epoch": 0.51, "grad_norm": 1.6253536015432246, "learning_rate": 5.145495743861304e-06, "loss": 0.8301, "step": 6307 }, { "epoch": 0.51, "grad_norm": 0.8038636453113016, "learning_rate": 5.144197036613243e-06, "loss": 1.0945, "step": 6308 }, { "epoch": 0.51, "grad_norm": 1.5330312498696057, "learning_rate": 5.1428983196286686e-06, "loss": 0.9082, "step": 6309 }, { "epoch": 0.51, "grad_norm": 1.4984920602458138, "learning_rate": 5.141599592995274e-06, "loss": 0.7644, "step": 6310 }, { "epoch": 0.51, "grad_norm": 1.5829593380869562, "learning_rate": 5.1403008568007505e-06, "loss": 0.7534, "step": 6311 }, { "epoch": 0.51, "grad_norm": 1.504188723090126, "learning_rate": 5.1390021111327936e-06, "loss": 0.7858, "step": 6312 }, { "epoch": 0.51, "grad_norm": 1.4828443407424783, "learning_rate": 5.137703356079095e-06, "loss": 0.809, "step": 6313 }, { "epoch": 0.51, "grad_norm": 1.590355347944815, "learning_rate": 5.1364045917273505e-06, "loss": 0.8366, "step": 6314 }, { "epoch": 0.51, "grad_norm": 1.5293534689011408, "learning_rate": 5.135105818165256e-06, "loss": 0.8159, "step": 6315 }, { "epoch": 0.51, "grad_norm": 0.7846283051512121, "learning_rate": 5.133807035480508e-06, "loss": 1.061, "step": 6316 }, { "epoch": 0.51, "grad_norm": 1.5373527013718988, "learning_rate": 5.132508243760806e-06, "loss": 0.7902, "step": 6317 }, { "epoch": 0.51, "grad_norm": 1.5338596721113498, "learning_rate": 5.13120944309384e-06, "loss": 0.7358, "step": 6318 }, { "epoch": 0.51, "grad_norm": 1.5616794078188856, "learning_rate": 5.1299106335673144e-06, "loss": 0.7798, "step": 6319 }, { "epoch": 0.51, "grad_norm": 1.5792581936431642, "learning_rate": 5.128611815268925e-06, "loss": 0.8413, "step": 6320 }, { "epoch": 0.51, "grad_norm": 1.4424436700636187, "learning_rate": 5.127312988286372e-06, "loss": 0.7856, "step": 6321 }, { "epoch": 0.51, "grad_norm": 0.7841208530391793, "learning_rate": 5.126014152707355e-06, "loss": 1.0787, "step": 6322 }, { "epoch": 0.51, "grad_norm": 1.6034464136476305, "learning_rate": 5.124715308619574e-06, "loss": 0.8095, "step": 6323 }, { "epoch": 0.51, "grad_norm": 1.914177150048868, "learning_rate": 5.123416456110731e-06, "loss": 0.7562, "step": 6324 }, { "epoch": 0.51, "grad_norm": 1.5674434510431392, "learning_rate": 5.122117595268526e-06, "loss": 0.7148, "step": 6325 }, { "epoch": 0.51, "grad_norm": 1.604859048898425, "learning_rate": 5.120818726180662e-06, "loss": 0.9276, "step": 6326 }, { "epoch": 0.51, "grad_norm": 1.5296910767538998, "learning_rate": 5.1195198489348405e-06, "loss": 0.6923, "step": 6327 }, { "epoch": 0.51, "grad_norm": 1.3836411107291229, "learning_rate": 5.118220963618767e-06, "loss": 0.7472, "step": 6328 }, { "epoch": 0.51, "grad_norm": 1.552679806726167, "learning_rate": 5.116922070320144e-06, "loss": 0.7667, "step": 6329 }, { "epoch": 0.51, "grad_norm": 1.5622838632213487, "learning_rate": 5.115623169126673e-06, "loss": 0.8165, "step": 6330 }, { "epoch": 0.51, "grad_norm": 0.8507202984399388, "learning_rate": 5.114324260126064e-06, "loss": 1.0845, "step": 6331 }, { "epoch": 0.51, "grad_norm": 1.468214840008952, "learning_rate": 5.113025343406017e-06, "loss": 0.7392, "step": 6332 }, { "epoch": 0.51, "grad_norm": 1.6308985279254928, "learning_rate": 5.111726419054242e-06, "loss": 0.8039, "step": 6333 }, { "epoch": 0.51, "grad_norm": 1.4530759579821584, "learning_rate": 5.110427487158444e-06, "loss": 0.7428, "step": 6334 }, { "epoch": 0.51, "grad_norm": 1.4202822869210754, "learning_rate": 5.109128547806328e-06, "loss": 0.72, "step": 6335 }, { "epoch": 0.51, "grad_norm": 1.559381639892466, "learning_rate": 5.107829601085604e-06, "loss": 0.8204, "step": 6336 }, { "epoch": 0.51, "grad_norm": 1.625347603541948, "learning_rate": 5.106530647083978e-06, "loss": 0.7856, "step": 6337 }, { "epoch": 0.51, "grad_norm": 1.554905981476767, "learning_rate": 5.10523168588916e-06, "loss": 0.7889, "step": 6338 }, { "epoch": 0.51, "grad_norm": 1.5198566646427836, "learning_rate": 5.1039327175888585e-06, "loss": 0.8241, "step": 6339 }, { "epoch": 0.51, "grad_norm": 1.385689419156612, "learning_rate": 5.10263374227078e-06, "loss": 0.7428, "step": 6340 }, { "epoch": 0.51, "grad_norm": 1.4938588711925693, "learning_rate": 5.101334760022639e-06, "loss": 0.7629, "step": 6341 }, { "epoch": 0.51, "grad_norm": 1.5898394665264874, "learning_rate": 5.100035770932141e-06, "loss": 0.7857, "step": 6342 }, { "epoch": 0.51, "grad_norm": 1.429099673467969, "learning_rate": 5.0987367750870005e-06, "loss": 0.7813, "step": 6343 }, { "epoch": 0.51, "grad_norm": 1.4774877246599243, "learning_rate": 5.097437772574927e-06, "loss": 0.7703, "step": 6344 }, { "epoch": 0.51, "grad_norm": 1.5408308207461083, "learning_rate": 5.0961387634836324e-06, "loss": 0.8715, "step": 6345 }, { "epoch": 0.51, "grad_norm": 1.7682416502229155, "learning_rate": 5.094839747900828e-06, "loss": 0.7207, "step": 6346 }, { "epoch": 0.51, "grad_norm": 0.8858592722579279, "learning_rate": 5.093540725914227e-06, "loss": 1.0874, "step": 6347 }, { "epoch": 0.51, "grad_norm": 1.531501842613457, "learning_rate": 5.092241697611543e-06, "loss": 0.7867, "step": 6348 }, { "epoch": 0.51, "grad_norm": 1.7234342824434714, "learning_rate": 5.090942663080488e-06, "loss": 0.7466, "step": 6349 }, { "epoch": 0.51, "grad_norm": 1.5729967492639005, "learning_rate": 5.089643622408778e-06, "loss": 0.7776, "step": 6350 }, { "epoch": 0.51, "grad_norm": 1.5015747903395165, "learning_rate": 5.0883445756841244e-06, "loss": 0.8023, "step": 6351 }, { "epoch": 0.51, "grad_norm": 1.4958402633714873, "learning_rate": 5.087045522994242e-06, "loss": 0.8147, "step": 6352 }, { "epoch": 0.51, "grad_norm": 1.5883149036764286, "learning_rate": 5.085746464426848e-06, "loss": 0.7238, "step": 6353 }, { "epoch": 0.51, "grad_norm": 0.8135767148113574, "learning_rate": 5.084447400069656e-06, "loss": 1.0873, "step": 6354 }, { "epoch": 0.51, "grad_norm": 1.8966465564032653, "learning_rate": 5.083148330010383e-06, "loss": 0.7108, "step": 6355 }, { "epoch": 0.51, "grad_norm": 1.6419829852724594, "learning_rate": 5.081849254336745e-06, "loss": 0.7841, "step": 6356 }, { "epoch": 0.51, "grad_norm": 0.8303448188712281, "learning_rate": 5.080550173136457e-06, "loss": 1.0727, "step": 6357 }, { "epoch": 0.51, "grad_norm": 1.5195709333018521, "learning_rate": 5.0792510864972384e-06, "loss": 0.801, "step": 6358 }, { "epoch": 0.51, "grad_norm": 1.5353032152237684, "learning_rate": 5.077951994506805e-06, "loss": 0.7642, "step": 6359 }, { "epoch": 0.51, "grad_norm": 0.7614957714211149, "learning_rate": 5.076652897252874e-06, "loss": 1.0531, "step": 6360 }, { "epoch": 0.51, "grad_norm": 1.5753872024648066, "learning_rate": 5.075353794823165e-06, "loss": 0.7606, "step": 6361 }, { "epoch": 0.51, "grad_norm": 1.5769985310783925, "learning_rate": 5.074054687305394e-06, "loss": 0.7463, "step": 6362 }, { "epoch": 0.51, "grad_norm": 1.4779834692586884, "learning_rate": 5.072755574787282e-06, "loss": 0.6905, "step": 6363 }, { "epoch": 0.51, "grad_norm": 1.6272880194156245, "learning_rate": 5.071456457356547e-06, "loss": 0.8267, "step": 6364 }, { "epoch": 0.51, "grad_norm": 1.3544468644296723, "learning_rate": 5.0701573351009105e-06, "loss": 0.7809, "step": 6365 }, { "epoch": 0.51, "grad_norm": 1.527362411624617, "learning_rate": 5.068858208108087e-06, "loss": 0.7284, "step": 6366 }, { "epoch": 0.51, "grad_norm": 1.5937254363767703, "learning_rate": 5.067559076465803e-06, "loss": 0.7465, "step": 6367 }, { "epoch": 0.51, "grad_norm": 1.534774681394336, "learning_rate": 5.066259940261774e-06, "loss": 0.742, "step": 6368 }, { "epoch": 0.51, "grad_norm": 1.5850870979047744, "learning_rate": 5.064960799583722e-06, "loss": 0.7882, "step": 6369 }, { "epoch": 0.51, "grad_norm": 1.5680986240909527, "learning_rate": 5.06366165451937e-06, "loss": 0.7611, "step": 6370 }, { "epoch": 0.51, "grad_norm": 0.8702628794300165, "learning_rate": 5.062362505156435e-06, "loss": 1.083, "step": 6371 }, { "epoch": 0.51, "grad_norm": 1.4658764015925108, "learning_rate": 5.061063351582642e-06, "loss": 0.7924, "step": 6372 }, { "epoch": 0.51, "grad_norm": 1.429584741878266, "learning_rate": 5.059764193885713e-06, "loss": 0.8118, "step": 6373 }, { "epoch": 0.51, "grad_norm": 0.7622081421603959, "learning_rate": 5.058465032153368e-06, "loss": 1.0623, "step": 6374 }, { "epoch": 0.51, "grad_norm": 1.5040150115713196, "learning_rate": 5.0571658664733314e-06, "loss": 0.7748, "step": 6375 }, { "epoch": 0.51, "grad_norm": 1.4945317693496563, "learning_rate": 5.055866696933324e-06, "loss": 0.76, "step": 6376 }, { "epoch": 0.51, "grad_norm": 1.4716468271220315, "learning_rate": 5.054567523621069e-06, "loss": 0.7768, "step": 6377 }, { "epoch": 0.51, "grad_norm": 1.4944777603568808, "learning_rate": 5.05326834662429e-06, "loss": 0.7679, "step": 6378 }, { "epoch": 0.51, "grad_norm": 1.4585884389243409, "learning_rate": 5.051969166030711e-06, "loss": 0.7831, "step": 6379 }, { "epoch": 0.51, "grad_norm": 0.8436007164477644, "learning_rate": 5.050669981928056e-06, "loss": 1.0759, "step": 6380 }, { "epoch": 0.51, "grad_norm": 1.4459808975680575, "learning_rate": 5.049370794404046e-06, "loss": 0.6919, "step": 6381 }, { "epoch": 0.51, "grad_norm": 1.4843535144244686, "learning_rate": 5.048071603546409e-06, "loss": 0.7285, "step": 6382 }, { "epoch": 0.51, "grad_norm": 1.6233102113993472, "learning_rate": 5.046772409442866e-06, "loss": 0.731, "step": 6383 }, { "epoch": 0.51, "grad_norm": 1.5898849085363784, "learning_rate": 5.045473212181145e-06, "loss": 0.7084, "step": 6384 }, { "epoch": 0.51, "grad_norm": 1.5719279533844586, "learning_rate": 5.044174011848966e-06, "loss": 0.8415, "step": 6385 }, { "epoch": 0.51, "grad_norm": 1.5729072149969299, "learning_rate": 5.0428748085340565e-06, "loss": 0.7244, "step": 6386 }, { "epoch": 0.51, "grad_norm": 1.590232042453039, "learning_rate": 5.041575602324144e-06, "loss": 0.8272, "step": 6387 }, { "epoch": 0.51, "grad_norm": 1.4691885332548085, "learning_rate": 5.0402763933069496e-06, "loss": 0.758, "step": 6388 }, { "epoch": 0.51, "grad_norm": 1.558791481962644, "learning_rate": 5.038977181570204e-06, "loss": 0.7495, "step": 6389 }, { "epoch": 0.51, "grad_norm": 1.648121755013111, "learning_rate": 5.037677967201629e-06, "loss": 0.741, "step": 6390 }, { "epoch": 0.51, "grad_norm": 1.5216222837170945, "learning_rate": 5.036378750288949e-06, "loss": 0.8026, "step": 6391 }, { "epoch": 0.51, "grad_norm": 0.8403329930315464, "learning_rate": 5.035079530919895e-06, "loss": 1.0758, "step": 6392 }, { "epoch": 0.51, "grad_norm": 1.5093342943380332, "learning_rate": 5.0337803091821905e-06, "loss": 0.7862, "step": 6393 }, { "epoch": 0.51, "grad_norm": 1.5528835208864047, "learning_rate": 5.032481085163562e-06, "loss": 0.7089, "step": 6394 }, { "epoch": 0.51, "grad_norm": 1.460099150259266, "learning_rate": 5.031181858951737e-06, "loss": 0.7927, "step": 6395 }, { "epoch": 0.51, "grad_norm": 1.4695033236469097, "learning_rate": 5.029882630634441e-06, "loss": 0.7267, "step": 6396 }, { "epoch": 0.51, "grad_norm": 1.6073685610750217, "learning_rate": 5.028583400299402e-06, "loss": 0.7544, "step": 6397 }, { "epoch": 0.51, "grad_norm": 1.4886265473622153, "learning_rate": 5.027284168034344e-06, "loss": 0.8229, "step": 6398 }, { "epoch": 0.51, "grad_norm": 1.4648639091058069, "learning_rate": 5.025984933927e-06, "loss": 0.7515, "step": 6399 }, { "epoch": 0.51, "grad_norm": 1.5335129972263561, "learning_rate": 5.024685698065093e-06, "loss": 0.8724, "step": 6400 }, { "epoch": 0.51, "grad_norm": 1.4028259466929902, "learning_rate": 5.02338646053635e-06, "loss": 0.651, "step": 6401 }, { "epoch": 0.51, "grad_norm": 1.653342939596997, "learning_rate": 5.0220872214285e-06, "loss": 0.7757, "step": 6402 }, { "epoch": 0.51, "grad_norm": 0.8498046992252434, "learning_rate": 5.02078798082927e-06, "loss": 1.0417, "step": 6403 }, { "epoch": 0.51, "grad_norm": 1.433729468480321, "learning_rate": 5.0194887388263895e-06, "loss": 0.8026, "step": 6404 }, { "epoch": 0.51, "grad_norm": 1.4374156885205223, "learning_rate": 5.018189495507584e-06, "loss": 0.7691, "step": 6405 }, { "epoch": 0.51, "grad_norm": 1.5013101089067444, "learning_rate": 5.016890250960582e-06, "loss": 0.8012, "step": 6406 }, { "epoch": 0.51, "grad_norm": 1.9077921372843638, "learning_rate": 5.0155910052731116e-06, "loss": 0.6503, "step": 6407 }, { "epoch": 0.51, "grad_norm": 1.5132613150844731, "learning_rate": 5.0142917585329e-06, "loss": 0.7665, "step": 6408 }, { "epoch": 0.51, "grad_norm": 1.4194916087658804, "learning_rate": 5.012992510827678e-06, "loss": 0.7717, "step": 6409 }, { "epoch": 0.51, "grad_norm": 1.553717623276083, "learning_rate": 5.01169326224517e-06, "loss": 0.7442, "step": 6410 }, { "epoch": 0.51, "grad_norm": 1.4710493304345436, "learning_rate": 5.010394012873107e-06, "loss": 0.7425, "step": 6411 }, { "epoch": 0.51, "grad_norm": 1.5840795676703907, "learning_rate": 5.009094762799218e-06, "loss": 0.81, "step": 6412 }, { "epoch": 0.51, "grad_norm": 1.4886871770917565, "learning_rate": 5.0077955121112285e-06, "loss": 0.776, "step": 6413 }, { "epoch": 0.51, "grad_norm": 1.4991076060960011, "learning_rate": 5.006496260896868e-06, "loss": 0.791, "step": 6414 }, { "epoch": 0.51, "grad_norm": 0.8336997986581897, "learning_rate": 5.0051970092438655e-06, "loss": 1.0739, "step": 6415 }, { "epoch": 0.51, "grad_norm": 1.5294706846268395, "learning_rate": 5.003897757239949e-06, "loss": 0.6606, "step": 6416 }, { "epoch": 0.51, "grad_norm": 1.5886204267215926, "learning_rate": 5.002598504972848e-06, "loss": 0.7312, "step": 6417 }, { "epoch": 0.51, "grad_norm": 1.543066086084952, "learning_rate": 5.0012992525302885e-06, "loss": 0.7258, "step": 6418 }, { "epoch": 0.52, "grad_norm": 1.6073384626028389, "learning_rate": 5e-06, "loss": 0.8081, "step": 6419 }, { "epoch": 0.52, "grad_norm": 1.545236148980762, "learning_rate": 4.998700747469713e-06, "loss": 0.8214, "step": 6420 }, { "epoch": 0.52, "grad_norm": 0.7892700117370993, "learning_rate": 4.997401495027154e-06, "loss": 1.0722, "step": 6421 }, { "epoch": 0.52, "grad_norm": 1.5323551721387514, "learning_rate": 4.996102242760053e-06, "loss": 0.7485, "step": 6422 }, { "epoch": 0.52, "grad_norm": 1.4635867370863458, "learning_rate": 4.994802990756136e-06, "loss": 0.7849, "step": 6423 }, { "epoch": 0.52, "grad_norm": 1.8497808674492808, "learning_rate": 4.9935037391031346e-06, "loss": 0.7867, "step": 6424 }, { "epoch": 0.52, "grad_norm": 1.628240885918961, "learning_rate": 4.992204487888772e-06, "loss": 0.8272, "step": 6425 }, { "epoch": 0.52, "grad_norm": 1.4648870900713535, "learning_rate": 4.9909052372007834e-06, "loss": 0.8433, "step": 6426 }, { "epoch": 0.52, "grad_norm": 1.753603882304953, "learning_rate": 4.9896059871268934e-06, "loss": 0.7618, "step": 6427 }, { "epoch": 0.52, "grad_norm": 1.4434484660624407, "learning_rate": 4.98830673775483e-06, "loss": 0.6977, "step": 6428 }, { "epoch": 0.52, "grad_norm": 1.5401244116421686, "learning_rate": 4.987007489172323e-06, "loss": 0.8304, "step": 6429 }, { "epoch": 0.52, "grad_norm": 1.39287256694849, "learning_rate": 4.9857082414671015e-06, "loss": 0.7178, "step": 6430 }, { "epoch": 0.52, "grad_norm": 0.8505393133893913, "learning_rate": 4.984408994726889e-06, "loss": 1.084, "step": 6431 }, { "epoch": 0.52, "grad_norm": 1.3981005307893708, "learning_rate": 4.9831097490394195e-06, "loss": 0.7084, "step": 6432 }, { "epoch": 0.52, "grad_norm": 0.8209200825415741, "learning_rate": 4.981810504492418e-06, "loss": 1.1023, "step": 6433 }, { "epoch": 0.52, "grad_norm": 1.5500416468655098, "learning_rate": 4.980511261173613e-06, "loss": 0.7412, "step": 6434 }, { "epoch": 0.52, "grad_norm": 1.4428367219170273, "learning_rate": 4.979212019170731e-06, "loss": 0.829, "step": 6435 }, { "epoch": 0.52, "grad_norm": 1.5994690143916275, "learning_rate": 4.977912778571501e-06, "loss": 0.7845, "step": 6436 }, { "epoch": 0.52, "grad_norm": 1.4698746203639657, "learning_rate": 4.976613539463652e-06, "loss": 0.7885, "step": 6437 }, { "epoch": 0.52, "grad_norm": 1.4901436956164573, "learning_rate": 4.975314301934909e-06, "loss": 0.827, "step": 6438 }, { "epoch": 0.52, "grad_norm": 1.513267267325777, "learning_rate": 4.974015066073002e-06, "loss": 0.7124, "step": 6439 }, { "epoch": 0.52, "grad_norm": 0.8326463811820981, "learning_rate": 4.972715831965657e-06, "loss": 1.1035, "step": 6440 }, { "epoch": 0.52, "grad_norm": 1.4966616094063074, "learning_rate": 4.971416599700601e-06, "loss": 0.8403, "step": 6441 }, { "epoch": 0.52, "grad_norm": 1.600038443020443, "learning_rate": 4.97011736936556e-06, "loss": 0.8186, "step": 6442 }, { "epoch": 0.52, "grad_norm": 0.8004497456864529, "learning_rate": 4.968818141048264e-06, "loss": 1.0671, "step": 6443 }, { "epoch": 0.52, "grad_norm": 1.4736674124986657, "learning_rate": 4.967518914836439e-06, "loss": 0.7743, "step": 6444 }, { "epoch": 0.52, "grad_norm": 1.6120093520659209, "learning_rate": 4.96621969081781e-06, "loss": 0.8447, "step": 6445 }, { "epoch": 0.52, "grad_norm": 1.5142589748478543, "learning_rate": 4.964920469080107e-06, "loss": 0.8229, "step": 6446 }, { "epoch": 0.52, "grad_norm": 0.7585248264790571, "learning_rate": 4.963621249711052e-06, "loss": 1.0734, "step": 6447 }, { "epoch": 0.52, "grad_norm": 1.536379949646187, "learning_rate": 4.9623220327983745e-06, "loss": 0.7324, "step": 6448 }, { "epoch": 0.52, "grad_norm": 1.4701693102670197, "learning_rate": 4.961022818429798e-06, "loss": 0.7885, "step": 6449 }, { "epoch": 0.52, "grad_norm": 0.7583910439565497, "learning_rate": 4.959723606693051e-06, "loss": 1.0724, "step": 6450 }, { "epoch": 0.52, "grad_norm": 2.112456729763788, "learning_rate": 4.958424397675859e-06, "loss": 0.7197, "step": 6451 }, { "epoch": 0.52, "grad_norm": 1.480326143205103, "learning_rate": 4.9571251914659435e-06, "loss": 0.8343, "step": 6452 }, { "epoch": 0.52, "grad_norm": 1.5408453259952195, "learning_rate": 4.955825988151036e-06, "loss": 0.8417, "step": 6453 }, { "epoch": 0.52, "grad_norm": 1.5958346627945268, "learning_rate": 4.9545267878188585e-06, "loss": 0.7745, "step": 6454 }, { "epoch": 0.52, "grad_norm": 1.6089433669065925, "learning_rate": 4.953227590557136e-06, "loss": 0.7606, "step": 6455 }, { "epoch": 0.52, "grad_norm": 1.654648816503151, "learning_rate": 4.951928396453593e-06, "loss": 0.8564, "step": 6456 }, { "epoch": 0.52, "grad_norm": 1.7326092986383894, "learning_rate": 4.950629205595955e-06, "loss": 0.7881, "step": 6457 }, { "epoch": 0.52, "grad_norm": 1.43304510259923, "learning_rate": 4.949330018071947e-06, "loss": 0.781, "step": 6458 }, { "epoch": 0.52, "grad_norm": 1.4726382085397431, "learning_rate": 4.948030833969289e-06, "loss": 0.6617, "step": 6459 }, { "epoch": 0.52, "grad_norm": 0.8473671837899007, "learning_rate": 4.946731653375711e-06, "loss": 1.0935, "step": 6460 }, { "epoch": 0.52, "grad_norm": 1.4080591134682687, "learning_rate": 4.945432476378933e-06, "loss": 0.7394, "step": 6461 }, { "epoch": 0.52, "grad_norm": 0.8013557886990337, "learning_rate": 4.944133303066677e-06, "loss": 1.0512, "step": 6462 }, { "epoch": 0.52, "grad_norm": 1.5449880110854264, "learning_rate": 4.94283413352667e-06, "loss": 0.7629, "step": 6463 }, { "epoch": 0.52, "grad_norm": 1.4889939833507277, "learning_rate": 4.9415349678466335e-06, "loss": 0.8013, "step": 6464 }, { "epoch": 0.52, "grad_norm": 1.5810427920849335, "learning_rate": 4.940235806114289e-06, "loss": 0.8321, "step": 6465 }, { "epoch": 0.52, "grad_norm": 1.5402087370488495, "learning_rate": 4.938936648417359e-06, "loss": 0.8168, "step": 6466 }, { "epoch": 0.52, "grad_norm": 1.528993746268958, "learning_rate": 4.937637494843566e-06, "loss": 0.8974, "step": 6467 }, { "epoch": 0.52, "grad_norm": 1.6206708296205121, "learning_rate": 4.936338345480633e-06, "loss": 0.7658, "step": 6468 }, { "epoch": 0.52, "grad_norm": 1.4459501312953147, "learning_rate": 4.935039200416279e-06, "loss": 0.7242, "step": 6469 }, { "epoch": 0.52, "grad_norm": 0.9083455540425515, "learning_rate": 4.933740059738227e-06, "loss": 1.1002, "step": 6470 }, { "epoch": 0.52, "grad_norm": 1.5135210114614037, "learning_rate": 4.932440923534199e-06, "loss": 0.7984, "step": 6471 }, { "epoch": 0.52, "grad_norm": 1.3949659506944094, "learning_rate": 4.931141791891913e-06, "loss": 0.7469, "step": 6472 }, { "epoch": 0.52, "grad_norm": 1.4223973909458398, "learning_rate": 4.929842664899092e-06, "loss": 0.7895, "step": 6473 }, { "epoch": 0.52, "grad_norm": 1.4874815384217368, "learning_rate": 4.928543542643454e-06, "loss": 0.7694, "step": 6474 }, { "epoch": 0.52, "grad_norm": 1.5752708452894981, "learning_rate": 4.92724442521272e-06, "loss": 0.7733, "step": 6475 }, { "epoch": 0.52, "grad_norm": 1.560095584104035, "learning_rate": 4.925945312694606e-06, "loss": 0.7367, "step": 6476 }, { "epoch": 0.52, "grad_norm": 1.5517567295584944, "learning_rate": 4.924646205176836e-06, "loss": 0.6818, "step": 6477 }, { "epoch": 0.52, "grad_norm": 1.581536756637996, "learning_rate": 4.923347102747129e-06, "loss": 0.7659, "step": 6478 }, { "epoch": 0.52, "grad_norm": 1.4952480089883156, "learning_rate": 4.922048005493196e-06, "loss": 0.8041, "step": 6479 }, { "epoch": 0.52, "grad_norm": 0.7937958434535268, "learning_rate": 4.920748913502763e-06, "loss": 1.0632, "step": 6480 }, { "epoch": 0.52, "grad_norm": 1.5143591022040594, "learning_rate": 4.919449826863544e-06, "loss": 0.7789, "step": 6481 }, { "epoch": 0.52, "grad_norm": 0.7650382053721767, "learning_rate": 4.9181507456632574e-06, "loss": 1.0765, "step": 6482 }, { "epoch": 0.52, "grad_norm": 1.4838996972233147, "learning_rate": 4.9168516699896185e-06, "loss": 0.7727, "step": 6483 }, { "epoch": 0.52, "grad_norm": 1.4814017018105492, "learning_rate": 4.915552599930345e-06, "loss": 0.8288, "step": 6484 }, { "epoch": 0.52, "grad_norm": 1.4958479633315331, "learning_rate": 4.914253535573154e-06, "loss": 0.7299, "step": 6485 }, { "epoch": 0.52, "grad_norm": 1.5254318313121316, "learning_rate": 4.912954477005758e-06, "loss": 0.8068, "step": 6486 }, { "epoch": 0.52, "grad_norm": 1.5583065834647298, "learning_rate": 4.911655424315877e-06, "loss": 0.8065, "step": 6487 }, { "epoch": 0.52, "grad_norm": 1.5029390966784038, "learning_rate": 4.910356377591224e-06, "loss": 0.7343, "step": 6488 }, { "epoch": 0.52, "grad_norm": 1.5030473924130376, "learning_rate": 4.909057336919513e-06, "loss": 0.8166, "step": 6489 }, { "epoch": 0.52, "grad_norm": 1.7721701774058407, "learning_rate": 4.907758302388458e-06, "loss": 0.7579, "step": 6490 }, { "epoch": 0.52, "grad_norm": 1.5892450038549366, "learning_rate": 4.906459274085774e-06, "loss": 0.8303, "step": 6491 }, { "epoch": 0.52, "grad_norm": 1.457407435370129, "learning_rate": 4.905160252099174e-06, "loss": 0.7807, "step": 6492 }, { "epoch": 0.52, "grad_norm": 1.5168357037440405, "learning_rate": 4.903861236516369e-06, "loss": 0.7567, "step": 6493 }, { "epoch": 0.52, "grad_norm": 1.4856418847093935, "learning_rate": 4.902562227425075e-06, "loss": 0.7529, "step": 6494 }, { "epoch": 0.52, "grad_norm": 1.7761629502836265, "learning_rate": 4.901263224913001e-06, "loss": 0.6403, "step": 6495 }, { "epoch": 0.52, "grad_norm": 1.5850458293113319, "learning_rate": 4.899964229067859e-06, "loss": 0.7808, "step": 6496 }, { "epoch": 0.52, "grad_norm": 1.510097095601767, "learning_rate": 4.8986652399773625e-06, "loss": 0.7779, "step": 6497 }, { "epoch": 0.52, "grad_norm": 1.6499328812896474, "learning_rate": 4.897366257729221e-06, "loss": 0.8087, "step": 6498 }, { "epoch": 0.52, "grad_norm": 1.5950171976264655, "learning_rate": 4.896067282411144e-06, "loss": 0.7711, "step": 6499 }, { "epoch": 0.52, "grad_norm": 1.541596010190185, "learning_rate": 4.894768314110841e-06, "loss": 0.7226, "step": 6500 }, { "epoch": 0.52, "grad_norm": 1.4682725118606523, "learning_rate": 4.893469352916023e-06, "loss": 0.7334, "step": 6501 }, { "epoch": 0.52, "grad_norm": 1.5529400185493567, "learning_rate": 4.892170398914398e-06, "loss": 0.8329, "step": 6502 }, { "epoch": 0.52, "grad_norm": 1.683749494991563, "learning_rate": 4.890871452193673e-06, "loss": 0.7923, "step": 6503 }, { "epoch": 0.52, "grad_norm": 1.513640823612606, "learning_rate": 4.889572512841557e-06, "loss": 0.799, "step": 6504 }, { "epoch": 0.52, "grad_norm": 1.5157349898236623, "learning_rate": 4.8882735809457594e-06, "loss": 0.7884, "step": 6505 }, { "epoch": 0.52, "grad_norm": 1.6013235478292636, "learning_rate": 4.886974656593986e-06, "loss": 0.7134, "step": 6506 }, { "epoch": 0.52, "grad_norm": 1.3466769949823978, "learning_rate": 4.885675739873938e-06, "loss": 0.6386, "step": 6507 }, { "epoch": 0.52, "grad_norm": 1.6083519080816646, "learning_rate": 4.8843768308733285e-06, "loss": 0.7664, "step": 6508 }, { "epoch": 0.52, "grad_norm": 0.855208814150592, "learning_rate": 4.883077929679859e-06, "loss": 1.109, "step": 6509 }, { "epoch": 0.52, "grad_norm": 1.587428529222084, "learning_rate": 4.881779036381234e-06, "loss": 0.7355, "step": 6510 }, { "epoch": 0.52, "grad_norm": 0.786876803078171, "learning_rate": 4.88048015106516e-06, "loss": 1.0711, "step": 6511 }, { "epoch": 0.52, "grad_norm": 1.4901684854756414, "learning_rate": 4.87918127381934e-06, "loss": 0.7869, "step": 6512 }, { "epoch": 0.52, "grad_norm": 1.5176874694101439, "learning_rate": 4.877882404731474e-06, "loss": 0.7193, "step": 6513 }, { "epoch": 0.52, "grad_norm": 1.5817594973821638, "learning_rate": 4.87658354388927e-06, "loss": 0.7906, "step": 6514 }, { "epoch": 0.52, "grad_norm": 1.4679859826111616, "learning_rate": 4.875284691380427e-06, "loss": 0.7983, "step": 6515 }, { "epoch": 0.52, "grad_norm": 1.51493781893582, "learning_rate": 4.873985847292647e-06, "loss": 0.6716, "step": 6516 }, { "epoch": 0.52, "grad_norm": 1.6487592101352864, "learning_rate": 4.872687011713629e-06, "loss": 0.764, "step": 6517 }, { "epoch": 0.52, "grad_norm": 0.7723148776336929, "learning_rate": 4.871388184731077e-06, "loss": 1.1063, "step": 6518 }, { "epoch": 0.52, "grad_norm": 1.8898810051464694, "learning_rate": 4.870089366432688e-06, "loss": 0.9017, "step": 6519 }, { "epoch": 0.52, "grad_norm": 1.4574292176576356, "learning_rate": 4.868790556906161e-06, "loss": 0.6978, "step": 6520 }, { "epoch": 0.52, "grad_norm": 1.4497915377880644, "learning_rate": 4.867491756239197e-06, "loss": 0.8322, "step": 6521 }, { "epoch": 0.52, "grad_norm": 1.5643138437015354, "learning_rate": 4.866192964519493e-06, "loss": 0.7335, "step": 6522 }, { "epoch": 0.52, "grad_norm": 1.459124303262117, "learning_rate": 4.8648941818347465e-06, "loss": 0.7189, "step": 6523 }, { "epoch": 0.52, "grad_norm": 1.5768394452809897, "learning_rate": 4.86359540827265e-06, "loss": 0.7792, "step": 6524 }, { "epoch": 0.52, "grad_norm": 1.6226952554231497, "learning_rate": 4.862296643920907e-06, "loss": 0.8023, "step": 6525 }, { "epoch": 0.52, "grad_norm": 1.5795545153804817, "learning_rate": 4.860997888867209e-06, "loss": 0.8735, "step": 6526 }, { "epoch": 0.52, "grad_norm": 1.4891096834423039, "learning_rate": 4.85969914319925e-06, "loss": 0.8081, "step": 6527 }, { "epoch": 0.52, "grad_norm": 1.461571651889349, "learning_rate": 4.8584004070047275e-06, "loss": 0.8013, "step": 6528 }, { "epoch": 0.52, "grad_norm": 1.5774315639224543, "learning_rate": 4.857101680371333e-06, "loss": 0.7547, "step": 6529 }, { "epoch": 0.52, "grad_norm": 1.4853511122903726, "learning_rate": 4.855802963386757e-06, "loss": 0.8448, "step": 6530 }, { "epoch": 0.52, "grad_norm": 0.8819504396452725, "learning_rate": 4.8545042561386975e-06, "loss": 1.0534, "step": 6531 }, { "epoch": 0.52, "grad_norm": 0.8360991669671466, "learning_rate": 4.853205558714843e-06, "loss": 1.115, "step": 6532 }, { "epoch": 0.52, "grad_norm": 1.5964781297985573, "learning_rate": 4.851906871202885e-06, "loss": 0.8192, "step": 6533 }, { "epoch": 0.52, "grad_norm": 1.5270888157884512, "learning_rate": 4.8506081936905124e-06, "loss": 0.6651, "step": 6534 }, { "epoch": 0.52, "grad_norm": 0.795856466773291, "learning_rate": 4.849309526265417e-06, "loss": 1.0788, "step": 6535 }, { "epoch": 0.52, "grad_norm": 1.4791727051628718, "learning_rate": 4.848010869015288e-06, "loss": 0.7942, "step": 6536 }, { "epoch": 0.52, "grad_norm": 1.6964711065071454, "learning_rate": 4.846712222027811e-06, "loss": 0.7051, "step": 6537 }, { "epoch": 0.52, "grad_norm": 0.8408982701451679, "learning_rate": 4.845413585390676e-06, "loss": 1.0865, "step": 6538 }, { "epoch": 0.52, "grad_norm": 1.5695823285103387, "learning_rate": 4.844114959191569e-06, "loss": 0.8251, "step": 6539 }, { "epoch": 0.52, "grad_norm": 0.8068832269713099, "learning_rate": 4.842816343518178e-06, "loss": 1.1215, "step": 6540 }, { "epoch": 0.52, "grad_norm": 1.6567438701114299, "learning_rate": 4.841517738458183e-06, "loss": 0.7997, "step": 6541 }, { "epoch": 0.52, "grad_norm": 1.558436974948719, "learning_rate": 4.8402191440992755e-06, "loss": 0.8806, "step": 6542 }, { "epoch": 0.52, "grad_norm": 1.6005547111940115, "learning_rate": 4.838920560529137e-06, "loss": 0.7358, "step": 6543 }, { "epoch": 0.53, "grad_norm": 1.447219648912299, "learning_rate": 4.837621987835449e-06, "loss": 0.6843, "step": 6544 }, { "epoch": 0.53, "grad_norm": 1.5275517450200786, "learning_rate": 4.836323426105897e-06, "loss": 0.7065, "step": 6545 }, { "epoch": 0.53, "grad_norm": 1.6046749707827714, "learning_rate": 4.835024875428162e-06, "loss": 0.7428, "step": 6546 }, { "epoch": 0.53, "grad_norm": 1.450644595828523, "learning_rate": 4.833726335889922e-06, "loss": 0.7833, "step": 6547 }, { "epoch": 0.53, "grad_norm": 1.4265858344694498, "learning_rate": 4.832427807578862e-06, "loss": 0.6018, "step": 6548 }, { "epoch": 0.53, "grad_norm": 1.4650526391866825, "learning_rate": 4.83112929058266e-06, "loss": 0.67, "step": 6549 }, { "epoch": 0.53, "grad_norm": 1.4116807058159213, "learning_rate": 4.829830784988995e-06, "loss": 0.7114, "step": 6550 }, { "epoch": 0.53, "grad_norm": 1.5155028453601673, "learning_rate": 4.828532290885541e-06, "loss": 0.727, "step": 6551 }, { "epoch": 0.53, "grad_norm": 1.4730596879557711, "learning_rate": 4.827233808359982e-06, "loss": 0.7682, "step": 6552 }, { "epoch": 0.53, "grad_norm": 1.6789068550740676, "learning_rate": 4.825935337499991e-06, "loss": 0.7937, "step": 6553 }, { "epoch": 0.53, "grad_norm": 1.524247026722769, "learning_rate": 4.824636878393243e-06, "loss": 0.758, "step": 6554 }, { "epoch": 0.53, "grad_norm": 1.6160458163196136, "learning_rate": 4.823338431127414e-06, "loss": 0.7635, "step": 6555 }, { "epoch": 0.53, "grad_norm": 1.9287572237546142, "learning_rate": 4.82203999579018e-06, "loss": 0.769, "step": 6556 }, { "epoch": 0.53, "grad_norm": 1.5451878340476455, "learning_rate": 4.820741572469211e-06, "loss": 0.7867, "step": 6557 }, { "epoch": 0.53, "grad_norm": 1.634336163996332, "learning_rate": 4.819443161252179e-06, "loss": 0.7258, "step": 6558 }, { "epoch": 0.53, "grad_norm": 1.5855406592570065, "learning_rate": 4.81814476222676e-06, "loss": 0.7431, "step": 6559 }, { "epoch": 0.53, "grad_norm": 1.4849348566967928, "learning_rate": 4.816846375480623e-06, "loss": 0.8275, "step": 6560 }, { "epoch": 0.53, "grad_norm": 1.5604195180300142, "learning_rate": 4.8155480011014354e-06, "loss": 0.7768, "step": 6561 }, { "epoch": 0.53, "grad_norm": 1.3982437802804302, "learning_rate": 4.81424963917687e-06, "loss": 0.6944, "step": 6562 }, { "epoch": 0.53, "grad_norm": 1.4706612670790948, "learning_rate": 4.812951289794594e-06, "loss": 0.7543, "step": 6563 }, { "epoch": 0.53, "grad_norm": 1.473060152420395, "learning_rate": 4.8116529530422745e-06, "loss": 0.7794, "step": 6564 }, { "epoch": 0.53, "grad_norm": 1.4739685231525774, "learning_rate": 4.810354629007576e-06, "loss": 0.7409, "step": 6565 }, { "epoch": 0.53, "grad_norm": 1.5522301016839524, "learning_rate": 4.80905631777817e-06, "loss": 0.8364, "step": 6566 }, { "epoch": 0.53, "grad_norm": 1.3985813543188996, "learning_rate": 4.807758019441717e-06, "loss": 0.8074, "step": 6567 }, { "epoch": 0.53, "grad_norm": 1.4350509556647661, "learning_rate": 4.80645973408588e-06, "loss": 0.7596, "step": 6568 }, { "epoch": 0.53, "grad_norm": 1.6133734009431189, "learning_rate": 4.805161461798326e-06, "loss": 0.6666, "step": 6569 }, { "epoch": 0.53, "grad_norm": 1.6890135242823225, "learning_rate": 4.803863202666716e-06, "loss": 0.7603, "step": 6570 }, { "epoch": 0.53, "grad_norm": 1.423206687258015, "learning_rate": 4.8025649567787095e-06, "loss": 0.7557, "step": 6571 }, { "epoch": 0.53, "grad_norm": 1.5713056267825107, "learning_rate": 4.80126672422197e-06, "loss": 0.7547, "step": 6572 }, { "epoch": 0.53, "grad_norm": 0.9055934158401004, "learning_rate": 4.799968505084156e-06, "loss": 1.0475, "step": 6573 }, { "epoch": 0.53, "grad_norm": 1.537480410269998, "learning_rate": 4.798670299452926e-06, "loss": 0.7665, "step": 6574 }, { "epoch": 0.53, "grad_norm": 1.5558199405836304, "learning_rate": 4.797372107415935e-06, "loss": 0.7477, "step": 6575 }, { "epoch": 0.53, "grad_norm": 1.477980826324111, "learning_rate": 4.796073929060845e-06, "loss": 0.7734, "step": 6576 }, { "epoch": 0.53, "grad_norm": 0.8358758564906432, "learning_rate": 4.79477576447531e-06, "loss": 1.1205, "step": 6577 }, { "epoch": 0.53, "grad_norm": 1.4751800684559153, "learning_rate": 4.793477613746984e-06, "loss": 0.7461, "step": 6578 }, { "epoch": 0.53, "grad_norm": 0.7848411182580356, "learning_rate": 4.792179476963521e-06, "loss": 1.1015, "step": 6579 }, { "epoch": 0.53, "grad_norm": 1.4898781789431677, "learning_rate": 4.7908813542125765e-06, "loss": 0.7582, "step": 6580 }, { "epoch": 0.53, "grad_norm": 1.549735385242299, "learning_rate": 4.789583245581801e-06, "loss": 0.8243, "step": 6581 }, { "epoch": 0.53, "grad_norm": 1.4603272206459428, "learning_rate": 4.788285151158844e-06, "loss": 0.7331, "step": 6582 }, { "epoch": 0.53, "grad_norm": 2.22606750674253, "learning_rate": 4.786987071031359e-06, "loss": 0.716, "step": 6583 }, { "epoch": 0.53, "grad_norm": 1.5144539851123386, "learning_rate": 4.785689005286995e-06, "loss": 0.8057, "step": 6584 }, { "epoch": 0.53, "grad_norm": 1.4946502857559942, "learning_rate": 4.784390954013396e-06, "loss": 0.7978, "step": 6585 }, { "epoch": 0.53, "grad_norm": 1.5295029981199595, "learning_rate": 4.783092917298216e-06, "loss": 0.8071, "step": 6586 }, { "epoch": 0.53, "grad_norm": 1.4831591224238339, "learning_rate": 4.781794895229097e-06, "loss": 0.7285, "step": 6587 }, { "epoch": 0.53, "grad_norm": 1.592945357557965, "learning_rate": 4.780496887893686e-06, "loss": 0.7339, "step": 6588 }, { "epoch": 0.53, "grad_norm": 1.478260215958859, "learning_rate": 4.779198895379627e-06, "loss": 0.7624, "step": 6589 }, { "epoch": 0.53, "grad_norm": 1.5758860468213067, "learning_rate": 4.7779009177745645e-06, "loss": 0.809, "step": 6590 }, { "epoch": 0.53, "grad_norm": 1.5029767172651876, "learning_rate": 4.7766029551661395e-06, "loss": 0.8688, "step": 6591 }, { "epoch": 0.53, "grad_norm": 1.5017567519992474, "learning_rate": 4.7753050076419916e-06, "loss": 0.8281, "step": 6592 }, { "epoch": 0.53, "grad_norm": 1.5476474533874085, "learning_rate": 4.774007075289766e-06, "loss": 0.773, "step": 6593 }, { "epoch": 0.53, "grad_norm": 1.6972990946699873, "learning_rate": 4.772709158197098e-06, "loss": 0.7139, "step": 6594 }, { "epoch": 0.53, "grad_norm": 1.6436218683370412, "learning_rate": 4.771411256451628e-06, "loss": 0.7867, "step": 6595 }, { "epoch": 0.53, "grad_norm": 1.5401917620855783, "learning_rate": 4.770113370140992e-06, "loss": 0.7726, "step": 6596 }, { "epoch": 0.53, "grad_norm": 1.54804670125832, "learning_rate": 4.768815499352828e-06, "loss": 0.8179, "step": 6597 }, { "epoch": 0.53, "grad_norm": 1.5303551405090337, "learning_rate": 4.76751764417477e-06, "loss": 0.8369, "step": 6598 }, { "epoch": 0.53, "grad_norm": 1.46431807576495, "learning_rate": 4.766219804694451e-06, "loss": 0.8001, "step": 6599 }, { "epoch": 0.53, "grad_norm": 0.9709887101807445, "learning_rate": 4.764921980999507e-06, "loss": 1.0667, "step": 6600 }, { "epoch": 0.53, "grad_norm": 1.5151494384730637, "learning_rate": 4.763624173177568e-06, "loss": 0.7344, "step": 6601 }, { "epoch": 0.53, "grad_norm": 1.4684602563638292, "learning_rate": 4.762326381316263e-06, "loss": 0.6811, "step": 6602 }, { "epoch": 0.53, "grad_norm": 1.9315114926891812, "learning_rate": 4.761028605503226e-06, "loss": 0.7966, "step": 6603 }, { "epoch": 0.53, "grad_norm": 1.5611713414289543, "learning_rate": 4.7597308458260845e-06, "loss": 0.8578, "step": 6604 }, { "epoch": 0.53, "grad_norm": 1.512347446742377, "learning_rate": 4.758433102372466e-06, "loss": 0.784, "step": 6605 }, { "epoch": 0.53, "grad_norm": 0.8842981610607556, "learning_rate": 4.7571353752299955e-06, "loss": 1.1221, "step": 6606 }, { "epoch": 0.53, "grad_norm": 0.8607453401366636, "learning_rate": 4.7558376644863e-06, "loss": 1.1085, "step": 6607 }, { "epoch": 0.53, "grad_norm": 1.6030697077930964, "learning_rate": 4.754539970229005e-06, "loss": 0.8012, "step": 6608 }, { "epoch": 0.53, "grad_norm": 1.5573501820373252, "learning_rate": 4.75324229254573e-06, "loss": 0.7428, "step": 6609 }, { "epoch": 0.53, "grad_norm": 1.5920673672094048, "learning_rate": 4.7519446315241025e-06, "loss": 0.7747, "step": 6610 }, { "epoch": 0.53, "grad_norm": 1.4065127768083898, "learning_rate": 4.75064698725174e-06, "loss": 0.8176, "step": 6611 }, { "epoch": 0.53, "grad_norm": 0.8256040648198439, "learning_rate": 4.749349359816261e-06, "loss": 1.11, "step": 6612 }, { "epoch": 0.53, "grad_norm": 1.5204605439929173, "learning_rate": 4.748051749305288e-06, "loss": 0.837, "step": 6613 }, { "epoch": 0.53, "grad_norm": 0.8001378820079399, "learning_rate": 4.746754155806437e-06, "loss": 1.0689, "step": 6614 }, { "epoch": 0.53, "grad_norm": 1.5075738639922274, "learning_rate": 4.7454565794073244e-06, "loss": 0.7958, "step": 6615 }, { "epoch": 0.53, "grad_norm": 1.5241153675449426, "learning_rate": 4.744159020195566e-06, "loss": 0.7396, "step": 6616 }, { "epoch": 0.53, "grad_norm": 1.4688654206279637, "learning_rate": 4.742861478258775e-06, "loss": 0.6956, "step": 6617 }, { "epoch": 0.53, "grad_norm": 1.4931990326876936, "learning_rate": 4.741563953684566e-06, "loss": 0.7882, "step": 6618 }, { "epoch": 0.53, "grad_norm": 0.806189694085349, "learning_rate": 4.740266446560547e-06, "loss": 1.1117, "step": 6619 }, { "epoch": 0.53, "grad_norm": 0.7870570703100621, "learning_rate": 4.738968956974334e-06, "loss": 1.1343, "step": 6620 }, { "epoch": 0.53, "grad_norm": 1.9967818814112137, "learning_rate": 4.737671485013533e-06, "loss": 0.7473, "step": 6621 }, { "epoch": 0.53, "grad_norm": 1.573910938302091, "learning_rate": 4.736374030765754e-06, "loss": 0.8197, "step": 6622 }, { "epoch": 0.53, "grad_norm": 1.5808421440003961, "learning_rate": 4.735076594318602e-06, "loss": 0.7474, "step": 6623 }, { "epoch": 0.53, "grad_norm": 0.7922501525137619, "learning_rate": 4.733779175759685e-06, "loss": 1.1262, "step": 6624 }, { "epoch": 0.53, "grad_norm": 1.5901335743839164, "learning_rate": 4.732481775176607e-06, "loss": 0.8136, "step": 6625 }, { "epoch": 0.53, "grad_norm": 0.8138392900959909, "learning_rate": 4.7311843926569704e-06, "loss": 1.0774, "step": 6626 }, { "epoch": 0.53, "grad_norm": 1.6722990043666195, "learning_rate": 4.729887028288379e-06, "loss": 0.8477, "step": 6627 }, { "epoch": 0.53, "grad_norm": 1.4525515458959124, "learning_rate": 4.728589682158434e-06, "loss": 0.7842, "step": 6628 }, { "epoch": 0.53, "grad_norm": 1.5248037366533527, "learning_rate": 4.727292354354731e-06, "loss": 0.7462, "step": 6629 }, { "epoch": 0.53, "grad_norm": 1.6017422043822407, "learning_rate": 4.725995044964874e-06, "loss": 0.8397, "step": 6630 }, { "epoch": 0.53, "grad_norm": 0.7841607134810539, "learning_rate": 4.724697754076459e-06, "loss": 1.0739, "step": 6631 }, { "epoch": 0.53, "grad_norm": 1.5029753538437, "learning_rate": 4.723400481777081e-06, "loss": 0.7911, "step": 6632 }, { "epoch": 0.53, "grad_norm": 1.538012276920181, "learning_rate": 4.722103228154333e-06, "loss": 0.7769, "step": 6633 }, { "epoch": 0.53, "grad_norm": 1.4976018561001678, "learning_rate": 4.7208059932958125e-06, "loss": 0.7673, "step": 6634 }, { "epoch": 0.53, "grad_norm": 1.4790421783976868, "learning_rate": 4.7195087772891096e-06, "loss": 0.7318, "step": 6635 }, { "epoch": 0.53, "grad_norm": 1.606771523702515, "learning_rate": 4.718211580221813e-06, "loss": 0.8078, "step": 6636 }, { "epoch": 0.53, "grad_norm": 0.7898932202575697, "learning_rate": 4.716914402181517e-06, "loss": 1.082, "step": 6637 }, { "epoch": 0.53, "grad_norm": 1.4350198102498026, "learning_rate": 4.7156172432558075e-06, "loss": 0.7396, "step": 6638 }, { "epoch": 0.53, "grad_norm": 1.4583377218324256, "learning_rate": 4.7143201035322735e-06, "loss": 0.7377, "step": 6639 }, { "epoch": 0.53, "grad_norm": 1.5104258124647387, "learning_rate": 4.713022983098496e-06, "loss": 0.7698, "step": 6640 }, { "epoch": 0.53, "grad_norm": 1.4191909123602346, "learning_rate": 4.711725882042066e-06, "loss": 0.7738, "step": 6641 }, { "epoch": 0.53, "grad_norm": 1.4676470109201796, "learning_rate": 4.710428800450562e-06, "loss": 0.7384, "step": 6642 }, { "epoch": 0.53, "grad_norm": 1.596183862664256, "learning_rate": 4.7091317384115675e-06, "loss": 0.7628, "step": 6643 }, { "epoch": 0.53, "grad_norm": 1.4655418121031003, "learning_rate": 4.7078346960126645e-06, "loss": 0.8198, "step": 6644 }, { "epoch": 0.53, "grad_norm": 1.479252919465189, "learning_rate": 4.70653767334143e-06, "loss": 0.7871, "step": 6645 }, { "epoch": 0.53, "grad_norm": 1.5370994950701813, "learning_rate": 4.705240670485441e-06, "loss": 0.6917, "step": 6646 }, { "epoch": 0.53, "grad_norm": 0.8581882244882423, "learning_rate": 4.703943687532279e-06, "loss": 1.0672, "step": 6647 }, { "epoch": 0.53, "grad_norm": 1.4835967447244494, "learning_rate": 4.7026467245695155e-06, "loss": 0.8103, "step": 6648 }, { "epoch": 0.53, "grad_norm": 1.5687801600978966, "learning_rate": 4.701349781684724e-06, "loss": 0.7876, "step": 6649 }, { "epoch": 0.53, "grad_norm": 1.5663925726685595, "learning_rate": 4.700052858965478e-06, "loss": 0.8471, "step": 6650 }, { "epoch": 0.53, "grad_norm": 1.5506410608758596, "learning_rate": 4.69875595649935e-06, "loss": 0.8428, "step": 6651 }, { "epoch": 0.53, "grad_norm": 1.4200634579855331, "learning_rate": 4.697459074373909e-06, "loss": 0.7736, "step": 6652 }, { "epoch": 0.53, "grad_norm": 0.8038886612715305, "learning_rate": 4.696162212676721e-06, "loss": 1.086, "step": 6653 }, { "epoch": 0.53, "grad_norm": 1.5144043076476261, "learning_rate": 4.694865371495357e-06, "loss": 0.7628, "step": 6654 }, { "epoch": 0.53, "grad_norm": 1.560584275191394, "learning_rate": 4.6935685509173815e-06, "loss": 0.7415, "step": 6655 }, { "epoch": 0.53, "grad_norm": 1.564844451295664, "learning_rate": 4.6922717510303565e-06, "loss": 0.7083, "step": 6656 }, { "epoch": 0.53, "grad_norm": 1.6757220491581997, "learning_rate": 4.690974971921846e-06, "loss": 0.7848, "step": 6657 }, { "epoch": 0.53, "grad_norm": 0.7746224590638965, "learning_rate": 4.6896782136794126e-06, "loss": 1.1006, "step": 6658 }, { "epoch": 0.53, "grad_norm": 1.6090707877798003, "learning_rate": 4.688381476390617e-06, "loss": 0.7793, "step": 6659 }, { "epoch": 0.53, "grad_norm": 1.498298428451919, "learning_rate": 4.687084760143015e-06, "loss": 0.7936, "step": 6660 }, { "epoch": 0.53, "grad_norm": 1.4635141617602387, "learning_rate": 4.685788065024167e-06, "loss": 0.7034, "step": 6661 }, { "epoch": 0.53, "grad_norm": 1.5398026829545355, "learning_rate": 4.684491391121628e-06, "loss": 0.8135, "step": 6662 }, { "epoch": 0.53, "grad_norm": 1.5833776514688946, "learning_rate": 4.68319473852295e-06, "loss": 0.8264, "step": 6663 }, { "epoch": 0.53, "grad_norm": 0.7604121489925689, "learning_rate": 4.681898107315687e-06, "loss": 1.0501, "step": 6664 }, { "epoch": 0.53, "grad_norm": 2.2173088905073084, "learning_rate": 4.680601497587392e-06, "loss": 0.7406, "step": 6665 }, { "epoch": 0.53, "grad_norm": 1.5345188888686891, "learning_rate": 4.679304909425615e-06, "loss": 0.7522, "step": 6666 }, { "epoch": 0.53, "grad_norm": 1.4288529756206767, "learning_rate": 4.678008342917903e-06, "loss": 0.6677, "step": 6667 }, { "epoch": 0.53, "grad_norm": 1.394454320906747, "learning_rate": 4.676711798151805e-06, "loss": 0.7554, "step": 6668 }, { "epoch": 0.54, "grad_norm": 1.4339047722445055, "learning_rate": 4.675415275214865e-06, "loss": 0.7756, "step": 6669 }, { "epoch": 0.54, "grad_norm": 1.367358724300926, "learning_rate": 4.674118774194627e-06, "loss": 0.6872, "step": 6670 }, { "epoch": 0.54, "grad_norm": 0.8111319230339418, "learning_rate": 4.672822295178636e-06, "loss": 1.0796, "step": 6671 }, { "epoch": 0.54, "grad_norm": 1.5157160066564959, "learning_rate": 4.671525838254432e-06, "loss": 0.8594, "step": 6672 }, { "epoch": 0.54, "grad_norm": 1.570599103074908, "learning_rate": 4.670229403509554e-06, "loss": 0.7373, "step": 6673 }, { "epoch": 0.54, "grad_norm": 1.6292486363137393, "learning_rate": 4.668932991031538e-06, "loss": 0.8171, "step": 6674 }, { "epoch": 0.54, "grad_norm": 1.6925484597669624, "learning_rate": 4.667636600907926e-06, "loss": 0.7265, "step": 6675 }, { "epoch": 0.54, "grad_norm": 1.5343612639345021, "learning_rate": 4.666340233226251e-06, "loss": 0.7719, "step": 6676 }, { "epoch": 0.54, "grad_norm": 1.5317379202004135, "learning_rate": 4.665043888074045e-06, "loss": 0.8363, "step": 6677 }, { "epoch": 0.54, "grad_norm": 0.78170101378087, "learning_rate": 4.663747565538843e-06, "loss": 1.0703, "step": 6678 }, { "epoch": 0.54, "grad_norm": 1.4259998089535644, "learning_rate": 4.662451265708174e-06, "loss": 0.6877, "step": 6679 }, { "epoch": 0.54, "grad_norm": 1.4984130877000916, "learning_rate": 4.661154988669569e-06, "loss": 0.6923, "step": 6680 }, { "epoch": 0.54, "grad_norm": 1.463762241120263, "learning_rate": 4.65985873451055e-06, "loss": 0.8314, "step": 6681 }, { "epoch": 0.54, "grad_norm": 1.5525144411041807, "learning_rate": 4.658562503318649e-06, "loss": 0.8054, "step": 6682 }, { "epoch": 0.54, "grad_norm": 1.4240767501890976, "learning_rate": 4.657266295181391e-06, "loss": 0.7769, "step": 6683 }, { "epoch": 0.54, "grad_norm": 1.4783289076551482, "learning_rate": 4.655970110186292e-06, "loss": 0.7553, "step": 6684 }, { "epoch": 0.54, "grad_norm": 1.3379860833532575, "learning_rate": 4.65467394842088e-06, "loss": 0.6806, "step": 6685 }, { "epoch": 0.54, "grad_norm": 1.514312800849696, "learning_rate": 4.653377809972673e-06, "loss": 0.8235, "step": 6686 }, { "epoch": 0.54, "grad_norm": 1.4768252689611825, "learning_rate": 4.652081694929188e-06, "loss": 0.7278, "step": 6687 }, { "epoch": 0.54, "grad_norm": 1.589736341131553, "learning_rate": 4.6507856033779435e-06, "loss": 0.8412, "step": 6688 }, { "epoch": 0.54, "grad_norm": 0.823976052903158, "learning_rate": 4.649489535406454e-06, "loss": 1.0525, "step": 6689 }, { "epoch": 0.54, "grad_norm": 1.5354660524114252, "learning_rate": 4.648193491102232e-06, "loss": 0.84, "step": 6690 }, { "epoch": 0.54, "grad_norm": 1.6244741935801714, "learning_rate": 4.646897470552787e-06, "loss": 0.7628, "step": 6691 }, { "epoch": 0.54, "grad_norm": 1.770626869681101, "learning_rate": 4.645601473845636e-06, "loss": 0.8245, "step": 6692 }, { "epoch": 0.54, "grad_norm": 1.6251600825371855, "learning_rate": 4.644305501068283e-06, "loss": 0.8183, "step": 6693 }, { "epoch": 0.54, "grad_norm": 1.5718121726417718, "learning_rate": 4.643009552308235e-06, "loss": 0.7205, "step": 6694 }, { "epoch": 0.54, "grad_norm": 0.7952940710994577, "learning_rate": 4.641713627652999e-06, "loss": 1.0761, "step": 6695 }, { "epoch": 0.54, "grad_norm": 1.4311785122973297, "learning_rate": 4.6404177271900785e-06, "loss": 0.7139, "step": 6696 }, { "epoch": 0.54, "grad_norm": 2.4215280386417883, "learning_rate": 4.639121851006976e-06, "loss": 0.7649, "step": 6697 }, { "epoch": 0.54, "grad_norm": 1.4802841135858886, "learning_rate": 4.637825999191189e-06, "loss": 0.7783, "step": 6698 }, { "epoch": 0.54, "grad_norm": 1.5564792881890266, "learning_rate": 4.63653017183022e-06, "loss": 0.7016, "step": 6699 }, { "epoch": 0.54, "grad_norm": 1.547901424012035, "learning_rate": 4.635234369011565e-06, "loss": 0.7907, "step": 6700 }, { "epoch": 0.54, "grad_norm": 1.5468952830747797, "learning_rate": 4.633938590822718e-06, "loss": 0.7879, "step": 6701 }, { "epoch": 0.54, "grad_norm": 1.6453643022066053, "learning_rate": 4.632642837351176e-06, "loss": 0.7652, "step": 6702 }, { "epoch": 0.54, "grad_norm": 1.531767100423295, "learning_rate": 4.63134710868443e-06, "loss": 0.7335, "step": 6703 }, { "epoch": 0.54, "grad_norm": 0.7732116298994447, "learning_rate": 4.6300514049099694e-06, "loss": 1.0656, "step": 6704 }, { "epoch": 0.54, "grad_norm": 1.5176588417586472, "learning_rate": 4.628755726115284e-06, "loss": 0.9211, "step": 6705 }, { "epoch": 0.54, "grad_norm": 1.5950946284854635, "learning_rate": 4.627460072387861e-06, "loss": 0.6642, "step": 6706 }, { "epoch": 0.54, "grad_norm": 1.4651150744273913, "learning_rate": 4.626164443815186e-06, "loss": 0.7216, "step": 6707 }, { "epoch": 0.54, "grad_norm": 1.642438497613779, "learning_rate": 4.62486884048474e-06, "loss": 0.7551, "step": 6708 }, { "epoch": 0.54, "grad_norm": 1.5932013396363436, "learning_rate": 4.623573262484011e-06, "loss": 0.749, "step": 6709 }, { "epoch": 0.54, "grad_norm": 1.4579368862463598, "learning_rate": 4.622277709900475e-06, "loss": 0.8149, "step": 6710 }, { "epoch": 0.54, "grad_norm": 1.4552237990496306, "learning_rate": 4.620982182821611e-06, "loss": 0.7842, "step": 6711 }, { "epoch": 0.54, "grad_norm": 1.4373146392670582, "learning_rate": 4.6196866813348985e-06, "loss": 0.7655, "step": 6712 }, { "epoch": 0.54, "grad_norm": 0.754321596951422, "learning_rate": 4.61839120552781e-06, "loss": 1.0464, "step": 6713 }, { "epoch": 0.54, "grad_norm": 0.8243537298753041, "learning_rate": 4.61709575548782e-06, "loss": 1.062, "step": 6714 }, { "epoch": 0.54, "grad_norm": 1.4478369578708192, "learning_rate": 4.615800331302399e-06, "loss": 0.7067, "step": 6715 }, { "epoch": 0.54, "grad_norm": 0.7899929874187903, "learning_rate": 4.61450493305902e-06, "loss": 1.1153, "step": 6716 }, { "epoch": 0.54, "grad_norm": 1.4875675701222748, "learning_rate": 4.613209560845148e-06, "loss": 0.7452, "step": 6717 }, { "epoch": 0.54, "grad_norm": 1.5550751965291805, "learning_rate": 4.61191421474825e-06, "loss": 0.7919, "step": 6718 }, { "epoch": 0.54, "grad_norm": 0.8034537556591744, "learning_rate": 4.6106188948557935e-06, "loss": 1.1145, "step": 6719 }, { "epoch": 0.54, "grad_norm": 1.647604902067715, "learning_rate": 4.6093236012552394e-06, "loss": 0.734, "step": 6720 }, { "epoch": 0.54, "grad_norm": 1.4318057570190756, "learning_rate": 4.608028334034049e-06, "loss": 0.7742, "step": 6721 }, { "epoch": 0.54, "grad_norm": 1.4718741665836244, "learning_rate": 4.606733093279681e-06, "loss": 0.7239, "step": 6722 }, { "epoch": 0.54, "grad_norm": 1.5676067548902917, "learning_rate": 4.605437879079595e-06, "loss": 0.733, "step": 6723 }, { "epoch": 0.54, "grad_norm": 1.5594260972025653, "learning_rate": 4.604142691521245e-06, "loss": 0.7478, "step": 6724 }, { "epoch": 0.54, "grad_norm": 1.575096258415649, "learning_rate": 4.602847530692084e-06, "loss": 0.806, "step": 6725 }, { "epoch": 0.54, "grad_norm": 0.795613161803088, "learning_rate": 4.601552396679568e-06, "loss": 1.0592, "step": 6726 }, { "epoch": 0.54, "grad_norm": 1.543929949722361, "learning_rate": 4.600257289571146e-06, "loss": 0.7689, "step": 6727 }, { "epoch": 0.54, "grad_norm": 1.6078631085204615, "learning_rate": 4.598962209454263e-06, "loss": 0.7019, "step": 6728 }, { "epoch": 0.54, "grad_norm": 1.5021400011444175, "learning_rate": 4.597667156416371e-06, "loss": 0.697, "step": 6729 }, { "epoch": 0.54, "grad_norm": 1.468244648144051, "learning_rate": 4.5963721305449125e-06, "loss": 0.7837, "step": 6730 }, { "epoch": 0.54, "grad_norm": 1.5081647617230125, "learning_rate": 4.59507713192733e-06, "loss": 0.7281, "step": 6731 }, { "epoch": 0.54, "grad_norm": 1.5319842812390967, "learning_rate": 4.5937821606510656e-06, "loss": 0.7644, "step": 6732 }, { "epoch": 0.54, "grad_norm": 1.496813507738929, "learning_rate": 4.592487216803558e-06, "loss": 0.7571, "step": 6733 }, { "epoch": 0.54, "grad_norm": 1.5574620071011724, "learning_rate": 4.591192300472247e-06, "loss": 0.8295, "step": 6734 }, { "epoch": 0.54, "grad_norm": 1.7133051998494504, "learning_rate": 4.589897411744563e-06, "loss": 0.8317, "step": 6735 }, { "epoch": 0.54, "grad_norm": 1.5166679878226084, "learning_rate": 4.5886025507079465e-06, "loss": 0.7132, "step": 6736 }, { "epoch": 0.54, "grad_norm": 1.4906586720634227, "learning_rate": 4.587307717449827e-06, "loss": 0.6493, "step": 6737 }, { "epoch": 0.54, "grad_norm": 1.437571371418521, "learning_rate": 4.586012912057634e-06, "loss": 0.6981, "step": 6738 }, { "epoch": 0.54, "grad_norm": 1.5305128465394449, "learning_rate": 4.584718134618793e-06, "loss": 0.7461, "step": 6739 }, { "epoch": 0.54, "grad_norm": 1.4664683633004731, "learning_rate": 4.583423385220736e-06, "loss": 0.7419, "step": 6740 }, { "epoch": 0.54, "grad_norm": 1.5316537848328047, "learning_rate": 4.582128663950884e-06, "loss": 0.8022, "step": 6741 }, { "epoch": 0.54, "grad_norm": 1.4631190526919073, "learning_rate": 4.58083397089666e-06, "loss": 0.7128, "step": 6742 }, { "epoch": 0.54, "grad_norm": 1.533318776570989, "learning_rate": 4.579539306145485e-06, "loss": 0.7611, "step": 6743 }, { "epoch": 0.54, "grad_norm": 1.4369447996994582, "learning_rate": 4.5782446697847775e-06, "loss": 0.8259, "step": 6744 }, { "epoch": 0.54, "grad_norm": 1.4803991955468436, "learning_rate": 4.5769500619019516e-06, "loss": 0.7081, "step": 6745 }, { "epoch": 0.54, "grad_norm": 0.8516004550012849, "learning_rate": 4.575655482584428e-06, "loss": 1.0909, "step": 6746 }, { "epoch": 0.54, "grad_norm": 1.6489730746279472, "learning_rate": 4.574360931919616e-06, "loss": 0.8371, "step": 6747 }, { "epoch": 0.54, "grad_norm": 1.4775898985832043, "learning_rate": 4.573066409994927e-06, "loss": 0.8301, "step": 6748 }, { "epoch": 0.54, "grad_norm": 1.4464101695235616, "learning_rate": 4.5717719168977696e-06, "loss": 0.6815, "step": 6749 }, { "epoch": 0.54, "grad_norm": 1.3881902265020774, "learning_rate": 4.570477452715553e-06, "loss": 0.8009, "step": 6750 }, { "epoch": 0.54, "grad_norm": 1.485082877876475, "learning_rate": 4.56918301753568e-06, "loss": 0.7971, "step": 6751 }, { "epoch": 0.54, "grad_norm": 1.5469149809237595, "learning_rate": 4.567888611445553e-06, "loss": 0.7328, "step": 6752 }, { "epoch": 0.54, "grad_norm": 1.5075360125769597, "learning_rate": 4.5665942345325776e-06, "loss": 0.779, "step": 6753 }, { "epoch": 0.54, "grad_norm": 1.5420553817821978, "learning_rate": 4.56529988688415e-06, "loss": 0.8087, "step": 6754 }, { "epoch": 0.54, "grad_norm": 0.8241051131335586, "learning_rate": 4.5640055685876695e-06, "loss": 1.0852, "step": 6755 }, { "epoch": 0.54, "grad_norm": 1.5289716567793399, "learning_rate": 4.562711279730526e-06, "loss": 0.7025, "step": 6756 }, { "epoch": 0.54, "grad_norm": 1.5251346371663899, "learning_rate": 4.561417020400119e-06, "loss": 0.7689, "step": 6757 }, { "epoch": 0.54, "grad_norm": 1.4357136554797612, "learning_rate": 4.560122790683839e-06, "loss": 0.7106, "step": 6758 }, { "epoch": 0.54, "grad_norm": 1.329206321389052, "learning_rate": 4.558828590669072e-06, "loss": 0.6608, "step": 6759 }, { "epoch": 0.54, "grad_norm": 1.4967299395728209, "learning_rate": 4.557534420443209e-06, "loss": 0.8004, "step": 6760 }, { "epoch": 0.54, "grad_norm": 0.8183998303087598, "learning_rate": 4.556240280093633e-06, "loss": 1.1025, "step": 6761 }, { "epoch": 0.54, "grad_norm": 1.5034617970270898, "learning_rate": 4.554946169707728e-06, "loss": 0.7609, "step": 6762 }, { "epoch": 0.54, "grad_norm": 1.7153262492485235, "learning_rate": 4.5536520893728735e-06, "loss": 0.738, "step": 6763 }, { "epoch": 0.54, "grad_norm": 1.4252224006931138, "learning_rate": 4.552358039176453e-06, "loss": 0.8231, "step": 6764 }, { "epoch": 0.54, "grad_norm": 1.5104147479047756, "learning_rate": 4.551064019205841e-06, "loss": 0.7451, "step": 6765 }, { "epoch": 0.54, "grad_norm": 1.472387305723786, "learning_rate": 4.549770029548411e-06, "loss": 0.7371, "step": 6766 }, { "epoch": 0.54, "grad_norm": 1.5169088508942212, "learning_rate": 4.548476070291541e-06, "loss": 0.776, "step": 6767 }, { "epoch": 0.54, "grad_norm": 1.4477797162462265, "learning_rate": 4.547182141522598e-06, "loss": 0.7409, "step": 6768 }, { "epoch": 0.54, "grad_norm": 1.4456352776386698, "learning_rate": 4.54588824332895e-06, "loss": 0.7651, "step": 6769 }, { "epoch": 0.54, "grad_norm": 1.5538468128265064, "learning_rate": 4.544594375797969e-06, "loss": 0.8117, "step": 6770 }, { "epoch": 0.54, "grad_norm": 1.5246530488803955, "learning_rate": 4.5433005390170174e-06, "loss": 0.727, "step": 6771 }, { "epoch": 0.54, "grad_norm": 1.5097774995946178, "learning_rate": 4.542006733073457e-06, "loss": 0.7329, "step": 6772 }, { "epoch": 0.54, "grad_norm": 1.460901067941363, "learning_rate": 4.540712958054647e-06, "loss": 0.7599, "step": 6773 }, { "epoch": 0.54, "grad_norm": 1.677362165291922, "learning_rate": 4.53941921404795e-06, "loss": 0.793, "step": 6774 }, { "epoch": 0.54, "grad_norm": 1.546771481604218, "learning_rate": 4.5381255011407225e-06, "loss": 0.742, "step": 6775 }, { "epoch": 0.54, "grad_norm": 1.5594299280006725, "learning_rate": 4.5368318194203145e-06, "loss": 0.6976, "step": 6776 }, { "epoch": 0.54, "grad_norm": 1.4846287433488095, "learning_rate": 4.535538168974083e-06, "loss": 0.7257, "step": 6777 }, { "epoch": 0.54, "grad_norm": 0.8533699156102753, "learning_rate": 4.534244549889376e-06, "loss": 1.0611, "step": 6778 }, { "epoch": 0.54, "grad_norm": 3.1710059510526634, "learning_rate": 4.532950962253543e-06, "loss": 0.7825, "step": 6779 }, { "epoch": 0.54, "grad_norm": 1.4619918530137257, "learning_rate": 4.531657406153926e-06, "loss": 0.8072, "step": 6780 }, { "epoch": 0.54, "grad_norm": 0.7656421734625081, "learning_rate": 4.530363881677874e-06, "loss": 1.0531, "step": 6781 }, { "epoch": 0.54, "grad_norm": 1.5744699718255917, "learning_rate": 4.529070388912727e-06, "loss": 0.7794, "step": 6782 }, { "epoch": 0.54, "grad_norm": 0.7671498696004015, "learning_rate": 4.527776927945823e-06, "loss": 1.0708, "step": 6783 }, { "epoch": 0.54, "grad_norm": 1.5388952735090162, "learning_rate": 4.526483498864501e-06, "loss": 0.8377, "step": 6784 }, { "epoch": 0.54, "grad_norm": 0.7803529163336377, "learning_rate": 4.525190101756097e-06, "loss": 1.1138, "step": 6785 }, { "epoch": 0.54, "grad_norm": 1.534583703032621, "learning_rate": 4.523896736707941e-06, "loss": 0.7754, "step": 6786 }, { "epoch": 0.54, "grad_norm": 1.4803642475633196, "learning_rate": 4.5226034038073675e-06, "loss": 0.7295, "step": 6787 }, { "epoch": 0.54, "grad_norm": 1.5412103038710805, "learning_rate": 4.521310103141704e-06, "loss": 0.698, "step": 6788 }, { "epoch": 0.54, "grad_norm": 1.4889724304111946, "learning_rate": 4.520016834798277e-06, "loss": 0.7737, "step": 6789 }, { "epoch": 0.54, "grad_norm": 1.6293551368593064, "learning_rate": 4.518723598864408e-06, "loss": 0.814, "step": 6790 }, { "epoch": 0.54, "grad_norm": 1.526822594562841, "learning_rate": 4.517430395427424e-06, "loss": 0.7998, "step": 6791 }, { "epoch": 0.54, "grad_norm": 0.8317215622899846, "learning_rate": 4.516137224574645e-06, "loss": 1.1073, "step": 6792 }, { "epoch": 0.55, "grad_norm": 1.3779015406401227, "learning_rate": 4.5148440863933845e-06, "loss": 0.7987, "step": 6793 }, { "epoch": 0.55, "grad_norm": 1.591676456570048, "learning_rate": 4.513550980970962e-06, "loss": 0.8106, "step": 6794 }, { "epoch": 0.55, "grad_norm": 1.469022891554638, "learning_rate": 4.51225790839469e-06, "loss": 0.7631, "step": 6795 }, { "epoch": 0.55, "grad_norm": 1.3530491833764033, "learning_rate": 4.510964868751879e-06, "loss": 0.7212, "step": 6796 }, { "epoch": 0.55, "grad_norm": 1.3702536841853044, "learning_rate": 4.509671862129837e-06, "loss": 0.6864, "step": 6797 }, { "epoch": 0.55, "grad_norm": 1.4569782211077371, "learning_rate": 4.508378888615872e-06, "loss": 0.8171, "step": 6798 }, { "epoch": 0.55, "grad_norm": 2.0067336125626194, "learning_rate": 4.5070859482972915e-06, "loss": 0.8205, "step": 6799 }, { "epoch": 0.55, "grad_norm": 1.4790912918879442, "learning_rate": 4.505793041261391e-06, "loss": 0.7102, "step": 6800 }, { "epoch": 0.55, "grad_norm": 1.4889596548626505, "learning_rate": 4.504500167595477e-06, "loss": 0.7772, "step": 6801 }, { "epoch": 0.55, "grad_norm": 1.5312732414528996, "learning_rate": 4.5032073273868445e-06, "loss": 0.8229, "step": 6802 }, { "epoch": 0.55, "grad_norm": 0.8174747478934806, "learning_rate": 4.501914520722788e-06, "loss": 1.0636, "step": 6803 }, { "epoch": 0.55, "grad_norm": 1.6080743361931922, "learning_rate": 4.500621747690604e-06, "loss": 0.8181, "step": 6804 }, { "epoch": 0.55, "grad_norm": 1.5343800880222516, "learning_rate": 4.499329008377581e-06, "loss": 0.7295, "step": 6805 }, { "epoch": 0.55, "grad_norm": 1.6019152815234603, "learning_rate": 4.498036302871007e-06, "loss": 0.7587, "step": 6806 }, { "epoch": 0.55, "grad_norm": 1.482259681491783, "learning_rate": 4.496743631258169e-06, "loss": 0.7672, "step": 6807 }, { "epoch": 0.55, "grad_norm": 1.4286051518796945, "learning_rate": 4.495450993626353e-06, "loss": 0.7804, "step": 6808 }, { "epoch": 0.55, "grad_norm": 1.5064271914105436, "learning_rate": 4.49415839006284e-06, "loss": 0.7615, "step": 6809 }, { "epoch": 0.55, "grad_norm": 1.5735823783458043, "learning_rate": 4.492865820654908e-06, "loss": 0.7235, "step": 6810 }, { "epoch": 0.55, "grad_norm": 0.8481495383951919, "learning_rate": 4.4915732854898365e-06, "loss": 1.0824, "step": 6811 }, { "epoch": 0.55, "grad_norm": 1.581371367471879, "learning_rate": 4.490280784654899e-06, "loss": 0.8587, "step": 6812 }, { "epoch": 0.55, "grad_norm": 1.5775968935822104, "learning_rate": 4.48898831823737e-06, "loss": 0.7906, "step": 6813 }, { "epoch": 0.55, "grad_norm": 0.8127386339365587, "learning_rate": 4.487695886324514e-06, "loss": 1.0742, "step": 6814 }, { "epoch": 0.55, "grad_norm": 0.7955865719098081, "learning_rate": 4.486403489003608e-06, "loss": 1.1129, "step": 6815 }, { "epoch": 0.55, "grad_norm": 1.5065994240678835, "learning_rate": 4.485111126361911e-06, "loss": 0.7565, "step": 6816 }, { "epoch": 0.55, "grad_norm": 0.8171745746729149, "learning_rate": 4.4838187984866865e-06, "loss": 1.0816, "step": 6817 }, { "epoch": 0.55, "grad_norm": 1.4392642299641591, "learning_rate": 4.482526505465199e-06, "loss": 0.7791, "step": 6818 }, { "epoch": 0.55, "grad_norm": 1.5144736251670543, "learning_rate": 4.481234247384705e-06, "loss": 0.7552, "step": 6819 }, { "epoch": 0.55, "grad_norm": 1.5553505809898935, "learning_rate": 4.4799420243324605e-06, "loss": 0.8053, "step": 6820 }, { "epoch": 0.55, "grad_norm": 1.5110060358236679, "learning_rate": 4.478649836395719e-06, "loss": 0.8194, "step": 6821 }, { "epoch": 0.55, "grad_norm": 1.6714828521175757, "learning_rate": 4.477357683661734e-06, "loss": 0.7787, "step": 6822 }, { "epoch": 0.55, "grad_norm": 1.5798657477435094, "learning_rate": 4.476065566217753e-06, "loss": 0.7194, "step": 6823 }, { "epoch": 0.55, "grad_norm": 1.4557559539125728, "learning_rate": 4.474773484151021e-06, "loss": 0.7453, "step": 6824 }, { "epoch": 0.55, "grad_norm": 0.7990492611754859, "learning_rate": 4.473481437548786e-06, "loss": 1.0847, "step": 6825 }, { "epoch": 0.55, "grad_norm": 1.4928686871888033, "learning_rate": 4.472189426498289e-06, "loss": 0.7229, "step": 6826 }, { "epoch": 0.55, "grad_norm": 1.4366613209559382, "learning_rate": 4.470897451086767e-06, "loss": 0.6576, "step": 6827 }, { "epoch": 0.55, "grad_norm": 1.7463674199635106, "learning_rate": 4.469605511401461e-06, "loss": 0.8391, "step": 6828 }, { "epoch": 0.55, "grad_norm": 1.5735965916233225, "learning_rate": 4.468313607529603e-06, "loss": 0.8312, "step": 6829 }, { "epoch": 0.55, "grad_norm": 1.4658736325721764, "learning_rate": 4.467021739558426e-06, "loss": 0.796, "step": 6830 }, { "epoch": 0.55, "grad_norm": 1.559203935588992, "learning_rate": 4.4657299075751596e-06, "loss": 0.8094, "step": 6831 }, { "epoch": 0.55, "grad_norm": 1.7442613403578175, "learning_rate": 4.464438111667031e-06, "loss": 0.7753, "step": 6832 }, { "epoch": 0.55, "grad_norm": 0.8484585453808401, "learning_rate": 4.463146351921267e-06, "loss": 1.1134, "step": 6833 }, { "epoch": 0.55, "grad_norm": 1.5871504871585123, "learning_rate": 4.461854628425087e-06, "loss": 0.7893, "step": 6834 }, { "epoch": 0.55, "grad_norm": 1.5863662854629188, "learning_rate": 4.4605629412657145e-06, "loss": 0.8106, "step": 6835 }, { "epoch": 0.55, "grad_norm": 1.3995744465396254, "learning_rate": 4.459271290530365e-06, "loss": 0.7633, "step": 6836 }, { "epoch": 0.55, "grad_norm": 1.6132150650393045, "learning_rate": 4.457979676306256e-06, "loss": 0.7676, "step": 6837 }, { "epoch": 0.55, "grad_norm": 1.572213405310079, "learning_rate": 4.456688098680597e-06, "loss": 0.7734, "step": 6838 }, { "epoch": 0.55, "grad_norm": 1.5445084477246462, "learning_rate": 4.4553965577406006e-06, "loss": 0.8467, "step": 6839 }, { "epoch": 0.55, "grad_norm": 0.822237315842328, "learning_rate": 4.454105053573474e-06, "loss": 1.1095, "step": 6840 }, { "epoch": 0.55, "grad_norm": 1.4632640660036722, "learning_rate": 4.4528135862664206e-06, "loss": 0.7737, "step": 6841 }, { "epoch": 0.55, "grad_norm": 1.5004340984924578, "learning_rate": 4.451522155906647e-06, "loss": 0.7785, "step": 6842 }, { "epoch": 0.55, "grad_norm": 1.602922472570306, "learning_rate": 4.4502307625813515e-06, "loss": 0.7143, "step": 6843 }, { "epoch": 0.55, "grad_norm": 1.628518624088369, "learning_rate": 4.448939406377732e-06, "loss": 0.8366, "step": 6844 }, { "epoch": 0.55, "grad_norm": 1.551950771814103, "learning_rate": 4.4476480873829834e-06, "loss": 0.778, "step": 6845 }, { "epoch": 0.55, "grad_norm": 1.5587565547229294, "learning_rate": 4.4463568056842995e-06, "loss": 0.7627, "step": 6846 }, { "epoch": 0.55, "grad_norm": 1.41804815579874, "learning_rate": 4.44506556136887e-06, "loss": 0.7066, "step": 6847 }, { "epoch": 0.55, "grad_norm": 1.5650562127947183, "learning_rate": 4.443774354523883e-06, "loss": 0.7421, "step": 6848 }, { "epoch": 0.55, "grad_norm": 1.5881646035115946, "learning_rate": 4.442483185236523e-06, "loss": 0.8756, "step": 6849 }, { "epoch": 0.55, "grad_norm": 1.5877180095434507, "learning_rate": 4.441192053593973e-06, "loss": 0.7431, "step": 6850 }, { "epoch": 0.55, "grad_norm": 1.5628029742277691, "learning_rate": 4.439900959683412e-06, "loss": 0.7619, "step": 6851 }, { "epoch": 0.55, "grad_norm": 1.6823109328360657, "learning_rate": 4.438609903592021e-06, "loss": 0.7013, "step": 6852 }, { "epoch": 0.55, "grad_norm": 0.8106627077664682, "learning_rate": 4.4373188854069736e-06, "loss": 1.1043, "step": 6853 }, { "epoch": 0.55, "grad_norm": 0.78660753323529, "learning_rate": 4.4360279052154406e-06, "loss": 1.0485, "step": 6854 }, { "epoch": 0.55, "grad_norm": 1.4861430833492006, "learning_rate": 4.434736963104592e-06, "loss": 0.7971, "step": 6855 }, { "epoch": 0.55, "grad_norm": 1.5599612476771783, "learning_rate": 4.433446059161598e-06, "loss": 0.7865, "step": 6856 }, { "epoch": 0.55, "grad_norm": 1.5782015687109299, "learning_rate": 4.432155193473621e-06, "loss": 0.754, "step": 6857 }, { "epoch": 0.55, "grad_norm": 1.373247083168306, "learning_rate": 4.430864366127821e-06, "loss": 0.7023, "step": 6858 }, { "epoch": 0.55, "grad_norm": 1.4244475681101365, "learning_rate": 4.429573577211365e-06, "loss": 0.7233, "step": 6859 }, { "epoch": 0.55, "grad_norm": 1.5563731633318956, "learning_rate": 4.428282826811404e-06, "loss": 0.6454, "step": 6860 }, { "epoch": 0.55, "grad_norm": 1.5231249732247683, "learning_rate": 4.426992115015094e-06, "loss": 0.7635, "step": 6861 }, { "epoch": 0.55, "grad_norm": 1.6113718413426183, "learning_rate": 4.425701441909584e-06, "loss": 0.7714, "step": 6862 }, { "epoch": 0.55, "grad_norm": 1.5221013186223222, "learning_rate": 4.424410807582029e-06, "loss": 0.6973, "step": 6863 }, { "epoch": 0.55, "grad_norm": 1.5326281527469157, "learning_rate": 4.423120212119571e-06, "loss": 0.7012, "step": 6864 }, { "epoch": 0.55, "grad_norm": 1.4115488339092077, "learning_rate": 4.421829655609355e-06, "loss": 0.7309, "step": 6865 }, { "epoch": 0.55, "grad_norm": 1.6106645423358947, "learning_rate": 4.4205391381385235e-06, "loss": 0.8064, "step": 6866 }, { "epoch": 0.55, "grad_norm": 1.4136202897703214, "learning_rate": 4.419248659794215e-06, "loss": 0.7297, "step": 6867 }, { "epoch": 0.55, "grad_norm": 0.9110755975678478, "learning_rate": 4.417958220663563e-06, "loss": 1.0842, "step": 6868 }, { "epoch": 0.55, "grad_norm": 1.4892462545815033, "learning_rate": 4.416667820833704e-06, "loss": 0.7586, "step": 6869 }, { "epoch": 0.55, "grad_norm": 1.6092782770430807, "learning_rate": 4.415377460391768e-06, "loss": 0.7154, "step": 6870 }, { "epoch": 0.55, "grad_norm": 1.4767368090081745, "learning_rate": 4.414087139424883e-06, "loss": 0.7858, "step": 6871 }, { "epoch": 0.55, "grad_norm": 1.7227041638770426, "learning_rate": 4.412796858020173e-06, "loss": 0.7789, "step": 6872 }, { "epoch": 0.55, "grad_norm": 1.5746817532874446, "learning_rate": 4.4115066162647625e-06, "loss": 0.8315, "step": 6873 }, { "epoch": 0.55, "grad_norm": 0.7933377422207455, "learning_rate": 4.410216414245771e-06, "loss": 1.0791, "step": 6874 }, { "epoch": 0.55, "grad_norm": 1.6720518601149836, "learning_rate": 4.408926252050315e-06, "loss": 0.767, "step": 6875 }, { "epoch": 0.55, "grad_norm": 1.592339115271695, "learning_rate": 4.407636129765511e-06, "loss": 0.8208, "step": 6876 }, { "epoch": 0.55, "grad_norm": 1.4224869843135501, "learning_rate": 4.406346047478471e-06, "loss": 0.7421, "step": 6877 }, { "epoch": 0.55, "grad_norm": 1.5082418924718668, "learning_rate": 4.405056005276302e-06, "loss": 0.8334, "step": 6878 }, { "epoch": 0.55, "grad_norm": 1.5676682238551272, "learning_rate": 4.40376600324611e-06, "loss": 0.7218, "step": 6879 }, { "epoch": 0.55, "grad_norm": 1.4171720635387464, "learning_rate": 4.402476041475004e-06, "loss": 0.8057, "step": 6880 }, { "epoch": 0.55, "grad_norm": 1.629833326510161, "learning_rate": 4.401186120050081e-06, "loss": 0.8157, "step": 6881 }, { "epoch": 0.55, "grad_norm": 1.6027327796867108, "learning_rate": 4.399896239058439e-06, "loss": 0.82, "step": 6882 }, { "epoch": 0.55, "grad_norm": 1.5842443885304018, "learning_rate": 4.398606398587177e-06, "loss": 0.7632, "step": 6883 }, { "epoch": 0.55, "grad_norm": 1.5861413624681564, "learning_rate": 4.397316598723385e-06, "loss": 0.7398, "step": 6884 }, { "epoch": 0.55, "grad_norm": 1.5173027743671035, "learning_rate": 4.396026839554154e-06, "loss": 0.7533, "step": 6885 }, { "epoch": 0.55, "grad_norm": 1.5004790126881262, "learning_rate": 4.394737121166573e-06, "loss": 0.7174, "step": 6886 }, { "epoch": 0.55, "grad_norm": 1.5437648118791238, "learning_rate": 4.393447443647726e-06, "loss": 0.705, "step": 6887 }, { "epoch": 0.55, "grad_norm": 1.5522244124785598, "learning_rate": 4.392157807084696e-06, "loss": 0.7704, "step": 6888 }, { "epoch": 0.55, "grad_norm": 1.6022007338449897, "learning_rate": 4.3908682115645565e-06, "loss": 0.7716, "step": 6889 }, { "epoch": 0.55, "grad_norm": 1.5616732588163402, "learning_rate": 4.389578657174391e-06, "loss": 0.7422, "step": 6890 }, { "epoch": 0.55, "grad_norm": 1.6171168510841951, "learning_rate": 4.38828914400127e-06, "loss": 0.7797, "step": 6891 }, { "epoch": 0.55, "grad_norm": 1.640118408610226, "learning_rate": 4.386999672132264e-06, "loss": 0.7773, "step": 6892 }, { "epoch": 0.55, "grad_norm": 1.476944075019624, "learning_rate": 4.385710241654443e-06, "loss": 0.7847, "step": 6893 }, { "epoch": 0.55, "grad_norm": 1.4528551323397358, "learning_rate": 4.3844208526548705e-06, "loss": 0.794, "step": 6894 }, { "epoch": 0.55, "grad_norm": 1.610822339479896, "learning_rate": 4.38313150522061e-06, "loss": 0.7502, "step": 6895 }, { "epoch": 0.55, "grad_norm": 1.4554778106596893, "learning_rate": 4.3818421994387194e-06, "loss": 0.7536, "step": 6896 }, { "epoch": 0.55, "grad_norm": 1.4687121775242085, "learning_rate": 4.380552935396259e-06, "loss": 0.6813, "step": 6897 }, { "epoch": 0.55, "grad_norm": 0.8181727482761814, "learning_rate": 4.3792637131802805e-06, "loss": 1.0776, "step": 6898 }, { "epoch": 0.55, "grad_norm": 1.3823714627796473, "learning_rate": 4.377974532877834e-06, "loss": 0.8764, "step": 6899 }, { "epoch": 0.55, "grad_norm": 0.7953269306952987, "learning_rate": 4.376685394575971e-06, "loss": 1.0971, "step": 6900 }, { "epoch": 0.55, "grad_norm": 0.8007194022591374, "learning_rate": 4.375396298361735e-06, "loss": 1.0967, "step": 6901 }, { "epoch": 0.55, "grad_norm": 0.7493237587248764, "learning_rate": 4.374107244322167e-06, "loss": 1.1089, "step": 6902 }, { "epoch": 0.55, "grad_norm": 1.4121823042023827, "learning_rate": 4.372818232544313e-06, "loss": 0.7301, "step": 6903 }, { "epoch": 0.55, "grad_norm": 1.5795239805200996, "learning_rate": 4.3715292631152045e-06, "loss": 0.7039, "step": 6904 }, { "epoch": 0.55, "grad_norm": 1.47498526910125, "learning_rate": 4.370240336121877e-06, "loss": 0.7258, "step": 6905 }, { "epoch": 0.55, "grad_norm": 0.8499374479124103, "learning_rate": 4.36895145165136e-06, "loss": 1.1284, "step": 6906 }, { "epoch": 0.55, "grad_norm": 1.533146721066811, "learning_rate": 4.3676626097906865e-06, "loss": 0.7324, "step": 6907 }, { "epoch": 0.55, "grad_norm": 0.8523982157150912, "learning_rate": 4.36637381062688e-06, "loss": 1.0426, "step": 6908 }, { "epoch": 0.55, "grad_norm": 1.4547962368763625, "learning_rate": 4.36508505424696e-06, "loss": 0.7518, "step": 6909 }, { "epoch": 0.55, "grad_norm": 1.4919433299993383, "learning_rate": 4.3637963407379515e-06, "loss": 0.7194, "step": 6910 }, { "epoch": 0.55, "grad_norm": 1.518883604910818, "learning_rate": 4.362507670186868e-06, "loss": 0.7686, "step": 6911 }, { "epoch": 0.55, "grad_norm": 1.4195179663838116, "learning_rate": 4.361219042680725e-06, "loss": 0.7521, "step": 6912 }, { "epoch": 0.55, "grad_norm": 0.8047798541250969, "learning_rate": 4.35993045830653e-06, "loss": 1.098, "step": 6913 }, { "epoch": 0.55, "grad_norm": 1.547962436599547, "learning_rate": 4.358641917151297e-06, "loss": 0.7756, "step": 6914 }, { "epoch": 0.55, "grad_norm": 1.6050091722781497, "learning_rate": 4.357353419302028e-06, "loss": 0.81, "step": 6915 }, { "epoch": 0.55, "grad_norm": 1.4500043309851394, "learning_rate": 4.356064964845724e-06, "loss": 0.7347, "step": 6916 }, { "epoch": 0.55, "grad_norm": 1.8878544672012398, "learning_rate": 4.354776553869387e-06, "loss": 0.8111, "step": 6917 }, { "epoch": 0.56, "grad_norm": 1.6642289457875687, "learning_rate": 4.3534881864600135e-06, "loss": 0.8092, "step": 6918 }, { "epoch": 0.56, "grad_norm": 1.4723799994708773, "learning_rate": 4.352199862704596e-06, "loss": 0.8402, "step": 6919 }, { "epoch": 0.56, "grad_norm": 1.561344852843685, "learning_rate": 4.3509115826901235e-06, "loss": 0.8075, "step": 6920 }, { "epoch": 0.56, "grad_norm": 1.5666981384899021, "learning_rate": 4.349623346503586e-06, "loss": 0.8227, "step": 6921 }, { "epoch": 0.56, "grad_norm": 0.7717022688955494, "learning_rate": 4.348335154231967e-06, "loss": 1.0715, "step": 6922 }, { "epoch": 0.56, "grad_norm": 0.8404357962483123, "learning_rate": 4.347047005962247e-06, "loss": 1.0741, "step": 6923 }, { "epoch": 0.56, "grad_norm": 1.4997388002546004, "learning_rate": 4.345758901781408e-06, "loss": 0.8208, "step": 6924 }, { "epoch": 0.56, "grad_norm": 0.7913310757988739, "learning_rate": 4.344470841776424e-06, "loss": 1.0882, "step": 6925 }, { "epoch": 0.56, "grad_norm": 1.4613413673643725, "learning_rate": 4.343182826034268e-06, "loss": 0.7903, "step": 6926 }, { "epoch": 0.56, "grad_norm": 1.5100074754962065, "learning_rate": 4.34189485464191e-06, "loss": 0.7588, "step": 6927 }, { "epoch": 0.56, "grad_norm": 1.5058973473258286, "learning_rate": 4.340606927686315e-06, "loss": 0.6989, "step": 6928 }, { "epoch": 0.56, "grad_norm": 1.4963407646823559, "learning_rate": 4.339319045254448e-06, "loss": 0.7585, "step": 6929 }, { "epoch": 0.56, "grad_norm": 1.4822583221026402, "learning_rate": 4.338031207433268e-06, "loss": 0.7716, "step": 6930 }, { "epoch": 0.56, "grad_norm": 1.4661476956748791, "learning_rate": 4.336743414309738e-06, "loss": 0.7061, "step": 6931 }, { "epoch": 0.56, "grad_norm": 1.575986646681442, "learning_rate": 4.33545566597081e-06, "loss": 0.7497, "step": 6932 }, { "epoch": 0.56, "grad_norm": 1.6679979777982052, "learning_rate": 4.33416796250343e-06, "loss": 0.7469, "step": 6933 }, { "epoch": 0.56, "grad_norm": 1.4893703705346955, "learning_rate": 4.3328803039945555e-06, "loss": 0.7557, "step": 6934 }, { "epoch": 0.56, "grad_norm": 1.5335983244726297, "learning_rate": 4.331592690531128e-06, "loss": 0.7944, "step": 6935 }, { "epoch": 0.56, "grad_norm": 1.5554704593814996, "learning_rate": 4.330305122200091e-06, "loss": 0.7952, "step": 6936 }, { "epoch": 0.56, "grad_norm": 1.4166634751349931, "learning_rate": 4.3290175990883815e-06, "loss": 0.8234, "step": 6937 }, { "epoch": 0.56, "grad_norm": 0.8724591301767475, "learning_rate": 4.327730121282939e-06, "loss": 1.0937, "step": 6938 }, { "epoch": 0.56, "grad_norm": 1.7730911623411953, "learning_rate": 4.326442688870697e-06, "loss": 0.7686, "step": 6939 }, { "epoch": 0.56, "grad_norm": 1.4488985979335967, "learning_rate": 4.325155301938582e-06, "loss": 0.8032, "step": 6940 }, { "epoch": 0.56, "grad_norm": 1.5411220689957446, "learning_rate": 4.323867960573526e-06, "loss": 0.7719, "step": 6941 }, { "epoch": 0.56, "grad_norm": 0.7921506584053002, "learning_rate": 4.3225806648624516e-06, "loss": 1.1082, "step": 6942 }, { "epoch": 0.56, "grad_norm": 0.7910349842892567, "learning_rate": 4.3212934148922785e-06, "loss": 1.0549, "step": 6943 }, { "epoch": 0.56, "grad_norm": 1.5122529600131875, "learning_rate": 4.320006210749928e-06, "loss": 0.7701, "step": 6944 }, { "epoch": 0.56, "grad_norm": 1.5044272456969652, "learning_rate": 4.318719052522312e-06, "loss": 0.7584, "step": 6945 }, { "epoch": 0.56, "grad_norm": 1.4504793080707057, "learning_rate": 4.3174319402963436e-06, "loss": 0.7462, "step": 6946 }, { "epoch": 0.56, "grad_norm": 1.474591126001228, "learning_rate": 4.3161448741589305e-06, "loss": 0.8647, "step": 6947 }, { "epoch": 0.56, "grad_norm": 1.5933982697412945, "learning_rate": 4.31485785419698e-06, "loss": 0.7856, "step": 6948 }, { "epoch": 0.56, "grad_norm": 0.8999830664151784, "learning_rate": 4.313570880497394e-06, "loss": 1.111, "step": 6949 }, { "epoch": 0.56, "grad_norm": 1.540896401430285, "learning_rate": 4.312283953147069e-06, "loss": 0.7803, "step": 6950 }, { "epoch": 0.56, "grad_norm": 0.8168646182965901, "learning_rate": 4.310997072232907e-06, "loss": 1.0933, "step": 6951 }, { "epoch": 0.56, "grad_norm": 1.5886395803885651, "learning_rate": 4.3097102378417985e-06, "loss": 0.8706, "step": 6952 }, { "epoch": 0.56, "grad_norm": 1.5487830453006384, "learning_rate": 4.3084234500606334e-06, "loss": 0.8355, "step": 6953 }, { "epoch": 0.56, "grad_norm": 0.7578411591482668, "learning_rate": 4.3071367089762965e-06, "loss": 1.0814, "step": 6954 }, { "epoch": 0.56, "grad_norm": 1.4641970801517272, "learning_rate": 4.305850014675675e-06, "loss": 0.8014, "step": 6955 }, { "epoch": 0.56, "grad_norm": 0.7586025420028498, "learning_rate": 4.304563367245646e-06, "loss": 1.1198, "step": 6956 }, { "epoch": 0.56, "grad_norm": 1.6697543356987439, "learning_rate": 4.303276766773088e-06, "loss": 0.6382, "step": 6957 }, { "epoch": 0.56, "grad_norm": 1.6743491278819924, "learning_rate": 4.301990213344878e-06, "loss": 0.8116, "step": 6958 }, { "epoch": 0.56, "grad_norm": 0.7713396386967131, "learning_rate": 4.3007037070478855e-06, "loss": 1.0757, "step": 6959 }, { "epoch": 0.56, "grad_norm": 1.5203509873255936, "learning_rate": 4.299417247968978e-06, "loss": 0.7973, "step": 6960 }, { "epoch": 0.56, "grad_norm": 1.647129247588905, "learning_rate": 4.298130836195017e-06, "loss": 0.6954, "step": 6961 }, { "epoch": 0.56, "grad_norm": 1.4668755552320825, "learning_rate": 4.296844471812868e-06, "loss": 0.7819, "step": 6962 }, { "epoch": 0.56, "grad_norm": 1.7106202133608355, "learning_rate": 4.295558154909389e-06, "loss": 0.7567, "step": 6963 }, { "epoch": 0.56, "grad_norm": 0.7827394005865854, "learning_rate": 4.294271885571433e-06, "loss": 1.0605, "step": 6964 }, { "epoch": 0.56, "grad_norm": 1.5445077440366493, "learning_rate": 4.292985663885854e-06, "loss": 0.7717, "step": 6965 }, { "epoch": 0.56, "grad_norm": 0.7926750189186063, "learning_rate": 4.291699489939499e-06, "loss": 1.0698, "step": 6966 }, { "epoch": 0.56, "grad_norm": 1.5417004091368554, "learning_rate": 4.290413363819213e-06, "loss": 0.7775, "step": 6967 }, { "epoch": 0.56, "grad_norm": 1.5838542939159044, "learning_rate": 4.2891272856118415e-06, "loss": 0.8278, "step": 6968 }, { "epoch": 0.56, "grad_norm": 1.4723974559251585, "learning_rate": 4.28784125540422e-06, "loss": 0.7631, "step": 6969 }, { "epoch": 0.56, "grad_norm": 0.8110665782725236, "learning_rate": 4.2865552732831864e-06, "loss": 1.0759, "step": 6970 }, { "epoch": 0.56, "grad_norm": 1.458405662978379, "learning_rate": 4.285269339335571e-06, "loss": 0.7241, "step": 6971 }, { "epoch": 0.56, "grad_norm": 0.7882538328398603, "learning_rate": 4.283983453648205e-06, "loss": 1.0965, "step": 6972 }, { "epoch": 0.56, "grad_norm": 1.6873386588233177, "learning_rate": 4.282697616307913e-06, "loss": 0.7431, "step": 6973 }, { "epoch": 0.56, "grad_norm": 0.7411033990284964, "learning_rate": 4.281411827401517e-06, "loss": 1.0536, "step": 6974 }, { "epoch": 0.56, "grad_norm": 1.4888270837813484, "learning_rate": 4.280126087015839e-06, "loss": 0.7285, "step": 6975 }, { "epoch": 0.56, "grad_norm": 0.774768537152064, "learning_rate": 4.278840395237695e-06, "loss": 1.0832, "step": 6976 }, { "epoch": 0.56, "grad_norm": 1.4552873350324782, "learning_rate": 4.277554752153895e-06, "loss": 0.7154, "step": 6977 }, { "epoch": 0.56, "grad_norm": 0.7632505453358764, "learning_rate": 4.2762691578512485e-06, "loss": 1.0882, "step": 6978 }, { "epoch": 0.56, "grad_norm": 1.6134987582781806, "learning_rate": 4.274983612416566e-06, "loss": 0.8242, "step": 6979 }, { "epoch": 0.56, "grad_norm": 0.7700126464417867, "learning_rate": 4.273698115936647e-06, "loss": 1.0606, "step": 6980 }, { "epoch": 0.56, "grad_norm": 1.47960890502507, "learning_rate": 4.272412668498291e-06, "loss": 0.6921, "step": 6981 }, { "epoch": 0.56, "grad_norm": 1.8985859201454274, "learning_rate": 4.271127270188297e-06, "loss": 0.6952, "step": 6982 }, { "epoch": 0.56, "grad_norm": 1.5274719121498368, "learning_rate": 4.269841921093456e-06, "loss": 0.7485, "step": 6983 }, { "epoch": 0.56, "grad_norm": 1.5031179881657442, "learning_rate": 4.268556621300555e-06, "loss": 0.8557, "step": 6984 }, { "epoch": 0.56, "grad_norm": 1.5398428784746296, "learning_rate": 4.267271370896387e-06, "loss": 0.7467, "step": 6985 }, { "epoch": 0.56, "grad_norm": 1.4947655459650928, "learning_rate": 4.265986169967731e-06, "loss": 0.7929, "step": 6986 }, { "epoch": 0.56, "grad_norm": 1.628079580521067, "learning_rate": 4.264701018601367e-06, "loss": 0.7485, "step": 6987 }, { "epoch": 0.56, "grad_norm": 1.509337783990883, "learning_rate": 4.263415916884071e-06, "loss": 0.7077, "step": 6988 }, { "epoch": 0.56, "grad_norm": 0.8153642249167848, "learning_rate": 4.262130864902617e-06, "loss": 1.1147, "step": 6989 }, { "epoch": 0.56, "grad_norm": 1.5214981222882862, "learning_rate": 4.260845862743775e-06, "loss": 0.7776, "step": 6990 }, { "epoch": 0.56, "grad_norm": 1.5446307601604488, "learning_rate": 4.2595609104943095e-06, "loss": 0.77, "step": 6991 }, { "epoch": 0.56, "grad_norm": 1.5728453655360544, "learning_rate": 4.258276008240985e-06, "loss": 0.7544, "step": 6992 }, { "epoch": 0.56, "grad_norm": 1.59908816769836, "learning_rate": 4.256991156070561e-06, "loss": 0.7655, "step": 6993 }, { "epoch": 0.56, "grad_norm": 1.5783748937893796, "learning_rate": 4.255706354069793e-06, "loss": 0.7476, "step": 6994 }, { "epoch": 0.56, "grad_norm": 1.508834813556522, "learning_rate": 4.2544216023254314e-06, "loss": 0.7728, "step": 6995 }, { "epoch": 0.56, "grad_norm": 1.5237966803246454, "learning_rate": 4.25313690092423e-06, "loss": 0.7689, "step": 6996 }, { "epoch": 0.56, "grad_norm": 1.456342898373902, "learning_rate": 4.251852249952934e-06, "loss": 0.7446, "step": 6997 }, { "epoch": 0.56, "grad_norm": 1.3549555836380007, "learning_rate": 4.250567649498283e-06, "loss": 0.695, "step": 6998 }, { "epoch": 0.56, "grad_norm": 0.8136640184360785, "learning_rate": 4.249283099647019e-06, "loss": 1.0903, "step": 6999 }, { "epoch": 0.56, "grad_norm": 1.4900308069928334, "learning_rate": 4.2479986004858766e-06, "loss": 0.7796, "step": 7000 }, { "epoch": 0.56, "grad_norm": 1.5922725822319352, "learning_rate": 4.2467141521015855e-06, "loss": 0.8098, "step": 7001 }, { "epoch": 0.56, "grad_norm": 0.7609079542638947, "learning_rate": 4.245429754580881e-06, "loss": 1.0564, "step": 7002 }, { "epoch": 0.56, "grad_norm": 0.7412150750411018, "learning_rate": 4.244145408010484e-06, "loss": 1.0917, "step": 7003 }, { "epoch": 0.56, "grad_norm": 1.498145633574176, "learning_rate": 4.2428611124771184e-06, "loss": 0.7407, "step": 7004 }, { "epoch": 0.56, "grad_norm": 0.7657978138650661, "learning_rate": 4.241576868067499e-06, "loss": 1.0622, "step": 7005 }, { "epoch": 0.56, "grad_norm": 1.5851893718000987, "learning_rate": 4.240292674868346e-06, "loss": 0.8088, "step": 7006 }, { "epoch": 0.56, "grad_norm": 1.5951092461965612, "learning_rate": 4.2390085329663685e-06, "loss": 0.6999, "step": 7007 }, { "epoch": 0.56, "grad_norm": 1.4204506282037903, "learning_rate": 4.237724442448273e-06, "loss": 0.7327, "step": 7008 }, { "epoch": 0.56, "grad_norm": 1.5050806888376942, "learning_rate": 4.2364404034007685e-06, "loss": 0.744, "step": 7009 }, { "epoch": 0.56, "grad_norm": 1.588904689268676, "learning_rate": 4.235156415910553e-06, "loss": 0.7144, "step": 7010 }, { "epoch": 0.56, "grad_norm": 1.6311589989690745, "learning_rate": 4.233872480064326e-06, "loss": 0.822, "step": 7011 }, { "epoch": 0.56, "grad_norm": 1.5129676403662289, "learning_rate": 4.232588595948779e-06, "loss": 0.7731, "step": 7012 }, { "epoch": 0.56, "grad_norm": 1.6153881229756593, "learning_rate": 4.231304763650607e-06, "loss": 0.8254, "step": 7013 }, { "epoch": 0.56, "grad_norm": 1.5326578790820926, "learning_rate": 4.230020983256494e-06, "loss": 0.7526, "step": 7014 }, { "epoch": 0.56, "grad_norm": 1.6830427670101706, "learning_rate": 4.2287372548531245e-06, "loss": 0.7418, "step": 7015 }, { "epoch": 0.56, "grad_norm": 0.859381105343199, "learning_rate": 4.22745357852718e-06, "loss": 1.0806, "step": 7016 }, { "epoch": 0.56, "grad_norm": 1.5803470729949507, "learning_rate": 4.226169954365337e-06, "loss": 0.7951, "step": 7017 }, { "epoch": 0.56, "grad_norm": 1.4768766708041745, "learning_rate": 4.224886382454267e-06, "loss": 0.6884, "step": 7018 }, { "epoch": 0.56, "grad_norm": 1.5598983693446669, "learning_rate": 4.22360286288064e-06, "loss": 0.8311, "step": 7019 }, { "epoch": 0.56, "grad_norm": 1.557215804039948, "learning_rate": 4.222319395731126e-06, "loss": 0.8038, "step": 7020 }, { "epoch": 0.56, "grad_norm": 1.4260877791722688, "learning_rate": 4.221035981092383e-06, "loss": 0.6871, "step": 7021 }, { "epoch": 0.56, "grad_norm": 1.4865103488792937, "learning_rate": 4.21975261905107e-06, "loss": 0.7186, "step": 7022 }, { "epoch": 0.56, "grad_norm": 1.5847607138771493, "learning_rate": 4.218469309693847e-06, "loss": 0.7653, "step": 7023 }, { "epoch": 0.56, "grad_norm": 1.4244366139360238, "learning_rate": 4.2171860531073624e-06, "loss": 0.73, "step": 7024 }, { "epoch": 0.56, "grad_norm": 1.62391589702249, "learning_rate": 4.215902849378265e-06, "loss": 0.841, "step": 7025 }, { "epoch": 0.56, "grad_norm": 1.5325421561933692, "learning_rate": 4.2146196985932e-06, "loss": 0.8372, "step": 7026 }, { "epoch": 0.56, "grad_norm": 1.628463471836452, "learning_rate": 4.21333660083881e-06, "loss": 0.8097, "step": 7027 }, { "epoch": 0.56, "grad_norm": 1.4640618983708393, "learning_rate": 4.2120535562017314e-06, "loss": 0.7433, "step": 7028 }, { "epoch": 0.56, "grad_norm": 1.520796711312939, "learning_rate": 4.210770564768597e-06, "loss": 0.7102, "step": 7029 }, { "epoch": 0.56, "grad_norm": 1.4002023455669288, "learning_rate": 4.209487626626039e-06, "loss": 0.6997, "step": 7030 }, { "epoch": 0.56, "grad_norm": 1.6954458248045787, "learning_rate": 4.208204741860685e-06, "loss": 0.8332, "step": 7031 }, { "epoch": 0.56, "grad_norm": 1.4576887262480682, "learning_rate": 4.2069219105591565e-06, "loss": 0.7248, "step": 7032 }, { "epoch": 0.56, "grad_norm": 1.6260693807469435, "learning_rate": 4.205639132808076e-06, "loss": 0.8395, "step": 7033 }, { "epoch": 0.56, "grad_norm": 0.790259541690348, "learning_rate": 4.2043564086940565e-06, "loss": 1.0823, "step": 7034 }, { "epoch": 0.56, "grad_norm": 0.7708139741847582, "learning_rate": 4.203073738303712e-06, "loss": 1.079, "step": 7035 }, { "epoch": 0.56, "grad_norm": 1.5079869990972794, "learning_rate": 4.2017911217236495e-06, "loss": 0.7572, "step": 7036 }, { "epoch": 0.56, "grad_norm": 1.4936906135003922, "learning_rate": 4.200508559040477e-06, "loss": 0.823, "step": 7037 }, { "epoch": 0.56, "grad_norm": 1.562509591020829, "learning_rate": 4.199226050340795e-06, "loss": 0.7383, "step": 7038 }, { "epoch": 0.56, "grad_norm": 1.6457173716580216, "learning_rate": 4.1979435957111984e-06, "loss": 0.7311, "step": 7039 }, { "epoch": 0.56, "grad_norm": 1.498161336457439, "learning_rate": 4.196661195238287e-06, "loss": 0.8183, "step": 7040 }, { "epoch": 0.56, "grad_norm": 1.5579801595343497, "learning_rate": 4.1953788490086486e-06, "loss": 0.7756, "step": 7041 }, { "epoch": 0.56, "grad_norm": 1.5379747325824085, "learning_rate": 4.194096557108869e-06, "loss": 0.7261, "step": 7042 }, { "epoch": 0.57, "grad_norm": 1.4153470707217075, "learning_rate": 4.192814319625534e-06, "loss": 0.7058, "step": 7043 }, { "epoch": 0.57, "grad_norm": 1.5605623246040383, "learning_rate": 4.191532136645221e-06, "loss": 0.7261, "step": 7044 }, { "epoch": 0.57, "grad_norm": 1.4855251932528117, "learning_rate": 4.190250008254507e-06, "loss": 0.7666, "step": 7045 }, { "epoch": 0.57, "grad_norm": 0.8656495892936655, "learning_rate": 4.188967934539962e-06, "loss": 1.0962, "step": 7046 }, { "epoch": 0.57, "grad_norm": 1.521638648883849, "learning_rate": 4.18768591558816e-06, "loss": 0.7501, "step": 7047 }, { "epoch": 0.57, "grad_norm": 0.8015441036999291, "learning_rate": 4.186403951485662e-06, "loss": 1.0585, "step": 7048 }, { "epoch": 0.57, "grad_norm": 1.4427445827182785, "learning_rate": 4.185122042319027e-06, "loss": 0.6736, "step": 7049 }, { "epoch": 0.57, "grad_norm": 1.5024840170398595, "learning_rate": 4.183840188174818e-06, "loss": 0.8035, "step": 7050 }, { "epoch": 0.57, "grad_norm": 1.5366742961891835, "learning_rate": 4.182558389139584e-06, "loss": 0.7646, "step": 7051 }, { "epoch": 0.57, "grad_norm": 1.6096158506389084, "learning_rate": 4.181276645299878e-06, "loss": 0.7317, "step": 7052 }, { "epoch": 0.57, "grad_norm": 1.4626238991871474, "learning_rate": 4.179994956742244e-06, "loss": 0.7298, "step": 7053 }, { "epoch": 0.57, "grad_norm": 1.6837791660950916, "learning_rate": 4.178713323553226e-06, "loss": 0.7543, "step": 7054 }, { "epoch": 0.57, "grad_norm": 1.3996684354699744, "learning_rate": 4.177431745819362e-06, "loss": 0.7572, "step": 7055 }, { "epoch": 0.57, "grad_norm": 2.429596213021206, "learning_rate": 4.176150223627186e-06, "loss": 0.7634, "step": 7056 }, { "epoch": 0.57, "grad_norm": 1.504858365311278, "learning_rate": 4.174868757063233e-06, "loss": 0.7439, "step": 7057 }, { "epoch": 0.57, "grad_norm": 0.8982884163587705, "learning_rate": 4.173587346214028e-06, "loss": 1.0581, "step": 7058 }, { "epoch": 0.57, "grad_norm": 1.448651302094281, "learning_rate": 4.172305991166094e-06, "loss": 0.7716, "step": 7059 }, { "epoch": 0.57, "grad_norm": 1.392905060304066, "learning_rate": 4.1710246920059535e-06, "loss": 0.7871, "step": 7060 }, { "epoch": 0.57, "grad_norm": 1.4036449295265534, "learning_rate": 4.169743448820121e-06, "loss": 0.7267, "step": 7061 }, { "epoch": 0.57, "grad_norm": 1.5812031997751954, "learning_rate": 4.16846226169511e-06, "loss": 0.7543, "step": 7062 }, { "epoch": 0.57, "grad_norm": 1.4617419129280593, "learning_rate": 4.1671811307174255e-06, "loss": 0.8186, "step": 7063 }, { "epoch": 0.57, "grad_norm": 1.71415105127411, "learning_rate": 4.165900055973579e-06, "loss": 0.7659, "step": 7064 }, { "epoch": 0.57, "grad_norm": 1.5515863318389964, "learning_rate": 4.164619037550067e-06, "loss": 0.7325, "step": 7065 }, { "epoch": 0.57, "grad_norm": 1.4722172655048211, "learning_rate": 4.163338075533385e-06, "loss": 0.7374, "step": 7066 }, { "epoch": 0.57, "grad_norm": 1.6225362486119685, "learning_rate": 4.162057170010033e-06, "loss": 0.7705, "step": 7067 }, { "epoch": 0.57, "grad_norm": 1.5303833819523325, "learning_rate": 4.1607763210664955e-06, "loss": 0.8069, "step": 7068 }, { "epoch": 0.57, "grad_norm": 1.4746485196179766, "learning_rate": 4.15949552878926e-06, "loss": 0.8053, "step": 7069 }, { "epoch": 0.57, "grad_norm": 1.6048993047749815, "learning_rate": 4.158214793264808e-06, "loss": 0.8076, "step": 7070 }, { "epoch": 0.57, "grad_norm": 1.8685964959809596, "learning_rate": 4.1569341145796185e-06, "loss": 0.8002, "step": 7071 }, { "epoch": 0.57, "grad_norm": 1.704742785930983, "learning_rate": 4.155653492820165e-06, "loss": 0.7721, "step": 7072 }, { "epoch": 0.57, "grad_norm": 1.5604548516654537, "learning_rate": 4.154372928072917e-06, "loss": 0.797, "step": 7073 }, { "epoch": 0.57, "grad_norm": 1.5219394785559033, "learning_rate": 4.153092420424344e-06, "loss": 0.7394, "step": 7074 }, { "epoch": 0.57, "grad_norm": 1.567205663730935, "learning_rate": 4.151811969960908e-06, "loss": 0.7167, "step": 7075 }, { "epoch": 0.57, "grad_norm": 1.569289973905083, "learning_rate": 4.1505315767690675e-06, "loss": 0.7335, "step": 7076 }, { "epoch": 0.57, "grad_norm": 1.4887889751178849, "learning_rate": 4.1492512409352755e-06, "loss": 0.709, "step": 7077 }, { "epoch": 0.57, "grad_norm": 1.5513860166690452, "learning_rate": 4.147970962545987e-06, "loss": 0.8265, "step": 7078 }, { "epoch": 0.57, "grad_norm": 1.6011317947612889, "learning_rate": 4.1466907416876466e-06, "loss": 0.7545, "step": 7079 }, { "epoch": 0.57, "grad_norm": 1.5216050097762117, "learning_rate": 4.145410578446697e-06, "loss": 0.7224, "step": 7080 }, { "epoch": 0.57, "grad_norm": 0.8386451667464562, "learning_rate": 4.1441304729095815e-06, "loss": 1.0935, "step": 7081 }, { "epoch": 0.57, "grad_norm": 0.7934165042921365, "learning_rate": 4.1428504251627335e-06, "loss": 1.103, "step": 7082 }, { "epoch": 0.57, "grad_norm": 1.5135287846568213, "learning_rate": 4.141570435292582e-06, "loss": 0.8418, "step": 7083 }, { "epoch": 0.57, "grad_norm": 1.564432601813079, "learning_rate": 4.14029050338556e-06, "loss": 0.7662, "step": 7084 }, { "epoch": 0.57, "grad_norm": 1.387086604553629, "learning_rate": 4.139010629528089e-06, "loss": 0.6856, "step": 7085 }, { "epoch": 0.57, "grad_norm": 1.5262350938140619, "learning_rate": 4.137730813806589e-06, "loss": 0.756, "step": 7086 }, { "epoch": 0.57, "grad_norm": 1.7430656879929525, "learning_rate": 4.136451056307475e-06, "loss": 0.7107, "step": 7087 }, { "epoch": 0.57, "grad_norm": 2.4981729085503597, "learning_rate": 4.135171357117162e-06, "loss": 0.7791, "step": 7088 }, { "epoch": 0.57, "grad_norm": 1.4365290621828308, "learning_rate": 4.133891716322056e-06, "loss": 0.7624, "step": 7089 }, { "epoch": 0.57, "grad_norm": 1.526314980811003, "learning_rate": 4.1326121340085595e-06, "loss": 0.713, "step": 7090 }, { "epoch": 0.57, "grad_norm": 1.494077388323042, "learning_rate": 4.1313326102630775e-06, "loss": 0.7278, "step": 7091 }, { "epoch": 0.57, "grad_norm": 1.468433708559831, "learning_rate": 4.130053145172005e-06, "loss": 0.7733, "step": 7092 }, { "epoch": 0.57, "grad_norm": 1.6169622928077265, "learning_rate": 4.1287737388217325e-06, "loss": 0.718, "step": 7093 }, { "epoch": 0.57, "grad_norm": 0.8358845661212813, "learning_rate": 4.127494391298647e-06, "loss": 1.1133, "step": 7094 }, { "epoch": 0.57, "grad_norm": 1.547959506487438, "learning_rate": 4.126215102689137e-06, "loss": 0.7541, "step": 7095 }, { "epoch": 0.57, "grad_norm": 1.5624047570482738, "learning_rate": 4.124935873079582e-06, "loss": 0.8297, "step": 7096 }, { "epoch": 0.57, "grad_norm": 0.8074977996225164, "learning_rate": 4.1236567025563565e-06, "loss": 1.0849, "step": 7097 }, { "epoch": 0.57, "grad_norm": 1.6408462949691376, "learning_rate": 4.122377591205835e-06, "loss": 0.8198, "step": 7098 }, { "epoch": 0.57, "grad_norm": 1.5271300867825561, "learning_rate": 4.121098539114387e-06, "loss": 0.7557, "step": 7099 }, { "epoch": 0.57, "grad_norm": 1.6176042525155987, "learning_rate": 4.1198195463683716e-06, "loss": 0.6571, "step": 7100 }, { "epoch": 0.57, "grad_norm": 1.5919570317737428, "learning_rate": 4.118540613054155e-06, "loss": 0.7298, "step": 7101 }, { "epoch": 0.57, "grad_norm": 1.5860673838245258, "learning_rate": 4.117261739258092e-06, "loss": 0.7613, "step": 7102 }, { "epoch": 0.57, "grad_norm": 0.7720359422638717, "learning_rate": 4.115982925066536e-06, "loss": 1.0755, "step": 7103 }, { "epoch": 0.57, "grad_norm": 1.6631120422585277, "learning_rate": 4.114704170565833e-06, "loss": 0.7826, "step": 7104 }, { "epoch": 0.57, "grad_norm": 1.543269038483739, "learning_rate": 4.113425475842329e-06, "loss": 0.6782, "step": 7105 }, { "epoch": 0.57, "grad_norm": 1.5918814080879675, "learning_rate": 4.112146840982365e-06, "loss": 0.7164, "step": 7106 }, { "epoch": 0.57, "grad_norm": 1.6396402610293275, "learning_rate": 4.110868266072273e-06, "loss": 0.8204, "step": 7107 }, { "epoch": 0.57, "grad_norm": 1.6092472191273226, "learning_rate": 4.109589751198393e-06, "loss": 0.7221, "step": 7108 }, { "epoch": 0.57, "grad_norm": 1.388014418878276, "learning_rate": 4.108311296447048e-06, "loss": 0.7559, "step": 7109 }, { "epoch": 0.57, "grad_norm": 1.5446557287113962, "learning_rate": 4.107032901904564e-06, "loss": 0.7103, "step": 7110 }, { "epoch": 0.57, "grad_norm": 0.8279707242164354, "learning_rate": 4.105754567657257e-06, "loss": 1.0756, "step": 7111 }, { "epoch": 0.57, "grad_norm": 1.4002401523237993, "learning_rate": 4.104476293791449e-06, "loss": 0.73, "step": 7112 }, { "epoch": 0.57, "grad_norm": 1.4691121826274531, "learning_rate": 4.103198080393449e-06, "loss": 0.8337, "step": 7113 }, { "epoch": 0.57, "grad_norm": 0.754328160845997, "learning_rate": 4.101919927549564e-06, "loss": 1.0551, "step": 7114 }, { "epoch": 0.57, "grad_norm": 0.8024385858466563, "learning_rate": 4.100641835346101e-06, "loss": 1.0801, "step": 7115 }, { "epoch": 0.57, "grad_norm": 1.4729572008807656, "learning_rate": 4.0993638038693575e-06, "loss": 0.7158, "step": 7116 }, { "epoch": 0.57, "grad_norm": 1.5147121738263984, "learning_rate": 4.098085833205629e-06, "loss": 0.8078, "step": 7117 }, { "epoch": 0.57, "grad_norm": 1.5947104671273853, "learning_rate": 4.0968079234412054e-06, "loss": 0.7671, "step": 7118 }, { "epoch": 0.57, "grad_norm": 1.3856971064534735, "learning_rate": 4.0955300746623785e-06, "loss": 0.7249, "step": 7119 }, { "epoch": 0.57, "grad_norm": 1.5281177458021948, "learning_rate": 4.094252286955429e-06, "loss": 0.7894, "step": 7120 }, { "epoch": 0.57, "grad_norm": 1.4479367430242662, "learning_rate": 4.092974560406635e-06, "loss": 0.7044, "step": 7121 }, { "epoch": 0.57, "grad_norm": 1.54243591201938, "learning_rate": 4.091696895102274e-06, "loss": 0.7868, "step": 7122 }, { "epoch": 0.57, "grad_norm": 1.540491417062718, "learning_rate": 4.090419291128616e-06, "loss": 0.7339, "step": 7123 }, { "epoch": 0.57, "grad_norm": 1.574562157142487, "learning_rate": 4.089141748571926e-06, "loss": 0.7537, "step": 7124 }, { "epoch": 0.57, "grad_norm": 1.4927405041267854, "learning_rate": 4.0878642675184675e-06, "loss": 0.7024, "step": 7125 }, { "epoch": 0.57, "grad_norm": 2.0882468373744474, "learning_rate": 4.086586848054501e-06, "loss": 0.7429, "step": 7126 }, { "epoch": 0.57, "grad_norm": 1.6335344463837769, "learning_rate": 4.085309490266278e-06, "loss": 0.8015, "step": 7127 }, { "epoch": 0.57, "grad_norm": 1.5072018266943266, "learning_rate": 4.08403219424005e-06, "loss": 0.7725, "step": 7128 }, { "epoch": 0.57, "grad_norm": 1.6210881311302356, "learning_rate": 4.082754960062062e-06, "loss": 0.8355, "step": 7129 }, { "epoch": 0.57, "grad_norm": 1.4632537188742267, "learning_rate": 4.081477787818559e-06, "loss": 0.7874, "step": 7130 }, { "epoch": 0.57, "grad_norm": 1.4712006850891277, "learning_rate": 4.0802006775957735e-06, "loss": 0.7807, "step": 7131 }, { "epoch": 0.57, "grad_norm": 1.468836921041486, "learning_rate": 4.0789236294799425e-06, "loss": 0.7548, "step": 7132 }, { "epoch": 0.57, "grad_norm": 1.5308232808269413, "learning_rate": 4.077646643557295e-06, "loss": 0.8063, "step": 7133 }, { "epoch": 0.57, "grad_norm": 1.5666626068333973, "learning_rate": 4.076369719914055e-06, "loss": 0.7795, "step": 7134 }, { "epoch": 0.57, "grad_norm": 1.5377358265677132, "learning_rate": 4.075092858636441e-06, "loss": 0.8133, "step": 7135 }, { "epoch": 0.57, "grad_norm": 1.4571039319355938, "learning_rate": 4.073816059810675e-06, "loss": 0.7501, "step": 7136 }, { "epoch": 0.57, "grad_norm": 1.5473773043761014, "learning_rate": 4.072539323522967e-06, "loss": 0.8034, "step": 7137 }, { "epoch": 0.57, "grad_norm": 1.4905016995132188, "learning_rate": 4.0712626498595206e-06, "loss": 0.7857, "step": 7138 }, { "epoch": 0.57, "grad_norm": 1.4818524041242678, "learning_rate": 4.069986038906547e-06, "loss": 0.7469, "step": 7139 }, { "epoch": 0.57, "grad_norm": 1.4368787817807676, "learning_rate": 4.0687094907502425e-06, "loss": 0.7852, "step": 7140 }, { "epoch": 0.57, "grad_norm": 1.5433060516578183, "learning_rate": 4.067433005476802e-06, "loss": 0.7259, "step": 7141 }, { "epoch": 0.57, "grad_norm": 1.3948417656051415, "learning_rate": 4.0661565831724185e-06, "loss": 0.7725, "step": 7142 }, { "epoch": 0.57, "grad_norm": 1.467304525198999, "learning_rate": 4.064880223923277e-06, "loss": 0.7411, "step": 7143 }, { "epoch": 0.57, "grad_norm": 1.7018906813780617, "learning_rate": 4.063603927815561e-06, "loss": 0.8394, "step": 7144 }, { "epoch": 0.57, "grad_norm": 1.488822441960508, "learning_rate": 4.062327694935448e-06, "loss": 0.7173, "step": 7145 }, { "epoch": 0.57, "grad_norm": 1.8228050794284565, "learning_rate": 4.061051525369114e-06, "loss": 0.7253, "step": 7146 }, { "epoch": 0.57, "grad_norm": 1.540225282192984, "learning_rate": 4.059775419202729e-06, "loss": 0.7666, "step": 7147 }, { "epoch": 0.57, "grad_norm": 1.3949528797560709, "learning_rate": 4.058499376522456e-06, "loss": 0.7436, "step": 7148 }, { "epoch": 0.57, "grad_norm": 1.5317956426315347, "learning_rate": 4.05722339741446e-06, "loss": 0.7675, "step": 7149 }, { "epoch": 0.57, "grad_norm": 1.4902789622961525, "learning_rate": 4.055947481964895e-06, "loss": 0.7456, "step": 7150 }, { "epoch": 0.57, "grad_norm": 1.4922505916641091, "learning_rate": 4.0546716302599156e-06, "loss": 0.7847, "step": 7151 }, { "epoch": 0.57, "grad_norm": 0.9157573078400717, "learning_rate": 4.053395842385668e-06, "loss": 1.0402, "step": 7152 }, { "epoch": 0.57, "grad_norm": 0.8669598843367065, "learning_rate": 4.052120118428298e-06, "loss": 1.0821, "step": 7153 }, { "epoch": 0.57, "grad_norm": 1.496477479700937, "learning_rate": 4.050844458473945e-06, "loss": 0.8051, "step": 7154 }, { "epoch": 0.57, "grad_norm": 1.572690363581884, "learning_rate": 4.049568862608743e-06, "loss": 0.8392, "step": 7155 }, { "epoch": 0.57, "grad_norm": 0.7810375592555158, "learning_rate": 4.048293330918827e-06, "loss": 1.0474, "step": 7156 }, { "epoch": 0.57, "grad_norm": 1.49694165920929, "learning_rate": 4.047017863490322e-06, "loss": 0.7374, "step": 7157 }, { "epoch": 0.57, "grad_norm": 1.5580336522446159, "learning_rate": 4.045742460409348e-06, "loss": 0.8219, "step": 7158 }, { "epoch": 0.57, "grad_norm": 0.9045209913316393, "learning_rate": 4.044467121762026e-06, "loss": 1.0623, "step": 7159 }, { "epoch": 0.57, "grad_norm": 1.6309969299574245, "learning_rate": 4.043191847634469e-06, "loss": 0.8006, "step": 7160 }, { "epoch": 0.57, "grad_norm": 1.4166723294111496, "learning_rate": 4.0419166381127865e-06, "loss": 0.6948, "step": 7161 }, { "epoch": 0.57, "grad_norm": 1.7649859680725803, "learning_rate": 4.040641493283081e-06, "loss": 0.7796, "step": 7162 }, { "epoch": 0.57, "grad_norm": 1.5247404594546232, "learning_rate": 4.039366413231458e-06, "loss": 0.8558, "step": 7163 }, { "epoch": 0.57, "grad_norm": 1.3311251339927925, "learning_rate": 4.038091398044012e-06, "loss": 0.6425, "step": 7164 }, { "epoch": 0.57, "grad_norm": 1.463494458220714, "learning_rate": 4.036816447806832e-06, "loss": 0.7241, "step": 7165 }, { "epoch": 0.57, "grad_norm": 1.4064974063687137, "learning_rate": 4.03554156260601e-06, "loss": 0.7029, "step": 7166 }, { "epoch": 0.58, "grad_norm": 1.64264977071479, "learning_rate": 4.0342667425276265e-06, "loss": 0.7495, "step": 7167 }, { "epoch": 0.58, "grad_norm": 0.8355448835502453, "learning_rate": 4.032991987657762e-06, "loss": 1.0702, "step": 7168 }, { "epoch": 0.58, "grad_norm": 1.4976396585244225, "learning_rate": 4.031717298082487e-06, "loss": 0.7245, "step": 7169 }, { "epoch": 0.58, "grad_norm": 1.522291765206997, "learning_rate": 4.030442673887876e-06, "loss": 0.8049, "step": 7170 }, { "epoch": 0.58, "grad_norm": 0.8028540636940037, "learning_rate": 4.029168115159993e-06, "loss": 1.0519, "step": 7171 }, { "epoch": 0.58, "grad_norm": 0.7799469344630628, "learning_rate": 4.027893621984896e-06, "loss": 1.0702, "step": 7172 }, { "epoch": 0.58, "grad_norm": 1.5155806798282636, "learning_rate": 4.026619194448647e-06, "loss": 0.7645, "step": 7173 }, { "epoch": 0.58, "grad_norm": 0.7918685161233011, "learning_rate": 4.025344832637295e-06, "loss": 1.0695, "step": 7174 }, { "epoch": 0.58, "grad_norm": 0.7875785102457681, "learning_rate": 4.024070536636889e-06, "loss": 1.0672, "step": 7175 }, { "epoch": 0.58, "grad_norm": 1.5275750469430311, "learning_rate": 4.022796306533472e-06, "loss": 0.7003, "step": 7176 }, { "epoch": 0.58, "grad_norm": 1.7011304514495424, "learning_rate": 4.021522142413082e-06, "loss": 0.8243, "step": 7177 }, { "epoch": 0.58, "grad_norm": 1.5928614131373477, "learning_rate": 4.020248044361756e-06, "loss": 0.7684, "step": 7178 }, { "epoch": 0.58, "grad_norm": 1.640593197921435, "learning_rate": 4.018974012465519e-06, "loss": 0.8355, "step": 7179 }, { "epoch": 0.58, "grad_norm": 1.5924582890077175, "learning_rate": 4.017700046810403e-06, "loss": 0.6925, "step": 7180 }, { "epoch": 0.58, "grad_norm": 1.6120998079711137, "learning_rate": 4.016426147482427e-06, "loss": 0.6828, "step": 7181 }, { "epoch": 0.58, "grad_norm": 1.491434826315475, "learning_rate": 4.015152314567603e-06, "loss": 0.8034, "step": 7182 }, { "epoch": 0.58, "grad_norm": 1.4402109082201846, "learning_rate": 4.01387854815195e-06, "loss": 0.7823, "step": 7183 }, { "epoch": 0.58, "grad_norm": 0.8672487359496766, "learning_rate": 4.012604848321471e-06, "loss": 1.0685, "step": 7184 }, { "epoch": 0.58, "grad_norm": 1.4447653310971225, "learning_rate": 4.011331215162171e-06, "loss": 0.7733, "step": 7185 }, { "epoch": 0.58, "grad_norm": 1.4886240948485858, "learning_rate": 4.0100576487600465e-06, "loss": 0.7523, "step": 7186 }, { "epoch": 0.58, "grad_norm": 0.8081762635676816, "learning_rate": 4.0087841492010946e-06, "loss": 1.0758, "step": 7187 }, { "epoch": 0.58, "grad_norm": 0.7902171925632017, "learning_rate": 4.007510716571304e-06, "loss": 1.0607, "step": 7188 }, { "epoch": 0.58, "grad_norm": 1.5008392014435952, "learning_rate": 4.006237350956657e-06, "loss": 0.7271, "step": 7189 }, { "epoch": 0.58, "grad_norm": 1.56291557809456, "learning_rate": 4.004964052443137e-06, "loss": 0.7348, "step": 7190 }, { "epoch": 0.58, "grad_norm": 1.5482025357115359, "learning_rate": 4.003690821116721e-06, "loss": 0.8193, "step": 7191 }, { "epoch": 0.58, "grad_norm": 1.4489551284704332, "learning_rate": 4.002417657063379e-06, "loss": 0.701, "step": 7192 }, { "epoch": 0.58, "grad_norm": 0.8850259789319137, "learning_rate": 4.001144560369077e-06, "loss": 1.11, "step": 7193 }, { "epoch": 0.58, "grad_norm": 1.562665657748395, "learning_rate": 3.999871531119779e-06, "loss": 0.7592, "step": 7194 }, { "epoch": 0.58, "grad_norm": 1.4764821556510002, "learning_rate": 3.9985985694014414e-06, "loss": 0.7855, "step": 7195 }, { "epoch": 0.58, "grad_norm": 1.42402710793973, "learning_rate": 3.997325675300018e-06, "loss": 0.7294, "step": 7196 }, { "epoch": 0.58, "grad_norm": 1.4747840896582316, "learning_rate": 3.996052848901459e-06, "loss": 0.7424, "step": 7197 }, { "epoch": 0.58, "grad_norm": 1.6050726523879517, "learning_rate": 3.994780090291707e-06, "loss": 0.9039, "step": 7198 }, { "epoch": 0.58, "grad_norm": 1.481324804103119, "learning_rate": 3.993507399556699e-06, "loss": 0.7522, "step": 7199 }, { "epoch": 0.58, "grad_norm": 1.489211148366926, "learning_rate": 3.992234776782376e-06, "loss": 0.7794, "step": 7200 }, { "epoch": 0.58, "grad_norm": 1.5084992866168918, "learning_rate": 3.990962222054665e-06, "loss": 0.7749, "step": 7201 }, { "epoch": 0.58, "grad_norm": 0.7745093260620298, "learning_rate": 3.989689735459492e-06, "loss": 1.0571, "step": 7202 }, { "epoch": 0.58, "grad_norm": 1.4285179785574975, "learning_rate": 3.988417317082777e-06, "loss": 0.7757, "step": 7203 }, { "epoch": 0.58, "grad_norm": 0.7672536960448711, "learning_rate": 3.987144967010439e-06, "loss": 1.0768, "step": 7204 }, { "epoch": 0.58, "grad_norm": 1.4535208576265604, "learning_rate": 3.985872685328389e-06, "loss": 0.8318, "step": 7205 }, { "epoch": 0.58, "grad_norm": 1.6911978542740556, "learning_rate": 3.984600472122533e-06, "loss": 0.8724, "step": 7206 }, { "epoch": 0.58, "grad_norm": 1.5166547046459407, "learning_rate": 3.983328327478776e-06, "loss": 0.7843, "step": 7207 }, { "epoch": 0.58, "grad_norm": 0.774598094722898, "learning_rate": 3.982056251483016e-06, "loss": 1.0544, "step": 7208 }, { "epoch": 0.58, "grad_norm": 0.7680335511216643, "learning_rate": 3.980784244221145e-06, "loss": 1.0984, "step": 7209 }, { "epoch": 0.58, "grad_norm": 1.5189397268679563, "learning_rate": 3.97951230577905e-06, "loss": 0.8037, "step": 7210 }, { "epoch": 0.58, "grad_norm": 1.5115252510365207, "learning_rate": 3.97824043624262e-06, "loss": 0.7727, "step": 7211 }, { "epoch": 0.58, "grad_norm": 1.4637110269781521, "learning_rate": 3.976968635697732e-06, "loss": 0.7508, "step": 7212 }, { "epoch": 0.58, "grad_norm": 0.7696970144061797, "learning_rate": 3.9756969042302605e-06, "loss": 1.0681, "step": 7213 }, { "epoch": 0.58, "grad_norm": 1.3377641952242485, "learning_rate": 3.974425241926076e-06, "loss": 0.73, "step": 7214 }, { "epoch": 0.58, "grad_norm": 0.7651638855504689, "learning_rate": 3.973153648871045e-06, "loss": 1.0534, "step": 7215 }, { "epoch": 0.58, "grad_norm": 1.5942248006454918, "learning_rate": 3.971882125151028e-06, "loss": 0.6832, "step": 7216 }, { "epoch": 0.58, "grad_norm": 1.5474020869809793, "learning_rate": 3.9706106708518785e-06, "loss": 0.7531, "step": 7217 }, { "epoch": 0.58, "grad_norm": 1.7448428516691636, "learning_rate": 3.969339286059452e-06, "loss": 0.8421, "step": 7218 }, { "epoch": 0.58, "grad_norm": 0.7638557319296726, "learning_rate": 3.968067970859595e-06, "loss": 1.0656, "step": 7219 }, { "epoch": 0.58, "grad_norm": 1.6102123518751517, "learning_rate": 3.9667967253381455e-06, "loss": 0.7031, "step": 7220 }, { "epoch": 0.58, "grad_norm": 1.4903400877596606, "learning_rate": 3.965525549580946e-06, "loss": 0.7343, "step": 7221 }, { "epoch": 0.58, "grad_norm": 1.500826808133186, "learning_rate": 3.964254443673826e-06, "loss": 0.8153, "step": 7222 }, { "epoch": 0.58, "grad_norm": 1.521775024667985, "learning_rate": 3.962983407702613e-06, "loss": 0.8091, "step": 7223 }, { "epoch": 0.58, "grad_norm": 0.7706669857353786, "learning_rate": 3.961712441753134e-06, "loss": 1.1003, "step": 7224 }, { "epoch": 0.58, "grad_norm": 1.612001896996319, "learning_rate": 3.960441545911205e-06, "loss": 0.8214, "step": 7225 }, { "epoch": 0.58, "grad_norm": 1.465306544644467, "learning_rate": 3.959170720262639e-06, "loss": 0.6893, "step": 7226 }, { "epoch": 0.58, "grad_norm": 1.629457248808697, "learning_rate": 3.957899964893245e-06, "loss": 0.7579, "step": 7227 }, { "epoch": 0.58, "grad_norm": 1.606871026288243, "learning_rate": 3.956629279888829e-06, "loss": 0.8282, "step": 7228 }, { "epoch": 0.58, "grad_norm": 1.5834350249503308, "learning_rate": 3.95535866533519e-06, "loss": 0.8092, "step": 7229 }, { "epoch": 0.58, "grad_norm": 1.6944650576956282, "learning_rate": 3.954088121318122e-06, "loss": 0.7154, "step": 7230 }, { "epoch": 0.58, "grad_norm": 1.909970700743625, "learning_rate": 3.952817647923417e-06, "loss": 0.7503, "step": 7231 }, { "epoch": 0.58, "grad_norm": 1.4593757728239447, "learning_rate": 3.951547245236859e-06, "loss": 0.8069, "step": 7232 }, { "epoch": 0.58, "grad_norm": 1.5802182234298403, "learning_rate": 3.950276913344228e-06, "loss": 0.7361, "step": 7233 }, { "epoch": 0.58, "grad_norm": 0.7900477374880661, "learning_rate": 3.949006652331297e-06, "loss": 1.0528, "step": 7234 }, { "epoch": 0.58, "grad_norm": 1.611913455115591, "learning_rate": 3.947736462283844e-06, "loss": 0.7637, "step": 7235 }, { "epoch": 0.58, "grad_norm": 1.5990893846695369, "learning_rate": 3.94646634328763e-06, "loss": 0.7433, "step": 7236 }, { "epoch": 0.58, "grad_norm": 1.6338897741126617, "learning_rate": 3.945196295428417e-06, "loss": 0.7386, "step": 7237 }, { "epoch": 0.58, "grad_norm": 1.4570129378017322, "learning_rate": 3.9439263187919635e-06, "loss": 0.8337, "step": 7238 }, { "epoch": 0.58, "grad_norm": 0.7819692770130315, "learning_rate": 3.94265641346402e-06, "loss": 1.0865, "step": 7239 }, { "epoch": 0.58, "grad_norm": 1.4714070156452312, "learning_rate": 3.941386579530331e-06, "loss": 0.7898, "step": 7240 }, { "epoch": 0.58, "grad_norm": 1.5076082529055248, "learning_rate": 3.940116817076643e-06, "loss": 0.8176, "step": 7241 }, { "epoch": 0.58, "grad_norm": 1.4608453612371899, "learning_rate": 3.9388471261886905e-06, "loss": 0.8235, "step": 7242 }, { "epoch": 0.58, "grad_norm": 1.4415209308338504, "learning_rate": 3.937577506952206e-06, "loss": 0.7928, "step": 7243 }, { "epoch": 0.58, "grad_norm": 1.57786500990223, "learning_rate": 3.936307959452917e-06, "loss": 0.7192, "step": 7244 }, { "epoch": 0.58, "grad_norm": 1.5064447392020524, "learning_rate": 3.9350384837765475e-06, "loss": 0.7984, "step": 7245 }, { "epoch": 0.58, "grad_norm": 1.4603479484311506, "learning_rate": 3.933769080008816e-06, "loss": 0.7732, "step": 7246 }, { "epoch": 0.58, "grad_norm": 1.4705219740445552, "learning_rate": 3.932499748235432e-06, "loss": 0.662, "step": 7247 }, { "epoch": 0.58, "grad_norm": 1.6423931859978949, "learning_rate": 3.931230488542107e-06, "loss": 0.8321, "step": 7248 }, { "epoch": 0.58, "grad_norm": 1.596457386454085, "learning_rate": 3.929961301014544e-06, "loss": 0.8058, "step": 7249 }, { "epoch": 0.58, "grad_norm": 0.811745916970097, "learning_rate": 3.928692185738442e-06, "loss": 1.0619, "step": 7250 }, { "epoch": 0.58, "grad_norm": 1.4991928120466487, "learning_rate": 3.927423142799489e-06, "loss": 0.8787, "step": 7251 }, { "epoch": 0.58, "grad_norm": 0.8357500037904331, "learning_rate": 3.926154172283382e-06, "loss": 1.0782, "step": 7252 }, { "epoch": 0.58, "grad_norm": 1.6684433144819075, "learning_rate": 3.9248852742758e-06, "loss": 0.9182, "step": 7253 }, { "epoch": 0.58, "grad_norm": 1.5029255778264479, "learning_rate": 3.92361644886242e-06, "loss": 0.7524, "step": 7254 }, { "epoch": 0.58, "grad_norm": 1.4371308284618662, "learning_rate": 3.922347696128922e-06, "loss": 0.8178, "step": 7255 }, { "epoch": 0.58, "grad_norm": 1.4066092504500654, "learning_rate": 3.92107901616097e-06, "loss": 0.7023, "step": 7256 }, { "epoch": 0.58, "grad_norm": 1.430323907540675, "learning_rate": 3.9198104090442305e-06, "loss": 0.7535, "step": 7257 }, { "epoch": 0.58, "grad_norm": 0.7953937174839691, "learning_rate": 3.918541874864362e-06, "loss": 1.127, "step": 7258 }, { "epoch": 0.58, "grad_norm": 1.4827601836319217, "learning_rate": 3.91727341370702e-06, "loss": 0.7348, "step": 7259 }, { "epoch": 0.58, "grad_norm": 1.489728992173413, "learning_rate": 3.916005025657852e-06, "loss": 0.8275, "step": 7260 }, { "epoch": 0.58, "grad_norm": 1.4559106202284222, "learning_rate": 3.914736710802501e-06, "loss": 0.6996, "step": 7261 }, { "epoch": 0.58, "grad_norm": 1.4018381218421112, "learning_rate": 3.913468469226612e-06, "loss": 0.6509, "step": 7262 }, { "epoch": 0.58, "grad_norm": 1.5657357977665278, "learning_rate": 3.912200301015816e-06, "loss": 0.8539, "step": 7263 }, { "epoch": 0.58, "grad_norm": 1.527746666623484, "learning_rate": 3.910932206255742e-06, "loss": 0.7652, "step": 7264 }, { "epoch": 0.58, "grad_norm": 1.5151709967475193, "learning_rate": 3.909664185032017e-06, "loss": 0.7109, "step": 7265 }, { "epoch": 0.58, "grad_norm": 1.4422402938074113, "learning_rate": 3.90839623743026e-06, "loss": 0.734, "step": 7266 }, { "epoch": 0.58, "grad_norm": 1.4691318886615006, "learning_rate": 3.907128363536084e-06, "loss": 0.7702, "step": 7267 }, { "epoch": 0.58, "grad_norm": 1.466381231199717, "learning_rate": 3.905860563435099e-06, "loss": 0.6965, "step": 7268 }, { "epoch": 0.58, "grad_norm": 1.5225750236345617, "learning_rate": 3.904592837212913e-06, "loss": 0.7092, "step": 7269 }, { "epoch": 0.58, "grad_norm": 1.6807187248985447, "learning_rate": 3.903325184955122e-06, "loss": 0.6976, "step": 7270 }, { "epoch": 0.58, "grad_norm": 1.5298721728670528, "learning_rate": 3.902057606747321e-06, "loss": 0.7754, "step": 7271 }, { "epoch": 0.58, "grad_norm": 1.3847728691913903, "learning_rate": 3.900790102675103e-06, "loss": 0.6332, "step": 7272 }, { "epoch": 0.58, "grad_norm": 1.60183246828662, "learning_rate": 3.89952267282405e-06, "loss": 0.7818, "step": 7273 }, { "epoch": 0.58, "grad_norm": 1.4668105011430197, "learning_rate": 3.898255317279744e-06, "loss": 0.6872, "step": 7274 }, { "epoch": 0.58, "grad_norm": 1.7112537134267514, "learning_rate": 3.896988036127755e-06, "loss": 0.8407, "step": 7275 }, { "epoch": 0.58, "grad_norm": 1.5342072574995707, "learning_rate": 3.895720829453659e-06, "loss": 0.7808, "step": 7276 }, { "epoch": 0.58, "grad_norm": 1.6429465386039408, "learning_rate": 3.894453697343016e-06, "loss": 0.8413, "step": 7277 }, { "epoch": 0.58, "grad_norm": 0.7659567168942542, "learning_rate": 3.893186639881387e-06, "loss": 1.0209, "step": 7278 }, { "epoch": 0.58, "grad_norm": 1.5960865966893847, "learning_rate": 3.891919657154328e-06, "loss": 0.745, "step": 7279 }, { "epoch": 0.58, "grad_norm": 0.7859465767957946, "learning_rate": 3.890652749247388e-06, "loss": 1.0599, "step": 7280 }, { "epoch": 0.58, "grad_norm": 1.4617661998974252, "learning_rate": 3.889385916246109e-06, "loss": 0.6816, "step": 7281 }, { "epoch": 0.58, "grad_norm": 0.7882271712682697, "learning_rate": 3.8881191582360345e-06, "loss": 1.0704, "step": 7282 }, { "epoch": 0.58, "grad_norm": 1.9900396175960753, "learning_rate": 3.886852475302697e-06, "loss": 0.786, "step": 7283 }, { "epoch": 0.58, "grad_norm": 1.6297709174841188, "learning_rate": 3.885585867531625e-06, "loss": 0.7931, "step": 7284 }, { "epoch": 0.58, "grad_norm": 1.5818358078510768, "learning_rate": 3.884319335008343e-06, "loss": 0.7772, "step": 7285 }, { "epoch": 0.58, "grad_norm": 1.6013702708466255, "learning_rate": 3.883052877818372e-06, "loss": 0.6957, "step": 7286 }, { "epoch": 0.58, "grad_norm": 1.6107152889100762, "learning_rate": 3.881786496047224e-06, "loss": 0.7863, "step": 7287 }, { "epoch": 0.58, "grad_norm": 1.5081901825096355, "learning_rate": 3.880520189780407e-06, "loss": 0.8017, "step": 7288 }, { "epoch": 0.58, "grad_norm": 1.4333904460399978, "learning_rate": 3.879253959103429e-06, "loss": 0.739, "step": 7289 }, { "epoch": 0.58, "grad_norm": 1.524853359880095, "learning_rate": 3.877987804101786e-06, "loss": 0.7132, "step": 7290 }, { "epoch": 0.58, "grad_norm": 1.5255864929187066, "learning_rate": 3.876721724860973e-06, "loss": 0.8256, "step": 7291 }, { "epoch": 0.59, "grad_norm": 1.4809916081614716, "learning_rate": 3.875455721466475e-06, "loss": 0.7967, "step": 7292 }, { "epoch": 0.59, "grad_norm": 1.5063175408707346, "learning_rate": 3.87418979400378e-06, "loss": 0.7666, "step": 7293 }, { "epoch": 0.59, "grad_norm": 1.3947333158682247, "learning_rate": 3.872923942558365e-06, "loss": 0.7267, "step": 7294 }, { "epoch": 0.59, "grad_norm": 1.5420871675481762, "learning_rate": 3.8716581672157e-06, "loss": 0.7522, "step": 7295 }, { "epoch": 0.59, "grad_norm": 2.1644049950088515, "learning_rate": 3.870392468061257e-06, "loss": 0.6944, "step": 7296 }, { "epoch": 0.59, "grad_norm": 1.5208831919183567, "learning_rate": 3.8691268451805e-06, "loss": 0.7724, "step": 7297 }, { "epoch": 0.59, "grad_norm": 1.4628982276672735, "learning_rate": 3.86786129865888e-06, "loss": 0.7781, "step": 7298 }, { "epoch": 0.59, "grad_norm": 1.5369249098757705, "learning_rate": 3.866595828581856e-06, "loss": 0.819, "step": 7299 }, { "epoch": 0.59, "grad_norm": 1.4691486200291137, "learning_rate": 3.8653304350348745e-06, "loss": 0.8088, "step": 7300 }, { "epoch": 0.59, "grad_norm": 1.391961157299506, "learning_rate": 3.864065118103376e-06, "loss": 0.7804, "step": 7301 }, { "epoch": 0.59, "grad_norm": 1.5505624560736497, "learning_rate": 3.862799877872796e-06, "loss": 0.7596, "step": 7302 }, { "epoch": 0.59, "grad_norm": 1.551283146732892, "learning_rate": 3.861534714428571e-06, "loss": 0.7651, "step": 7303 }, { "epoch": 0.59, "grad_norm": 1.8823664557875566, "learning_rate": 3.860269627856126e-06, "loss": 0.7587, "step": 7304 }, { "epoch": 0.59, "grad_norm": 1.7020252179726705, "learning_rate": 3.859004618240879e-06, "loss": 0.7152, "step": 7305 }, { "epoch": 0.59, "grad_norm": 1.409754639235626, "learning_rate": 3.85773968566825e-06, "loss": 0.6968, "step": 7306 }, { "epoch": 0.59, "grad_norm": 1.4692027053334245, "learning_rate": 3.856474830223651e-06, "loss": 0.6956, "step": 7307 }, { "epoch": 0.59, "grad_norm": 1.618271946960897, "learning_rate": 3.855210051992486e-06, "loss": 0.7458, "step": 7308 }, { "epoch": 0.59, "grad_norm": 1.555962867037443, "learning_rate": 3.853945351060155e-06, "loss": 0.7985, "step": 7309 }, { "epoch": 0.59, "grad_norm": 1.6269011833685136, "learning_rate": 3.852680727512056e-06, "loss": 0.8567, "step": 7310 }, { "epoch": 0.59, "grad_norm": 1.6498263761361962, "learning_rate": 3.851416181433576e-06, "loss": 0.7745, "step": 7311 }, { "epoch": 0.59, "grad_norm": 1.4905189746794514, "learning_rate": 3.8501517129101015e-06, "loss": 0.7827, "step": 7312 }, { "epoch": 0.59, "grad_norm": 1.4469199766504233, "learning_rate": 3.848887322027015e-06, "loss": 0.6875, "step": 7313 }, { "epoch": 0.59, "grad_norm": 1.3758673718646328, "learning_rate": 3.8476230088696875e-06, "loss": 0.6676, "step": 7314 }, { "epoch": 0.59, "grad_norm": 1.590790565075438, "learning_rate": 3.846358773523488e-06, "loss": 0.7632, "step": 7315 }, { "epoch": 0.59, "grad_norm": 1.5841692263929703, "learning_rate": 3.845094616073783e-06, "loss": 0.8482, "step": 7316 }, { "epoch": 0.59, "grad_norm": 1.6164802624724364, "learning_rate": 3.843830536605932e-06, "loss": 0.8091, "step": 7317 }, { "epoch": 0.59, "grad_norm": 1.5397880433194941, "learning_rate": 3.842566535205286e-06, "loss": 0.7441, "step": 7318 }, { "epoch": 0.59, "grad_norm": 1.4153542080469386, "learning_rate": 3.841302611957193e-06, "loss": 0.677, "step": 7319 }, { "epoch": 0.59, "grad_norm": 1.5292731818864507, "learning_rate": 3.840038766946999e-06, "loss": 0.7189, "step": 7320 }, { "epoch": 0.59, "grad_norm": 1.6504548822632614, "learning_rate": 3.8387750002600395e-06, "loss": 0.7586, "step": 7321 }, { "epoch": 0.59, "grad_norm": 0.832586143342959, "learning_rate": 3.8375113119816444e-06, "loss": 1.0582, "step": 7322 }, { "epoch": 0.59, "grad_norm": 0.8253660075751077, "learning_rate": 3.836247702197146e-06, "loss": 1.0879, "step": 7323 }, { "epoch": 0.59, "grad_norm": 1.4641661586255243, "learning_rate": 3.834984170991865e-06, "loss": 0.7152, "step": 7324 }, { "epoch": 0.59, "grad_norm": 1.5009724049939883, "learning_rate": 3.833720718451116e-06, "loss": 0.6923, "step": 7325 }, { "epoch": 0.59, "grad_norm": 1.5183310870000626, "learning_rate": 3.83245734466021e-06, "loss": 0.7339, "step": 7326 }, { "epoch": 0.59, "grad_norm": 0.7832400776252512, "learning_rate": 3.831194049704455e-06, "loss": 1.1181, "step": 7327 }, { "epoch": 0.59, "grad_norm": 1.5619617677336655, "learning_rate": 3.82993083366915e-06, "loss": 0.7442, "step": 7328 }, { "epoch": 0.59, "grad_norm": 1.6059698170557184, "learning_rate": 3.8286676966395895e-06, "loss": 0.7455, "step": 7329 }, { "epoch": 0.59, "grad_norm": 1.5173039119208334, "learning_rate": 3.827404638701066e-06, "loss": 0.792, "step": 7330 }, { "epoch": 0.59, "grad_norm": 1.5130347418810082, "learning_rate": 3.8261416599388625e-06, "loss": 0.8407, "step": 7331 }, { "epoch": 0.59, "grad_norm": 1.4966770129808409, "learning_rate": 3.824878760438259e-06, "loss": 0.6693, "step": 7332 }, { "epoch": 0.59, "grad_norm": 0.7907774121554396, "learning_rate": 3.823615940284525e-06, "loss": 1.0254, "step": 7333 }, { "epoch": 0.59, "grad_norm": 1.4614747281160294, "learning_rate": 3.822353199562936e-06, "loss": 0.7143, "step": 7334 }, { "epoch": 0.59, "grad_norm": 0.8016914734912023, "learning_rate": 3.821090538358751e-06, "loss": 1.0544, "step": 7335 }, { "epoch": 0.59, "grad_norm": 1.5509233814899828, "learning_rate": 3.819827956757228e-06, "loss": 0.6739, "step": 7336 }, { "epoch": 0.59, "grad_norm": 1.5738399458052668, "learning_rate": 3.8185654548436215e-06, "loss": 0.8006, "step": 7337 }, { "epoch": 0.59, "grad_norm": 1.5121767171645342, "learning_rate": 3.817303032703176e-06, "loss": 0.808, "step": 7338 }, { "epoch": 0.59, "grad_norm": 1.556457245322098, "learning_rate": 3.8160406904211325e-06, "loss": 0.7896, "step": 7339 }, { "epoch": 0.59, "grad_norm": 1.4690313190232334, "learning_rate": 3.814778428082732e-06, "loss": 0.8441, "step": 7340 }, { "epoch": 0.59, "grad_norm": 1.4812244762413158, "learning_rate": 3.8135162457732017e-06, "loss": 0.784, "step": 7341 }, { "epoch": 0.59, "grad_norm": 1.4653686194351543, "learning_rate": 3.812254143577767e-06, "loss": 0.6927, "step": 7342 }, { "epoch": 0.59, "grad_norm": 0.8059576722181655, "learning_rate": 3.8109921215816466e-06, "loss": 1.1038, "step": 7343 }, { "epoch": 0.59, "grad_norm": 1.4475618268190038, "learning_rate": 3.8097301798700587e-06, "loss": 0.8229, "step": 7344 }, { "epoch": 0.59, "grad_norm": 1.4763860867286513, "learning_rate": 3.808468318528211e-06, "loss": 0.7147, "step": 7345 }, { "epoch": 0.59, "grad_norm": 1.5561827068760734, "learning_rate": 3.807206537641306e-06, "loss": 0.8029, "step": 7346 }, { "epoch": 0.59, "grad_norm": 1.4858360555654384, "learning_rate": 3.805944837294544e-06, "loss": 0.7675, "step": 7347 }, { "epoch": 0.59, "grad_norm": 1.4600143183590961, "learning_rate": 3.8046832175731175e-06, "loss": 0.7755, "step": 7348 }, { "epoch": 0.59, "grad_norm": 1.5053323533686256, "learning_rate": 3.803421678562213e-06, "loss": 0.7511, "step": 7349 }, { "epoch": 0.59, "grad_norm": 0.7817044963104299, "learning_rate": 3.8021602203470102e-06, "loss": 1.0917, "step": 7350 }, { "epoch": 0.59, "grad_norm": 1.473930409870113, "learning_rate": 3.8008988430126916e-06, "loss": 0.7964, "step": 7351 }, { "epoch": 0.59, "grad_norm": 1.5882573601170922, "learning_rate": 3.799637546644424e-06, "loss": 0.7583, "step": 7352 }, { "epoch": 0.59, "grad_norm": 1.552596843494377, "learning_rate": 3.7983763313273737e-06, "loss": 0.8129, "step": 7353 }, { "epoch": 0.59, "grad_norm": 0.7893785600951179, "learning_rate": 3.797115197146702e-06, "loss": 1.0598, "step": 7354 }, { "epoch": 0.59, "grad_norm": 1.4336710165606827, "learning_rate": 3.7958541441875628e-06, "loss": 0.7283, "step": 7355 }, { "epoch": 0.59, "grad_norm": 1.5507858915597676, "learning_rate": 3.7945931725351028e-06, "loss": 0.7937, "step": 7356 }, { "epoch": 0.59, "grad_norm": 0.7599082928879205, "learning_rate": 3.793332282274472e-06, "loss": 1.0721, "step": 7357 }, { "epoch": 0.59, "grad_norm": 1.5298738186014904, "learning_rate": 3.7920714734908025e-06, "loss": 0.8233, "step": 7358 }, { "epoch": 0.59, "grad_norm": 1.525855890796849, "learning_rate": 3.7908107462692303e-06, "loss": 0.819, "step": 7359 }, { "epoch": 0.59, "grad_norm": 1.6410259680649963, "learning_rate": 3.7895501006948787e-06, "loss": 0.8454, "step": 7360 }, { "epoch": 0.59, "grad_norm": 1.6146231268004767, "learning_rate": 3.788289536852875e-06, "loss": 0.8079, "step": 7361 }, { "epoch": 0.59, "grad_norm": 1.5296447053937747, "learning_rate": 3.787029054828332e-06, "loss": 0.714, "step": 7362 }, { "epoch": 0.59, "grad_norm": 1.8332501096478866, "learning_rate": 3.78576865470636e-06, "loss": 0.7782, "step": 7363 }, { "epoch": 0.59, "grad_norm": 1.6043405674147053, "learning_rate": 3.784508336572066e-06, "loss": 0.7574, "step": 7364 }, { "epoch": 0.59, "grad_norm": 0.8093002599400496, "learning_rate": 3.7832481005105483e-06, "loss": 1.0902, "step": 7365 }, { "epoch": 0.59, "grad_norm": 1.58074598069486, "learning_rate": 3.781987946606901e-06, "loss": 0.7326, "step": 7366 }, { "epoch": 0.59, "grad_norm": 1.535106978721027, "learning_rate": 3.7807278749462105e-06, "loss": 0.8655, "step": 7367 }, { "epoch": 0.59, "grad_norm": 1.453255371436978, "learning_rate": 3.7794678856135647e-06, "loss": 0.6712, "step": 7368 }, { "epoch": 0.59, "grad_norm": 0.800621606898038, "learning_rate": 3.7782079786940372e-06, "loss": 1.0949, "step": 7369 }, { "epoch": 0.59, "grad_norm": 1.489905992777797, "learning_rate": 3.7769481542727003e-06, "loss": 0.741, "step": 7370 }, { "epoch": 0.59, "grad_norm": 1.4852413468690266, "learning_rate": 3.775688412434622e-06, "loss": 0.7107, "step": 7371 }, { "epoch": 0.59, "grad_norm": 0.7973512280662108, "learning_rate": 3.7744287532648615e-06, "loss": 1.0725, "step": 7372 }, { "epoch": 0.59, "grad_norm": 1.514581351722614, "learning_rate": 3.773169176848474e-06, "loss": 0.719, "step": 7373 }, { "epoch": 0.59, "grad_norm": 1.507294900990096, "learning_rate": 3.7719096832705075e-06, "loss": 0.6512, "step": 7374 }, { "epoch": 0.59, "grad_norm": 1.539479914073825, "learning_rate": 3.7706502726160087e-06, "loss": 0.7643, "step": 7375 }, { "epoch": 0.59, "grad_norm": 1.4602429916934363, "learning_rate": 3.769390944970015e-06, "loss": 0.8454, "step": 7376 }, { "epoch": 0.59, "grad_norm": 1.3974324064067387, "learning_rate": 3.7681317004175565e-06, "loss": 0.7436, "step": 7377 }, { "epoch": 0.59, "grad_norm": 1.6187271235495457, "learning_rate": 3.766872539043664e-06, "loss": 0.7088, "step": 7378 }, { "epoch": 0.59, "grad_norm": 1.4891667363261964, "learning_rate": 3.7656134609333576e-06, "loss": 0.7714, "step": 7379 }, { "epoch": 0.59, "grad_norm": 0.7674804633558427, "learning_rate": 3.7643544661716518e-06, "loss": 1.0402, "step": 7380 }, { "epoch": 0.59, "grad_norm": 1.4234147235527612, "learning_rate": 3.7630955548435595e-06, "loss": 0.663, "step": 7381 }, { "epoch": 0.59, "grad_norm": 0.7955822923746192, "learning_rate": 3.7618367270340825e-06, "loss": 1.0627, "step": 7382 }, { "epoch": 0.59, "grad_norm": 1.6212872339326139, "learning_rate": 3.7605779828282225e-06, "loss": 0.8007, "step": 7383 }, { "epoch": 0.59, "grad_norm": 1.4818167617211282, "learning_rate": 3.759319322310968e-06, "loss": 0.731, "step": 7384 }, { "epoch": 0.59, "grad_norm": 1.5498789048333175, "learning_rate": 3.7580607455673125e-06, "loss": 0.7093, "step": 7385 }, { "epoch": 0.59, "grad_norm": 1.5100507732795363, "learning_rate": 3.756802252682236e-06, "loss": 0.6817, "step": 7386 }, { "epoch": 0.59, "grad_norm": 1.4810427564087147, "learning_rate": 3.755543843740711e-06, "loss": 0.7911, "step": 7387 }, { "epoch": 0.59, "grad_norm": 2.6202274024307886, "learning_rate": 3.7542855188277134e-06, "loss": 0.7577, "step": 7388 }, { "epoch": 0.59, "grad_norm": 1.6622735449146322, "learning_rate": 3.753027278028206e-06, "loss": 0.8221, "step": 7389 }, { "epoch": 0.59, "grad_norm": 1.4163571684248732, "learning_rate": 3.7517691214271485e-06, "loss": 0.7396, "step": 7390 }, { "epoch": 0.59, "grad_norm": 1.597486007782243, "learning_rate": 3.750511049109493e-06, "loss": 0.7301, "step": 7391 }, { "epoch": 0.59, "grad_norm": 0.8155423970724165, "learning_rate": 3.7492530611601897e-06, "loss": 1.0752, "step": 7392 }, { "epoch": 0.59, "grad_norm": 1.6642958448896712, "learning_rate": 3.7479951576641793e-06, "loss": 0.766, "step": 7393 }, { "epoch": 0.59, "grad_norm": 1.409533414988318, "learning_rate": 3.7467373387063973e-06, "loss": 0.7641, "step": 7394 }, { "epoch": 0.59, "grad_norm": 0.7765479073115865, "learning_rate": 3.7454796043717777e-06, "loss": 1.0641, "step": 7395 }, { "epoch": 0.59, "grad_norm": 1.4830021631958124, "learning_rate": 3.7442219547452436e-06, "loss": 0.7464, "step": 7396 }, { "epoch": 0.59, "grad_norm": 1.5817053298355144, "learning_rate": 3.742964389911714e-06, "loss": 0.8078, "step": 7397 }, { "epoch": 0.59, "grad_norm": 0.7684778887267976, "learning_rate": 3.7417069099561038e-06, "loss": 1.0874, "step": 7398 }, { "epoch": 0.59, "grad_norm": 1.4971847832416363, "learning_rate": 3.74044951496332e-06, "loss": 0.6988, "step": 7399 }, { "epoch": 0.59, "grad_norm": 1.5027559398271524, "learning_rate": 3.739192205018266e-06, "loss": 0.7522, "step": 7400 }, { "epoch": 0.59, "grad_norm": 1.3450194841763936, "learning_rate": 3.7379349802058363e-06, "loss": 0.7766, "step": 7401 }, { "epoch": 0.59, "grad_norm": 1.4745294786764742, "learning_rate": 3.7366778406109228e-06, "loss": 0.8139, "step": 7402 }, { "epoch": 0.59, "grad_norm": 1.5872990671565737, "learning_rate": 3.735420786318411e-06, "loss": 0.8585, "step": 7403 }, { "epoch": 0.59, "grad_norm": 1.485961198404866, "learning_rate": 3.734163817413177e-06, "loss": 0.8167, "step": 7404 }, { "epoch": 0.59, "grad_norm": 1.5008803204971517, "learning_rate": 3.7329069339800984e-06, "loss": 0.7545, "step": 7405 }, { "epoch": 0.59, "grad_norm": 1.5635454740994974, "learning_rate": 3.7316501361040412e-06, "loss": 0.7583, "step": 7406 }, { "epoch": 0.59, "grad_norm": 0.787959548858775, "learning_rate": 3.7303934238698675e-06, "loss": 1.0938, "step": 7407 }, { "epoch": 0.59, "grad_norm": 1.6638546370108085, "learning_rate": 3.7291367973624314e-06, "loss": 0.7608, "step": 7408 }, { "epoch": 0.59, "grad_norm": 0.7930433138419533, "learning_rate": 3.727880256666586e-06, "loss": 1.0948, "step": 7409 }, { "epoch": 0.59, "grad_norm": 1.4661730351027815, "learning_rate": 3.726623801867174e-06, "loss": 0.7288, "step": 7410 }, { "epoch": 0.59, "grad_norm": 1.5418319307299484, "learning_rate": 3.725367433049033e-06, "loss": 0.707, "step": 7411 }, { "epoch": 0.59, "grad_norm": 1.52582904045691, "learning_rate": 3.7241111502970003e-06, "loss": 0.8268, "step": 7412 }, { "epoch": 0.59, "grad_norm": 1.61620839076135, "learning_rate": 3.722854953695899e-06, "loss": 0.7008, "step": 7413 }, { "epoch": 0.59, "grad_norm": 1.6293365605025272, "learning_rate": 3.721598843330552e-06, "loss": 0.8251, "step": 7414 }, { "epoch": 0.59, "grad_norm": 1.538634350645867, "learning_rate": 3.720342819285774e-06, "loss": 0.7262, "step": 7415 }, { "epoch": 0.59, "grad_norm": 0.8028660882328434, "learning_rate": 3.7190868816463753e-06, "loss": 1.0225, "step": 7416 }, { "epoch": 0.6, "grad_norm": 1.4902581256384901, "learning_rate": 3.71783103049716e-06, "loss": 0.7409, "step": 7417 }, { "epoch": 0.6, "grad_norm": 1.442191482254818, "learning_rate": 3.716575265922924e-06, "loss": 0.7646, "step": 7418 }, { "epoch": 0.6, "grad_norm": 1.531356555045697, "learning_rate": 3.7153195880084616e-06, "loss": 0.7279, "step": 7419 }, { "epoch": 0.6, "grad_norm": 1.638918668571168, "learning_rate": 3.714063996838558e-06, "loss": 0.7801, "step": 7420 }, { "epoch": 0.6, "grad_norm": 1.609986561207907, "learning_rate": 3.712808492497992e-06, "loss": 0.8274, "step": 7421 }, { "epoch": 0.6, "grad_norm": 1.604873756549189, "learning_rate": 3.7115530750715416e-06, "loss": 0.6997, "step": 7422 }, { "epoch": 0.6, "grad_norm": 1.5792596942921844, "learning_rate": 3.7102977446439743e-06, "loss": 0.7824, "step": 7423 }, { "epoch": 0.6, "grad_norm": 1.4567298072558976, "learning_rate": 3.709042501300052e-06, "loss": 0.7585, "step": 7424 }, { "epoch": 0.6, "grad_norm": 1.5044455599525024, "learning_rate": 3.7077873451245317e-06, "loss": 0.8378, "step": 7425 }, { "epoch": 0.6, "grad_norm": 1.543329414681131, "learning_rate": 3.706532276202165e-06, "loss": 0.7475, "step": 7426 }, { "epoch": 0.6, "grad_norm": 1.4822820965374717, "learning_rate": 3.705277294617697e-06, "loss": 0.8105, "step": 7427 }, { "epoch": 0.6, "grad_norm": 1.5915595274848706, "learning_rate": 3.7040224004558646e-06, "loss": 0.7157, "step": 7428 }, { "epoch": 0.6, "grad_norm": 1.4901148608981263, "learning_rate": 3.7027675938014046e-06, "loss": 0.838, "step": 7429 }, { "epoch": 0.6, "grad_norm": 1.4240608376326165, "learning_rate": 3.701512874739045e-06, "loss": 0.7563, "step": 7430 }, { "epoch": 0.6, "grad_norm": 1.8780802672923584, "learning_rate": 3.7002582433535035e-06, "loss": 0.764, "step": 7431 }, { "epoch": 0.6, "grad_norm": 1.479349948993328, "learning_rate": 3.6990036997294953e-06, "loss": 0.8256, "step": 7432 }, { "epoch": 0.6, "grad_norm": 1.5342389113323844, "learning_rate": 3.697749243951735e-06, "loss": 0.7805, "step": 7433 }, { "epoch": 0.6, "grad_norm": 1.601046986047214, "learning_rate": 3.6964948761049225e-06, "loss": 0.8128, "step": 7434 }, { "epoch": 0.6, "grad_norm": 0.8247698010786512, "learning_rate": 3.6952405962737565e-06, "loss": 1.0913, "step": 7435 }, { "epoch": 0.6, "grad_norm": 1.5534110593666266, "learning_rate": 3.69398640454293e-06, "loss": 0.8417, "step": 7436 }, { "epoch": 0.6, "grad_norm": 1.5317333900049852, "learning_rate": 3.6927323009971273e-06, "loss": 0.7323, "step": 7437 }, { "epoch": 0.6, "grad_norm": 0.79144289539661, "learning_rate": 3.6914782857210263e-06, "loss": 1.0855, "step": 7438 }, { "epoch": 0.6, "grad_norm": 1.5561843607522912, "learning_rate": 3.6902243587993068e-06, "loss": 0.7702, "step": 7439 }, { "epoch": 0.6, "grad_norm": 1.4871913752751227, "learning_rate": 3.6889705203166327e-06, "loss": 0.8021, "step": 7440 }, { "epoch": 0.6, "grad_norm": 1.478124789051195, "learning_rate": 3.6877167703576676e-06, "loss": 0.8387, "step": 7441 }, { "epoch": 0.6, "grad_norm": 1.502879393457538, "learning_rate": 3.6864631090070656e-06, "loss": 0.7783, "step": 7442 }, { "epoch": 0.6, "grad_norm": 1.4754543991489188, "learning_rate": 3.6852095363494788e-06, "loss": 0.7649, "step": 7443 }, { "epoch": 0.6, "grad_norm": 1.3434870234784428, "learning_rate": 3.683956052469551e-06, "loss": 0.7454, "step": 7444 }, { "epoch": 0.6, "grad_norm": 1.7796303859245608, "learning_rate": 3.682702657451919e-06, "loss": 0.7892, "step": 7445 }, { "epoch": 0.6, "grad_norm": 1.4735209345789462, "learning_rate": 3.6814493513812165e-06, "loss": 0.7183, "step": 7446 }, { "epoch": 0.6, "grad_norm": 1.4333034360833905, "learning_rate": 3.680196134342069e-06, "loss": 0.7697, "step": 7447 }, { "epoch": 0.6, "grad_norm": 0.8212285741732758, "learning_rate": 3.678943006419096e-06, "loss": 1.0763, "step": 7448 }, { "epoch": 0.6, "grad_norm": 1.5991135859803531, "learning_rate": 3.6776899676969104e-06, "loss": 0.8305, "step": 7449 }, { "epoch": 0.6, "grad_norm": 1.4209175598954586, "learning_rate": 3.676437018260123e-06, "loss": 0.7585, "step": 7450 }, { "epoch": 0.6, "grad_norm": 0.7693564731624345, "learning_rate": 3.6751841581933356e-06, "loss": 1.068, "step": 7451 }, { "epoch": 0.6, "grad_norm": 1.4966292402736916, "learning_rate": 3.673931387581142e-06, "loss": 0.7539, "step": 7452 }, { "epoch": 0.6, "grad_norm": 1.5901003647995444, "learning_rate": 3.672678706508134e-06, "loss": 0.6898, "step": 7453 }, { "epoch": 0.6, "grad_norm": 1.489348035385817, "learning_rate": 3.6714261150588947e-06, "loss": 0.7758, "step": 7454 }, { "epoch": 0.6, "grad_norm": 1.495575771224626, "learning_rate": 3.6701736133180007e-06, "loss": 0.7646, "step": 7455 }, { "epoch": 0.6, "grad_norm": 1.508689074723911, "learning_rate": 3.668921201370027e-06, "loss": 0.7496, "step": 7456 }, { "epoch": 0.6, "grad_norm": 1.5867446683589597, "learning_rate": 3.6676688792995375e-06, "loss": 0.7608, "step": 7457 }, { "epoch": 0.6, "grad_norm": 1.622250690845507, "learning_rate": 3.6664166471910924e-06, "loss": 0.7735, "step": 7458 }, { "epoch": 0.6, "grad_norm": 1.5436070501283474, "learning_rate": 3.6651645051292415e-06, "loss": 0.8228, "step": 7459 }, { "epoch": 0.6, "grad_norm": 1.5205480888480039, "learning_rate": 3.663912453198538e-06, "loss": 0.7456, "step": 7460 }, { "epoch": 0.6, "grad_norm": 1.4712395186948484, "learning_rate": 3.662660491483521e-06, "loss": 0.7433, "step": 7461 }, { "epoch": 0.6, "grad_norm": 1.647927048030299, "learning_rate": 3.661408620068725e-06, "loss": 0.7773, "step": 7462 }, { "epoch": 0.6, "grad_norm": 1.4465968719848012, "learning_rate": 3.6601568390386797e-06, "loss": 0.7907, "step": 7463 }, { "epoch": 0.6, "grad_norm": 0.8033852491291057, "learning_rate": 3.6589051484779094e-06, "loss": 1.0552, "step": 7464 }, { "epoch": 0.6, "grad_norm": 1.517223224277276, "learning_rate": 3.6576535484709298e-06, "loss": 0.7942, "step": 7465 }, { "epoch": 0.6, "grad_norm": 1.5985065234193088, "learning_rate": 3.6564020391022493e-06, "loss": 0.7714, "step": 7466 }, { "epoch": 0.6, "grad_norm": 0.7808844474651351, "learning_rate": 3.655150620456378e-06, "loss": 1.082, "step": 7467 }, { "epoch": 0.6, "grad_norm": 1.430752475521129, "learning_rate": 3.6538992926178117e-06, "loss": 0.7998, "step": 7468 }, { "epoch": 0.6, "grad_norm": 1.478408396577184, "learning_rate": 3.652648055671043e-06, "loss": 0.7465, "step": 7469 }, { "epoch": 0.6, "grad_norm": 1.7165214602779304, "learning_rate": 3.6513969097005585e-06, "loss": 0.7936, "step": 7470 }, { "epoch": 0.6, "grad_norm": 1.5893213405403046, "learning_rate": 3.6501458547908396e-06, "loss": 0.7432, "step": 7471 }, { "epoch": 0.6, "grad_norm": 1.3558036662466073, "learning_rate": 3.648894891026358e-06, "loss": 0.7619, "step": 7472 }, { "epoch": 0.6, "grad_norm": 1.4491292086726095, "learning_rate": 3.6476440184915817e-06, "loss": 0.7198, "step": 7473 }, { "epoch": 0.6, "grad_norm": 1.5502836031649407, "learning_rate": 3.6463932372709763e-06, "loss": 0.7147, "step": 7474 }, { "epoch": 0.6, "grad_norm": 1.611648355810139, "learning_rate": 3.645142547448994e-06, "loss": 0.7352, "step": 7475 }, { "epoch": 0.6, "grad_norm": 1.5605110910287165, "learning_rate": 3.643891949110082e-06, "loss": 0.6964, "step": 7476 }, { "epoch": 0.6, "grad_norm": 1.5266792476351092, "learning_rate": 3.6426414423386898e-06, "loss": 0.729, "step": 7477 }, { "epoch": 0.6, "grad_norm": 1.5356754900388923, "learning_rate": 3.6413910272192504e-06, "loss": 0.7914, "step": 7478 }, { "epoch": 0.6, "grad_norm": 1.5691789932641294, "learning_rate": 3.6401407038361948e-06, "loss": 0.7635, "step": 7479 }, { "epoch": 0.6, "grad_norm": 1.6550119612651897, "learning_rate": 3.6388904722739493e-06, "loss": 0.8254, "step": 7480 }, { "epoch": 0.6, "grad_norm": 1.5627594822313946, "learning_rate": 3.6376403326169317e-06, "loss": 0.8637, "step": 7481 }, { "epoch": 0.6, "grad_norm": 1.5556852287939968, "learning_rate": 3.6363902849495535e-06, "loss": 0.711, "step": 7482 }, { "epoch": 0.6, "grad_norm": 0.7837348707008455, "learning_rate": 3.63514032935622e-06, "loss": 1.0432, "step": 7483 }, { "epoch": 0.6, "grad_norm": 1.521539874865236, "learning_rate": 3.6338904659213335e-06, "loss": 0.7743, "step": 7484 }, { "epoch": 0.6, "grad_norm": 1.5269743497254862, "learning_rate": 3.6326406947292875e-06, "loss": 0.7468, "step": 7485 }, { "epoch": 0.6, "grad_norm": 1.4778940461699037, "learning_rate": 3.631391015864467e-06, "loss": 0.7115, "step": 7486 }, { "epoch": 0.6, "grad_norm": 1.462959661288377, "learning_rate": 3.6301414294112557e-06, "loss": 0.6991, "step": 7487 }, { "epoch": 0.6, "grad_norm": 0.8016299241323159, "learning_rate": 3.6288919354540276e-06, "loss": 1.0723, "step": 7488 }, { "epoch": 0.6, "grad_norm": 1.6100458622934315, "learning_rate": 3.6276425340771517e-06, "loss": 0.7724, "step": 7489 }, { "epoch": 0.6, "grad_norm": 1.61142448095863, "learning_rate": 3.626393225364988e-06, "loss": 0.7882, "step": 7490 }, { "epoch": 0.6, "grad_norm": 1.4781464947015273, "learning_rate": 3.6251440094018956e-06, "loss": 0.7763, "step": 7491 }, { "epoch": 0.6, "grad_norm": 1.716731936028379, "learning_rate": 3.6238948862722246e-06, "loss": 0.7976, "step": 7492 }, { "epoch": 0.6, "grad_norm": 1.7106938853357843, "learning_rate": 3.6226458560603144e-06, "loss": 0.7327, "step": 7493 }, { "epoch": 0.6, "grad_norm": 1.7700988432335054, "learning_rate": 3.621396918850508e-06, "loss": 0.8713, "step": 7494 }, { "epoch": 0.6, "grad_norm": 1.4202617696068167, "learning_rate": 3.6201480747271337e-06, "loss": 0.7336, "step": 7495 }, { "epoch": 0.6, "grad_norm": 1.8300240537283983, "learning_rate": 3.6188993237745163e-06, "loss": 0.7498, "step": 7496 }, { "epoch": 0.6, "grad_norm": 1.476991949259957, "learning_rate": 3.617650666076975e-06, "loss": 0.7295, "step": 7497 }, { "epoch": 0.6, "grad_norm": 1.5483707270079279, "learning_rate": 3.6164021017188223e-06, "loss": 0.7721, "step": 7498 }, { "epoch": 0.6, "grad_norm": 1.7196706532168888, "learning_rate": 3.6151536307843625e-06, "loss": 0.8331, "step": 7499 }, { "epoch": 0.6, "grad_norm": 0.86331977190653, "learning_rate": 3.613905253357895e-06, "loss": 1.0704, "step": 7500 }, { "epoch": 0.6, "grad_norm": 1.5941748816392318, "learning_rate": 3.6126569695237156e-06, "loss": 0.747, "step": 7501 }, { "epoch": 0.6, "grad_norm": 0.8298000897555168, "learning_rate": 3.6114087793661122e-06, "loss": 1.0533, "step": 7502 }, { "epoch": 0.6, "grad_norm": 1.487340547263413, "learning_rate": 3.610160682969359e-06, "loss": 0.7697, "step": 7503 }, { "epoch": 0.6, "grad_norm": 0.7751541403436542, "learning_rate": 3.6089126804177373e-06, "loss": 1.0621, "step": 7504 }, { "epoch": 0.6, "grad_norm": 0.7564094570667504, "learning_rate": 3.6076647717955117e-06, "loss": 1.0527, "step": 7505 }, { "epoch": 0.6, "grad_norm": 2.7933724308586982, "learning_rate": 3.606416957186945e-06, "loss": 0.7564, "step": 7506 }, { "epoch": 0.6, "grad_norm": 0.801611577812531, "learning_rate": 3.605169236676291e-06, "loss": 1.0759, "step": 7507 }, { "epoch": 0.6, "grad_norm": 1.5244166475878427, "learning_rate": 3.6039216103478004e-06, "loss": 0.6991, "step": 7508 }, { "epoch": 0.6, "grad_norm": 1.5649380415732512, "learning_rate": 3.602674078285715e-06, "loss": 0.7252, "step": 7509 }, { "epoch": 0.6, "grad_norm": 1.4831515997071558, "learning_rate": 3.601426640574269e-06, "loss": 0.7413, "step": 7510 }, { "epoch": 0.6, "grad_norm": 1.4287722743220175, "learning_rate": 3.6001792972976957e-06, "loss": 0.6193, "step": 7511 }, { "epoch": 0.6, "grad_norm": 1.3814408405853835, "learning_rate": 3.598932048540218e-06, "loss": 0.7982, "step": 7512 }, { "epoch": 0.6, "grad_norm": 1.699914717364874, "learning_rate": 3.597684894386051e-06, "loss": 0.7369, "step": 7513 }, { "epoch": 0.6, "grad_norm": 1.489058176755613, "learning_rate": 3.5964378349194075e-06, "loss": 0.7966, "step": 7514 }, { "epoch": 0.6, "grad_norm": 1.6180755905238915, "learning_rate": 3.5951908702244904e-06, "loss": 0.7287, "step": 7515 }, { "epoch": 0.6, "grad_norm": 0.8583177767237031, "learning_rate": 3.593944000385498e-06, "loss": 1.0842, "step": 7516 }, { "epoch": 0.6, "grad_norm": 0.8087541550536842, "learning_rate": 3.59269722548662e-06, "loss": 1.0755, "step": 7517 }, { "epoch": 0.6, "grad_norm": 0.7771135720806651, "learning_rate": 3.591450545612047e-06, "loss": 1.0463, "step": 7518 }, { "epoch": 0.6, "grad_norm": 1.5412034124603604, "learning_rate": 3.590203960845952e-06, "loss": 0.7333, "step": 7519 }, { "epoch": 0.6, "grad_norm": 1.6368178900131987, "learning_rate": 3.5889574712725077e-06, "loss": 0.7476, "step": 7520 }, { "epoch": 0.6, "grad_norm": 1.4753898203920943, "learning_rate": 3.587711076975884e-06, "loss": 0.7476, "step": 7521 }, { "epoch": 0.6, "grad_norm": 1.452107939704967, "learning_rate": 3.5864647780402373e-06, "loss": 0.7941, "step": 7522 }, { "epoch": 0.6, "grad_norm": 1.4897885005384408, "learning_rate": 3.5852185745497204e-06, "loss": 0.6941, "step": 7523 }, { "epoch": 0.6, "grad_norm": 1.6473171693395638, "learning_rate": 3.58397246658848e-06, "loss": 0.7303, "step": 7524 }, { "epoch": 0.6, "grad_norm": 1.6604806988330723, "learning_rate": 3.582726454240658e-06, "loss": 0.7696, "step": 7525 }, { "epoch": 0.6, "grad_norm": 1.52036219223496, "learning_rate": 3.581480537590386e-06, "loss": 0.7615, "step": 7526 }, { "epoch": 0.6, "grad_norm": 1.4516889256215708, "learning_rate": 3.58023471672179e-06, "loss": 0.8393, "step": 7527 }, { "epoch": 0.6, "grad_norm": 1.5730056634965792, "learning_rate": 3.5789889917189945e-06, "loss": 0.7963, "step": 7528 }, { "epoch": 0.6, "grad_norm": 1.5374346807371195, "learning_rate": 3.577743362666112e-06, "loss": 0.7616, "step": 7529 }, { "epoch": 0.6, "grad_norm": 1.6524823223745182, "learning_rate": 3.5764978296472484e-06, "loss": 0.8409, "step": 7530 }, { "epoch": 0.6, "grad_norm": 1.568299055258799, "learning_rate": 3.5752523927465066e-06, "loss": 0.8278, "step": 7531 }, { "epoch": 0.6, "grad_norm": 0.8681136861287417, "learning_rate": 3.574007052047982e-06, "loss": 1.0817, "step": 7532 }, { "epoch": 0.6, "grad_norm": 1.5149006600380364, "learning_rate": 3.5727618076357617e-06, "loss": 0.7423, "step": 7533 }, { "epoch": 0.6, "grad_norm": 1.5867275558988427, "learning_rate": 3.5715166595939264e-06, "loss": 0.831, "step": 7534 }, { "epoch": 0.6, "grad_norm": 1.4934625843318252, "learning_rate": 3.5702716080065546e-06, "loss": 0.6631, "step": 7535 }, { "epoch": 0.6, "grad_norm": 1.5241148036864458, "learning_rate": 3.569026652957713e-06, "loss": 0.7055, "step": 7536 }, { "epoch": 0.6, "grad_norm": 1.5989233142591985, "learning_rate": 3.567781794531461e-06, "loss": 0.7209, "step": 7537 }, { "epoch": 0.6, "grad_norm": 1.6289210553927786, "learning_rate": 3.5665370328118596e-06, "loss": 0.8328, "step": 7538 }, { "epoch": 0.6, "grad_norm": 1.6369144709877455, "learning_rate": 3.565292367882956e-06, "loss": 0.6957, "step": 7539 }, { "epoch": 0.6, "grad_norm": 1.484696636696164, "learning_rate": 3.564047799828792e-06, "loss": 0.7923, "step": 7540 }, { "epoch": 0.61, "grad_norm": 1.6015695636127298, "learning_rate": 3.562803328733403e-06, "loss": 0.8145, "step": 7541 }, { "epoch": 0.61, "grad_norm": 0.8061253415311265, "learning_rate": 3.5615589546808204e-06, "loss": 1.0849, "step": 7542 }, { "epoch": 0.61, "grad_norm": 1.8796163731777786, "learning_rate": 3.560314677755067e-06, "loss": 0.7891, "step": 7543 }, { "epoch": 0.61, "grad_norm": 0.7681973021111453, "learning_rate": 3.5590704980401564e-06, "loss": 1.0424, "step": 7544 }, { "epoch": 0.61, "grad_norm": 1.3845478399340694, "learning_rate": 3.5578264156201025e-06, "loss": 0.7625, "step": 7545 }, { "epoch": 0.61, "grad_norm": 1.4734337366465642, "learning_rate": 3.5565824305789076e-06, "loss": 0.7683, "step": 7546 }, { "epoch": 0.61, "grad_norm": 1.457416807484069, "learning_rate": 3.5553385430005673e-06, "loss": 0.714, "step": 7547 }, { "epoch": 0.61, "grad_norm": 1.4870509254038446, "learning_rate": 3.5540947529690697e-06, "loss": 0.7353, "step": 7548 }, { "epoch": 0.61, "grad_norm": 0.8215963627332331, "learning_rate": 3.5528510605684017e-06, "loss": 1.0819, "step": 7549 }, { "epoch": 0.61, "grad_norm": 1.5744157608066103, "learning_rate": 3.55160746588254e-06, "loss": 0.7598, "step": 7550 }, { "epoch": 0.61, "grad_norm": 0.7733327613727949, "learning_rate": 3.5503639689954527e-06, "loss": 1.0753, "step": 7551 }, { "epoch": 0.61, "grad_norm": 1.499491525207169, "learning_rate": 3.549120569991107e-06, "loss": 0.8035, "step": 7552 }, { "epoch": 0.61, "grad_norm": 1.4915282259412823, "learning_rate": 3.5478772689534568e-06, "loss": 0.7681, "step": 7553 }, { "epoch": 0.61, "grad_norm": 0.7989377416883774, "learning_rate": 3.5466340659664526e-06, "loss": 1.093, "step": 7554 }, { "epoch": 0.61, "grad_norm": 1.5185657070878948, "learning_rate": 3.5453909611140412e-06, "loss": 0.7752, "step": 7555 }, { "epoch": 0.61, "grad_norm": 0.7445344716337217, "learning_rate": 3.5441479544801586e-06, "loss": 1.0735, "step": 7556 }, { "epoch": 0.61, "grad_norm": 1.470539343726246, "learning_rate": 3.542905046148735e-06, "loss": 0.7418, "step": 7557 }, { "epoch": 0.61, "grad_norm": 0.774567932419869, "learning_rate": 3.5416622362036938e-06, "loss": 1.061, "step": 7558 }, { "epoch": 0.61, "grad_norm": 0.7544483835583607, "learning_rate": 3.540419524728954e-06, "loss": 1.0558, "step": 7559 }, { "epoch": 0.61, "grad_norm": 1.4029237126279235, "learning_rate": 3.5391769118084253e-06, "loss": 0.7613, "step": 7560 }, { "epoch": 0.61, "grad_norm": 1.4581773524895458, "learning_rate": 3.5379343975260094e-06, "loss": 0.7776, "step": 7561 }, { "epoch": 0.61, "grad_norm": 1.4357724312380797, "learning_rate": 3.53669198196561e-06, "loss": 0.7076, "step": 7562 }, { "epoch": 0.61, "grad_norm": 0.7616847033473215, "learning_rate": 3.5354496652111125e-06, "loss": 1.0399, "step": 7563 }, { "epoch": 0.61, "grad_norm": 1.5453126708220262, "learning_rate": 3.5342074473464026e-06, "loss": 0.8157, "step": 7564 }, { "epoch": 0.61, "grad_norm": 1.509260882523467, "learning_rate": 3.532965328455356e-06, "loss": 0.7569, "step": 7565 }, { "epoch": 0.61, "grad_norm": 1.5486423292601774, "learning_rate": 3.5317233086218474e-06, "loss": 0.7774, "step": 7566 }, { "epoch": 0.61, "grad_norm": 1.7428943575483389, "learning_rate": 3.530481387929737e-06, "loss": 0.7459, "step": 7567 }, { "epoch": 0.61, "grad_norm": 0.7885684013757214, "learning_rate": 3.529239566462883e-06, "loss": 1.0732, "step": 7568 }, { "epoch": 0.61, "grad_norm": 1.454810455713462, "learning_rate": 3.5279978443051383e-06, "loss": 0.7792, "step": 7569 }, { "epoch": 0.61, "grad_norm": 1.570097770452429, "learning_rate": 3.526756221540345e-06, "loss": 0.7032, "step": 7570 }, { "epoch": 0.61, "grad_norm": 1.5880038118390614, "learning_rate": 3.525514698252338e-06, "loss": 0.8371, "step": 7571 }, { "epoch": 0.61, "grad_norm": 1.5446698929215528, "learning_rate": 3.5242732745249517e-06, "loss": 0.6809, "step": 7572 }, { "epoch": 0.61, "grad_norm": 1.6608266398879645, "learning_rate": 3.523031950442009e-06, "loss": 0.7884, "step": 7573 }, { "epoch": 0.61, "grad_norm": 1.675521983055264, "learning_rate": 3.5217907260873265e-06, "loss": 0.7662, "step": 7574 }, { "epoch": 0.61, "grad_norm": 0.7723098713019468, "learning_rate": 3.5205496015447127e-06, "loss": 1.0647, "step": 7575 }, { "epoch": 0.61, "grad_norm": 0.823783610305435, "learning_rate": 3.519308576897974e-06, "loss": 1.065, "step": 7576 }, { "epoch": 0.61, "grad_norm": 1.4836591389929885, "learning_rate": 3.5180676522309065e-06, "loss": 0.8087, "step": 7577 }, { "epoch": 0.61, "grad_norm": 1.430716937095831, "learning_rate": 3.5168268276272977e-06, "loss": 0.7727, "step": 7578 }, { "epoch": 0.61, "grad_norm": 1.4828065664720884, "learning_rate": 3.515586103170935e-06, "loss": 0.7937, "step": 7579 }, { "epoch": 0.61, "grad_norm": 1.566977695215505, "learning_rate": 3.514345478945592e-06, "loss": 0.8541, "step": 7580 }, { "epoch": 0.61, "grad_norm": 1.4828633240398457, "learning_rate": 3.5131049550350406e-06, "loss": 0.7858, "step": 7581 }, { "epoch": 0.61, "grad_norm": 1.5624635503204707, "learning_rate": 3.5118645315230394e-06, "loss": 0.8118, "step": 7582 }, { "epoch": 0.61, "grad_norm": 1.4749047157608806, "learning_rate": 3.5106242084933506e-06, "loss": 0.8176, "step": 7583 }, { "epoch": 0.61, "grad_norm": 1.4650249970158078, "learning_rate": 3.5093839860297206e-06, "loss": 0.7541, "step": 7584 }, { "epoch": 0.61, "grad_norm": 0.7748572106997424, "learning_rate": 3.5081438642158916e-06, "loss": 1.0653, "step": 7585 }, { "epoch": 0.61, "grad_norm": 1.3953204298126096, "learning_rate": 3.506903843135601e-06, "loss": 0.6865, "step": 7586 }, { "epoch": 0.61, "grad_norm": 1.5478324789891318, "learning_rate": 3.5056639228725777e-06, "loss": 0.7739, "step": 7587 }, { "epoch": 0.61, "grad_norm": 0.771658950634736, "learning_rate": 3.5044241035105425e-06, "loss": 1.0609, "step": 7588 }, { "epoch": 0.61, "grad_norm": 1.5795353204911822, "learning_rate": 3.5031843851332105e-06, "loss": 0.8114, "step": 7589 }, { "epoch": 0.61, "grad_norm": 1.5073379427027271, "learning_rate": 3.5019447678242937e-06, "loss": 0.8032, "step": 7590 }, { "epoch": 0.61, "grad_norm": 0.7819841978906235, "learning_rate": 3.500705251667491e-06, "loss": 1.0752, "step": 7591 }, { "epoch": 0.61, "grad_norm": 1.3910005799341711, "learning_rate": 3.4994658367464963e-06, "loss": 0.7564, "step": 7592 }, { "epoch": 0.61, "grad_norm": 1.4503262325501858, "learning_rate": 3.4982265231450006e-06, "loss": 0.7208, "step": 7593 }, { "epoch": 0.61, "grad_norm": 1.4219440564400516, "learning_rate": 3.4969873109466847e-06, "loss": 0.6768, "step": 7594 }, { "epoch": 0.61, "grad_norm": 1.46450191009171, "learning_rate": 3.4957482002352217e-06, "loss": 0.7686, "step": 7595 }, { "epoch": 0.61, "grad_norm": 1.478720707549166, "learning_rate": 3.494509191094281e-06, "loss": 0.8281, "step": 7596 }, { "epoch": 0.61, "grad_norm": 1.4920312028005376, "learning_rate": 3.4932702836075216e-06, "loss": 0.7606, "step": 7597 }, { "epoch": 0.61, "grad_norm": 0.840069302730572, "learning_rate": 3.492031477858598e-06, "loss": 1.1017, "step": 7598 }, { "epoch": 0.61, "grad_norm": 1.4721144333944405, "learning_rate": 3.4907927739311552e-06, "loss": 0.7967, "step": 7599 }, { "epoch": 0.61, "grad_norm": 1.3587453420023408, "learning_rate": 3.489554171908838e-06, "loss": 0.6401, "step": 7600 }, { "epoch": 0.61, "grad_norm": 1.4863406994264252, "learning_rate": 3.4883156718752763e-06, "loss": 0.7542, "step": 7601 }, { "epoch": 0.61, "grad_norm": 1.4844771439142408, "learning_rate": 3.4870772739140956e-06, "loss": 0.8047, "step": 7602 }, { "epoch": 0.61, "grad_norm": 1.5084742039483718, "learning_rate": 3.485838978108919e-06, "loss": 0.7619, "step": 7603 }, { "epoch": 0.61, "grad_norm": 1.585076818293297, "learning_rate": 3.4846007845433568e-06, "loss": 0.7396, "step": 7604 }, { "epoch": 0.61, "grad_norm": 1.5026226174881314, "learning_rate": 3.4833626933010144e-06, "loss": 0.7819, "step": 7605 }, { "epoch": 0.61, "grad_norm": 1.583018447862338, "learning_rate": 3.48212470446549e-06, "loss": 0.7334, "step": 7606 }, { "epoch": 0.61, "grad_norm": 1.5595444385402168, "learning_rate": 3.480886818120377e-06, "loss": 0.7825, "step": 7607 }, { "epoch": 0.61, "grad_norm": 1.4767457732705096, "learning_rate": 3.479649034349261e-06, "loss": 0.6989, "step": 7608 }, { "epoch": 0.61, "grad_norm": 1.5549833543543723, "learning_rate": 3.4784113532357157e-06, "loss": 0.7387, "step": 7609 }, { "epoch": 0.61, "grad_norm": 1.8669489798525798, "learning_rate": 3.477173774863317e-06, "loss": 0.6708, "step": 7610 }, { "epoch": 0.61, "grad_norm": 0.8039718090125483, "learning_rate": 3.4759362993156275e-06, "loss": 1.0201, "step": 7611 }, { "epoch": 0.61, "grad_norm": 1.5288075125817986, "learning_rate": 3.4746989266762034e-06, "loss": 0.7091, "step": 7612 }, { "epoch": 0.61, "grad_norm": 1.6499571452125519, "learning_rate": 3.4734616570285954e-06, "loss": 0.7636, "step": 7613 }, { "epoch": 0.61, "grad_norm": 1.4411691021126618, "learning_rate": 3.472224490456348e-06, "loss": 0.7729, "step": 7614 }, { "epoch": 0.61, "grad_norm": 0.7771783080317571, "learning_rate": 3.4709874270429968e-06, "loss": 1.0379, "step": 7615 }, { "epoch": 0.61, "grad_norm": 1.461151971271799, "learning_rate": 3.4697504668720677e-06, "loss": 0.7222, "step": 7616 }, { "epoch": 0.61, "grad_norm": 1.5926403433595129, "learning_rate": 3.468513610027089e-06, "loss": 0.7963, "step": 7617 }, { "epoch": 0.61, "grad_norm": 1.417366418816824, "learning_rate": 3.4672768565915726e-06, "loss": 0.6927, "step": 7618 }, { "epoch": 0.61, "grad_norm": 0.7590127825287106, "learning_rate": 3.4660402066490274e-06, "loss": 1.0861, "step": 7619 }, { "epoch": 0.61, "grad_norm": 1.5143751247375883, "learning_rate": 3.4648036602829556e-06, "loss": 0.8597, "step": 7620 }, { "epoch": 0.61, "grad_norm": 1.5288457361545285, "learning_rate": 3.4635672175768508e-06, "loss": 0.7764, "step": 7621 }, { "epoch": 0.61, "grad_norm": 1.6079295176480357, "learning_rate": 3.4623308786142017e-06, "loss": 0.7939, "step": 7622 }, { "epoch": 0.61, "grad_norm": 1.3784660733392804, "learning_rate": 3.4610946434784863e-06, "loss": 0.6504, "step": 7623 }, { "epoch": 0.61, "grad_norm": 0.7899033664898677, "learning_rate": 3.4598585122531802e-06, "loss": 1.0849, "step": 7624 }, { "epoch": 0.61, "grad_norm": 1.39961761116411, "learning_rate": 3.4586224850217496e-06, "loss": 0.7904, "step": 7625 }, { "epoch": 0.61, "grad_norm": 0.7799462097533494, "learning_rate": 3.4573865618676506e-06, "loss": 1.0735, "step": 7626 }, { "epoch": 0.61, "grad_norm": 1.4978188186109243, "learning_rate": 3.456150742874341e-06, "loss": 0.8106, "step": 7627 }, { "epoch": 0.61, "grad_norm": 1.5522902485485648, "learning_rate": 3.4549150281252635e-06, "loss": 0.7641, "step": 7628 }, { "epoch": 0.61, "grad_norm": 1.5330431516458938, "learning_rate": 3.4536794177038563e-06, "loss": 0.6971, "step": 7629 }, { "epoch": 0.61, "grad_norm": 1.462077711702339, "learning_rate": 3.4524439116935492e-06, "loss": 0.678, "step": 7630 }, { "epoch": 0.61, "grad_norm": 1.4863998632214435, "learning_rate": 3.451208510177769e-06, "loss": 0.7913, "step": 7631 }, { "epoch": 0.61, "grad_norm": 1.4910919079796032, "learning_rate": 3.4499732132399324e-06, "loss": 0.8358, "step": 7632 }, { "epoch": 0.61, "grad_norm": 1.5962862536776972, "learning_rate": 3.448738020963446e-06, "loss": 0.7617, "step": 7633 }, { "epoch": 0.61, "grad_norm": 1.5972695337144773, "learning_rate": 3.4475029334317195e-06, "loss": 0.8818, "step": 7634 }, { "epoch": 0.61, "grad_norm": 1.6401047156026347, "learning_rate": 3.4462679507281433e-06, "loss": 0.8702, "step": 7635 }, { "epoch": 0.61, "grad_norm": 0.7781267734021727, "learning_rate": 3.445033072936106e-06, "loss": 1.05, "step": 7636 }, { "epoch": 0.61, "grad_norm": 0.7770603340051605, "learning_rate": 3.443798300138993e-06, "loss": 1.0744, "step": 7637 }, { "epoch": 0.61, "grad_norm": 1.4887356590030498, "learning_rate": 3.442563632420178e-06, "loss": 0.7125, "step": 7638 }, { "epoch": 0.61, "grad_norm": 1.4124087756568406, "learning_rate": 3.441329069863027e-06, "loss": 0.6993, "step": 7639 }, { "epoch": 0.61, "grad_norm": 1.5029109686764768, "learning_rate": 3.4400946125509003e-06, "loss": 0.7818, "step": 7640 }, { "epoch": 0.61, "grad_norm": 1.5130376208961214, "learning_rate": 3.438860260567154e-06, "loss": 0.7529, "step": 7641 }, { "epoch": 0.61, "grad_norm": 1.4186527064241272, "learning_rate": 3.437626013995132e-06, "loss": 0.7169, "step": 7642 }, { "epoch": 0.61, "grad_norm": 1.5761395931469055, "learning_rate": 3.4363918729181727e-06, "loss": 0.7378, "step": 7643 }, { "epoch": 0.61, "grad_norm": 1.5207188562832274, "learning_rate": 3.4351578374196117e-06, "loss": 0.8204, "step": 7644 }, { "epoch": 0.61, "grad_norm": 1.502131748277624, "learning_rate": 3.4339239075827712e-06, "loss": 0.8147, "step": 7645 }, { "epoch": 0.61, "grad_norm": 1.4721518005481307, "learning_rate": 3.4326900834909694e-06, "loss": 0.7363, "step": 7646 }, { "epoch": 0.61, "grad_norm": 1.602426479795486, "learning_rate": 3.431456365227516e-06, "loss": 0.7815, "step": 7647 }, { "epoch": 0.61, "grad_norm": 1.594465387620532, "learning_rate": 3.430222752875717e-06, "loss": 0.738, "step": 7648 }, { "epoch": 0.61, "grad_norm": 1.4975553440173384, "learning_rate": 3.428989246518867e-06, "loss": 0.73, "step": 7649 }, { "epoch": 0.61, "grad_norm": 1.4495008242576943, "learning_rate": 3.4277558462402543e-06, "loss": 0.7078, "step": 7650 }, { "epoch": 0.61, "grad_norm": 0.8101999529385817, "learning_rate": 3.426522552123163e-06, "loss": 1.0779, "step": 7651 }, { "epoch": 0.61, "grad_norm": 1.567449252221874, "learning_rate": 3.425289364250868e-06, "loss": 0.8002, "step": 7652 }, { "epoch": 0.61, "grad_norm": 1.5300880920876043, "learning_rate": 3.4240562827066326e-06, "loss": 0.7202, "step": 7653 }, { "epoch": 0.61, "grad_norm": 1.5558439324116484, "learning_rate": 3.4228233075737225e-06, "loss": 0.7656, "step": 7654 }, { "epoch": 0.61, "grad_norm": 1.4499843131434937, "learning_rate": 3.4215904389353897e-06, "loss": 0.7154, "step": 7655 }, { "epoch": 0.61, "grad_norm": 1.4473192865088527, "learning_rate": 3.42035767687488e-06, "loss": 0.7159, "step": 7656 }, { "epoch": 0.61, "grad_norm": 1.4832094540484464, "learning_rate": 3.4191250214754303e-06, "loss": 0.7324, "step": 7657 }, { "epoch": 0.61, "grad_norm": 1.4569206152080438, "learning_rate": 3.4178924728202757e-06, "loss": 0.7097, "step": 7658 }, { "epoch": 0.61, "grad_norm": 1.4958638828901487, "learning_rate": 3.416660030992639e-06, "loss": 0.7303, "step": 7659 }, { "epoch": 0.61, "grad_norm": 0.8042847746718376, "learning_rate": 3.415427696075735e-06, "loss": 1.0928, "step": 7660 }, { "epoch": 0.61, "grad_norm": 1.420237026388972, "learning_rate": 3.414195468152779e-06, "loss": 0.789, "step": 7661 }, { "epoch": 0.61, "grad_norm": 1.4959250470330512, "learning_rate": 3.41296334730697e-06, "loss": 0.8103, "step": 7662 }, { "epoch": 0.61, "grad_norm": 1.5351756794881675, "learning_rate": 3.411731333621507e-06, "loss": 0.7734, "step": 7663 }, { "epoch": 0.61, "grad_norm": 1.5017176534061838, "learning_rate": 3.410499427179572e-06, "loss": 0.7037, "step": 7664 }, { "epoch": 0.61, "grad_norm": 1.4386512294183724, "learning_rate": 3.4092676280643533e-06, "loss": 0.7266, "step": 7665 }, { "epoch": 0.62, "grad_norm": 0.8005861480789737, "learning_rate": 3.4080359363590214e-06, "loss": 1.0715, "step": 7666 }, { "epoch": 0.62, "grad_norm": 0.7951726105276048, "learning_rate": 3.4068043521467424e-06, "loss": 1.0764, "step": 7667 }, { "epoch": 0.62, "grad_norm": 0.7776369426031778, "learning_rate": 3.4055728755106775e-06, "loss": 1.0877, "step": 7668 }, { "epoch": 0.62, "grad_norm": 1.4171632119984605, "learning_rate": 3.404341506533978e-06, "loss": 0.6302, "step": 7669 }, { "epoch": 0.62, "grad_norm": 1.5436326713679167, "learning_rate": 3.4031102452997864e-06, "loss": 0.7769, "step": 7670 }, { "epoch": 0.62, "grad_norm": 1.5267072059182556, "learning_rate": 3.4018790918912447e-06, "loss": 0.7832, "step": 7671 }, { "epoch": 0.62, "grad_norm": 1.660475983200203, "learning_rate": 3.4006480463914814e-06, "loss": 0.7632, "step": 7672 }, { "epoch": 0.62, "grad_norm": 1.4554772779602787, "learning_rate": 3.399417108883619e-06, "loss": 0.8324, "step": 7673 }, { "epoch": 0.62, "grad_norm": 1.5073015954402174, "learning_rate": 3.3981862794507725e-06, "loss": 0.7638, "step": 7674 }, { "epoch": 0.62, "grad_norm": 1.682368460434198, "learning_rate": 3.3969555581760518e-06, "loss": 0.7109, "step": 7675 }, { "epoch": 0.62, "grad_norm": 1.4223391935364513, "learning_rate": 3.395724945142558e-06, "loss": 0.7177, "step": 7676 }, { "epoch": 0.62, "grad_norm": 0.7709082799196232, "learning_rate": 3.3944944404333815e-06, "loss": 1.0832, "step": 7677 }, { "epoch": 0.62, "grad_norm": 1.6134872499856183, "learning_rate": 3.3932640441316135e-06, "loss": 0.8333, "step": 7678 }, { "epoch": 0.62, "grad_norm": 1.567629533635683, "learning_rate": 3.392033756320333e-06, "loss": 0.7478, "step": 7679 }, { "epoch": 0.62, "grad_norm": 0.787993585169824, "learning_rate": 3.3908035770826085e-06, "loss": 1.0631, "step": 7680 }, { "epoch": 0.62, "grad_norm": 1.4355988783278564, "learning_rate": 3.3895735065015044e-06, "loss": 0.6998, "step": 7681 }, { "epoch": 0.62, "grad_norm": 1.5305075968266346, "learning_rate": 3.388343544660082e-06, "loss": 0.6448, "step": 7682 }, { "epoch": 0.62, "grad_norm": 1.538724295682777, "learning_rate": 3.387113691641388e-06, "loss": 0.78, "step": 7683 }, { "epoch": 0.62, "grad_norm": 1.4169611112825142, "learning_rate": 3.385883947528465e-06, "loss": 0.715, "step": 7684 }, { "epoch": 0.62, "grad_norm": 1.5461424102766514, "learning_rate": 3.3846543124043496e-06, "loss": 0.8918, "step": 7685 }, { "epoch": 0.62, "grad_norm": 1.526251471047948, "learning_rate": 3.3834247863520692e-06, "loss": 0.7093, "step": 7686 }, { "epoch": 0.62, "grad_norm": 2.2231663207469103, "learning_rate": 3.3821953694546433e-06, "loss": 0.7586, "step": 7687 }, { "epoch": 0.62, "grad_norm": 1.4796068198311394, "learning_rate": 3.3809660617950835e-06, "loss": 0.6639, "step": 7688 }, { "epoch": 0.62, "grad_norm": 1.387221048383456, "learning_rate": 3.379736863456399e-06, "loss": 0.6734, "step": 7689 }, { "epoch": 0.62, "grad_norm": 1.4544434933735053, "learning_rate": 3.378507774521587e-06, "loss": 0.7909, "step": 7690 }, { "epoch": 0.62, "grad_norm": 0.8139126230276331, "learning_rate": 3.377278795073637e-06, "loss": 1.0782, "step": 7691 }, { "epoch": 0.62, "grad_norm": 0.815805088331301, "learning_rate": 3.376049925195534e-06, "loss": 1.0573, "step": 7692 }, { "epoch": 0.62, "grad_norm": 1.4802041281772744, "learning_rate": 3.3748211649702533e-06, "loss": 0.7688, "step": 7693 }, { "epoch": 0.62, "grad_norm": 1.5368633027442444, "learning_rate": 3.3735925144807623e-06, "loss": 0.7591, "step": 7694 }, { "epoch": 0.62, "grad_norm": 1.5188434840632612, "learning_rate": 3.3723639738100254e-06, "loss": 0.7691, "step": 7695 }, { "epoch": 0.62, "grad_norm": 1.539671563677665, "learning_rate": 3.371135543040995e-06, "loss": 0.743, "step": 7696 }, { "epoch": 0.62, "grad_norm": 0.8183696661805621, "learning_rate": 3.369907222256617e-06, "loss": 1.0306, "step": 7697 }, { "epoch": 0.62, "grad_norm": 1.5890172940816591, "learning_rate": 3.3686790115398287e-06, "loss": 0.8589, "step": 7698 }, { "epoch": 0.62, "grad_norm": 1.6008422251802295, "learning_rate": 3.367450910973566e-06, "loss": 0.8146, "step": 7699 }, { "epoch": 0.62, "grad_norm": 0.7583910451795927, "learning_rate": 3.3662229206407505e-06, "loss": 1.0855, "step": 7700 }, { "epoch": 0.62, "grad_norm": 1.449687015023182, "learning_rate": 3.3649950406242986e-06, "loss": 0.7717, "step": 7701 }, { "epoch": 0.62, "grad_norm": 1.3858920354432247, "learning_rate": 3.3637672710071213e-06, "loss": 0.7001, "step": 7702 }, { "epoch": 0.62, "grad_norm": 1.4828308821146254, "learning_rate": 3.362539611872119e-06, "loss": 0.728, "step": 7703 }, { "epoch": 0.62, "grad_norm": 1.6995548224690664, "learning_rate": 3.3613120633021868e-06, "loss": 0.7982, "step": 7704 }, { "epoch": 0.62, "grad_norm": 1.4315692739849697, "learning_rate": 3.360084625380209e-06, "loss": 0.7399, "step": 7705 }, { "epoch": 0.62, "grad_norm": 0.7906718874783281, "learning_rate": 3.358857298189069e-06, "loss": 1.0645, "step": 7706 }, { "epoch": 0.62, "grad_norm": 1.5135814217164067, "learning_rate": 3.357630081811638e-06, "loss": 0.7795, "step": 7707 }, { "epoch": 0.62, "grad_norm": 1.4453116393666199, "learning_rate": 3.356402976330776e-06, "loss": 0.7766, "step": 7708 }, { "epoch": 0.62, "grad_norm": 1.431605980667538, "learning_rate": 3.355175981829346e-06, "loss": 0.6501, "step": 7709 }, { "epoch": 0.62, "grad_norm": 1.485342674065422, "learning_rate": 3.3539490983901944e-06, "loss": 0.7708, "step": 7710 }, { "epoch": 0.62, "grad_norm": 1.46952762901311, "learning_rate": 3.352722326096163e-06, "loss": 0.6751, "step": 7711 }, { "epoch": 0.62, "grad_norm": 1.518457286606172, "learning_rate": 3.3514956650300877e-06, "loss": 0.7464, "step": 7712 }, { "epoch": 0.62, "grad_norm": 1.5119029407721312, "learning_rate": 3.3502691152747947e-06, "loss": 0.7959, "step": 7713 }, { "epoch": 0.62, "grad_norm": 1.6370664235011787, "learning_rate": 3.3490426769131035e-06, "loss": 0.7822, "step": 7714 }, { "epoch": 0.62, "grad_norm": 1.579172632932745, "learning_rate": 3.347816350027823e-06, "loss": 0.7549, "step": 7715 }, { "epoch": 0.62, "grad_norm": 1.3609181530164969, "learning_rate": 3.3465901347017633e-06, "loss": 0.707, "step": 7716 }, { "epoch": 0.62, "grad_norm": 1.5420055954275518, "learning_rate": 3.345364031017718e-06, "loss": 0.738, "step": 7717 }, { "epoch": 0.62, "grad_norm": 1.4709976958593354, "learning_rate": 3.3441380390584765e-06, "loss": 0.7985, "step": 7718 }, { "epoch": 0.62, "grad_norm": 1.5875290870711958, "learning_rate": 3.3429121589068213e-06, "loss": 0.7934, "step": 7719 }, { "epoch": 0.62, "grad_norm": 0.7841357655265737, "learning_rate": 3.3416863906455264e-06, "loss": 1.0996, "step": 7720 }, { "epoch": 0.62, "grad_norm": 1.5275904119934207, "learning_rate": 3.340460734357359e-06, "loss": 0.765, "step": 7721 }, { "epoch": 0.62, "grad_norm": 1.610217874995973, "learning_rate": 3.339235190125075e-06, "loss": 0.8262, "step": 7722 }, { "epoch": 0.62, "grad_norm": 1.554263477493308, "learning_rate": 3.338009758031432e-06, "loss": 0.8038, "step": 7723 }, { "epoch": 0.62, "grad_norm": 1.4994560689654726, "learning_rate": 3.33678443815917e-06, "loss": 0.85, "step": 7724 }, { "epoch": 0.62, "grad_norm": 1.4973053402150438, "learning_rate": 3.3355592305910223e-06, "loss": 0.6942, "step": 7725 }, { "epoch": 0.62, "grad_norm": 1.485354515757623, "learning_rate": 3.334334135409724e-06, "loss": 0.7558, "step": 7726 }, { "epoch": 0.62, "grad_norm": 1.3770813567425635, "learning_rate": 3.333109152697994e-06, "loss": 0.6486, "step": 7727 }, { "epoch": 0.62, "grad_norm": 1.5722437509053684, "learning_rate": 3.3318842825385454e-06, "loss": 0.7815, "step": 7728 }, { "epoch": 0.62, "grad_norm": 1.5339600119853851, "learning_rate": 3.3306595250140834e-06, "loss": 0.6599, "step": 7729 }, { "epoch": 0.62, "grad_norm": 1.4834201763270622, "learning_rate": 3.3294348802073085e-06, "loss": 0.7609, "step": 7730 }, { "epoch": 0.62, "grad_norm": 1.4978176997527324, "learning_rate": 3.3282103482009103e-06, "loss": 0.6741, "step": 7731 }, { "epoch": 0.62, "grad_norm": 0.824194045892066, "learning_rate": 3.32698592907757e-06, "loss": 1.0197, "step": 7732 }, { "epoch": 0.62, "grad_norm": 1.5273626628165662, "learning_rate": 3.325761622919968e-06, "loss": 0.7249, "step": 7733 }, { "epoch": 0.62, "grad_norm": 1.4828409856405271, "learning_rate": 3.324537429810769e-06, "loss": 0.7917, "step": 7734 }, { "epoch": 0.62, "grad_norm": 1.6592878525449464, "learning_rate": 3.323313349832633e-06, "loss": 0.7266, "step": 7735 }, { "epoch": 0.62, "grad_norm": 1.4822094409757134, "learning_rate": 3.322089383068215e-06, "loss": 0.7565, "step": 7736 }, { "epoch": 0.62, "grad_norm": 0.7681489446935946, "learning_rate": 3.3208655296001585e-06, "loss": 1.087, "step": 7737 }, { "epoch": 0.62, "grad_norm": 1.5285037494250227, "learning_rate": 3.319641789511101e-06, "loss": 0.7539, "step": 7738 }, { "epoch": 0.62, "grad_norm": 1.6149893711683694, "learning_rate": 3.318418162883671e-06, "loss": 0.7737, "step": 7739 }, { "epoch": 0.62, "grad_norm": 1.5224002877201968, "learning_rate": 3.3171946498004925e-06, "loss": 0.7227, "step": 7740 }, { "epoch": 0.62, "grad_norm": 1.584677866764195, "learning_rate": 3.3159712503441798e-06, "loss": 0.8583, "step": 7741 }, { "epoch": 0.62, "grad_norm": 1.4411263673331944, "learning_rate": 3.3147479645973367e-06, "loss": 0.7379, "step": 7742 }, { "epoch": 0.62, "grad_norm": 1.8403717632591616, "learning_rate": 3.3135247926425675e-06, "loss": 0.7616, "step": 7743 }, { "epoch": 0.62, "grad_norm": 1.6356992430942525, "learning_rate": 3.3123017345624597e-06, "loss": 0.7831, "step": 7744 }, { "epoch": 0.62, "grad_norm": 1.4778209267358549, "learning_rate": 3.311078790439598e-06, "loss": 0.7639, "step": 7745 }, { "epoch": 0.62, "grad_norm": 1.5545549543177637, "learning_rate": 3.309855960356557e-06, "loss": 0.7718, "step": 7746 }, { "epoch": 0.62, "grad_norm": 1.5218035112431112, "learning_rate": 3.3086332443959086e-06, "loss": 0.7888, "step": 7747 }, { "epoch": 0.62, "grad_norm": 1.518894853807762, "learning_rate": 3.3074106426402097e-06, "loss": 0.7169, "step": 7748 }, { "epoch": 0.62, "grad_norm": 1.7304711389266134, "learning_rate": 3.306188155172013e-06, "loss": 0.7842, "step": 7749 }, { "epoch": 0.62, "grad_norm": 0.8062458996308914, "learning_rate": 3.304965782073868e-06, "loss": 1.0585, "step": 7750 }, { "epoch": 0.62, "grad_norm": 1.4321074124924713, "learning_rate": 3.30374352342831e-06, "loss": 0.7552, "step": 7751 }, { "epoch": 0.62, "grad_norm": 1.6647044158653015, "learning_rate": 3.3025213793178647e-06, "loss": 0.767, "step": 7752 }, { "epoch": 0.62, "grad_norm": 1.4927534116294736, "learning_rate": 3.301299349825059e-06, "loss": 0.7629, "step": 7753 }, { "epoch": 0.62, "grad_norm": 1.481934945366105, "learning_rate": 3.300077435032406e-06, "loss": 0.8199, "step": 7754 }, { "epoch": 0.62, "grad_norm": 1.7112164574935533, "learning_rate": 3.298855635022411e-06, "loss": 0.7413, "step": 7755 }, { "epoch": 0.62, "grad_norm": 1.4002598564481645, "learning_rate": 3.2976339498775734e-06, "loss": 0.8137, "step": 7756 }, { "epoch": 0.62, "grad_norm": 1.5655197849824238, "learning_rate": 3.2964123796803847e-06, "loss": 0.7574, "step": 7757 }, { "epoch": 0.62, "grad_norm": 0.7819476296236746, "learning_rate": 3.2951909245133277e-06, "loss": 1.0698, "step": 7758 }, { "epoch": 0.62, "grad_norm": 1.59718820577481, "learning_rate": 3.2939695844588758e-06, "loss": 0.7669, "step": 7759 }, { "epoch": 0.62, "grad_norm": 1.474407811989907, "learning_rate": 3.2927483595995003e-06, "loss": 0.6877, "step": 7760 }, { "epoch": 0.62, "grad_norm": 1.5958094987160363, "learning_rate": 3.29152725001766e-06, "loss": 0.7557, "step": 7761 }, { "epoch": 0.62, "grad_norm": 1.5568552557305642, "learning_rate": 3.2903062557958065e-06, "loss": 0.7037, "step": 7762 }, { "epoch": 0.62, "grad_norm": 1.5217694279969063, "learning_rate": 3.2890853770163822e-06, "loss": 0.7809, "step": 7763 }, { "epoch": 0.62, "grad_norm": 1.395850989073227, "learning_rate": 3.2878646137618275e-06, "loss": 0.7711, "step": 7764 }, { "epoch": 0.62, "grad_norm": 0.7582074679438263, "learning_rate": 3.2866439661145684e-06, "loss": 1.0672, "step": 7765 }, { "epoch": 0.62, "grad_norm": 0.7602438570790034, "learning_rate": 3.2854234341570247e-06, "loss": 1.0554, "step": 7766 }, { "epoch": 0.62, "grad_norm": 1.5053707260218123, "learning_rate": 3.2842030179716146e-06, "loss": 0.8551, "step": 7767 }, { "epoch": 0.62, "grad_norm": 1.612079291211773, "learning_rate": 3.282982717640739e-06, "loss": 0.872, "step": 7768 }, { "epoch": 0.62, "grad_norm": 1.68799657215577, "learning_rate": 3.281762533246794e-06, "loss": 0.7998, "step": 7769 }, { "epoch": 0.62, "grad_norm": 1.5926629914845578, "learning_rate": 3.280542464872174e-06, "loss": 0.8295, "step": 7770 }, { "epoch": 0.62, "grad_norm": 1.6075243593945032, "learning_rate": 3.279322512599259e-06, "loss": 0.7991, "step": 7771 }, { "epoch": 0.62, "grad_norm": 1.5687269004277071, "learning_rate": 3.2781026765104224e-06, "loss": 0.7594, "step": 7772 }, { "epoch": 0.62, "grad_norm": 1.5456648903242183, "learning_rate": 3.276882956688029e-06, "loss": 0.7649, "step": 7773 }, { "epoch": 0.62, "grad_norm": 1.4934650332761044, "learning_rate": 3.27566335321444e-06, "loss": 0.7762, "step": 7774 }, { "epoch": 0.62, "grad_norm": 1.380947989710664, "learning_rate": 3.274443866172004e-06, "loss": 0.6833, "step": 7775 }, { "epoch": 0.62, "grad_norm": 1.588025714191186, "learning_rate": 3.273224495643062e-06, "loss": 0.7275, "step": 7776 }, { "epoch": 0.62, "grad_norm": 1.5872474298199568, "learning_rate": 3.2720052417099526e-06, "loss": 0.8538, "step": 7777 }, { "epoch": 0.62, "grad_norm": 0.8018654235931406, "learning_rate": 3.2707861044550003e-06, "loss": 1.1054, "step": 7778 }, { "epoch": 0.62, "grad_norm": 1.5407507488515706, "learning_rate": 3.269567083960525e-06, "loss": 0.7393, "step": 7779 }, { "epoch": 0.62, "grad_norm": 1.4923556461713623, "learning_rate": 3.268348180308836e-06, "loss": 0.7688, "step": 7780 }, { "epoch": 0.62, "grad_norm": 1.8255574979458629, "learning_rate": 3.267129393582238e-06, "loss": 0.7432, "step": 7781 }, { "epoch": 0.62, "grad_norm": 1.5729847288555268, "learning_rate": 3.2659107238630258e-06, "loss": 0.7771, "step": 7782 }, { "epoch": 0.62, "grad_norm": 1.477684613311222, "learning_rate": 3.2646921712334854e-06, "loss": 0.7615, "step": 7783 }, { "epoch": 0.62, "grad_norm": 1.4289188473259566, "learning_rate": 3.2634737357758994e-06, "loss": 0.7907, "step": 7784 }, { "epoch": 0.62, "grad_norm": 1.5324739243135617, "learning_rate": 3.2622554175725376e-06, "loss": 0.7928, "step": 7785 }, { "epoch": 0.62, "grad_norm": 0.8064895378409249, "learning_rate": 3.2610372167056633e-06, "loss": 1.0643, "step": 7786 }, { "epoch": 0.62, "grad_norm": 1.431838820914917, "learning_rate": 3.25981913325753e-06, "loss": 0.8515, "step": 7787 }, { "epoch": 0.62, "grad_norm": 1.5877710159724996, "learning_rate": 3.2586011673103907e-06, "loss": 0.7957, "step": 7788 }, { "epoch": 0.62, "grad_norm": 1.508441333666042, "learning_rate": 3.257383318946482e-06, "loss": 0.7767, "step": 7789 }, { "epoch": 0.62, "grad_norm": 1.5144968139461934, "learning_rate": 3.2561655882480358e-06, "loss": 0.7031, "step": 7790 }, { "epoch": 0.63, "grad_norm": 1.4689640532218047, "learning_rate": 3.254947975297278e-06, "loss": 0.7505, "step": 7791 }, { "epoch": 0.63, "grad_norm": 1.5962760730569854, "learning_rate": 3.2537304801764225e-06, "loss": 0.7876, "step": 7792 }, { "epoch": 0.63, "grad_norm": 1.542190258321352, "learning_rate": 3.252513102967676e-06, "loss": 0.7785, "step": 7793 }, { "epoch": 0.63, "grad_norm": 1.5986558853403114, "learning_rate": 3.2512958437532426e-06, "loss": 0.8383, "step": 7794 }, { "epoch": 0.63, "grad_norm": 0.7611895230340678, "learning_rate": 3.250078702615314e-06, "loss": 1.0514, "step": 7795 }, { "epoch": 0.63, "grad_norm": 0.7459017170071067, "learning_rate": 3.2488616796360717e-06, "loss": 1.0741, "step": 7796 }, { "epoch": 0.63, "grad_norm": 1.4205475691246763, "learning_rate": 3.2476447748976906e-06, "loss": 0.8066, "step": 7797 }, { "epoch": 0.63, "grad_norm": 1.4317242432487347, "learning_rate": 3.2464279884823436e-06, "loss": 0.762, "step": 7798 }, { "epoch": 0.63, "grad_norm": 1.431288608589671, "learning_rate": 3.245211320472189e-06, "loss": 0.7633, "step": 7799 }, { "epoch": 0.63, "grad_norm": 0.745610564594737, "learning_rate": 3.243994770949377e-06, "loss": 1.0468, "step": 7800 }, { "epoch": 0.63, "grad_norm": 1.4392876081289605, "learning_rate": 3.2427783399960544e-06, "loss": 0.6468, "step": 7801 }, { "epoch": 0.63, "grad_norm": 1.4820993634074593, "learning_rate": 3.241562027694357e-06, "loss": 0.7681, "step": 7802 }, { "epoch": 0.63, "grad_norm": 0.7879525761160608, "learning_rate": 3.240345834126412e-06, "loss": 1.088, "step": 7803 }, { "epoch": 0.63, "grad_norm": 1.5406098101999683, "learning_rate": 3.2391297593743374e-06, "loss": 0.7691, "step": 7804 }, { "epoch": 0.63, "grad_norm": 1.5208319660428213, "learning_rate": 3.237913803520251e-06, "loss": 0.7007, "step": 7805 }, { "epoch": 0.63, "grad_norm": 1.515692031791251, "learning_rate": 3.236697966646254e-06, "loss": 0.7572, "step": 7806 }, { "epoch": 0.63, "grad_norm": 1.6037855105784322, "learning_rate": 3.2354822488344407e-06, "loss": 0.7732, "step": 7807 }, { "epoch": 0.63, "grad_norm": 1.6381248309371017, "learning_rate": 3.234266650166901e-06, "loss": 0.7483, "step": 7808 }, { "epoch": 0.63, "grad_norm": 1.6277485437439911, "learning_rate": 3.2330511707257164e-06, "loss": 0.7798, "step": 7809 }, { "epoch": 0.63, "grad_norm": 1.4910964743656239, "learning_rate": 3.2318358105929538e-06, "loss": 0.7823, "step": 7810 }, { "epoch": 0.63, "grad_norm": 1.6390698215695276, "learning_rate": 3.2306205698506832e-06, "loss": 0.7913, "step": 7811 }, { "epoch": 0.63, "grad_norm": 1.49303984132423, "learning_rate": 3.2294054485809577e-06, "loss": 0.7293, "step": 7812 }, { "epoch": 0.63, "grad_norm": 1.4496776496849337, "learning_rate": 3.228190446865824e-06, "loss": 0.6615, "step": 7813 }, { "epoch": 0.63, "grad_norm": 2.6369959696170864, "learning_rate": 3.226975564787322e-06, "loss": 0.7853, "step": 7814 }, { "epoch": 0.63, "grad_norm": 1.4345916948205992, "learning_rate": 3.2257608024274857e-06, "loss": 0.7468, "step": 7815 }, { "epoch": 0.63, "grad_norm": 1.5731383502344427, "learning_rate": 3.224546159868337e-06, "loss": 0.7698, "step": 7816 }, { "epoch": 0.63, "grad_norm": 1.3323647257859619, "learning_rate": 3.22333163719189e-06, "loss": 0.6246, "step": 7817 }, { "epoch": 0.63, "grad_norm": 1.5175248211031584, "learning_rate": 3.2221172344801543e-06, "loss": 0.791, "step": 7818 }, { "epoch": 0.63, "grad_norm": 1.608666180317806, "learning_rate": 3.220902951815129e-06, "loss": 0.7575, "step": 7819 }, { "epoch": 0.63, "grad_norm": 1.45656570559801, "learning_rate": 3.2196887892788044e-06, "loss": 0.7393, "step": 7820 }, { "epoch": 0.63, "grad_norm": 1.5421844317473503, "learning_rate": 3.2184747469531618e-06, "loss": 0.6887, "step": 7821 }, { "epoch": 0.63, "grad_norm": 1.4815147291390154, "learning_rate": 3.2172608249201796e-06, "loss": 0.7419, "step": 7822 }, { "epoch": 0.63, "grad_norm": 1.5464214112353742, "learning_rate": 3.2160470232618228e-06, "loss": 0.7742, "step": 7823 }, { "epoch": 0.63, "grad_norm": 1.5255883546452593, "learning_rate": 3.2148333420600497e-06, "loss": 0.7578, "step": 7824 }, { "epoch": 0.63, "grad_norm": 1.5474463719932279, "learning_rate": 3.213619781396812e-06, "loss": 0.7833, "step": 7825 }, { "epoch": 0.63, "grad_norm": 0.8001016406975772, "learning_rate": 3.2124063413540517e-06, "loss": 1.0612, "step": 7826 }, { "epoch": 0.63, "grad_norm": 1.4345510796415737, "learning_rate": 3.211193022013702e-06, "loss": 0.7405, "step": 7827 }, { "epoch": 0.63, "grad_norm": 1.6746786765458157, "learning_rate": 3.209979823457691e-06, "loss": 0.8003, "step": 7828 }, { "epoch": 0.63, "grad_norm": 1.531881553409104, "learning_rate": 3.208766745767935e-06, "loss": 0.7546, "step": 7829 }, { "epoch": 0.63, "grad_norm": 1.5767603630566982, "learning_rate": 3.207553789026344e-06, "loss": 0.7386, "step": 7830 }, { "epoch": 0.63, "grad_norm": 1.6667129134491754, "learning_rate": 3.2063409533148183e-06, "loss": 0.8291, "step": 7831 }, { "epoch": 0.63, "grad_norm": 1.3602322789040886, "learning_rate": 3.2051282387152547e-06, "loss": 0.7391, "step": 7832 }, { "epoch": 0.63, "grad_norm": 1.6054953762475686, "learning_rate": 3.2039156453095366e-06, "loss": 0.7988, "step": 7833 }, { "epoch": 0.63, "grad_norm": 0.7969563553130192, "learning_rate": 3.2027031731795403e-06, "loss": 1.0536, "step": 7834 }, { "epoch": 0.63, "grad_norm": 1.4265000619854054, "learning_rate": 3.2014908224071367e-06, "loss": 0.7279, "step": 7835 }, { "epoch": 0.63, "grad_norm": 1.5072576293189832, "learning_rate": 3.2002785930741855e-06, "loss": 0.7444, "step": 7836 }, { "epoch": 0.63, "grad_norm": 1.6172904031052406, "learning_rate": 3.199066485262538e-06, "loss": 0.7759, "step": 7837 }, { "epoch": 0.63, "grad_norm": 1.6940830930648774, "learning_rate": 3.1978544990540383e-06, "loss": 0.7719, "step": 7838 }, { "epoch": 0.63, "grad_norm": 1.6121913255937244, "learning_rate": 3.1966426345305263e-06, "loss": 0.7425, "step": 7839 }, { "epoch": 0.63, "grad_norm": 1.4819006913463797, "learning_rate": 3.1954308917738263e-06, "loss": 0.7077, "step": 7840 }, { "epoch": 0.63, "grad_norm": 1.5377942764151096, "learning_rate": 3.194219270865757e-06, "loss": 0.7826, "step": 7841 }, { "epoch": 0.63, "grad_norm": 1.4526125359385222, "learning_rate": 3.1930077718881336e-06, "loss": 0.7534, "step": 7842 }, { "epoch": 0.63, "grad_norm": 1.4226338788066621, "learning_rate": 3.191796394922757e-06, "loss": 0.6863, "step": 7843 }, { "epoch": 0.63, "grad_norm": 0.805818346712067, "learning_rate": 3.1905851400514232e-06, "loss": 1.012, "step": 7844 }, { "epoch": 0.63, "grad_norm": 1.5235302992685509, "learning_rate": 3.189374007355917e-06, "loss": 0.6967, "step": 7845 }, { "epoch": 0.63, "grad_norm": 1.4826994911575055, "learning_rate": 3.1881629969180197e-06, "loss": 0.7567, "step": 7846 }, { "epoch": 0.63, "grad_norm": 1.4138080693804158, "learning_rate": 3.186952108819499e-06, "loss": 0.687, "step": 7847 }, { "epoch": 0.63, "grad_norm": 1.5811347273231366, "learning_rate": 3.1857413431421156e-06, "loss": 0.8498, "step": 7848 }, { "epoch": 0.63, "grad_norm": 1.4763936263369029, "learning_rate": 3.1845306999676274e-06, "loss": 0.7799, "step": 7849 }, { "epoch": 0.63, "grad_norm": 1.5326458985211964, "learning_rate": 3.183320179377778e-06, "loss": 0.8035, "step": 7850 }, { "epoch": 0.63, "grad_norm": 1.4510757324269528, "learning_rate": 3.182109781454303e-06, "loss": 0.7455, "step": 7851 }, { "epoch": 0.63, "grad_norm": 1.7860472443347002, "learning_rate": 3.1808995062789335e-06, "loss": 0.7288, "step": 7852 }, { "epoch": 0.63, "grad_norm": 1.6015356039303732, "learning_rate": 3.1796893539333884e-06, "loss": 0.7434, "step": 7853 }, { "epoch": 0.63, "grad_norm": 1.55957233374763, "learning_rate": 3.178479324499381e-06, "loss": 0.6916, "step": 7854 }, { "epoch": 0.63, "grad_norm": 1.5143179540356113, "learning_rate": 3.1772694180586137e-06, "loss": 0.6709, "step": 7855 }, { "epoch": 0.63, "grad_norm": 1.573744039644747, "learning_rate": 3.1760596346927843e-06, "loss": 0.8341, "step": 7856 }, { "epoch": 0.63, "grad_norm": 1.560791867875191, "learning_rate": 3.174849974483579e-06, "loss": 0.7907, "step": 7857 }, { "epoch": 0.63, "grad_norm": 0.7858211671402192, "learning_rate": 3.173640437512675e-06, "loss": 1.0323, "step": 7858 }, { "epoch": 0.63, "grad_norm": 0.7771064932734836, "learning_rate": 3.1724310238617475e-06, "loss": 1.1031, "step": 7859 }, { "epoch": 0.63, "grad_norm": 1.3921858659723871, "learning_rate": 3.171221733612455e-06, "loss": 0.7436, "step": 7860 }, { "epoch": 0.63, "grad_norm": 1.6202562348252048, "learning_rate": 3.1700125668464534e-06, "loss": 0.7748, "step": 7861 }, { "epoch": 0.63, "grad_norm": 1.5599780995953991, "learning_rate": 3.168803523645387e-06, "loss": 0.8009, "step": 7862 }, { "epoch": 0.63, "grad_norm": 1.6330109483338922, "learning_rate": 3.1675946040908946e-06, "loss": 0.8111, "step": 7863 }, { "epoch": 0.63, "grad_norm": 1.5607665833767694, "learning_rate": 3.1663858082646047e-06, "loss": 0.7864, "step": 7864 }, { "epoch": 0.63, "grad_norm": 1.4558880150071696, "learning_rate": 3.165177136248135e-06, "loss": 0.8103, "step": 7865 }, { "epoch": 0.63, "grad_norm": 1.5558702558662263, "learning_rate": 3.163968588123104e-06, "loss": 0.7726, "step": 7866 }, { "epoch": 0.63, "grad_norm": 1.5040355541588522, "learning_rate": 3.162760163971112e-06, "loss": 0.7358, "step": 7867 }, { "epoch": 0.63, "grad_norm": 1.5592771868012663, "learning_rate": 3.1615518638737534e-06, "loss": 0.7646, "step": 7868 }, { "epoch": 0.63, "grad_norm": 1.5921876318572783, "learning_rate": 3.160343687912618e-06, "loss": 0.7408, "step": 7869 }, { "epoch": 0.63, "grad_norm": 1.5942615996605947, "learning_rate": 3.159135636169284e-06, "loss": 0.789, "step": 7870 }, { "epoch": 0.63, "grad_norm": 1.3807661303790821, "learning_rate": 3.1579277087253202e-06, "loss": 0.7747, "step": 7871 }, { "epoch": 0.63, "grad_norm": 1.5653883352760136, "learning_rate": 3.156719905662289e-06, "loss": 0.7542, "step": 7872 }, { "epoch": 0.63, "grad_norm": 1.6116420343139433, "learning_rate": 3.1555122270617454e-06, "loss": 0.7987, "step": 7873 }, { "epoch": 0.63, "grad_norm": 1.5077281141312564, "learning_rate": 3.154304673005235e-06, "loss": 0.7908, "step": 7874 }, { "epoch": 0.63, "grad_norm": 1.8117271494343843, "learning_rate": 3.1530972435742902e-06, "loss": 0.7985, "step": 7875 }, { "epoch": 0.63, "grad_norm": 1.6668986322358865, "learning_rate": 3.151889938850445e-06, "loss": 0.8366, "step": 7876 }, { "epoch": 0.63, "grad_norm": 1.5163693572502344, "learning_rate": 3.150682758915218e-06, "loss": 0.7823, "step": 7877 }, { "epoch": 0.63, "grad_norm": 1.5055714672942124, "learning_rate": 3.1494757038501197e-06, "loss": 0.7441, "step": 7878 }, { "epoch": 0.63, "grad_norm": 1.4541763259905094, "learning_rate": 3.148268773736651e-06, "loss": 0.6931, "step": 7879 }, { "epoch": 0.63, "grad_norm": 1.526275167849469, "learning_rate": 3.147061968656311e-06, "loss": 0.7477, "step": 7880 }, { "epoch": 0.63, "grad_norm": 1.543715915156076, "learning_rate": 3.145855288690584e-06, "loss": 0.7206, "step": 7881 }, { "epoch": 0.63, "grad_norm": 1.5064686373171996, "learning_rate": 3.1446487339209455e-06, "loss": 0.6337, "step": 7882 }, { "epoch": 0.63, "grad_norm": 1.5321585797141481, "learning_rate": 3.1434423044288697e-06, "loss": 0.7783, "step": 7883 }, { "epoch": 0.63, "grad_norm": 1.4746995412644193, "learning_rate": 3.1422360002958143e-06, "loss": 0.7572, "step": 7884 }, { "epoch": 0.63, "grad_norm": 0.7868770871636039, "learning_rate": 3.141029821603232e-06, "loss": 1.0639, "step": 7885 }, { "epoch": 0.63, "grad_norm": 1.5998398502917683, "learning_rate": 3.1398237684325643e-06, "loss": 0.7507, "step": 7886 }, { "epoch": 0.63, "grad_norm": 0.8283856976092014, "learning_rate": 3.1386178408652524e-06, "loss": 1.0737, "step": 7887 }, { "epoch": 0.63, "grad_norm": 1.5081277011004612, "learning_rate": 3.137412038982719e-06, "loss": 0.7063, "step": 7888 }, { "epoch": 0.63, "grad_norm": 1.357911624773277, "learning_rate": 3.1362063628663836e-06, "loss": 0.7501, "step": 7889 }, { "epoch": 0.63, "grad_norm": 1.5549456230796386, "learning_rate": 3.135000812597657e-06, "loss": 0.7721, "step": 7890 }, { "epoch": 0.63, "grad_norm": 1.5384880021201024, "learning_rate": 3.1337953882579408e-06, "loss": 0.7575, "step": 7891 }, { "epoch": 0.63, "grad_norm": 1.524105626707267, "learning_rate": 3.1325900899286245e-06, "loss": 0.7484, "step": 7892 }, { "epoch": 0.63, "grad_norm": 1.5309776855411286, "learning_rate": 3.131384917691098e-06, "loss": 0.7722, "step": 7893 }, { "epoch": 0.63, "grad_norm": 1.6527576154837786, "learning_rate": 3.130179871626734e-06, "loss": 0.9127, "step": 7894 }, { "epoch": 0.63, "grad_norm": 1.5437672183578086, "learning_rate": 3.128974951816901e-06, "loss": 0.7409, "step": 7895 }, { "epoch": 0.63, "grad_norm": 1.9700821504100932, "learning_rate": 3.127770158342957e-06, "loss": 0.8076, "step": 7896 }, { "epoch": 0.63, "grad_norm": 1.5465587189969936, "learning_rate": 3.126565491286254e-06, "loss": 0.7507, "step": 7897 }, { "epoch": 0.63, "grad_norm": 1.4487049607015645, "learning_rate": 3.1253609507281326e-06, "loss": 0.7157, "step": 7898 }, { "epoch": 0.63, "grad_norm": 0.7926514709129854, "learning_rate": 3.1241565367499257e-06, "loss": 1.065, "step": 7899 }, { "epoch": 0.63, "grad_norm": 1.5782621971278104, "learning_rate": 3.122952249432959e-06, "loss": 0.7845, "step": 7900 }, { "epoch": 0.63, "grad_norm": 1.471358667778139, "learning_rate": 3.1217480888585493e-06, "loss": 0.7428, "step": 7901 }, { "epoch": 0.63, "grad_norm": 0.7677107957170246, "learning_rate": 3.1205440551080033e-06, "loss": 1.0651, "step": 7902 }, { "epoch": 0.63, "grad_norm": 1.463420217395165, "learning_rate": 3.1193401482626186e-06, "loss": 0.8024, "step": 7903 }, { "epoch": 0.63, "grad_norm": 1.7944412764710764, "learning_rate": 3.118136368403689e-06, "loss": 0.7416, "step": 7904 }, { "epoch": 0.63, "grad_norm": 1.5235932758352553, "learning_rate": 3.116932715612495e-06, "loss": 0.7429, "step": 7905 }, { "epoch": 0.63, "grad_norm": 1.5985752596012477, "learning_rate": 3.1157291899703097e-06, "loss": 0.7441, "step": 7906 }, { "epoch": 0.63, "grad_norm": 1.526484283278482, "learning_rate": 3.114525791558398e-06, "loss": 0.6608, "step": 7907 }, { "epoch": 0.63, "grad_norm": 1.412562300352828, "learning_rate": 3.1133225204580177e-06, "loss": 0.7673, "step": 7908 }, { "epoch": 0.63, "grad_norm": 1.5060136241277517, "learning_rate": 3.1121193767504117e-06, "loss": 0.8455, "step": 7909 }, { "epoch": 0.63, "grad_norm": 1.3517749979544447, "learning_rate": 3.1109163605168246e-06, "loss": 0.6647, "step": 7910 }, { "epoch": 0.63, "grad_norm": 1.6161456382372785, "learning_rate": 3.1097134718384846e-06, "loss": 0.7827, "step": 7911 }, { "epoch": 0.63, "grad_norm": 1.6266915093773635, "learning_rate": 3.1085107107966146e-06, "loss": 0.8048, "step": 7912 }, { "epoch": 0.63, "grad_norm": 1.54981550185417, "learning_rate": 3.1073080774724227e-06, "loss": 0.7779, "step": 7913 }, { "epoch": 0.63, "grad_norm": 1.5224881589053962, "learning_rate": 3.10610557194712e-06, "loss": 0.7119, "step": 7914 }, { "epoch": 0.64, "grad_norm": 1.5319787841857366, "learning_rate": 3.1049031943019004e-06, "loss": 0.7334, "step": 7915 }, { "epoch": 0.64, "grad_norm": 1.5283507870583308, "learning_rate": 3.1037009446179483e-06, "loss": 0.8448, "step": 7916 }, { "epoch": 0.64, "grad_norm": 1.460622164274087, "learning_rate": 3.102498822976446e-06, "loss": 0.7195, "step": 7917 }, { "epoch": 0.64, "grad_norm": 1.4613912704618879, "learning_rate": 3.101296829458562e-06, "loss": 0.7924, "step": 7918 }, { "epoch": 0.64, "grad_norm": 1.5049396548718912, "learning_rate": 3.100094964145458e-06, "loss": 0.8111, "step": 7919 }, { "epoch": 0.64, "grad_norm": 1.4902887471820265, "learning_rate": 3.098893227118285e-06, "loss": 0.7943, "step": 7920 }, { "epoch": 0.64, "grad_norm": 1.5499880363518441, "learning_rate": 3.097691618458189e-06, "loss": 0.7797, "step": 7921 }, { "epoch": 0.64, "grad_norm": 1.5118413602558276, "learning_rate": 3.0964901382463052e-06, "loss": 0.8541, "step": 7922 }, { "epoch": 0.64, "grad_norm": 1.552292552176281, "learning_rate": 3.0952887865637593e-06, "loss": 0.6696, "step": 7923 }, { "epoch": 0.64, "grad_norm": 2.332166227487138, "learning_rate": 3.0940875634916713e-06, "loss": 0.7338, "step": 7924 }, { "epoch": 0.64, "grad_norm": 1.573430330136126, "learning_rate": 3.092886469111149e-06, "loss": 0.7506, "step": 7925 }, { "epoch": 0.64, "grad_norm": 1.546445582200416, "learning_rate": 3.0916855035032905e-06, "loss": 0.7046, "step": 7926 }, { "epoch": 0.64, "grad_norm": 1.4407201801834755, "learning_rate": 3.090484666749193e-06, "loss": 0.7563, "step": 7927 }, { "epoch": 0.64, "grad_norm": 1.426430457258376, "learning_rate": 3.089283958929938e-06, "loss": 0.7309, "step": 7928 }, { "epoch": 0.64, "grad_norm": 1.4707885951986526, "learning_rate": 3.088083380126598e-06, "loss": 0.7737, "step": 7929 }, { "epoch": 0.64, "grad_norm": 1.448299746735942, "learning_rate": 3.0868829304202386e-06, "loss": 0.8123, "step": 7930 }, { "epoch": 0.64, "grad_norm": 1.495935184487191, "learning_rate": 3.0856826098919196e-06, "loss": 0.7502, "step": 7931 }, { "epoch": 0.64, "grad_norm": 1.522750990225869, "learning_rate": 3.0844824186226885e-06, "loss": 0.7328, "step": 7932 }, { "epoch": 0.64, "grad_norm": 1.579226713579999, "learning_rate": 3.0832823566935833e-06, "loss": 0.7387, "step": 7933 }, { "epoch": 0.64, "grad_norm": 1.4672088725375034, "learning_rate": 3.0820824241856377e-06, "loss": 0.7566, "step": 7934 }, { "epoch": 0.64, "grad_norm": 1.5964219563312856, "learning_rate": 3.0808826211798725e-06, "loss": 0.7963, "step": 7935 }, { "epoch": 0.64, "grad_norm": 1.646035001421369, "learning_rate": 3.0796829477573004e-06, "loss": 0.7293, "step": 7936 }, { "epoch": 0.64, "grad_norm": 1.502327853053212, "learning_rate": 3.0784834039989253e-06, "loss": 0.7537, "step": 7937 }, { "epoch": 0.64, "grad_norm": 1.4393912934746202, "learning_rate": 3.0772839899857465e-06, "loss": 0.6871, "step": 7938 }, { "epoch": 0.64, "grad_norm": 1.4833373647805088, "learning_rate": 3.0760847057987486e-06, "loss": 0.7798, "step": 7939 }, { "epoch": 0.64, "grad_norm": 1.5492116933048001, "learning_rate": 3.0748855515189104e-06, "loss": 0.7394, "step": 7940 }, { "epoch": 0.64, "grad_norm": 1.659942606350061, "learning_rate": 3.0736865272272024e-06, "loss": 0.7638, "step": 7941 }, { "epoch": 0.64, "grad_norm": 1.507731271327927, "learning_rate": 3.072487633004585e-06, "loss": 0.6941, "step": 7942 }, { "epoch": 0.64, "grad_norm": 1.6236872096873152, "learning_rate": 3.0712888689320107e-06, "loss": 0.804, "step": 7943 }, { "epoch": 0.64, "grad_norm": 1.4744118491107836, "learning_rate": 3.0700902350904207e-06, "loss": 0.6945, "step": 7944 }, { "epoch": 0.64, "grad_norm": 1.525229554007588, "learning_rate": 3.068891731560751e-06, "loss": 0.7221, "step": 7945 }, { "epoch": 0.64, "grad_norm": 1.3962622446850879, "learning_rate": 3.0676933584239287e-06, "loss": 0.7005, "step": 7946 }, { "epoch": 0.64, "grad_norm": 0.8230611332684528, "learning_rate": 3.0664951157608676e-06, "loss": 1.0604, "step": 7947 }, { "epoch": 0.64, "grad_norm": 0.7833110891588118, "learning_rate": 3.0652970036524787e-06, "loss": 1.0987, "step": 7948 }, { "epoch": 0.64, "grad_norm": 1.4116601381691856, "learning_rate": 3.064099022179661e-06, "loss": 0.7037, "step": 7949 }, { "epoch": 0.64, "grad_norm": 1.5727377663560629, "learning_rate": 3.0629011714233014e-06, "loss": 0.7379, "step": 7950 }, { "epoch": 0.64, "grad_norm": 1.4384872899127135, "learning_rate": 3.0617034514642865e-06, "loss": 0.6701, "step": 7951 }, { "epoch": 0.64, "grad_norm": 1.5428286051017324, "learning_rate": 3.060505862383486e-06, "loss": 0.7751, "step": 7952 }, { "epoch": 0.64, "grad_norm": 0.7745107110449517, "learning_rate": 3.059308404261765e-06, "loss": 1.0884, "step": 7953 }, { "epoch": 0.64, "grad_norm": 1.553500752873653, "learning_rate": 3.058111077179976e-06, "loss": 0.7585, "step": 7954 }, { "epoch": 0.64, "grad_norm": 1.4710626893173846, "learning_rate": 3.0569138812189696e-06, "loss": 0.7564, "step": 7955 }, { "epoch": 0.64, "grad_norm": 1.640461977181579, "learning_rate": 3.0557168164595817e-06, "loss": 0.796, "step": 7956 }, { "epoch": 0.64, "grad_norm": 0.7857773527466386, "learning_rate": 3.0545198829826383e-06, "loss": 1.0679, "step": 7957 }, { "epoch": 0.64, "grad_norm": 1.4531885836931626, "learning_rate": 3.0533230808689617e-06, "loss": 0.7679, "step": 7958 }, { "epoch": 0.64, "grad_norm": 1.5562255501840483, "learning_rate": 3.052126410199363e-06, "loss": 0.8269, "step": 7959 }, { "epoch": 0.64, "grad_norm": 1.4454588087544018, "learning_rate": 3.0509298710546433e-06, "loss": 0.7381, "step": 7960 }, { "epoch": 0.64, "grad_norm": 1.5665970027994078, "learning_rate": 3.0497334635155933e-06, "loss": 0.7586, "step": 7961 }, { "epoch": 0.64, "grad_norm": 1.501899125504199, "learning_rate": 3.048537187663001e-06, "loss": 0.8081, "step": 7962 }, { "epoch": 0.64, "grad_norm": 1.446606358272631, "learning_rate": 3.047341043577641e-06, "loss": 0.7886, "step": 7963 }, { "epoch": 0.64, "grad_norm": 1.4067109821814765, "learning_rate": 3.046145031340275e-06, "loss": 0.8269, "step": 7964 }, { "epoch": 0.64, "grad_norm": 1.5882473964709063, "learning_rate": 3.0449491510316675e-06, "loss": 0.8358, "step": 7965 }, { "epoch": 0.64, "grad_norm": 1.576854360524838, "learning_rate": 3.0437534027325634e-06, "loss": 0.7708, "step": 7966 }, { "epoch": 0.64, "grad_norm": 1.6486312235370708, "learning_rate": 3.042557786523702e-06, "loss": 0.7835, "step": 7967 }, { "epoch": 0.64, "grad_norm": 1.4881331326129894, "learning_rate": 3.041362302485816e-06, "loss": 0.7252, "step": 7968 }, { "epoch": 0.64, "grad_norm": 1.4472713096002712, "learning_rate": 3.040166950699626e-06, "loss": 0.7317, "step": 7969 }, { "epoch": 0.64, "grad_norm": 0.7757253603079295, "learning_rate": 3.0389717312458446e-06, "loss": 1.0743, "step": 7970 }, { "epoch": 0.64, "grad_norm": 1.6038455468368156, "learning_rate": 3.0377766442051738e-06, "loss": 0.8285, "step": 7971 }, { "epoch": 0.64, "grad_norm": 1.4448625431881938, "learning_rate": 3.036581689658314e-06, "loss": 0.8025, "step": 7972 }, { "epoch": 0.64, "grad_norm": 0.810402769205492, "learning_rate": 3.0353868676859477e-06, "loss": 1.07, "step": 7973 }, { "epoch": 0.64, "grad_norm": 0.7733993936148941, "learning_rate": 3.03419217836875e-06, "loss": 1.0657, "step": 7974 }, { "epoch": 0.64, "grad_norm": 1.6119926668131304, "learning_rate": 3.0329976217873935e-06, "loss": 0.7354, "step": 7975 }, { "epoch": 0.64, "grad_norm": 1.799427800416381, "learning_rate": 3.0318031980225348e-06, "loss": 0.6981, "step": 7976 }, { "epoch": 0.64, "grad_norm": 1.5165070043751696, "learning_rate": 3.0306089071548263e-06, "loss": 0.7207, "step": 7977 }, { "epoch": 0.64, "grad_norm": 1.668444328082406, "learning_rate": 3.029414749264905e-06, "loss": 0.7737, "step": 7978 }, { "epoch": 0.64, "grad_norm": 0.760588730356039, "learning_rate": 3.0282207244334084e-06, "loss": 1.0687, "step": 7979 }, { "epoch": 0.64, "grad_norm": 1.5720996746761045, "learning_rate": 3.027026832740956e-06, "loss": 0.6732, "step": 7980 }, { "epoch": 0.64, "grad_norm": 1.4199344182943525, "learning_rate": 3.025833074268162e-06, "loss": 0.7457, "step": 7981 }, { "epoch": 0.64, "grad_norm": 1.54652241029616, "learning_rate": 3.0246394490956343e-06, "loss": 0.7702, "step": 7982 }, { "epoch": 0.64, "grad_norm": 1.588834779800411, "learning_rate": 3.0234459573039687e-06, "loss": 0.7424, "step": 7983 }, { "epoch": 0.64, "grad_norm": 1.6041640573449811, "learning_rate": 3.0222525989737517e-06, "loss": 0.7297, "step": 7984 }, { "epoch": 0.64, "grad_norm": 1.5020243379532323, "learning_rate": 3.02105937418556e-06, "loss": 0.7073, "step": 7985 }, { "epoch": 0.64, "grad_norm": 1.415189778987335, "learning_rate": 3.019866283019966e-06, "loss": 0.7708, "step": 7986 }, { "epoch": 0.64, "grad_norm": 1.5287471209405323, "learning_rate": 3.0186733255575286e-06, "loss": 0.7631, "step": 7987 }, { "epoch": 0.64, "grad_norm": 1.562657859394054, "learning_rate": 3.0174805018787973e-06, "loss": 0.7513, "step": 7988 }, { "epoch": 0.64, "grad_norm": 1.5489141501117913, "learning_rate": 3.016287812064317e-06, "loss": 0.8214, "step": 7989 }, { "epoch": 0.64, "grad_norm": 1.4745590905766002, "learning_rate": 3.01509525619462e-06, "loss": 0.7385, "step": 7990 }, { "epoch": 0.64, "grad_norm": 2.0232713081072506, "learning_rate": 3.0139028343502273e-06, "loss": 0.7447, "step": 7991 }, { "epoch": 0.64, "grad_norm": 1.588019227384956, "learning_rate": 3.012710546611659e-06, "loss": 0.749, "step": 7992 }, { "epoch": 0.64, "grad_norm": 1.5036644984630219, "learning_rate": 3.0115183930594194e-06, "loss": 0.7523, "step": 7993 }, { "epoch": 0.64, "grad_norm": 1.5591825829868644, "learning_rate": 3.010326373774004e-06, "loss": 0.7508, "step": 7994 }, { "epoch": 0.64, "grad_norm": 1.7143242545343365, "learning_rate": 3.0091344888359015e-06, "loss": 0.8813, "step": 7995 }, { "epoch": 0.64, "grad_norm": 1.4488289057126014, "learning_rate": 3.007942738325591e-06, "loss": 0.8276, "step": 7996 }, { "epoch": 0.64, "grad_norm": 1.589684805111971, "learning_rate": 3.0067511223235425e-06, "loss": 0.7971, "step": 7997 }, { "epoch": 0.64, "grad_norm": 0.8131730114975364, "learning_rate": 3.005559640910213e-06, "loss": 1.0739, "step": 7998 }, { "epoch": 0.64, "grad_norm": 1.5280893490062593, "learning_rate": 3.0043682941660603e-06, "loss": 0.7926, "step": 7999 }, { "epoch": 0.64, "grad_norm": 1.5108552453737845, "learning_rate": 3.0031770821715233e-06, "loss": 0.7158, "step": 8000 }, { "epoch": 0.64, "grad_norm": 1.623882738392162, "learning_rate": 3.001986005007036e-06, "loss": 0.8142, "step": 8001 }, { "epoch": 0.64, "grad_norm": 1.478763871069111, "learning_rate": 3.0007950627530197e-06, "loss": 0.7055, "step": 8002 }, { "epoch": 0.64, "grad_norm": 1.5222860936784044, "learning_rate": 2.999604255489894e-06, "loss": 0.7067, "step": 8003 }, { "epoch": 0.64, "grad_norm": 1.4276697848398863, "learning_rate": 2.9984135832980643e-06, "loss": 0.714, "step": 8004 }, { "epoch": 0.64, "grad_norm": 0.7910228466134466, "learning_rate": 2.9972230462579243e-06, "loss": 1.0845, "step": 8005 }, { "epoch": 0.64, "grad_norm": 1.618445048701098, "learning_rate": 2.996032644449865e-06, "loss": 0.7991, "step": 8006 }, { "epoch": 0.64, "grad_norm": 0.7931163516780105, "learning_rate": 2.994842377954264e-06, "loss": 1.0902, "step": 8007 }, { "epoch": 0.64, "grad_norm": 0.764687241304576, "learning_rate": 2.9936522468514888e-06, "loss": 1.0421, "step": 8008 }, { "epoch": 0.64, "grad_norm": 1.6089082815114828, "learning_rate": 2.9924622512219037e-06, "loss": 0.7605, "step": 8009 }, { "epoch": 0.64, "grad_norm": 1.4993213005400576, "learning_rate": 2.991272391145858e-06, "loss": 0.7367, "step": 8010 }, { "epoch": 0.64, "grad_norm": 0.7604130166650378, "learning_rate": 2.990082666703693e-06, "loss": 1.0726, "step": 8011 }, { "epoch": 0.64, "grad_norm": 1.5490166088527242, "learning_rate": 2.988893077975742e-06, "loss": 0.7473, "step": 8012 }, { "epoch": 0.64, "grad_norm": 1.555747147864406, "learning_rate": 2.98770362504233e-06, "loss": 0.8198, "step": 8013 }, { "epoch": 0.64, "grad_norm": 1.5837257708041734, "learning_rate": 2.986514307983771e-06, "loss": 0.7899, "step": 8014 }, { "epoch": 0.64, "grad_norm": 1.4624883457253859, "learning_rate": 2.9853251268803674e-06, "loss": 0.7189, "step": 8015 }, { "epoch": 0.64, "grad_norm": 0.7786957789800643, "learning_rate": 2.984136081812421e-06, "loss": 1.0359, "step": 8016 }, { "epoch": 0.64, "grad_norm": 1.4434296628097525, "learning_rate": 2.9829471728602156e-06, "loss": 0.7572, "step": 8017 }, { "epoch": 0.64, "grad_norm": 1.7852741949858848, "learning_rate": 2.981758400104028e-06, "loss": 0.7421, "step": 8018 }, { "epoch": 0.64, "grad_norm": 1.5922236010076183, "learning_rate": 2.9805697636241278e-06, "loss": 0.7934, "step": 8019 }, { "epoch": 0.64, "grad_norm": 1.5705563145882944, "learning_rate": 2.9793812635007757e-06, "loss": 0.7692, "step": 8020 }, { "epoch": 0.64, "grad_norm": 0.7982703978316215, "learning_rate": 2.9781928998142217e-06, "loss": 1.0826, "step": 8021 }, { "epoch": 0.64, "grad_norm": 1.4928965551209985, "learning_rate": 2.9770046726447056e-06, "loss": 0.7806, "step": 8022 }, { "epoch": 0.64, "grad_norm": 1.6679567521782042, "learning_rate": 2.97581658207246e-06, "loss": 0.7589, "step": 8023 }, { "epoch": 0.64, "grad_norm": 1.525837198328867, "learning_rate": 2.9746286281777075e-06, "loss": 0.7176, "step": 8024 }, { "epoch": 0.64, "grad_norm": 1.5255339415244606, "learning_rate": 2.97344081104066e-06, "loss": 0.7721, "step": 8025 }, { "epoch": 0.64, "grad_norm": 1.5164366359408274, "learning_rate": 2.9722531307415243e-06, "loss": 0.7384, "step": 8026 }, { "epoch": 0.64, "grad_norm": 1.4379423010428263, "learning_rate": 2.9710655873604943e-06, "loss": 0.7035, "step": 8027 }, { "epoch": 0.64, "grad_norm": 1.4489583842644578, "learning_rate": 2.969878180977755e-06, "loss": 0.7461, "step": 8028 }, { "epoch": 0.64, "grad_norm": 1.463566301609115, "learning_rate": 2.968690911673482e-06, "loss": 0.7532, "step": 8029 }, { "epoch": 0.64, "grad_norm": 1.3844194298239825, "learning_rate": 2.967503779527845e-06, "loss": 0.812, "step": 8030 }, { "epoch": 0.64, "grad_norm": 1.5572438051693611, "learning_rate": 2.966316784621e-06, "loss": 0.7314, "step": 8031 }, { "epoch": 0.64, "grad_norm": 1.556167802016975, "learning_rate": 2.9651299270330945e-06, "loss": 0.8426, "step": 8032 }, { "epoch": 0.64, "grad_norm": 1.6628684489413896, "learning_rate": 2.9639432068442716e-06, "loss": 0.7973, "step": 8033 }, { "epoch": 0.64, "grad_norm": 1.4421542175244941, "learning_rate": 2.9627566241346584e-06, "loss": 0.7239, "step": 8034 }, { "epoch": 0.64, "grad_norm": 1.5037599998621216, "learning_rate": 2.9615701789843766e-06, "loss": 0.7942, "step": 8035 }, { "epoch": 0.64, "grad_norm": 1.5272042101112238, "learning_rate": 2.960383871473535e-06, "loss": 0.7681, "step": 8036 }, { "epoch": 0.64, "grad_norm": 1.6937994697015775, "learning_rate": 2.9591977016822406e-06, "loss": 0.7941, "step": 8037 }, { "epoch": 0.64, "grad_norm": 0.7828494940349162, "learning_rate": 2.9580116696905836e-06, "loss": 1.0648, "step": 8038 }, { "epoch": 0.64, "grad_norm": 1.6702173253719916, "learning_rate": 2.9568257755786474e-06, "loss": 0.8345, "step": 8039 }, { "epoch": 0.65, "grad_norm": 0.7880414144463519, "learning_rate": 2.955640019426508e-06, "loss": 1.0751, "step": 8040 }, { "epoch": 0.65, "grad_norm": 1.4751484888204602, "learning_rate": 2.9544544013142284e-06, "loss": 0.7138, "step": 8041 }, { "epoch": 0.65, "grad_norm": 0.7655111271691917, "learning_rate": 2.9532689213218657e-06, "loss": 1.0534, "step": 8042 }, { "epoch": 0.65, "grad_norm": 1.6527153357583906, "learning_rate": 2.9520835795294633e-06, "loss": 0.7601, "step": 8043 }, { "epoch": 0.65, "grad_norm": 1.408934616040001, "learning_rate": 2.950898376017064e-06, "loss": 0.7772, "step": 8044 }, { "epoch": 0.65, "grad_norm": 1.6057430189054687, "learning_rate": 2.9497133108646903e-06, "loss": 0.7291, "step": 8045 }, { "epoch": 0.65, "grad_norm": 1.5391777956132795, "learning_rate": 2.94852838415236e-06, "loss": 0.7751, "step": 8046 }, { "epoch": 0.65, "grad_norm": 1.482991698080346, "learning_rate": 2.9473435959600864e-06, "loss": 0.7328, "step": 8047 }, { "epoch": 0.65, "grad_norm": 1.424944085867061, "learning_rate": 2.946158946367867e-06, "loss": 0.7612, "step": 8048 }, { "epoch": 0.65, "grad_norm": 1.471935064043554, "learning_rate": 2.944974435455691e-06, "loss": 0.8058, "step": 8049 }, { "epoch": 0.65, "grad_norm": 1.3962850596539222, "learning_rate": 2.943790063303541e-06, "loss": 0.721, "step": 8050 }, { "epoch": 0.65, "grad_norm": 1.5386898018583883, "learning_rate": 2.942605829991387e-06, "loss": 0.7419, "step": 8051 }, { "epoch": 0.65, "grad_norm": 2.024697776802329, "learning_rate": 2.9414217355991937e-06, "loss": 0.759, "step": 8052 }, { "epoch": 0.65, "grad_norm": 1.6754737566713902, "learning_rate": 2.9402377802069086e-06, "loss": 0.7472, "step": 8053 }, { "epoch": 0.65, "grad_norm": 1.6139298978485828, "learning_rate": 2.939053963894481e-06, "loss": 0.735, "step": 8054 }, { "epoch": 0.65, "grad_norm": 1.4135510875996606, "learning_rate": 2.9378702867418423e-06, "loss": 0.7687, "step": 8055 }, { "epoch": 0.65, "grad_norm": 0.8389571024014597, "learning_rate": 2.936686748828916e-06, "loss": 1.0542, "step": 8056 }, { "epoch": 0.65, "grad_norm": 1.4783089638901947, "learning_rate": 2.93550335023562e-06, "loss": 0.7758, "step": 8057 }, { "epoch": 0.65, "grad_norm": 1.4453713985943246, "learning_rate": 2.934320091041858e-06, "loss": 0.6687, "step": 8058 }, { "epoch": 0.65, "grad_norm": 1.650752634724723, "learning_rate": 2.933136971327527e-06, "loss": 0.8179, "step": 8059 }, { "epoch": 0.65, "grad_norm": 1.4788610607120913, "learning_rate": 2.9319539911725136e-06, "loss": 0.7814, "step": 8060 }, { "epoch": 0.65, "grad_norm": 0.8018156660344682, "learning_rate": 2.930771150656696e-06, "loss": 1.0852, "step": 8061 }, { "epoch": 0.65, "grad_norm": 0.7635886580720078, "learning_rate": 2.9295884498599415e-06, "loss": 1.098, "step": 8062 }, { "epoch": 0.65, "grad_norm": 1.5338503050890298, "learning_rate": 2.9284058888621076e-06, "loss": 0.8019, "step": 8063 }, { "epoch": 0.65, "grad_norm": 1.4947889838382138, "learning_rate": 2.9272234677430467e-06, "loss": 0.7963, "step": 8064 }, { "epoch": 0.65, "grad_norm": 0.817480720275671, "learning_rate": 2.926041186582598e-06, "loss": 1.0913, "step": 8065 }, { "epoch": 0.65, "grad_norm": 1.5759143418654924, "learning_rate": 2.9248590454605887e-06, "loss": 0.7313, "step": 8066 }, { "epoch": 0.65, "grad_norm": 1.5047420914529652, "learning_rate": 2.9236770444568428e-06, "loss": 0.7023, "step": 8067 }, { "epoch": 0.65, "grad_norm": 1.4161396496439687, "learning_rate": 2.922495183651171e-06, "loss": 0.6961, "step": 8068 }, { "epoch": 0.65, "grad_norm": 1.6218572137866627, "learning_rate": 2.921313463123375e-06, "loss": 0.706, "step": 8069 }, { "epoch": 0.65, "grad_norm": 1.5159348018194834, "learning_rate": 2.920131882953245e-06, "loss": 0.8616, "step": 8070 }, { "epoch": 0.65, "grad_norm": 1.573608024532517, "learning_rate": 2.9189504432205685e-06, "loss": 0.7217, "step": 8071 }, { "epoch": 0.65, "grad_norm": 1.4657065140139776, "learning_rate": 2.9177691440051158e-06, "loss": 0.8178, "step": 8072 }, { "epoch": 0.65, "grad_norm": 1.525018251852873, "learning_rate": 2.9165879853866507e-06, "loss": 0.8171, "step": 8073 }, { "epoch": 0.65, "grad_norm": 0.8025038659058368, "learning_rate": 2.9154069674449325e-06, "loss": 1.0434, "step": 8074 }, { "epoch": 0.65, "grad_norm": 1.4337853837503074, "learning_rate": 2.9142260902597003e-06, "loss": 0.7809, "step": 8075 }, { "epoch": 0.65, "grad_norm": 1.3672115015814426, "learning_rate": 2.9130453539106917e-06, "loss": 0.7298, "step": 8076 }, { "epoch": 0.65, "grad_norm": 1.4573418958682203, "learning_rate": 2.9118647584776316e-06, "loss": 0.7523, "step": 8077 }, { "epoch": 0.65, "grad_norm": 0.762183195343577, "learning_rate": 2.9106843040402397e-06, "loss": 1.0946, "step": 8078 }, { "epoch": 0.65, "grad_norm": 1.600462033640718, "learning_rate": 2.9095039906782207e-06, "loss": 0.8476, "step": 8079 }, { "epoch": 0.65, "grad_norm": 1.5349543602300637, "learning_rate": 2.9083238184712713e-06, "loss": 0.7075, "step": 8080 }, { "epoch": 0.65, "grad_norm": 1.4421728261897893, "learning_rate": 2.9071437874990813e-06, "loss": 0.7853, "step": 8081 }, { "epoch": 0.65, "grad_norm": 1.4375242494956446, "learning_rate": 2.9059638978413295e-06, "loss": 0.7456, "step": 8082 }, { "epoch": 0.65, "grad_norm": 1.4579617172665835, "learning_rate": 2.9047841495776812e-06, "loss": 0.7253, "step": 8083 }, { "epoch": 0.65, "grad_norm": 1.58434784649837, "learning_rate": 2.9036045427878e-06, "loss": 0.8164, "step": 8084 }, { "epoch": 0.65, "grad_norm": 1.4823521380097855, "learning_rate": 2.902425077551334e-06, "loss": 0.7978, "step": 8085 }, { "epoch": 0.65, "grad_norm": 0.7937375509882071, "learning_rate": 2.901245753947923e-06, "loss": 1.0667, "step": 8086 }, { "epoch": 0.65, "grad_norm": 0.7805547908756333, "learning_rate": 2.9000665720571987e-06, "loss": 1.0998, "step": 8087 }, { "epoch": 0.65, "grad_norm": 1.4352967288936975, "learning_rate": 2.8988875319587795e-06, "loss": 0.7354, "step": 8088 }, { "epoch": 0.65, "grad_norm": 1.4730829019922882, "learning_rate": 2.89770863373228e-06, "loss": 0.8379, "step": 8089 }, { "epoch": 0.65, "grad_norm": 1.5082691511259805, "learning_rate": 2.8965298774572983e-06, "loss": 0.8283, "step": 8090 }, { "epoch": 0.65, "grad_norm": 0.7936058361337938, "learning_rate": 2.8953512632134305e-06, "loss": 1.0916, "step": 8091 }, { "epoch": 0.65, "grad_norm": 1.452459275453735, "learning_rate": 2.8941727910802587e-06, "loss": 0.7518, "step": 8092 }, { "epoch": 0.65, "grad_norm": 1.5287095106052275, "learning_rate": 2.8929944611373555e-06, "loss": 0.8416, "step": 8093 }, { "epoch": 0.65, "grad_norm": 1.5522357219259648, "learning_rate": 2.8918162734642817e-06, "loss": 0.7175, "step": 8094 }, { "epoch": 0.65, "grad_norm": 1.5609251686000156, "learning_rate": 2.8906382281405965e-06, "loss": 0.6848, "step": 8095 }, { "epoch": 0.65, "grad_norm": 1.5483557436464859, "learning_rate": 2.8894603252458407e-06, "loss": 0.7585, "step": 8096 }, { "epoch": 0.65, "grad_norm": 1.5077653530175126, "learning_rate": 2.88828256485955e-06, "loss": 0.7659, "step": 8097 }, { "epoch": 0.65, "grad_norm": 0.8050413001609112, "learning_rate": 2.8871049470612495e-06, "loss": 1.0958, "step": 8098 }, { "epoch": 0.65, "grad_norm": 1.4179232028681084, "learning_rate": 2.8859274719304543e-06, "loss": 0.7778, "step": 8099 }, { "epoch": 0.65, "grad_norm": 1.522486378997395, "learning_rate": 2.8847501395466704e-06, "loss": 0.6731, "step": 8100 }, { "epoch": 0.65, "grad_norm": 1.5360806555064173, "learning_rate": 2.8835729499893915e-06, "loss": 0.8208, "step": 8101 }, { "epoch": 0.65, "grad_norm": 1.5643302481905936, "learning_rate": 2.8823959033381086e-06, "loss": 0.8299, "step": 8102 }, { "epoch": 0.65, "grad_norm": 0.8029753036731989, "learning_rate": 2.881218999672297e-06, "loss": 1.0563, "step": 8103 }, { "epoch": 0.65, "grad_norm": 1.639315037754315, "learning_rate": 2.880042239071421e-06, "loss": 0.792, "step": 8104 }, { "epoch": 0.65, "grad_norm": 1.4848823908975823, "learning_rate": 2.8788656216149423e-06, "loss": 0.8082, "step": 8105 }, { "epoch": 0.65, "grad_norm": 1.5934031461124696, "learning_rate": 2.8776891473823076e-06, "loss": 0.7657, "step": 8106 }, { "epoch": 0.65, "grad_norm": 1.4339192201132445, "learning_rate": 2.8765128164529545e-06, "loss": 0.7581, "step": 8107 }, { "epoch": 0.65, "grad_norm": 1.4744087445007026, "learning_rate": 2.875336628906312e-06, "loss": 0.7651, "step": 8108 }, { "epoch": 0.65, "grad_norm": 1.5704843448597468, "learning_rate": 2.874160584821798e-06, "loss": 0.697, "step": 8109 }, { "epoch": 0.65, "grad_norm": 1.561811262162164, "learning_rate": 2.8729846842788223e-06, "loss": 0.8051, "step": 8110 }, { "epoch": 0.65, "grad_norm": 1.4279309363445127, "learning_rate": 2.871808927356783e-06, "loss": 0.7676, "step": 8111 }, { "epoch": 0.65, "grad_norm": 1.3665414775725098, "learning_rate": 2.870633314135073e-06, "loss": 0.7168, "step": 8112 }, { "epoch": 0.65, "grad_norm": 1.5455771227612614, "learning_rate": 2.869457844693071e-06, "loss": 0.817, "step": 8113 }, { "epoch": 0.65, "grad_norm": 1.4362621043623574, "learning_rate": 2.8682825191101447e-06, "loss": 0.7459, "step": 8114 }, { "epoch": 0.65, "grad_norm": 1.3286159645409736, "learning_rate": 2.86710733746566e-06, "loss": 0.7069, "step": 8115 }, { "epoch": 0.65, "grad_norm": 1.5059779276284988, "learning_rate": 2.865932299838964e-06, "loss": 0.745, "step": 8116 }, { "epoch": 0.65, "grad_norm": 1.4986009951954842, "learning_rate": 2.8647574063093995e-06, "loss": 0.7604, "step": 8117 }, { "epoch": 0.65, "grad_norm": 1.547989284907312, "learning_rate": 2.8635826569562974e-06, "loss": 0.7855, "step": 8118 }, { "epoch": 0.65, "grad_norm": 1.531523906634903, "learning_rate": 2.862408051858979e-06, "loss": 0.745, "step": 8119 }, { "epoch": 0.65, "grad_norm": 1.5414222354532996, "learning_rate": 2.861233591096758e-06, "loss": 0.8215, "step": 8120 }, { "epoch": 0.65, "grad_norm": 1.4633093822555236, "learning_rate": 2.860059274748933e-06, "loss": 0.7695, "step": 8121 }, { "epoch": 0.65, "grad_norm": 1.5445081819621311, "learning_rate": 2.8588851028948008e-06, "loss": 0.7854, "step": 8122 }, { "epoch": 0.65, "grad_norm": 1.5828666202552446, "learning_rate": 2.857711075613642e-06, "loss": 0.8145, "step": 8123 }, { "epoch": 0.65, "grad_norm": 1.4269386679196387, "learning_rate": 2.8565371929847286e-06, "loss": 0.7455, "step": 8124 }, { "epoch": 0.65, "grad_norm": 1.6206372828326947, "learning_rate": 2.8553634550873273e-06, "loss": 0.8202, "step": 8125 }, { "epoch": 0.65, "grad_norm": 1.5694970214797648, "learning_rate": 2.854189862000689e-06, "loss": 0.7771, "step": 8126 }, { "epoch": 0.65, "grad_norm": 0.7997892857045377, "learning_rate": 2.8530164138040585e-06, "loss": 1.0341, "step": 8127 }, { "epoch": 0.65, "grad_norm": 1.5309724420575004, "learning_rate": 2.851843110576667e-06, "loss": 0.8447, "step": 8128 }, { "epoch": 0.65, "grad_norm": 0.7750066171906114, "learning_rate": 2.850669952397743e-06, "loss": 1.0629, "step": 8129 }, { "epoch": 0.65, "grad_norm": 1.581510627470217, "learning_rate": 2.849496939346498e-06, "loss": 0.7296, "step": 8130 }, { "epoch": 0.65, "grad_norm": 0.7801189952109374, "learning_rate": 2.848324071502137e-06, "loss": 1.0627, "step": 8131 }, { "epoch": 0.65, "grad_norm": 1.5320594118233466, "learning_rate": 2.8471513489438553e-06, "loss": 0.742, "step": 8132 }, { "epoch": 0.65, "grad_norm": 1.6318660681902477, "learning_rate": 2.845978771750837e-06, "loss": 0.7914, "step": 8133 }, { "epoch": 0.65, "grad_norm": 1.3414471829840864, "learning_rate": 2.8448063400022573e-06, "loss": 0.6917, "step": 8134 }, { "epoch": 0.65, "grad_norm": 1.652515575054662, "learning_rate": 2.84363405377728e-06, "loss": 0.7461, "step": 8135 }, { "epoch": 0.65, "grad_norm": 1.4431869706906255, "learning_rate": 2.842461913155064e-06, "loss": 0.716, "step": 8136 }, { "epoch": 0.65, "grad_norm": 1.4804634680711033, "learning_rate": 2.8412899182147536e-06, "loss": 0.7168, "step": 8137 }, { "epoch": 0.65, "grad_norm": 1.5535544693176144, "learning_rate": 2.8401180690354813e-06, "loss": 0.7874, "step": 8138 }, { "epoch": 0.65, "grad_norm": 1.516954098185473, "learning_rate": 2.838946365696379e-06, "loss": 0.8297, "step": 8139 }, { "epoch": 0.65, "grad_norm": 1.6773037317377901, "learning_rate": 2.8377748082765586e-06, "loss": 0.7457, "step": 8140 }, { "epoch": 0.65, "grad_norm": 1.5923397610331045, "learning_rate": 2.8366033968551277e-06, "loss": 0.7579, "step": 8141 }, { "epoch": 0.65, "grad_norm": 1.4632349734107177, "learning_rate": 2.835432131511182e-06, "loss": 0.7724, "step": 8142 }, { "epoch": 0.65, "grad_norm": 1.5635097162607379, "learning_rate": 2.834261012323809e-06, "loss": 0.7651, "step": 8143 }, { "epoch": 0.65, "grad_norm": 1.4533880137645105, "learning_rate": 2.8330900393720846e-06, "loss": 0.6694, "step": 8144 }, { "epoch": 0.65, "grad_norm": 1.6061444499352628, "learning_rate": 2.8319192127350736e-06, "loss": 0.754, "step": 8145 }, { "epoch": 0.65, "grad_norm": 1.5834244901299899, "learning_rate": 2.830748532491837e-06, "loss": 0.7604, "step": 8146 }, { "epoch": 0.65, "grad_norm": 1.4735489759314258, "learning_rate": 2.8295779987214197e-06, "loss": 0.7889, "step": 8147 }, { "epoch": 0.65, "grad_norm": 0.8350176600955049, "learning_rate": 2.828407611502857e-06, "loss": 1.0814, "step": 8148 }, { "epoch": 0.65, "grad_norm": 1.5064134109599048, "learning_rate": 2.8272373709151798e-06, "loss": 0.7453, "step": 8149 }, { "epoch": 0.65, "grad_norm": 1.4548728880486381, "learning_rate": 2.826067277037403e-06, "loss": 0.741, "step": 8150 }, { "epoch": 0.65, "grad_norm": 1.6456121299734578, "learning_rate": 2.824897329948536e-06, "loss": 0.7684, "step": 8151 }, { "epoch": 0.65, "grad_norm": 1.373144213000034, "learning_rate": 2.8237275297275746e-06, "loss": 0.786, "step": 8152 }, { "epoch": 0.65, "grad_norm": 1.6555856073927024, "learning_rate": 2.822557876453506e-06, "loss": 0.7969, "step": 8153 }, { "epoch": 0.65, "grad_norm": 1.7599400883165328, "learning_rate": 2.821388370205309e-06, "loss": 0.8412, "step": 8154 }, { "epoch": 0.65, "grad_norm": 1.4681089543707215, "learning_rate": 2.820219011061949e-06, "loss": 0.6645, "step": 8155 }, { "epoch": 0.65, "grad_norm": 1.5701251199588686, "learning_rate": 2.819049799102388e-06, "loss": 0.777, "step": 8156 }, { "epoch": 0.65, "grad_norm": 1.589331090769787, "learning_rate": 2.8178807344055716e-06, "loss": 0.7236, "step": 8157 }, { "epoch": 0.65, "grad_norm": 1.5769274221748837, "learning_rate": 2.816711817050437e-06, "loss": 0.8081, "step": 8158 }, { "epoch": 0.65, "grad_norm": 1.729006588353903, "learning_rate": 2.8155430471159118e-06, "loss": 0.7298, "step": 8159 }, { "epoch": 0.65, "grad_norm": 0.7797051702835283, "learning_rate": 2.8143744246809167e-06, "loss": 1.0931, "step": 8160 }, { "epoch": 0.65, "grad_norm": 1.3948132420441235, "learning_rate": 2.813205949824358e-06, "loss": 0.6949, "step": 8161 }, { "epoch": 0.65, "grad_norm": 1.550744973865652, "learning_rate": 2.8120376226251343e-06, "loss": 0.7048, "step": 8162 }, { "epoch": 0.65, "grad_norm": 0.748194246072263, "learning_rate": 2.810869443162133e-06, "loss": 1.0665, "step": 8163 }, { "epoch": 0.66, "grad_norm": 1.4629171201849687, "learning_rate": 2.809701411514233e-06, "loss": 0.7515, "step": 8164 }, { "epoch": 0.66, "grad_norm": 1.5566750323722136, "learning_rate": 2.8085335277603002e-06, "loss": 0.7624, "step": 8165 }, { "epoch": 0.66, "grad_norm": 1.5411964772090687, "learning_rate": 2.8073657919791965e-06, "loss": 0.7147, "step": 8166 }, { "epoch": 0.66, "grad_norm": 1.517073434070002, "learning_rate": 2.806198204249768e-06, "loss": 0.7565, "step": 8167 }, { "epoch": 0.66, "grad_norm": 1.5043865856503862, "learning_rate": 2.805030764650854e-06, "loss": 0.6991, "step": 8168 }, { "epoch": 0.66, "grad_norm": 1.5912143755828456, "learning_rate": 2.803863473261279e-06, "loss": 0.7629, "step": 8169 }, { "epoch": 0.66, "grad_norm": 1.6921732943539733, "learning_rate": 2.8026963301598668e-06, "loss": 0.809, "step": 8170 }, { "epoch": 0.66, "grad_norm": 1.4296992674564826, "learning_rate": 2.8015293354254223e-06, "loss": 0.7495, "step": 8171 }, { "epoch": 0.66, "grad_norm": 1.4711277829924374, "learning_rate": 2.8003624891367426e-06, "loss": 0.7512, "step": 8172 }, { "epoch": 0.66, "grad_norm": 1.5898781228872214, "learning_rate": 2.799195791372619e-06, "loss": 0.6506, "step": 8173 }, { "epoch": 0.66, "grad_norm": 1.4785180225336976, "learning_rate": 2.7980292422118282e-06, "loss": 0.7595, "step": 8174 }, { "epoch": 0.66, "grad_norm": 1.528239024334879, "learning_rate": 2.79686284173314e-06, "loss": 0.7773, "step": 8175 }, { "epoch": 0.66, "grad_norm": 1.567854146231779, "learning_rate": 2.7956965900153066e-06, "loss": 0.8329, "step": 8176 }, { "epoch": 0.66, "grad_norm": 1.3674798886336972, "learning_rate": 2.794530487137082e-06, "loss": 0.7583, "step": 8177 }, { "epoch": 0.66, "grad_norm": 1.6200146557528319, "learning_rate": 2.793364533177202e-06, "loss": 0.8351, "step": 8178 }, { "epoch": 0.66, "grad_norm": 1.4492268225483043, "learning_rate": 2.7921987282143927e-06, "loss": 0.7118, "step": 8179 }, { "epoch": 0.66, "grad_norm": 1.5704595514648871, "learning_rate": 2.791033072327375e-06, "loss": 0.7619, "step": 8180 }, { "epoch": 0.66, "grad_norm": 1.6974430393766118, "learning_rate": 2.789867565594856e-06, "loss": 0.7995, "step": 8181 }, { "epoch": 0.66, "grad_norm": 1.5076286464717512, "learning_rate": 2.78870220809553e-06, "loss": 0.8573, "step": 8182 }, { "epoch": 0.66, "grad_norm": 1.4714622644339659, "learning_rate": 2.7875369999080897e-06, "loss": 0.6753, "step": 8183 }, { "epoch": 0.66, "grad_norm": 1.5491104866815282, "learning_rate": 2.7863719411112106e-06, "loss": 0.8074, "step": 8184 }, { "epoch": 0.66, "grad_norm": 1.492394774902329, "learning_rate": 2.7852070317835595e-06, "loss": 0.7583, "step": 8185 }, { "epoch": 0.66, "grad_norm": 0.8194717418262882, "learning_rate": 2.7840422720037943e-06, "loss": 1.0918, "step": 8186 }, { "epoch": 0.66, "grad_norm": 1.8445097018164702, "learning_rate": 2.7828776618505615e-06, "loss": 0.7856, "step": 8187 }, { "epoch": 0.66, "grad_norm": 1.4761793697113752, "learning_rate": 2.7817132014024994e-06, "loss": 0.6992, "step": 8188 }, { "epoch": 0.66, "grad_norm": 0.8444929918028405, "learning_rate": 2.7805488907382316e-06, "loss": 1.0597, "step": 8189 }, { "epoch": 0.66, "grad_norm": 1.5522741225368732, "learning_rate": 2.779384729936381e-06, "loss": 0.7138, "step": 8190 }, { "epoch": 0.66, "grad_norm": 0.7891231845127261, "learning_rate": 2.7782207190755496e-06, "loss": 1.05, "step": 8191 }, { "epoch": 0.66, "grad_norm": 1.5305681099269213, "learning_rate": 2.7770568582343364e-06, "loss": 0.7289, "step": 8192 }, { "epoch": 0.66, "grad_norm": 1.6186652438868623, "learning_rate": 2.7758931474913255e-06, "loss": 0.8066, "step": 8193 }, { "epoch": 0.66, "grad_norm": 1.5751068155615215, "learning_rate": 2.7747295869250966e-06, "loss": 0.7327, "step": 8194 }, { "epoch": 0.66, "grad_norm": 0.7876555822266667, "learning_rate": 2.7735661766142142e-06, "loss": 1.0398, "step": 8195 }, { "epoch": 0.66, "grad_norm": 1.515210614293708, "learning_rate": 2.772402916637235e-06, "loss": 0.7409, "step": 8196 }, { "epoch": 0.66, "grad_norm": 1.4587790196551986, "learning_rate": 2.771239807072705e-06, "loss": 0.7372, "step": 8197 }, { "epoch": 0.66, "grad_norm": 1.6144380738845585, "learning_rate": 2.770076847999159e-06, "loss": 0.7284, "step": 8198 }, { "epoch": 0.66, "grad_norm": 1.6076516536520802, "learning_rate": 2.768914039495123e-06, "loss": 0.7184, "step": 8199 }, { "epoch": 0.66, "grad_norm": 1.5285417852154481, "learning_rate": 2.767751381639111e-06, "loss": 0.7458, "step": 8200 }, { "epoch": 0.66, "grad_norm": 1.4840940735577408, "learning_rate": 2.7665888745096326e-06, "loss": 0.7436, "step": 8201 }, { "epoch": 0.66, "grad_norm": 0.7960642964042137, "learning_rate": 2.7654265181851797e-06, "loss": 1.0708, "step": 8202 }, { "epoch": 0.66, "grad_norm": 1.5564767371495245, "learning_rate": 2.764264312744236e-06, "loss": 0.6758, "step": 8203 }, { "epoch": 0.66, "grad_norm": 1.683058950135786, "learning_rate": 2.7631022582652808e-06, "loss": 0.7353, "step": 8204 }, { "epoch": 0.66, "grad_norm": 1.5262474585942547, "learning_rate": 2.7619403548267756e-06, "loss": 0.8451, "step": 8205 }, { "epoch": 0.66, "grad_norm": 1.5811241553684552, "learning_rate": 2.7607786025071754e-06, "loss": 0.7695, "step": 8206 }, { "epoch": 0.66, "grad_norm": 1.4453932074438092, "learning_rate": 2.7596170013849243e-06, "loss": 0.7489, "step": 8207 }, { "epoch": 0.66, "grad_norm": 1.4866377333543155, "learning_rate": 2.758455551538456e-06, "loss": 0.7872, "step": 8208 }, { "epoch": 0.66, "grad_norm": 1.504072959743171, "learning_rate": 2.7572942530461943e-06, "loss": 0.7345, "step": 8209 }, { "epoch": 0.66, "grad_norm": 1.5292456581310776, "learning_rate": 2.7561331059865514e-06, "loss": 0.7003, "step": 8210 }, { "epoch": 0.66, "grad_norm": 1.5616341976701913, "learning_rate": 2.7549721104379335e-06, "loss": 0.779, "step": 8211 }, { "epoch": 0.66, "grad_norm": 1.5180479774073772, "learning_rate": 2.753811266478733e-06, "loss": 0.72, "step": 8212 }, { "epoch": 0.66, "grad_norm": 1.4798054285219076, "learning_rate": 2.75265057418733e-06, "loss": 0.7105, "step": 8213 }, { "epoch": 0.66, "grad_norm": 0.7979725167265882, "learning_rate": 2.7514900336421e-06, "loss": 1.0623, "step": 8214 }, { "epoch": 0.66, "grad_norm": 1.601102345497246, "learning_rate": 2.7503296449214055e-06, "loss": 0.8346, "step": 8215 }, { "epoch": 0.66, "grad_norm": 0.7980074352859905, "learning_rate": 2.7491694081035975e-06, "loss": 1.0591, "step": 8216 }, { "epoch": 0.66, "grad_norm": 1.457221782327802, "learning_rate": 2.748009323267016e-06, "loss": 0.6734, "step": 8217 }, { "epoch": 0.66, "grad_norm": 0.7886086346592107, "learning_rate": 2.7468493904899958e-06, "loss": 1.0976, "step": 8218 }, { "epoch": 0.66, "grad_norm": 1.4469181252921868, "learning_rate": 2.745689609850859e-06, "loss": 0.8045, "step": 8219 }, { "epoch": 0.66, "grad_norm": 1.6908345128966462, "learning_rate": 2.744529981427911e-06, "loss": 0.8419, "step": 8220 }, { "epoch": 0.66, "grad_norm": 1.5720855433326222, "learning_rate": 2.7433705052994574e-06, "loss": 0.771, "step": 8221 }, { "epoch": 0.66, "grad_norm": 1.555070697716995, "learning_rate": 2.742211181543788e-06, "loss": 0.7575, "step": 8222 }, { "epoch": 0.66, "grad_norm": 1.423684606578331, "learning_rate": 2.74105201023918e-06, "loss": 0.6689, "step": 8223 }, { "epoch": 0.66, "grad_norm": 1.5883049216829839, "learning_rate": 2.7398929914639084e-06, "loss": 0.8218, "step": 8224 }, { "epoch": 0.66, "grad_norm": 1.5323578029369596, "learning_rate": 2.7387341252962296e-06, "loss": 0.7153, "step": 8225 }, { "epoch": 0.66, "grad_norm": 1.4825194585710566, "learning_rate": 2.737575411814393e-06, "loss": 0.7379, "step": 8226 }, { "epoch": 0.66, "grad_norm": 0.7805187715625739, "learning_rate": 2.7364168510966367e-06, "loss": 1.0679, "step": 8227 }, { "epoch": 0.66, "grad_norm": 1.7444426229544738, "learning_rate": 2.735258443221192e-06, "loss": 0.7455, "step": 8228 }, { "epoch": 0.66, "grad_norm": 1.5966553775158707, "learning_rate": 2.734100188266276e-06, "loss": 0.791, "step": 8229 }, { "epoch": 0.66, "grad_norm": 1.8784727080919141, "learning_rate": 2.7329420863100963e-06, "loss": 0.8027, "step": 8230 }, { "epoch": 0.66, "grad_norm": 1.544888434203259, "learning_rate": 2.731784137430852e-06, "loss": 0.7925, "step": 8231 }, { "epoch": 0.66, "grad_norm": 1.5870905087155576, "learning_rate": 2.730626341706728e-06, "loss": 0.8558, "step": 8232 }, { "epoch": 0.66, "grad_norm": 0.7658922329321347, "learning_rate": 2.729468699215903e-06, "loss": 1.0666, "step": 8233 }, { "epoch": 0.66, "grad_norm": 1.5184427447612903, "learning_rate": 2.728311210036542e-06, "loss": 0.7758, "step": 8234 }, { "epoch": 0.66, "grad_norm": 1.563971372833978, "learning_rate": 2.727153874246804e-06, "loss": 0.696, "step": 8235 }, { "epoch": 0.66, "grad_norm": 1.5323333595400572, "learning_rate": 2.7259966919248336e-06, "loss": 0.756, "step": 8236 }, { "epoch": 0.66, "grad_norm": 1.5050093458549125, "learning_rate": 2.724839663148764e-06, "loss": 0.6553, "step": 8237 }, { "epoch": 0.66, "grad_norm": 1.5745528401866362, "learning_rate": 2.7236827879967255e-06, "loss": 0.7865, "step": 8238 }, { "epoch": 0.66, "grad_norm": 1.4632618788397818, "learning_rate": 2.72252606654683e-06, "loss": 0.7203, "step": 8239 }, { "epoch": 0.66, "grad_norm": 1.6619870972870103, "learning_rate": 2.7213694988771822e-06, "loss": 0.8399, "step": 8240 }, { "epoch": 0.66, "grad_norm": 1.5055542259564336, "learning_rate": 2.7202130850658765e-06, "loss": 0.7986, "step": 8241 }, { "epoch": 0.66, "grad_norm": 1.53580668132139, "learning_rate": 2.7190568251909965e-06, "loss": 0.7862, "step": 8242 }, { "epoch": 0.66, "grad_norm": 1.5916724700827931, "learning_rate": 2.717900719330615e-06, "loss": 0.7714, "step": 8243 }, { "epoch": 0.66, "grad_norm": 1.5399004486392494, "learning_rate": 2.7167447675627933e-06, "loss": 0.8145, "step": 8244 }, { "epoch": 0.66, "grad_norm": 1.5192641179967277, "learning_rate": 2.715588969965588e-06, "loss": 0.6827, "step": 8245 }, { "epoch": 0.66, "grad_norm": 1.5963066457127921, "learning_rate": 2.7144333266170387e-06, "loss": 0.7688, "step": 8246 }, { "epoch": 0.66, "grad_norm": 1.6091906856430578, "learning_rate": 2.7132778375951752e-06, "loss": 0.7697, "step": 8247 }, { "epoch": 0.66, "grad_norm": 1.577973245021502, "learning_rate": 2.712122502978024e-06, "loss": 0.6942, "step": 8248 }, { "epoch": 0.66, "grad_norm": 1.534652578689979, "learning_rate": 2.7109673228435925e-06, "loss": 0.7409, "step": 8249 }, { "epoch": 0.66, "grad_norm": 1.4787204655968955, "learning_rate": 2.7098122972698815e-06, "loss": 0.7318, "step": 8250 }, { "epoch": 0.66, "grad_norm": 1.5265301727902316, "learning_rate": 2.7086574263348808e-06, "loss": 0.7398, "step": 8251 }, { "epoch": 0.66, "grad_norm": 1.4177665504940071, "learning_rate": 2.7075027101165706e-06, "loss": 0.78, "step": 8252 }, { "epoch": 0.66, "grad_norm": 1.4927707994002868, "learning_rate": 2.7063481486929187e-06, "loss": 0.7381, "step": 8253 }, { "epoch": 0.66, "grad_norm": 1.4796579131683236, "learning_rate": 2.7051937421418834e-06, "loss": 0.7176, "step": 8254 }, { "epoch": 0.66, "grad_norm": 1.5919576528685537, "learning_rate": 2.7040394905414156e-06, "loss": 0.7548, "step": 8255 }, { "epoch": 0.66, "grad_norm": 1.6342513141230028, "learning_rate": 2.7028853939694523e-06, "loss": 0.7374, "step": 8256 }, { "epoch": 0.66, "grad_norm": 2.016111651371093, "learning_rate": 2.7017314525039186e-06, "loss": 0.8017, "step": 8257 }, { "epoch": 0.66, "grad_norm": 1.5204705836421444, "learning_rate": 2.7005776662227312e-06, "loss": 0.7902, "step": 8258 }, { "epoch": 0.66, "grad_norm": 1.5035135609806296, "learning_rate": 2.699424035203799e-06, "loss": 0.77, "step": 8259 }, { "epoch": 0.66, "grad_norm": 1.6395677048161992, "learning_rate": 2.6982705595250182e-06, "loss": 0.7349, "step": 8260 }, { "epoch": 0.66, "grad_norm": 1.445221524756905, "learning_rate": 2.6971172392642687e-06, "loss": 0.7155, "step": 8261 }, { "epoch": 0.66, "grad_norm": 1.450805480445973, "learning_rate": 2.695964074499432e-06, "loss": 0.7328, "step": 8262 }, { "epoch": 0.66, "grad_norm": 1.5278579527763616, "learning_rate": 2.6948110653083715e-06, "loss": 0.7313, "step": 8263 }, { "epoch": 0.66, "grad_norm": 1.4622310593685828, "learning_rate": 2.6936582117689347e-06, "loss": 0.7346, "step": 8264 }, { "epoch": 0.66, "grad_norm": 1.5642189316506692, "learning_rate": 2.6925055139589705e-06, "loss": 0.7728, "step": 8265 }, { "epoch": 0.66, "grad_norm": 1.5571979856865035, "learning_rate": 2.6913529719563116e-06, "loss": 0.7956, "step": 8266 }, { "epoch": 0.66, "grad_norm": 1.510178467134776, "learning_rate": 2.6902005858387786e-06, "loss": 0.7957, "step": 8267 }, { "epoch": 0.66, "grad_norm": 1.5836062330763, "learning_rate": 2.6890483556841817e-06, "loss": 0.7863, "step": 8268 }, { "epoch": 0.66, "grad_norm": 1.6909240733681785, "learning_rate": 2.6878962815703264e-06, "loss": 0.8109, "step": 8269 }, { "epoch": 0.66, "grad_norm": 0.7967985927418983, "learning_rate": 2.6867443635750013e-06, "loss": 1.0858, "step": 8270 }, { "epoch": 0.66, "grad_norm": 1.4660835305500082, "learning_rate": 2.6855926017759837e-06, "loss": 0.7128, "step": 8271 }, { "epoch": 0.66, "grad_norm": 1.4920862461130173, "learning_rate": 2.6844409962510476e-06, "loss": 0.6411, "step": 8272 }, { "epoch": 0.66, "grad_norm": 1.5219091682879402, "learning_rate": 2.683289547077951e-06, "loss": 0.7551, "step": 8273 }, { "epoch": 0.66, "grad_norm": 1.579719795126976, "learning_rate": 2.6821382543344414e-06, "loss": 0.7064, "step": 8274 }, { "epoch": 0.66, "grad_norm": 1.6479014830592709, "learning_rate": 2.680987118098257e-06, "loss": 0.7871, "step": 8275 }, { "epoch": 0.66, "grad_norm": 1.6253991839981268, "learning_rate": 2.679836138447125e-06, "loss": 0.7555, "step": 8276 }, { "epoch": 0.66, "grad_norm": 1.4378880832289318, "learning_rate": 2.678685315458763e-06, "loss": 0.7123, "step": 8277 }, { "epoch": 0.66, "grad_norm": 1.6588005805879444, "learning_rate": 2.6775346492108735e-06, "loss": 0.7232, "step": 8278 }, { "epoch": 0.66, "grad_norm": 1.4767391863037627, "learning_rate": 2.6763841397811576e-06, "loss": 0.7918, "step": 8279 }, { "epoch": 0.66, "grad_norm": 1.4449676568939362, "learning_rate": 2.6752337872472977e-06, "loss": 0.7551, "step": 8280 }, { "epoch": 0.66, "grad_norm": 1.5094335025251526, "learning_rate": 2.674083591686967e-06, "loss": 0.7847, "step": 8281 }, { "epoch": 0.66, "grad_norm": 1.5681406851517012, "learning_rate": 2.6729335531778324e-06, "loss": 0.7206, "step": 8282 }, { "epoch": 0.66, "grad_norm": 1.5116764038853105, "learning_rate": 2.6717836717975448e-06, "loss": 0.8246, "step": 8283 }, { "epoch": 0.66, "grad_norm": 0.7928496526314607, "learning_rate": 2.670633947623748e-06, "loss": 1.1011, "step": 8284 }, { "epoch": 0.66, "grad_norm": 1.4471055535164516, "learning_rate": 2.669484380734073e-06, "loss": 0.6978, "step": 8285 }, { "epoch": 0.66, "grad_norm": 1.5198811313458036, "learning_rate": 2.6683349712061422e-06, "loss": 0.7794, "step": 8286 }, { "epoch": 0.66, "grad_norm": 1.5610014070753564, "learning_rate": 2.667185719117566e-06, "loss": 0.764, "step": 8287 }, { "epoch": 0.66, "grad_norm": 1.5042677962157927, "learning_rate": 2.6660366245459422e-06, "loss": 0.7353, "step": 8288 }, { "epoch": 0.67, "grad_norm": 0.7565998165719525, "learning_rate": 2.664887687568864e-06, "loss": 1.0693, "step": 8289 }, { "epoch": 0.67, "grad_norm": 1.5589997059017977, "learning_rate": 2.6637389082639085e-06, "loss": 0.7478, "step": 8290 }, { "epoch": 0.67, "grad_norm": 1.56216535414835, "learning_rate": 2.662590286708645e-06, "loss": 0.7665, "step": 8291 }, { "epoch": 0.67, "grad_norm": 1.5960946587269664, "learning_rate": 2.661441822980628e-06, "loss": 0.7657, "step": 8292 }, { "epoch": 0.67, "grad_norm": 1.4281754044600463, "learning_rate": 2.6602935171574086e-06, "loss": 0.7204, "step": 8293 }, { "epoch": 0.67, "grad_norm": 1.8872982471342359, "learning_rate": 2.6591453693165204e-06, "loss": 0.7338, "step": 8294 }, { "epoch": 0.67, "grad_norm": 1.5625320189673453, "learning_rate": 2.6579973795354897e-06, "loss": 0.7525, "step": 8295 }, { "epoch": 0.67, "grad_norm": 0.784255830926654, "learning_rate": 2.6568495478918312e-06, "loss": 1.0623, "step": 8296 }, { "epoch": 0.67, "grad_norm": 1.6501774851672413, "learning_rate": 2.6557018744630493e-06, "loss": 0.7608, "step": 8297 }, { "epoch": 0.67, "grad_norm": 1.5589551295589876, "learning_rate": 2.6545543593266376e-06, "loss": 0.6947, "step": 8298 }, { "epoch": 0.67, "grad_norm": 1.832161967403847, "learning_rate": 2.6534070025600765e-06, "loss": 0.7137, "step": 8299 }, { "epoch": 0.67, "grad_norm": 1.5870021199922453, "learning_rate": 2.6522598042408423e-06, "loss": 0.8202, "step": 8300 }, { "epoch": 0.67, "grad_norm": 1.490317885698625, "learning_rate": 2.6511127644463945e-06, "loss": 0.7318, "step": 8301 }, { "epoch": 0.67, "grad_norm": 1.4310934536967337, "learning_rate": 2.6499658832541824e-06, "loss": 0.739, "step": 8302 }, { "epoch": 0.67, "grad_norm": 1.5615871903691583, "learning_rate": 2.6488191607416493e-06, "loss": 0.8113, "step": 8303 }, { "epoch": 0.67, "grad_norm": 0.7743913799613471, "learning_rate": 2.6476725969862227e-06, "loss": 1.0903, "step": 8304 }, { "epoch": 0.67, "grad_norm": 0.7677416170653298, "learning_rate": 2.6465261920653195e-06, "loss": 1.0456, "step": 8305 }, { "epoch": 0.67, "grad_norm": 1.4817362529164761, "learning_rate": 2.645379946056351e-06, "loss": 0.7281, "step": 8306 }, { "epoch": 0.67, "grad_norm": 0.7509586091654735, "learning_rate": 2.6442338590367144e-06, "loss": 1.0464, "step": 8307 }, { "epoch": 0.67, "grad_norm": 1.668289510873252, "learning_rate": 2.6430879310837933e-06, "loss": 0.7898, "step": 8308 }, { "epoch": 0.67, "grad_norm": 1.5298336616719217, "learning_rate": 2.641942162274962e-06, "loss": 0.7664, "step": 8309 }, { "epoch": 0.67, "grad_norm": 1.4842557853779859, "learning_rate": 2.64079655268759e-06, "loss": 0.8014, "step": 8310 }, { "epoch": 0.67, "grad_norm": 1.5453234378926828, "learning_rate": 2.639651102399029e-06, "loss": 0.7824, "step": 8311 }, { "epoch": 0.67, "grad_norm": 1.5036011642971612, "learning_rate": 2.6385058114866215e-06, "loss": 0.8458, "step": 8312 }, { "epoch": 0.67, "grad_norm": 0.7660021211901055, "learning_rate": 2.637360680027703e-06, "loss": 1.0263, "step": 8313 }, { "epoch": 0.67, "grad_norm": 1.5411658763867058, "learning_rate": 2.6362157080995942e-06, "loss": 0.8327, "step": 8314 }, { "epoch": 0.67, "grad_norm": 1.673054584840367, "learning_rate": 2.6350708957796057e-06, "loss": 0.6909, "step": 8315 }, { "epoch": 0.67, "grad_norm": 1.4596427912866035, "learning_rate": 2.6339262431450365e-06, "loss": 0.7661, "step": 8316 }, { "epoch": 0.67, "grad_norm": 1.5173635836734376, "learning_rate": 2.632781750273179e-06, "loss": 0.7384, "step": 8317 }, { "epoch": 0.67, "grad_norm": 1.5244123972029278, "learning_rate": 2.6316374172413117e-06, "loss": 0.7634, "step": 8318 }, { "epoch": 0.67, "grad_norm": 0.8069214500790183, "learning_rate": 2.6304932441267006e-06, "loss": 1.0546, "step": 8319 }, { "epoch": 0.67, "grad_norm": 1.4540523948550046, "learning_rate": 2.6293492310066042e-06, "loss": 0.7539, "step": 8320 }, { "epoch": 0.67, "grad_norm": 1.5434795724811048, "learning_rate": 2.6282053779582683e-06, "loss": 0.8189, "step": 8321 }, { "epoch": 0.67, "grad_norm": 1.5260802143096122, "learning_rate": 2.627061685058927e-06, "loss": 0.815, "step": 8322 }, { "epoch": 0.67, "grad_norm": 1.5315500003669564, "learning_rate": 2.625918152385809e-06, "loss": 0.7225, "step": 8323 }, { "epoch": 0.67, "grad_norm": 1.5458235969596263, "learning_rate": 2.624774780016126e-06, "loss": 0.8488, "step": 8324 }, { "epoch": 0.67, "grad_norm": 1.4135513204108237, "learning_rate": 2.623631568027081e-06, "loss": 0.7162, "step": 8325 }, { "epoch": 0.67, "grad_norm": 0.768973723695802, "learning_rate": 2.6224885164958642e-06, "loss": 1.0803, "step": 8326 }, { "epoch": 0.67, "grad_norm": 1.5270364210756986, "learning_rate": 2.6213456254996618e-06, "loss": 0.8194, "step": 8327 }, { "epoch": 0.67, "grad_norm": 1.5215604022027092, "learning_rate": 2.620202895115641e-06, "loss": 0.7598, "step": 8328 }, { "epoch": 0.67, "grad_norm": 1.547442459952091, "learning_rate": 2.6190603254209627e-06, "loss": 0.7657, "step": 8329 }, { "epoch": 0.67, "grad_norm": 1.4856411745327227, "learning_rate": 2.617917916492776e-06, "loss": 0.8341, "step": 8330 }, { "epoch": 0.67, "grad_norm": 1.4964497773145318, "learning_rate": 2.6167756684082185e-06, "loss": 0.7102, "step": 8331 }, { "epoch": 0.67, "grad_norm": 1.4885449564797126, "learning_rate": 2.6156335812444174e-06, "loss": 0.7541, "step": 8332 }, { "epoch": 0.67, "grad_norm": 1.5400550427616044, "learning_rate": 2.614491655078487e-06, "loss": 0.7547, "step": 8333 }, { "epoch": 0.67, "grad_norm": 1.5937521163858108, "learning_rate": 2.6133498899875363e-06, "loss": 0.7975, "step": 8334 }, { "epoch": 0.67, "grad_norm": 1.5271851735152815, "learning_rate": 2.612208286048659e-06, "loss": 0.7509, "step": 8335 }, { "epoch": 0.67, "grad_norm": 1.469116636493714, "learning_rate": 2.6110668433389364e-06, "loss": 0.7568, "step": 8336 }, { "epoch": 0.67, "grad_norm": 1.4835749509932519, "learning_rate": 2.6099255619354446e-06, "loss": 0.6861, "step": 8337 }, { "epoch": 0.67, "grad_norm": 1.4571790735830914, "learning_rate": 2.6087844419152455e-06, "loss": 0.7816, "step": 8338 }, { "epoch": 0.67, "grad_norm": 0.794043100723083, "learning_rate": 2.607643483355387e-06, "loss": 1.1015, "step": 8339 }, { "epoch": 0.67, "grad_norm": 1.363931779289655, "learning_rate": 2.6065026863329112e-06, "loss": 0.8058, "step": 8340 }, { "epoch": 0.67, "grad_norm": 1.5618708578991791, "learning_rate": 2.605362050924848e-06, "loss": 0.6616, "step": 8341 }, { "epoch": 0.67, "grad_norm": 1.5730651032797092, "learning_rate": 2.6042215772082145e-06, "loss": 0.7755, "step": 8342 }, { "epoch": 0.67, "grad_norm": 1.529371549827531, "learning_rate": 2.6030812652600156e-06, "loss": 0.7998, "step": 8343 }, { "epoch": 0.67, "grad_norm": 1.512514379994665, "learning_rate": 2.601941115157254e-06, "loss": 0.8056, "step": 8344 }, { "epoch": 0.67, "grad_norm": 1.5051310189785445, "learning_rate": 2.600801126976911e-06, "loss": 0.699, "step": 8345 }, { "epoch": 0.67, "grad_norm": 1.4986973335703448, "learning_rate": 2.599661300795959e-06, "loss": 0.7923, "step": 8346 }, { "epoch": 0.67, "grad_norm": 1.471068322102205, "learning_rate": 2.598521636691368e-06, "loss": 0.6641, "step": 8347 }, { "epoch": 0.67, "grad_norm": 1.5932328871375554, "learning_rate": 2.5973821347400875e-06, "loss": 0.7619, "step": 8348 }, { "epoch": 0.67, "grad_norm": 1.6251156219109892, "learning_rate": 2.5962427950190584e-06, "loss": 0.7123, "step": 8349 }, { "epoch": 0.67, "grad_norm": 1.6410363656149476, "learning_rate": 2.5951036176052104e-06, "loss": 0.7657, "step": 8350 }, { "epoch": 0.67, "grad_norm": 1.4875845499246276, "learning_rate": 2.593964602575467e-06, "loss": 0.7522, "step": 8351 }, { "epoch": 0.67, "grad_norm": 1.5239718470964292, "learning_rate": 2.592825750006738e-06, "loss": 0.7924, "step": 8352 }, { "epoch": 0.67, "grad_norm": 1.4812332997612203, "learning_rate": 2.591687059975915e-06, "loss": 0.8158, "step": 8353 }, { "epoch": 0.67, "grad_norm": 1.5218891112743624, "learning_rate": 2.59054853255989e-06, "loss": 0.7757, "step": 8354 }, { "epoch": 0.67, "grad_norm": 1.507942353211598, "learning_rate": 2.5894101678355377e-06, "loss": 0.7009, "step": 8355 }, { "epoch": 0.67, "grad_norm": 1.5377710764614008, "learning_rate": 2.5882719658797235e-06, "loss": 0.8184, "step": 8356 }, { "epoch": 0.67, "grad_norm": 1.5660362429775845, "learning_rate": 2.5871339267692984e-06, "loss": 0.7999, "step": 8357 }, { "epoch": 0.67, "grad_norm": 1.4624270774768444, "learning_rate": 2.58599605058111e-06, "loss": 0.6185, "step": 8358 }, { "epoch": 0.67, "grad_norm": 1.5637179607164189, "learning_rate": 2.584858337391988e-06, "loss": 0.8311, "step": 8359 }, { "epoch": 0.67, "grad_norm": 0.7987328072619645, "learning_rate": 2.5837207872787522e-06, "loss": 1.0684, "step": 8360 }, { "epoch": 0.67, "grad_norm": 1.6515606773989207, "learning_rate": 2.582583400318216e-06, "loss": 0.7655, "step": 8361 }, { "epoch": 0.67, "grad_norm": 1.4827644290515394, "learning_rate": 2.5814461765871757e-06, "loss": 0.679, "step": 8362 }, { "epoch": 0.67, "grad_norm": 0.8069585849875317, "learning_rate": 2.5803091161624204e-06, "loss": 1.085, "step": 8363 }, { "epoch": 0.67, "grad_norm": 1.4759057158668927, "learning_rate": 2.579172219120727e-06, "loss": 0.7559, "step": 8364 }, { "epoch": 0.67, "grad_norm": 1.5548702280416802, "learning_rate": 2.57803548553886e-06, "loss": 0.7488, "step": 8365 }, { "epoch": 0.67, "grad_norm": 1.7248796059739278, "learning_rate": 2.5768989154935752e-06, "loss": 0.7772, "step": 8366 }, { "epoch": 0.67, "grad_norm": 0.7501609229173506, "learning_rate": 2.5757625090616147e-06, "loss": 1.09, "step": 8367 }, { "epoch": 0.67, "grad_norm": 1.4836467342786714, "learning_rate": 2.574626266319715e-06, "loss": 0.7626, "step": 8368 }, { "epoch": 0.67, "grad_norm": 1.4901006389981988, "learning_rate": 2.573490187344596e-06, "loss": 0.7397, "step": 8369 }, { "epoch": 0.67, "grad_norm": 1.435554522145888, "learning_rate": 2.5723542722129655e-06, "loss": 0.7614, "step": 8370 }, { "epoch": 0.67, "grad_norm": 0.763561570946773, "learning_rate": 2.5712185210015283e-06, "loss": 1.0629, "step": 8371 }, { "epoch": 0.67, "grad_norm": 1.538196871892569, "learning_rate": 2.57008293378697e-06, "loss": 0.8044, "step": 8372 }, { "epoch": 0.67, "grad_norm": 1.7079994563724208, "learning_rate": 2.5689475106459683e-06, "loss": 0.7993, "step": 8373 }, { "epoch": 0.67, "grad_norm": 0.7770459303180841, "learning_rate": 2.5678122516551896e-06, "loss": 1.0517, "step": 8374 }, { "epoch": 0.67, "grad_norm": 0.8001471786354353, "learning_rate": 2.5666771568912892e-06, "loss": 1.0881, "step": 8375 }, { "epoch": 0.67, "grad_norm": 1.4127718845825796, "learning_rate": 2.565542226430911e-06, "loss": 0.7723, "step": 8376 }, { "epoch": 0.67, "grad_norm": 0.742090361151367, "learning_rate": 2.564407460350687e-06, "loss": 1.0694, "step": 8377 }, { "epoch": 0.67, "grad_norm": 2.14888985342812, "learning_rate": 2.5632728587272427e-06, "loss": 0.7065, "step": 8378 }, { "epoch": 0.67, "grad_norm": 1.5540163599327135, "learning_rate": 2.562138421637186e-06, "loss": 0.7356, "step": 8379 }, { "epoch": 0.67, "grad_norm": 1.4382144428626762, "learning_rate": 2.561004149157116e-06, "loss": 0.7309, "step": 8380 }, { "epoch": 0.67, "grad_norm": 1.425419481907605, "learning_rate": 2.559870041363625e-06, "loss": 0.7611, "step": 8381 }, { "epoch": 0.67, "grad_norm": 1.584008873824714, "learning_rate": 2.558736098333289e-06, "loss": 0.7838, "step": 8382 }, { "epoch": 0.67, "grad_norm": 1.6325081216459987, "learning_rate": 2.5576023201426736e-06, "loss": 0.79, "step": 8383 }, { "epoch": 0.67, "grad_norm": 1.4054487823566333, "learning_rate": 2.5564687068683335e-06, "loss": 0.6888, "step": 8384 }, { "epoch": 0.67, "grad_norm": 1.594617861872597, "learning_rate": 2.5553352585868152e-06, "loss": 0.7637, "step": 8385 }, { "epoch": 0.67, "grad_norm": 1.7350851279410724, "learning_rate": 2.5542019753746496e-06, "loss": 0.8218, "step": 8386 }, { "epoch": 0.67, "grad_norm": 0.7866939086364111, "learning_rate": 2.5530688573083574e-06, "loss": 1.0623, "step": 8387 }, { "epoch": 0.67, "grad_norm": 1.507736134345464, "learning_rate": 2.551935904464453e-06, "loss": 0.6908, "step": 8388 }, { "epoch": 0.67, "grad_norm": 0.7910625718056159, "learning_rate": 2.550803116919435e-06, "loss": 1.0803, "step": 8389 }, { "epoch": 0.67, "grad_norm": 1.4207413857699362, "learning_rate": 2.5496704947497896e-06, "loss": 0.726, "step": 8390 }, { "epoch": 0.67, "grad_norm": 1.564481164980497, "learning_rate": 2.5485380380319945e-06, "loss": 0.6906, "step": 8391 }, { "epoch": 0.67, "grad_norm": 1.4641500491589405, "learning_rate": 2.5474057468425185e-06, "loss": 0.7141, "step": 8392 }, { "epoch": 0.67, "grad_norm": 1.5793549776642106, "learning_rate": 2.5462736212578144e-06, "loss": 0.7888, "step": 8393 }, { "epoch": 0.67, "grad_norm": 1.4445054521993803, "learning_rate": 2.545141661354324e-06, "loss": 0.7213, "step": 8394 }, { "epoch": 0.67, "grad_norm": 1.6413333100615628, "learning_rate": 2.5440098672084845e-06, "loss": 0.7825, "step": 8395 }, { "epoch": 0.67, "grad_norm": 0.7818529195443648, "learning_rate": 2.542878238896716e-06, "loss": 1.078, "step": 8396 }, { "epoch": 0.67, "grad_norm": 1.5310508027526801, "learning_rate": 2.541746776495426e-06, "loss": 0.7304, "step": 8397 }, { "epoch": 0.67, "grad_norm": 1.4288168572009228, "learning_rate": 2.5406154800810125e-06, "loss": 0.7502, "step": 8398 }, { "epoch": 0.67, "grad_norm": 1.6019856997365618, "learning_rate": 2.539484349729868e-06, "loss": 0.7558, "step": 8399 }, { "epoch": 0.67, "grad_norm": 1.4297650128586952, "learning_rate": 2.5383533855183663e-06, "loss": 0.7622, "step": 8400 }, { "epoch": 0.67, "grad_norm": 1.6517568364555275, "learning_rate": 2.537222587522871e-06, "loss": 0.7381, "step": 8401 }, { "epoch": 0.67, "grad_norm": 1.4332846174225153, "learning_rate": 2.5360919558197393e-06, "loss": 0.7178, "step": 8402 }, { "epoch": 0.67, "grad_norm": 1.5532658509861086, "learning_rate": 2.534961490485313e-06, "loss": 0.8291, "step": 8403 }, { "epoch": 0.67, "grad_norm": 1.6270131420271687, "learning_rate": 2.5338311915959224e-06, "loss": 0.7386, "step": 8404 }, { "epoch": 0.67, "grad_norm": 1.6435164707318857, "learning_rate": 2.53270105922789e-06, "loss": 0.8023, "step": 8405 }, { "epoch": 0.67, "grad_norm": 1.5189289764570817, "learning_rate": 2.5315710934575245e-06, "loss": 0.7635, "step": 8406 }, { "epoch": 0.67, "grad_norm": 1.5419323268118992, "learning_rate": 2.5304412943611228e-06, "loss": 0.8071, "step": 8407 }, { "epoch": 0.67, "grad_norm": 1.6015092877218355, "learning_rate": 2.529311662014972e-06, "loss": 0.7202, "step": 8408 }, { "epoch": 0.67, "grad_norm": 1.5206682794901463, "learning_rate": 2.528182196495348e-06, "loss": 0.8411, "step": 8409 }, { "epoch": 0.67, "grad_norm": 0.7842379052895506, "learning_rate": 2.5270528978785134e-06, "loss": 1.0923, "step": 8410 }, { "epoch": 0.67, "grad_norm": 1.4502227496408504, "learning_rate": 2.52592376624072e-06, "loss": 0.7247, "step": 8411 }, { "epoch": 0.67, "grad_norm": 0.7753518784200064, "learning_rate": 2.5247948016582137e-06, "loss": 1.0518, "step": 8412 }, { "epoch": 0.67, "grad_norm": 1.466033938988233, "learning_rate": 2.5236660042072215e-06, "loss": 0.7485, "step": 8413 }, { "epoch": 0.68, "grad_norm": 1.5518280510084257, "learning_rate": 2.5225373739639637e-06, "loss": 0.6924, "step": 8414 }, { "epoch": 0.68, "grad_norm": 1.4484107891099296, "learning_rate": 2.521408911004646e-06, "loss": 0.7769, "step": 8415 }, { "epoch": 0.68, "grad_norm": 1.5310596627334216, "learning_rate": 2.520280615405467e-06, "loss": 0.7522, "step": 8416 }, { "epoch": 0.68, "grad_norm": 0.7393061497137441, "learning_rate": 2.519152487242612e-06, "loss": 1.0715, "step": 8417 }, { "epoch": 0.68, "grad_norm": 1.3623725067960104, "learning_rate": 2.518024526592253e-06, "loss": 0.6117, "step": 8418 }, { "epoch": 0.68, "grad_norm": 1.5127601497908967, "learning_rate": 2.5168967335305542e-06, "loss": 0.7331, "step": 8419 }, { "epoch": 0.68, "grad_norm": 1.5497545074950307, "learning_rate": 2.515769108133666e-06, "loss": 0.7408, "step": 8420 }, { "epoch": 0.68, "grad_norm": 1.4343647107884339, "learning_rate": 2.514641650477726e-06, "loss": 0.7089, "step": 8421 }, { "epoch": 0.68, "grad_norm": 1.549562251320844, "learning_rate": 2.5135143606388667e-06, "loss": 0.7641, "step": 8422 }, { "epoch": 0.68, "grad_norm": 0.7720925996545489, "learning_rate": 2.5123872386932037e-06, "loss": 1.0545, "step": 8423 }, { "epoch": 0.68, "grad_norm": 1.4923482491361153, "learning_rate": 2.511260284716842e-06, "loss": 0.7875, "step": 8424 }, { "epoch": 0.68, "grad_norm": 1.5739825655294877, "learning_rate": 2.510133498785875e-06, "loss": 0.7532, "step": 8425 }, { "epoch": 0.68, "grad_norm": 1.5521360627895644, "learning_rate": 2.50900688097639e-06, "loss": 0.7916, "step": 8426 }, { "epoch": 0.68, "grad_norm": 0.7674188205702864, "learning_rate": 2.5078804313644554e-06, "loss": 1.0683, "step": 8427 }, { "epoch": 0.68, "grad_norm": 1.442193330694279, "learning_rate": 2.5067541500261337e-06, "loss": 0.768, "step": 8428 }, { "epoch": 0.68, "grad_norm": 1.6876083903551493, "learning_rate": 2.5056280370374725e-06, "loss": 0.7229, "step": 8429 }, { "epoch": 0.68, "grad_norm": 0.7419561805872419, "learning_rate": 2.50450209247451e-06, "loss": 1.0312, "step": 8430 }, { "epoch": 0.68, "grad_norm": 1.4555961366230348, "learning_rate": 2.503376316413273e-06, "loss": 0.7458, "step": 8431 }, { "epoch": 0.68, "grad_norm": 1.5293103665401389, "learning_rate": 2.5022507089297733e-06, "loss": 0.7573, "step": 8432 }, { "epoch": 0.68, "grad_norm": 0.8012811685674973, "learning_rate": 2.5011252701000194e-06, "loss": 1.0735, "step": 8433 }, { "epoch": 0.68, "grad_norm": 1.5545441177260568, "learning_rate": 2.5000000000000015e-06, "loss": 0.7967, "step": 8434 }, { "epoch": 0.68, "grad_norm": 1.6674759522104003, "learning_rate": 2.4988748987056976e-06, "loss": 0.7803, "step": 8435 }, { "epoch": 0.68, "grad_norm": 1.6078973580268119, "learning_rate": 2.497749966293082e-06, "loss": 0.8005, "step": 8436 }, { "epoch": 0.68, "grad_norm": 0.7703211429283107, "learning_rate": 2.4966252028381113e-06, "loss": 1.1068, "step": 8437 }, { "epoch": 0.68, "grad_norm": 1.6340983282383685, "learning_rate": 2.495500608416728e-06, "loss": 0.7466, "step": 8438 }, { "epoch": 0.68, "grad_norm": 0.7686987341424951, "learning_rate": 2.494376183104873e-06, "loss": 1.0497, "step": 8439 }, { "epoch": 0.68, "grad_norm": 1.478923287697896, "learning_rate": 2.4932519269784694e-06, "loss": 0.7581, "step": 8440 }, { "epoch": 0.68, "grad_norm": 1.560121133250179, "learning_rate": 2.4921278401134258e-06, "loss": 0.8781, "step": 8441 }, { "epoch": 0.68, "grad_norm": 1.5172347211836246, "learning_rate": 2.4910039225856432e-06, "loss": 0.8111, "step": 8442 }, { "epoch": 0.68, "grad_norm": 1.5686488635766143, "learning_rate": 2.489880174471015e-06, "loss": 0.7695, "step": 8443 }, { "epoch": 0.68, "grad_norm": 1.5871365895248375, "learning_rate": 2.488756595845417e-06, "loss": 0.7528, "step": 8444 }, { "epoch": 0.68, "grad_norm": 0.7711887370630394, "learning_rate": 2.4876331867847138e-06, "loss": 1.0517, "step": 8445 }, { "epoch": 0.68, "grad_norm": 1.2972816748496643, "learning_rate": 2.4865099473647646e-06, "loss": 0.7006, "step": 8446 }, { "epoch": 0.68, "grad_norm": 1.5126279923415573, "learning_rate": 2.4853868776614117e-06, "loss": 0.7117, "step": 8447 }, { "epoch": 0.68, "grad_norm": 1.44772968116204, "learning_rate": 2.484263977750486e-06, "loss": 0.6764, "step": 8448 }, { "epoch": 0.68, "grad_norm": 1.4427981519607018, "learning_rate": 2.4831412477078076e-06, "loss": 0.7628, "step": 8449 }, { "epoch": 0.68, "grad_norm": 1.4914825083414722, "learning_rate": 2.4820186876091893e-06, "loss": 0.8245, "step": 8450 }, { "epoch": 0.68, "grad_norm": 1.4847909841052833, "learning_rate": 2.4808962975304275e-06, "loss": 0.8007, "step": 8451 }, { "epoch": 0.68, "grad_norm": 1.4076458706047503, "learning_rate": 2.479774077547307e-06, "loss": 0.6963, "step": 8452 }, { "epoch": 0.68, "grad_norm": 0.783742448255017, "learning_rate": 2.4786520277356043e-06, "loss": 1.0712, "step": 8453 }, { "epoch": 0.68, "grad_norm": 1.5460566139428884, "learning_rate": 2.4775301481710817e-06, "loss": 0.7643, "step": 8454 }, { "epoch": 0.68, "grad_norm": 0.7464646646832671, "learning_rate": 2.476408438929491e-06, "loss": 1.0744, "step": 8455 }, { "epoch": 0.68, "grad_norm": 1.5901321319181445, "learning_rate": 2.475286900086572e-06, "loss": 0.7829, "step": 8456 }, { "epoch": 0.68, "grad_norm": 1.9767168129808872, "learning_rate": 2.4741655317180565e-06, "loss": 0.7345, "step": 8457 }, { "epoch": 0.68, "grad_norm": 0.7570698920037929, "learning_rate": 2.4730443338996596e-06, "loss": 1.082, "step": 8458 }, { "epoch": 0.68, "grad_norm": 1.6963894154363808, "learning_rate": 2.471923306707086e-06, "loss": 0.7574, "step": 8459 }, { "epoch": 0.68, "grad_norm": 1.5834973729346815, "learning_rate": 2.4708024502160327e-06, "loss": 0.854, "step": 8460 }, { "epoch": 0.68, "grad_norm": 1.6311786190052766, "learning_rate": 2.469681764502182e-06, "loss": 0.7323, "step": 8461 }, { "epoch": 0.68, "grad_norm": 0.7849084836681917, "learning_rate": 2.4685612496412043e-06, "loss": 1.1033, "step": 8462 }, { "epoch": 0.68, "grad_norm": 0.7467702167359213, "learning_rate": 2.4674409057087594e-06, "loss": 1.0447, "step": 8463 }, { "epoch": 0.68, "grad_norm": 1.576550436595146, "learning_rate": 2.4663207327804954e-06, "loss": 0.7911, "step": 8464 }, { "epoch": 0.68, "grad_norm": 1.5310382323759892, "learning_rate": 2.4652007309320497e-06, "loss": 0.7868, "step": 8465 }, { "epoch": 0.68, "grad_norm": 1.5774340045688249, "learning_rate": 2.464080900239045e-06, "loss": 0.7546, "step": 8466 }, { "epoch": 0.68, "grad_norm": 1.4181515965360847, "learning_rate": 2.4629612407770976e-06, "loss": 0.7908, "step": 8467 }, { "epoch": 0.68, "grad_norm": 1.7262651524596806, "learning_rate": 2.461841752621809e-06, "loss": 0.7395, "step": 8468 }, { "epoch": 0.68, "grad_norm": 1.535478838648945, "learning_rate": 2.4607224358487674e-06, "loss": 0.6548, "step": 8469 }, { "epoch": 0.68, "grad_norm": 1.4550847672524254, "learning_rate": 2.4596032905335554e-06, "loss": 0.7865, "step": 8470 }, { "epoch": 0.68, "grad_norm": 1.5443102339033274, "learning_rate": 2.4584843167517376e-06, "loss": 0.6896, "step": 8471 }, { "epoch": 0.68, "grad_norm": 1.5877231578804025, "learning_rate": 2.4573655145788704e-06, "loss": 0.8777, "step": 8472 }, { "epoch": 0.68, "grad_norm": 1.5326015925966847, "learning_rate": 2.456246884090498e-06, "loss": 0.7193, "step": 8473 }, { "epoch": 0.68, "grad_norm": 1.5172982366004646, "learning_rate": 2.455128425362153e-06, "loss": 0.7708, "step": 8474 }, { "epoch": 0.68, "grad_norm": 1.4988523396961726, "learning_rate": 2.4540101384693556e-06, "loss": 0.7694, "step": 8475 }, { "epoch": 0.68, "grad_norm": 1.5581546277659775, "learning_rate": 2.452892023487613e-06, "loss": 0.7658, "step": 8476 }, { "epoch": 0.68, "grad_norm": 1.5210484881795254, "learning_rate": 2.4517740804924272e-06, "loss": 0.7288, "step": 8477 }, { "epoch": 0.68, "grad_norm": 1.4332585539019278, "learning_rate": 2.4506563095592826e-06, "loss": 0.7656, "step": 8478 }, { "epoch": 0.68, "grad_norm": 1.4626670396198622, "learning_rate": 2.449538710763652e-06, "loss": 0.7722, "step": 8479 }, { "epoch": 0.68, "grad_norm": 1.5849148088265514, "learning_rate": 2.448421284181001e-06, "loss": 0.7434, "step": 8480 }, { "epoch": 0.68, "grad_norm": 1.5725567050276974, "learning_rate": 2.4473040298867795e-06, "loss": 0.7442, "step": 8481 }, { "epoch": 0.68, "grad_norm": 1.595503875591317, "learning_rate": 2.446186947956427e-06, "loss": 0.7882, "step": 8482 }, { "epoch": 0.68, "grad_norm": 1.5985716994997807, "learning_rate": 2.4450700384653697e-06, "loss": 0.6912, "step": 8483 }, { "epoch": 0.68, "grad_norm": 0.7577981979995358, "learning_rate": 2.4439533014890295e-06, "loss": 1.0222, "step": 8484 }, { "epoch": 0.68, "grad_norm": 1.4589461705886189, "learning_rate": 2.442836737102805e-06, "loss": 0.757, "step": 8485 }, { "epoch": 0.68, "grad_norm": 1.507856684852319, "learning_rate": 2.4417203453820892e-06, "loss": 0.7563, "step": 8486 }, { "epoch": 0.68, "grad_norm": 1.4954645062697478, "learning_rate": 2.4406041264022677e-06, "loss": 0.7851, "step": 8487 }, { "epoch": 0.68, "grad_norm": 0.8032693927492929, "learning_rate": 2.4394880802387083e-06, "loss": 1.076, "step": 8488 }, { "epoch": 0.68, "grad_norm": 0.80698150158232, "learning_rate": 2.4383722069667683e-06, "loss": 1.0789, "step": 8489 }, { "epoch": 0.68, "grad_norm": 1.5467302227353021, "learning_rate": 2.4372565066617927e-06, "loss": 0.7454, "step": 8490 }, { "epoch": 0.68, "grad_norm": 0.7707819615026256, "learning_rate": 2.4361409793991193e-06, "loss": 1.0569, "step": 8491 }, { "epoch": 0.68, "grad_norm": 1.4413195392362863, "learning_rate": 2.4350256252540697e-06, "loss": 0.6664, "step": 8492 }, { "epoch": 0.68, "grad_norm": 1.5010233227479461, "learning_rate": 2.4339104443019536e-06, "loss": 0.7475, "step": 8493 }, { "epoch": 0.68, "grad_norm": 1.5756744878684474, "learning_rate": 2.4327954366180738e-06, "loss": 0.7606, "step": 8494 }, { "epoch": 0.68, "grad_norm": 1.5741863143146801, "learning_rate": 2.4316806022777164e-06, "loss": 0.7262, "step": 8495 }, { "epoch": 0.68, "grad_norm": 1.5548658126181871, "learning_rate": 2.430565941356157e-06, "loss": 0.8298, "step": 8496 }, { "epoch": 0.68, "grad_norm": 0.807816477028093, "learning_rate": 2.4294514539286613e-06, "loss": 1.0734, "step": 8497 }, { "epoch": 0.68, "grad_norm": 0.8116416036386969, "learning_rate": 2.428337140070481e-06, "loss": 1.0402, "step": 8498 }, { "epoch": 0.68, "grad_norm": 1.5745969821687225, "learning_rate": 2.4272229998568576e-06, "loss": 0.7317, "step": 8499 }, { "epoch": 0.68, "grad_norm": 1.5726699494942498, "learning_rate": 2.4261090333630184e-06, "loss": 0.7482, "step": 8500 }, { "epoch": 0.68, "grad_norm": 1.451988917359058, "learning_rate": 2.424995240664184e-06, "loss": 0.7034, "step": 8501 }, { "epoch": 0.68, "grad_norm": 1.5500404411131714, "learning_rate": 2.42388162183556e-06, "loss": 0.8019, "step": 8502 }, { "epoch": 0.68, "grad_norm": 0.7518438921317464, "learning_rate": 2.4227681769523374e-06, "loss": 1.0785, "step": 8503 }, { "epoch": 0.68, "grad_norm": 2.333959303166974, "learning_rate": 2.4216549060897026e-06, "loss": 0.7951, "step": 8504 }, { "epoch": 0.68, "grad_norm": 1.5058553984262726, "learning_rate": 2.420541809322824e-06, "loss": 0.6886, "step": 8505 }, { "epoch": 0.68, "grad_norm": 1.492355377308934, "learning_rate": 2.419428886726861e-06, "loss": 0.6907, "step": 8506 }, { "epoch": 0.68, "grad_norm": 1.4729260067494232, "learning_rate": 2.4183161383769602e-06, "loss": 0.8364, "step": 8507 }, { "epoch": 0.68, "grad_norm": 1.5556655955544163, "learning_rate": 2.4172035643482573e-06, "loss": 0.8206, "step": 8508 }, { "epoch": 0.68, "grad_norm": 1.520560836781945, "learning_rate": 2.416091164715876e-06, "loss": 0.7718, "step": 8509 }, { "epoch": 0.68, "grad_norm": 1.5774677017767569, "learning_rate": 2.414978939554925e-06, "loss": 0.7269, "step": 8510 }, { "epoch": 0.68, "grad_norm": 1.5159135222432951, "learning_rate": 2.4138668889405094e-06, "loss": 0.745, "step": 8511 }, { "epoch": 0.68, "grad_norm": 1.561068160345222, "learning_rate": 2.4127550129477145e-06, "loss": 0.7685, "step": 8512 }, { "epoch": 0.68, "grad_norm": 0.7624393027756743, "learning_rate": 2.4116433116516182e-06, "loss": 1.0559, "step": 8513 }, { "epoch": 0.68, "grad_norm": 1.5567468937949895, "learning_rate": 2.4105317851272816e-06, "loss": 0.7871, "step": 8514 }, { "epoch": 0.68, "grad_norm": 1.521450805729144, "learning_rate": 2.409420433449762e-06, "loss": 0.7392, "step": 8515 }, { "epoch": 0.68, "grad_norm": 1.5159581843145464, "learning_rate": 2.408309256694098e-06, "loss": 0.8021, "step": 8516 }, { "epoch": 0.68, "grad_norm": 1.5711501502926193, "learning_rate": 2.4071982549353203e-06, "loss": 0.7509, "step": 8517 }, { "epoch": 0.68, "grad_norm": 1.5086312513511886, "learning_rate": 2.4060874282484444e-06, "loss": 0.7463, "step": 8518 }, { "epoch": 0.68, "grad_norm": 1.6272051682731492, "learning_rate": 2.404976776708477e-06, "loss": 0.7636, "step": 8519 }, { "epoch": 0.68, "grad_norm": 1.4469203522673066, "learning_rate": 2.4038663003904095e-06, "loss": 0.8105, "step": 8520 }, { "epoch": 0.68, "grad_norm": 1.6216967759518894, "learning_rate": 2.4027559993692274e-06, "loss": 0.7231, "step": 8521 }, { "epoch": 0.68, "grad_norm": 0.7651362467872992, "learning_rate": 2.4016458737198995e-06, "loss": 1.0808, "step": 8522 }, { "epoch": 0.68, "grad_norm": 1.5838897908715523, "learning_rate": 2.4005359235173835e-06, "loss": 0.9136, "step": 8523 }, { "epoch": 0.68, "grad_norm": 1.4381843956332023, "learning_rate": 2.399426148836625e-06, "loss": 0.7306, "step": 8524 }, { "epoch": 0.68, "grad_norm": 1.510335055545418, "learning_rate": 2.39831654975256e-06, "loss": 0.6853, "step": 8525 }, { "epoch": 0.68, "grad_norm": 1.9490788388663494, "learning_rate": 2.397207126340112e-06, "loss": 0.7745, "step": 8526 }, { "epoch": 0.68, "grad_norm": 0.7754369919880774, "learning_rate": 2.3960978786741878e-06, "loss": 1.0842, "step": 8527 }, { "epoch": 0.68, "grad_norm": 1.5257053994226537, "learning_rate": 2.3949888068296927e-06, "loss": 0.7792, "step": 8528 }, { "epoch": 0.68, "grad_norm": 1.4990816262870552, "learning_rate": 2.3938799108815087e-06, "loss": 0.7759, "step": 8529 }, { "epoch": 0.68, "grad_norm": 1.5531678195467715, "learning_rate": 2.392771190904512e-06, "loss": 0.7446, "step": 8530 }, { "epoch": 0.68, "grad_norm": 1.4608258718285658, "learning_rate": 2.391662646973564e-06, "loss": 0.816, "step": 8531 }, { "epoch": 0.68, "grad_norm": 0.7706627486168927, "learning_rate": 2.3905542791635213e-06, "loss": 1.0733, "step": 8532 }, { "epoch": 0.68, "grad_norm": 1.510353106434897, "learning_rate": 2.38944608754922e-06, "loss": 0.7105, "step": 8533 }, { "epoch": 0.68, "grad_norm": 0.7805814581971641, "learning_rate": 2.3883380722054865e-06, "loss": 1.0722, "step": 8534 }, { "epoch": 0.68, "grad_norm": 0.7294263125134057, "learning_rate": 2.3872302332071403e-06, "loss": 1.0666, "step": 8535 }, { "epoch": 0.68, "grad_norm": 1.4851676502927478, "learning_rate": 2.3861225706289824e-06, "loss": 0.7353, "step": 8536 }, { "epoch": 0.68, "grad_norm": 1.523344766286888, "learning_rate": 2.385015084545805e-06, "loss": 0.7297, "step": 8537 }, { "epoch": 0.69, "grad_norm": 1.518049648251476, "learning_rate": 2.383907775032389e-06, "loss": 0.6912, "step": 8538 }, { "epoch": 0.69, "grad_norm": 1.484549568057692, "learning_rate": 2.382800642163503e-06, "loss": 0.7738, "step": 8539 }, { "epoch": 0.69, "grad_norm": 1.5126950228592801, "learning_rate": 2.381693686013902e-06, "loss": 0.7309, "step": 8540 }, { "epoch": 0.69, "grad_norm": 1.4383117943177546, "learning_rate": 2.3805869066583304e-06, "loss": 0.8075, "step": 8541 }, { "epoch": 0.69, "grad_norm": 1.5057406824823547, "learning_rate": 2.3794803041715207e-06, "loss": 0.791, "step": 8542 }, { "epoch": 0.69, "grad_norm": 0.7653343647091818, "learning_rate": 2.378373878628193e-06, "loss": 1.064, "step": 8543 }, { "epoch": 0.69, "grad_norm": 1.451632365771509, "learning_rate": 2.377267630103054e-06, "loss": 0.7748, "step": 8544 }, { "epoch": 0.69, "grad_norm": 0.7599709411696033, "learning_rate": 2.376161558670803e-06, "loss": 1.0372, "step": 8545 }, { "epoch": 0.69, "grad_norm": 0.7783147970953287, "learning_rate": 2.375055664406124e-06, "loss": 1.0484, "step": 8546 }, { "epoch": 0.69, "grad_norm": 0.7441896809449883, "learning_rate": 2.3739499473836893e-06, "loss": 1.0387, "step": 8547 }, { "epoch": 0.69, "grad_norm": 1.645004072926352, "learning_rate": 2.372844407678156e-06, "loss": 0.8536, "step": 8548 }, { "epoch": 0.69, "grad_norm": 1.5035448161190417, "learning_rate": 2.371739045364178e-06, "loss": 0.7861, "step": 8549 }, { "epoch": 0.69, "grad_norm": 1.6414008265281586, "learning_rate": 2.3706338605163896e-06, "loss": 0.769, "step": 8550 }, { "epoch": 0.69, "grad_norm": 1.4128759869314864, "learning_rate": 2.3695288532094152e-06, "loss": 0.6503, "step": 8551 }, { "epoch": 0.69, "grad_norm": 1.5156163767241455, "learning_rate": 2.368424023517868e-06, "loss": 0.7412, "step": 8552 }, { "epoch": 0.69, "grad_norm": 1.4267419676716042, "learning_rate": 2.3673193715163477e-06, "loss": 0.6289, "step": 8553 }, { "epoch": 0.69, "grad_norm": 0.7858884829867728, "learning_rate": 2.3662148972794434e-06, "loss": 1.0688, "step": 8554 }, { "epoch": 0.69, "grad_norm": 1.4979514006745351, "learning_rate": 2.3651106008817303e-06, "loss": 0.7386, "step": 8555 }, { "epoch": 0.69, "grad_norm": 1.4726667690896118, "learning_rate": 2.364006482397776e-06, "loss": 0.7402, "step": 8556 }, { "epoch": 0.69, "grad_norm": 1.4044393374901258, "learning_rate": 2.3629025419021317e-06, "loss": 0.747, "step": 8557 }, { "epoch": 0.69, "grad_norm": 0.7798921104018887, "learning_rate": 2.3617987794693358e-06, "loss": 1.0577, "step": 8558 }, { "epoch": 0.69, "grad_norm": 1.552262686278092, "learning_rate": 2.360695195173921e-06, "loss": 0.6995, "step": 8559 }, { "epoch": 0.69, "grad_norm": 1.663446038552533, "learning_rate": 2.3595917890904017e-06, "loss": 0.771, "step": 8560 }, { "epoch": 0.69, "grad_norm": 1.577542980473154, "learning_rate": 2.3584885612932825e-06, "loss": 0.7385, "step": 8561 }, { "epoch": 0.69, "grad_norm": 1.4474310613301555, "learning_rate": 2.357385511857056e-06, "loss": 0.6195, "step": 8562 }, { "epoch": 0.69, "grad_norm": 1.585787914130582, "learning_rate": 2.3562826408562016e-06, "loss": 0.8609, "step": 8563 }, { "epoch": 0.69, "grad_norm": 0.7836028551372024, "learning_rate": 2.3551799483651894e-06, "loss": 1.0607, "step": 8564 }, { "epoch": 0.69, "grad_norm": 1.46909051260179, "learning_rate": 2.354077434458473e-06, "loss": 0.8553, "step": 8565 }, { "epoch": 0.69, "grad_norm": 1.4725081174800165, "learning_rate": 2.3529750992105e-06, "loss": 0.7756, "step": 8566 }, { "epoch": 0.69, "grad_norm": 1.573305977910874, "learning_rate": 2.351872942695701e-06, "loss": 0.7804, "step": 8567 }, { "epoch": 0.69, "grad_norm": 1.5376476203816676, "learning_rate": 2.3507709649884948e-06, "loss": 0.725, "step": 8568 }, { "epoch": 0.69, "grad_norm": 1.7854213151302347, "learning_rate": 2.349669166163292e-06, "loss": 0.773, "step": 8569 }, { "epoch": 0.69, "grad_norm": 1.4669408168907234, "learning_rate": 2.348567546294488e-06, "loss": 0.7704, "step": 8570 }, { "epoch": 0.69, "grad_norm": 0.7675823657644338, "learning_rate": 2.347466105456466e-06, "loss": 1.088, "step": 8571 }, { "epoch": 0.69, "grad_norm": 0.7616015838858193, "learning_rate": 2.346364843723598e-06, "loss": 1.0782, "step": 8572 }, { "epoch": 0.69, "grad_norm": 1.5180395633243218, "learning_rate": 2.345263761170244e-06, "loss": 0.851, "step": 8573 }, { "epoch": 0.69, "grad_norm": 1.5731372906336154, "learning_rate": 2.3441628578707505e-06, "loss": 0.7003, "step": 8574 }, { "epoch": 0.69, "grad_norm": 1.6382223482010612, "learning_rate": 2.3430621338994527e-06, "loss": 0.8144, "step": 8575 }, { "epoch": 0.69, "grad_norm": 1.5794018091609778, "learning_rate": 2.3419615893306762e-06, "loss": 0.7971, "step": 8576 }, { "epoch": 0.69, "grad_norm": 1.5751730128656825, "learning_rate": 2.340861224238732e-06, "loss": 0.6948, "step": 8577 }, { "epoch": 0.69, "grad_norm": 1.6134195032326573, "learning_rate": 2.3397610386979157e-06, "loss": 0.8071, "step": 8578 }, { "epoch": 0.69, "grad_norm": 1.5231983755967886, "learning_rate": 2.3386610327825194e-06, "loss": 0.7975, "step": 8579 }, { "epoch": 0.69, "grad_norm": 1.5725651302900405, "learning_rate": 2.3375612065668158e-06, "loss": 0.7585, "step": 8580 }, { "epoch": 0.69, "grad_norm": 1.4508243854844547, "learning_rate": 2.3364615601250673e-06, "loss": 0.7596, "step": 8581 }, { "epoch": 0.69, "grad_norm": 1.5192156165105057, "learning_rate": 2.335362093531523e-06, "loss": 0.7549, "step": 8582 }, { "epoch": 0.69, "grad_norm": 1.6163421518684031, "learning_rate": 2.334262806860425e-06, "loss": 0.7196, "step": 8583 }, { "epoch": 0.69, "grad_norm": 1.5819450021124593, "learning_rate": 2.3331637001859974e-06, "loss": 0.8335, "step": 8584 }, { "epoch": 0.69, "grad_norm": 1.5503334643627318, "learning_rate": 2.332064773582456e-06, "loss": 0.7745, "step": 8585 }, { "epoch": 0.69, "grad_norm": 0.7818150292827802, "learning_rate": 2.330966027124001e-06, "loss": 1.0551, "step": 8586 }, { "epoch": 0.69, "grad_norm": 1.5255440246370735, "learning_rate": 2.3298674608848225e-06, "loss": 0.7964, "step": 8587 }, { "epoch": 0.69, "grad_norm": 1.5875659153423067, "learning_rate": 2.3287690749390994e-06, "loss": 0.742, "step": 8588 }, { "epoch": 0.69, "grad_norm": 1.5950011596890226, "learning_rate": 2.3276708693609947e-06, "loss": 0.7255, "step": 8589 }, { "epoch": 0.69, "grad_norm": 1.4430389465991293, "learning_rate": 2.326572844224665e-06, "loss": 0.6812, "step": 8590 }, { "epoch": 0.69, "grad_norm": 0.7694401188895588, "learning_rate": 2.32547499960425e-06, "loss": 1.0649, "step": 8591 }, { "epoch": 0.69, "grad_norm": 1.5094451563851234, "learning_rate": 2.3243773355738773e-06, "loss": 0.7778, "step": 8592 }, { "epoch": 0.69, "grad_norm": 1.4593433639114457, "learning_rate": 2.3232798522076667e-06, "loss": 0.8028, "step": 8593 }, { "epoch": 0.69, "grad_norm": 1.49924647535026, "learning_rate": 2.3221825495797213e-06, "loss": 0.7652, "step": 8594 }, { "epoch": 0.69, "grad_norm": 1.8780161741091508, "learning_rate": 2.3210854277641333e-06, "loss": 0.8135, "step": 8595 }, { "epoch": 0.69, "grad_norm": 1.7236993134868128, "learning_rate": 2.3199884868349824e-06, "loss": 0.7241, "step": 8596 }, { "epoch": 0.69, "grad_norm": 1.5370851161782546, "learning_rate": 2.3188917268663375e-06, "loss": 0.7999, "step": 8597 }, { "epoch": 0.69, "grad_norm": 1.5803946047023787, "learning_rate": 2.317795147932254e-06, "loss": 0.7474, "step": 8598 }, { "epoch": 0.69, "grad_norm": 1.487836329676063, "learning_rate": 2.3166987501067733e-06, "loss": 0.6791, "step": 8599 }, { "epoch": 0.69, "grad_norm": 1.535060983460703, "learning_rate": 2.315602533463931e-06, "loss": 0.812, "step": 8600 }, { "epoch": 0.69, "grad_norm": 1.506178315385878, "learning_rate": 2.3145064980777433e-06, "loss": 0.7472, "step": 8601 }, { "epoch": 0.69, "grad_norm": 1.9120837916487803, "learning_rate": 2.313410644022216e-06, "loss": 0.738, "step": 8602 }, { "epoch": 0.69, "grad_norm": 1.6006623984807768, "learning_rate": 2.3123149713713474e-06, "loss": 0.7526, "step": 8603 }, { "epoch": 0.69, "grad_norm": 1.62526878726048, "learning_rate": 2.311219480199117e-06, "loss": 0.7542, "step": 8604 }, { "epoch": 0.69, "grad_norm": 0.7879553572683522, "learning_rate": 2.3101241705794962e-06, "loss": 1.0662, "step": 8605 }, { "epoch": 0.69, "grad_norm": 1.571713907593411, "learning_rate": 2.309029042586442e-06, "loss": 0.8138, "step": 8606 }, { "epoch": 0.69, "grad_norm": 1.5488867198100023, "learning_rate": 2.3079340962939e-06, "loss": 0.7938, "step": 8607 }, { "epoch": 0.69, "grad_norm": 1.522421785501238, "learning_rate": 2.3068393317758035e-06, "loss": 0.6416, "step": 8608 }, { "epoch": 0.69, "grad_norm": 0.7612261063493391, "learning_rate": 2.3057447491060725e-06, "loss": 1.0784, "step": 8609 }, { "epoch": 0.69, "grad_norm": 1.5061028192070636, "learning_rate": 2.304650348358618e-06, "loss": 0.7964, "step": 8610 }, { "epoch": 0.69, "grad_norm": 1.4581792668080857, "learning_rate": 2.3035561296073356e-06, "loss": 0.7471, "step": 8611 }, { "epoch": 0.69, "grad_norm": 1.4980249371701648, "learning_rate": 2.3024620929261087e-06, "loss": 0.7967, "step": 8612 }, { "epoch": 0.69, "grad_norm": 1.482371634552179, "learning_rate": 2.301368238388808e-06, "loss": 0.719, "step": 8613 }, { "epoch": 0.69, "grad_norm": 1.4995025589232869, "learning_rate": 2.3002745660692967e-06, "loss": 0.7229, "step": 8614 }, { "epoch": 0.69, "grad_norm": 1.46628633950014, "learning_rate": 2.2991810760414194e-06, "loss": 0.7206, "step": 8615 }, { "epoch": 0.69, "grad_norm": 1.495994099328573, "learning_rate": 2.298087768379012e-06, "loss": 0.816, "step": 8616 }, { "epoch": 0.69, "grad_norm": 1.4647126002887028, "learning_rate": 2.2969946431558963e-06, "loss": 0.6667, "step": 8617 }, { "epoch": 0.69, "grad_norm": 1.4910649822541095, "learning_rate": 2.295901700445884e-06, "loss": 0.7388, "step": 8618 }, { "epoch": 0.69, "grad_norm": 1.6416495754369, "learning_rate": 2.29480894032277e-06, "loss": 0.7862, "step": 8619 }, { "epoch": 0.69, "grad_norm": 1.5680871584305436, "learning_rate": 2.2937163628603437e-06, "loss": 0.7347, "step": 8620 }, { "epoch": 0.69, "grad_norm": 1.5407044245995107, "learning_rate": 2.292623968132377e-06, "loss": 0.6914, "step": 8621 }, { "epoch": 0.69, "grad_norm": 2.7355525773197535, "learning_rate": 2.291531756212631e-06, "loss": 0.7152, "step": 8622 }, { "epoch": 0.69, "grad_norm": 1.5492937108570797, "learning_rate": 2.2904397271748517e-06, "loss": 0.7641, "step": 8623 }, { "epoch": 0.69, "grad_norm": 1.4803060155533772, "learning_rate": 2.28934788109278e-06, "loss": 0.7228, "step": 8624 }, { "epoch": 0.69, "grad_norm": 1.483808228638164, "learning_rate": 2.288256218040138e-06, "loss": 0.8094, "step": 8625 }, { "epoch": 0.69, "grad_norm": 0.7952761861086189, "learning_rate": 2.2871647380906347e-06, "loss": 1.0594, "step": 8626 }, { "epoch": 0.69, "grad_norm": 0.7740590856570554, "learning_rate": 2.286073441317973e-06, "loss": 1.0522, "step": 8627 }, { "epoch": 0.69, "grad_norm": 0.8224992494425446, "learning_rate": 2.284982327795839e-06, "loss": 1.0322, "step": 8628 }, { "epoch": 0.69, "grad_norm": 1.4789466627889427, "learning_rate": 2.283891397597908e-06, "loss": 0.8271, "step": 8629 }, { "epoch": 0.69, "grad_norm": 1.4021952817961376, "learning_rate": 2.2828006507978373e-06, "loss": 0.816, "step": 8630 }, { "epoch": 0.69, "grad_norm": 0.7678737425863551, "learning_rate": 2.2817100874692816e-06, "loss": 1.0701, "step": 8631 }, { "epoch": 0.69, "grad_norm": 1.5598480149063723, "learning_rate": 2.280619707685876e-06, "loss": 0.8061, "step": 8632 }, { "epoch": 0.69, "grad_norm": 1.5175424481368072, "learning_rate": 2.2795295115212445e-06, "loss": 0.7825, "step": 8633 }, { "epoch": 0.69, "grad_norm": 0.7343383898610367, "learning_rate": 2.2784394990490033e-06, "loss": 1.0189, "step": 8634 }, { "epoch": 0.69, "grad_norm": 1.5529729314234861, "learning_rate": 2.27734967034275e-06, "loss": 0.6564, "step": 8635 }, { "epoch": 0.69, "grad_norm": 1.5346131061002133, "learning_rate": 2.276260025476071e-06, "loss": 0.8468, "step": 8636 }, { "epoch": 0.69, "grad_norm": 1.6775762162165697, "learning_rate": 2.2751705645225446e-06, "loss": 0.7595, "step": 8637 }, { "epoch": 0.69, "grad_norm": 1.4837832078920317, "learning_rate": 2.2740812875557328e-06, "loss": 0.8157, "step": 8638 }, { "epoch": 0.69, "grad_norm": 1.5451625611076296, "learning_rate": 2.2729921946491855e-06, "loss": 0.721, "step": 8639 }, { "epoch": 0.69, "grad_norm": 1.586110107963762, "learning_rate": 2.2719032858764413e-06, "loss": 0.7829, "step": 8640 }, { "epoch": 0.69, "grad_norm": 1.5121222727800199, "learning_rate": 2.270814561311025e-06, "loss": 0.8162, "step": 8641 }, { "epoch": 0.69, "grad_norm": 1.5511901982674645, "learning_rate": 2.2697260210264506e-06, "loss": 0.7904, "step": 8642 }, { "epoch": 0.69, "grad_norm": 0.8033024678419775, "learning_rate": 2.268637665096216e-06, "loss": 1.0598, "step": 8643 }, { "epoch": 0.69, "grad_norm": 1.4340619245178114, "learning_rate": 2.2675494935938143e-06, "loss": 0.8054, "step": 8644 }, { "epoch": 0.69, "grad_norm": 1.371684650048675, "learning_rate": 2.2664615065927182e-06, "loss": 0.6732, "step": 8645 }, { "epoch": 0.69, "grad_norm": 1.522766385738045, "learning_rate": 2.2653737041663927e-06, "loss": 0.7371, "step": 8646 }, { "epoch": 0.69, "grad_norm": 1.5804006475853332, "learning_rate": 2.264286086388285e-06, "loss": 0.7882, "step": 8647 }, { "epoch": 0.69, "grad_norm": 1.5839410030756629, "learning_rate": 2.263198653331839e-06, "loss": 0.7421, "step": 8648 }, { "epoch": 0.69, "grad_norm": 1.4499294089114947, "learning_rate": 2.2621114050704775e-06, "loss": 0.7408, "step": 8649 }, { "epoch": 0.69, "grad_norm": 1.488134664956892, "learning_rate": 2.2610243416776146e-06, "loss": 0.7057, "step": 8650 }, { "epoch": 0.69, "grad_norm": 1.426602145557958, "learning_rate": 2.2599374632266514e-06, "loss": 0.6487, "step": 8651 }, { "epoch": 0.69, "grad_norm": 1.5076053867522965, "learning_rate": 2.2588507697909756e-06, "loss": 0.7569, "step": 8652 }, { "epoch": 0.69, "grad_norm": 1.409978766927716, "learning_rate": 2.2577642614439643e-06, "loss": 0.7761, "step": 8653 }, { "epoch": 0.69, "grad_norm": 1.5320528891165686, "learning_rate": 2.2566779382589788e-06, "loss": 0.7583, "step": 8654 }, { "epoch": 0.69, "grad_norm": 1.385756744619177, "learning_rate": 2.2555918003093737e-06, "loss": 0.8209, "step": 8655 }, { "epoch": 0.69, "grad_norm": 1.5666375314583019, "learning_rate": 2.2545058476684855e-06, "loss": 0.8072, "step": 8656 }, { "epoch": 0.69, "grad_norm": 0.7698046438196599, "learning_rate": 2.253420080409639e-06, "loss": 1.0553, "step": 8657 }, { "epoch": 0.69, "grad_norm": 1.5774035130645043, "learning_rate": 2.2523344986061508e-06, "loss": 0.7389, "step": 8658 }, { "epoch": 0.69, "grad_norm": 1.546727535339064, "learning_rate": 2.25124910233132e-06, "loss": 0.7529, "step": 8659 }, { "epoch": 0.69, "grad_norm": 1.5232079451545817, "learning_rate": 2.250163891658435e-06, "loss": 0.7301, "step": 8660 }, { "epoch": 0.69, "grad_norm": 1.5633243149813447, "learning_rate": 2.249078866660772e-06, "loss": 0.6435, "step": 8661 }, { "epoch": 0.69, "grad_norm": 1.5481640158758458, "learning_rate": 2.247994027411595e-06, "loss": 0.7114, "step": 8662 }, { "epoch": 0.7, "grad_norm": 1.5827406182772803, "learning_rate": 2.2469093739841537e-06, "loss": 0.7733, "step": 8663 }, { "epoch": 0.7, "grad_norm": 1.4997896498638703, "learning_rate": 2.2458249064516846e-06, "loss": 0.766, "step": 8664 }, { "epoch": 0.7, "grad_norm": 0.7809964236684456, "learning_rate": 2.2447406248874176e-06, "loss": 1.0456, "step": 8665 }, { "epoch": 0.7, "grad_norm": 1.439786395442257, "learning_rate": 2.2436565293645642e-06, "loss": 0.6385, "step": 8666 }, { "epoch": 0.7, "grad_norm": 1.4428531723469047, "learning_rate": 2.2425726199563225e-06, "loss": 0.7401, "step": 8667 }, { "epoch": 0.7, "grad_norm": 1.4850846910220885, "learning_rate": 2.2414888967358844e-06, "loss": 0.7998, "step": 8668 }, { "epoch": 0.7, "grad_norm": 1.6462869377376586, "learning_rate": 2.240405359776424e-06, "loss": 0.7903, "step": 8669 }, { "epoch": 0.7, "grad_norm": 0.7539260421140899, "learning_rate": 2.2393220091511043e-06, "loss": 1.0678, "step": 8670 }, { "epoch": 0.7, "grad_norm": 1.5388870015743326, "learning_rate": 2.2382388449330728e-06, "loss": 0.7052, "step": 8671 }, { "epoch": 0.7, "grad_norm": 1.4710947130064311, "learning_rate": 2.237155867195472e-06, "loss": 0.706, "step": 8672 }, { "epoch": 0.7, "grad_norm": 1.662251585820311, "learning_rate": 2.236073076011426e-06, "loss": 0.7615, "step": 8673 }, { "epoch": 0.7, "grad_norm": 1.4370815809968653, "learning_rate": 2.2349904714540427e-06, "loss": 0.7556, "step": 8674 }, { "epoch": 0.7, "grad_norm": 1.7350910090338045, "learning_rate": 2.233908053596427e-06, "loss": 0.7963, "step": 8675 }, { "epoch": 0.7, "grad_norm": 1.6524540828366223, "learning_rate": 2.2328258225116645e-06, "loss": 0.8183, "step": 8676 }, { "epoch": 0.7, "grad_norm": 1.4134806136348532, "learning_rate": 2.231743778272828e-06, "loss": 0.7979, "step": 8677 }, { "epoch": 0.7, "grad_norm": 1.6477819589473648, "learning_rate": 2.2306619209529832e-06, "loss": 0.7172, "step": 8678 }, { "epoch": 0.7, "grad_norm": 0.8223264867596763, "learning_rate": 2.229580250625179e-06, "loss": 1.0622, "step": 8679 }, { "epoch": 0.7, "grad_norm": 1.4890441484782917, "learning_rate": 2.2284987673624503e-06, "loss": 0.7964, "step": 8680 }, { "epoch": 0.7, "grad_norm": 1.4974516464949201, "learning_rate": 2.227417471237821e-06, "loss": 0.732, "step": 8681 }, { "epoch": 0.7, "grad_norm": 1.570670630902365, "learning_rate": 2.2263363623243058e-06, "loss": 0.7876, "step": 8682 }, { "epoch": 0.7, "grad_norm": 1.530099518427506, "learning_rate": 2.225255440694901e-06, "loss": 0.7752, "step": 8683 }, { "epoch": 0.7, "grad_norm": 1.5858424242807503, "learning_rate": 2.2241747064225942e-06, "loss": 0.7705, "step": 8684 }, { "epoch": 0.7, "grad_norm": 1.7012934826309962, "learning_rate": 2.223094159580359e-06, "loss": 0.778, "step": 8685 }, { "epoch": 0.7, "grad_norm": 1.6200109715508053, "learning_rate": 2.222013800241155e-06, "loss": 0.8019, "step": 8686 }, { "epoch": 0.7, "grad_norm": 1.5512513071049818, "learning_rate": 2.220933628477932e-06, "loss": 0.6898, "step": 8687 }, { "epoch": 0.7, "grad_norm": 1.5762209875570257, "learning_rate": 2.2198536443636233e-06, "loss": 0.676, "step": 8688 }, { "epoch": 0.7, "grad_norm": 1.500662201343916, "learning_rate": 2.218773847971156e-06, "loss": 0.7079, "step": 8689 }, { "epoch": 0.7, "grad_norm": 1.392552909836651, "learning_rate": 2.217694239373437e-06, "loss": 0.7673, "step": 8690 }, { "epoch": 0.7, "grad_norm": 1.4703357120974991, "learning_rate": 2.2166148186433637e-06, "loss": 0.7296, "step": 8691 }, { "epoch": 0.7, "grad_norm": 1.5283778289806405, "learning_rate": 2.2155355858538245e-06, "loss": 0.7808, "step": 8692 }, { "epoch": 0.7, "grad_norm": 0.8013391890255379, "learning_rate": 2.21445654107769e-06, "loss": 1.097, "step": 8693 }, { "epoch": 0.7, "grad_norm": 1.4529341386148438, "learning_rate": 2.2133776843878185e-06, "loss": 0.7018, "step": 8694 }, { "epoch": 0.7, "grad_norm": 1.664215310240996, "learning_rate": 2.2122990158570583e-06, "loss": 0.7472, "step": 8695 }, { "epoch": 0.7, "grad_norm": 1.7118800252255761, "learning_rate": 2.2112205355582427e-06, "loss": 0.7331, "step": 8696 }, { "epoch": 0.7, "grad_norm": 1.4670626078750053, "learning_rate": 2.2101422435641932e-06, "loss": 0.7375, "step": 8697 }, { "epoch": 0.7, "grad_norm": 1.59443283879915, "learning_rate": 2.2090641399477183e-06, "loss": 0.7329, "step": 8698 }, { "epoch": 0.7, "grad_norm": 1.569408374075968, "learning_rate": 2.2079862247816148e-06, "loss": 0.7468, "step": 8699 }, { "epoch": 0.7, "grad_norm": 1.547386084759423, "learning_rate": 2.2069084981386666e-06, "loss": 0.7731, "step": 8700 }, { "epoch": 0.7, "grad_norm": 1.6227734076966431, "learning_rate": 2.205830960091641e-06, "loss": 0.6863, "step": 8701 }, { "epoch": 0.7, "grad_norm": 1.6186360633485697, "learning_rate": 2.2047536107133005e-06, "loss": 0.802, "step": 8702 }, { "epoch": 0.7, "grad_norm": 1.4552418147882455, "learning_rate": 2.203676450076388e-06, "loss": 0.7622, "step": 8703 }, { "epoch": 0.7, "grad_norm": 1.6257024343218032, "learning_rate": 2.202599478253636e-06, "loss": 0.7377, "step": 8704 }, { "epoch": 0.7, "grad_norm": 1.5782620491213968, "learning_rate": 2.201522695317763e-06, "loss": 0.704, "step": 8705 }, { "epoch": 0.7, "grad_norm": 0.7628979110573563, "learning_rate": 2.2004461013414775e-06, "loss": 1.0508, "step": 8706 }, { "epoch": 0.7, "grad_norm": 1.605968577314302, "learning_rate": 2.1993696963974726e-06, "loss": 0.8036, "step": 8707 }, { "epoch": 0.7, "grad_norm": 1.5268399814401854, "learning_rate": 2.198293480558428e-06, "loss": 0.7783, "step": 8708 }, { "epoch": 0.7, "grad_norm": 1.5687472235736113, "learning_rate": 2.1972174538970155e-06, "loss": 0.7484, "step": 8709 }, { "epoch": 0.7, "grad_norm": 1.4264323556812994, "learning_rate": 2.1961416164858897e-06, "loss": 0.7229, "step": 8710 }, { "epoch": 0.7, "grad_norm": 1.4414716023425886, "learning_rate": 2.195065968397693e-06, "loss": 0.8024, "step": 8711 }, { "epoch": 0.7, "grad_norm": 1.5312019580517486, "learning_rate": 2.1939905097050553e-06, "loss": 0.8013, "step": 8712 }, { "epoch": 0.7, "grad_norm": 1.4907439810758152, "learning_rate": 2.192915240480596e-06, "loss": 0.7109, "step": 8713 }, { "epoch": 0.7, "grad_norm": 1.516001962214674, "learning_rate": 2.191840160796918e-06, "loss": 0.6578, "step": 8714 }, { "epoch": 0.7, "grad_norm": 1.6507993219831034, "learning_rate": 2.190765270726612e-06, "loss": 0.7416, "step": 8715 }, { "epoch": 0.7, "grad_norm": 1.5015230739968666, "learning_rate": 2.1896905703422605e-06, "loss": 0.7275, "step": 8716 }, { "epoch": 0.7, "grad_norm": 1.4984065036360317, "learning_rate": 2.18861605971643e-06, "loss": 0.7625, "step": 8717 }, { "epoch": 0.7, "grad_norm": 1.6220732720331674, "learning_rate": 2.1875417389216674e-06, "loss": 0.7938, "step": 8718 }, { "epoch": 0.7, "grad_norm": 0.7680073127187883, "learning_rate": 2.18646760803052e-06, "loss": 1.0531, "step": 8719 }, { "epoch": 0.7, "grad_norm": 1.490722018935445, "learning_rate": 2.185393667115513e-06, "loss": 0.7998, "step": 8720 }, { "epoch": 0.7, "grad_norm": 1.4816061915696637, "learning_rate": 2.1843199162491618e-06, "loss": 0.8063, "step": 8721 }, { "epoch": 0.7, "grad_norm": 1.5152822666832892, "learning_rate": 2.1832463555039662e-06, "loss": 0.7548, "step": 8722 }, { "epoch": 0.7, "grad_norm": 1.7737138597227453, "learning_rate": 2.18217298495242e-06, "loss": 0.8539, "step": 8723 }, { "epoch": 0.7, "grad_norm": 1.6382195545787797, "learning_rate": 2.1810998046669964e-06, "loss": 0.754, "step": 8724 }, { "epoch": 0.7, "grad_norm": 1.4851896842342254, "learning_rate": 2.180026814720158e-06, "loss": 0.7796, "step": 8725 }, { "epoch": 0.7, "grad_norm": 1.516366262044474, "learning_rate": 2.1789540151843597e-06, "loss": 0.7164, "step": 8726 }, { "epoch": 0.7, "grad_norm": 1.4800615733207947, "learning_rate": 2.177881406132037e-06, "loss": 0.7214, "step": 8727 }, { "epoch": 0.7, "grad_norm": 1.464973813729646, "learning_rate": 2.1768089876356145e-06, "loss": 0.76, "step": 8728 }, { "epoch": 0.7, "grad_norm": 0.7866820089074741, "learning_rate": 2.175736759767505e-06, "loss": 1.0374, "step": 8729 }, { "epoch": 0.7, "grad_norm": 1.4589495021951695, "learning_rate": 2.174664722600108e-06, "loss": 0.7178, "step": 8730 }, { "epoch": 0.7, "grad_norm": 1.4146259021648806, "learning_rate": 2.1735928762058085e-06, "loss": 0.7076, "step": 8731 }, { "epoch": 0.7, "grad_norm": 1.3748388644709217, "learning_rate": 2.1725212206569798e-06, "loss": 0.8053, "step": 8732 }, { "epoch": 0.7, "grad_norm": 1.7029016608443437, "learning_rate": 2.1714497560259854e-06, "loss": 0.7716, "step": 8733 }, { "epoch": 0.7, "grad_norm": 1.8493430153187262, "learning_rate": 2.1703784823851714e-06, "loss": 0.8261, "step": 8734 }, { "epoch": 0.7, "grad_norm": 1.6028716591424468, "learning_rate": 2.1693073998068704e-06, "loss": 0.7805, "step": 8735 }, { "epoch": 0.7, "grad_norm": 1.5574437686171727, "learning_rate": 2.1682365083634086e-06, "loss": 0.7258, "step": 8736 }, { "epoch": 0.7, "grad_norm": 1.517299510637743, "learning_rate": 2.167165808127093e-06, "loss": 0.7681, "step": 8737 }, { "epoch": 0.7, "grad_norm": 0.7765050532695559, "learning_rate": 2.16609529917022e-06, "loss": 1.0553, "step": 8738 }, { "epoch": 0.7, "grad_norm": 1.45362117958091, "learning_rate": 2.165024981565072e-06, "loss": 0.7831, "step": 8739 }, { "epoch": 0.7, "grad_norm": 1.5615396010063949, "learning_rate": 2.16395485538392e-06, "loss": 0.7724, "step": 8740 }, { "epoch": 0.7, "grad_norm": 1.5359974451549714, "learning_rate": 2.1628849206990216e-06, "loss": 0.6934, "step": 8741 }, { "epoch": 0.7, "grad_norm": 1.3160820506920023, "learning_rate": 2.1618151775826186e-06, "loss": 0.6857, "step": 8742 }, { "epoch": 0.7, "grad_norm": 1.6091376974731562, "learning_rate": 2.160745626106946e-06, "loss": 0.7174, "step": 8743 }, { "epoch": 0.7, "grad_norm": 1.4913619676924215, "learning_rate": 2.159676266344222e-06, "loss": 0.762, "step": 8744 }, { "epoch": 0.7, "grad_norm": 0.7757825929527788, "learning_rate": 2.1586070983666506e-06, "loss": 1.048, "step": 8745 }, { "epoch": 0.7, "grad_norm": 1.5338755377392468, "learning_rate": 2.1575381222464236e-06, "loss": 0.7809, "step": 8746 }, { "epoch": 0.7, "grad_norm": 1.421698220223875, "learning_rate": 2.1564693380557246e-06, "loss": 0.6771, "step": 8747 }, { "epoch": 0.7, "grad_norm": 0.7808947990710995, "learning_rate": 2.155400745866717e-06, "loss": 1.0791, "step": 8748 }, { "epoch": 0.7, "grad_norm": 1.5420577392146768, "learning_rate": 2.1543323457515564e-06, "loss": 0.8443, "step": 8749 }, { "epoch": 0.7, "grad_norm": 1.5181520504834694, "learning_rate": 2.1532641377823828e-06, "loss": 0.8008, "step": 8750 }, { "epoch": 0.7, "grad_norm": 1.5009966705422029, "learning_rate": 2.1521961220313237e-06, "loss": 0.7796, "step": 8751 }, { "epoch": 0.7, "grad_norm": 1.5178397709987348, "learning_rate": 2.151128298570494e-06, "loss": 0.8291, "step": 8752 }, { "epoch": 0.7, "grad_norm": 1.4315331585527435, "learning_rate": 2.150060667471995e-06, "loss": 0.7961, "step": 8753 }, { "epoch": 0.7, "grad_norm": 1.735276506523688, "learning_rate": 2.148993228807918e-06, "loss": 0.7672, "step": 8754 }, { "epoch": 0.7, "grad_norm": 1.4012177813206146, "learning_rate": 2.147925982650337e-06, "loss": 0.6856, "step": 8755 }, { "epoch": 0.7, "grad_norm": 2.298923294117518, "learning_rate": 2.146858929071314e-06, "loss": 0.7332, "step": 8756 }, { "epoch": 0.7, "grad_norm": 1.449043455320584, "learning_rate": 2.145792068142902e-06, "loss": 0.7361, "step": 8757 }, { "epoch": 0.7, "grad_norm": 1.5426795793525792, "learning_rate": 2.1447253999371355e-06, "loss": 0.7398, "step": 8758 }, { "epoch": 0.7, "grad_norm": 1.5119004887439558, "learning_rate": 2.1436589245260375e-06, "loss": 0.7003, "step": 8759 }, { "epoch": 0.7, "grad_norm": 1.5449871845477379, "learning_rate": 2.142592641981622e-06, "loss": 0.7512, "step": 8760 }, { "epoch": 0.7, "grad_norm": 0.7790416420168379, "learning_rate": 2.1415265523758877e-06, "loss": 1.0614, "step": 8761 }, { "epoch": 0.7, "grad_norm": 1.4972148176634292, "learning_rate": 2.1404606557808142e-06, "loss": 0.7455, "step": 8762 }, { "epoch": 0.7, "grad_norm": 1.5248502510257462, "learning_rate": 2.139394952268375e-06, "loss": 0.8198, "step": 8763 }, { "epoch": 0.7, "grad_norm": 0.7788280596267059, "learning_rate": 2.138329441910531e-06, "loss": 1.0648, "step": 8764 }, { "epoch": 0.7, "grad_norm": 1.4734609676612733, "learning_rate": 2.137264124779227e-06, "loss": 0.7901, "step": 8765 }, { "epoch": 0.7, "grad_norm": 1.4708876216899602, "learning_rate": 2.1361990009463935e-06, "loss": 0.8017, "step": 8766 }, { "epoch": 0.7, "grad_norm": 0.7720520557917335, "learning_rate": 2.1351340704839534e-06, "loss": 1.0752, "step": 8767 }, { "epoch": 0.7, "grad_norm": 1.4861680279470364, "learning_rate": 2.134069333463812e-06, "loss": 0.7073, "step": 8768 }, { "epoch": 0.7, "grad_norm": 1.5345241969635814, "learning_rate": 2.133004789957862e-06, "loss": 0.876, "step": 8769 }, { "epoch": 0.7, "grad_norm": 1.9803511984491948, "learning_rate": 2.1319404400379835e-06, "loss": 0.752, "step": 8770 }, { "epoch": 0.7, "grad_norm": 1.5250020399165836, "learning_rate": 2.130876283776046e-06, "loss": 0.7938, "step": 8771 }, { "epoch": 0.7, "grad_norm": 1.4597890077065672, "learning_rate": 2.1298123212439028e-06, "loss": 0.7364, "step": 8772 }, { "epoch": 0.7, "grad_norm": 1.503572508939869, "learning_rate": 2.1287485525133943e-06, "loss": 0.7306, "step": 8773 }, { "epoch": 0.7, "grad_norm": 1.6003525237162908, "learning_rate": 2.127684977656349e-06, "loss": 0.7848, "step": 8774 }, { "epoch": 0.7, "grad_norm": 1.5345098079410684, "learning_rate": 2.1266215967445823e-06, "loss": 0.8023, "step": 8775 }, { "epoch": 0.7, "grad_norm": 1.4963824096299543, "learning_rate": 2.1255584098498936e-06, "loss": 0.7944, "step": 8776 }, { "epoch": 0.7, "grad_norm": 1.4839727032360568, "learning_rate": 2.124495417044076e-06, "loss": 0.7847, "step": 8777 }, { "epoch": 0.7, "grad_norm": 1.6183813996119811, "learning_rate": 2.1234326183989036e-06, "loss": 0.8142, "step": 8778 }, { "epoch": 0.7, "grad_norm": 1.7569152448232452, "learning_rate": 2.1223700139861375e-06, "loss": 0.7195, "step": 8779 }, { "epoch": 0.7, "grad_norm": 0.7772811569630824, "learning_rate": 2.121307603877527e-06, "loss": 1.033, "step": 8780 }, { "epoch": 0.7, "grad_norm": 1.5786536348925817, "learning_rate": 2.120245388144811e-06, "loss": 0.8001, "step": 8781 }, { "epoch": 0.7, "grad_norm": 1.5547451778595185, "learning_rate": 2.1191833668597117e-06, "loss": 0.805, "step": 8782 }, { "epoch": 0.7, "grad_norm": 1.4387759070522135, "learning_rate": 2.118121540093939e-06, "loss": 0.6686, "step": 8783 }, { "epoch": 0.7, "grad_norm": 1.4677472818688722, "learning_rate": 2.117059907919189e-06, "loss": 0.6988, "step": 8784 }, { "epoch": 0.7, "grad_norm": 2.384862904762936, "learning_rate": 2.1159984704071466e-06, "loss": 0.7999, "step": 8785 }, { "epoch": 0.7, "grad_norm": 1.480209987129238, "learning_rate": 2.114937227629482e-06, "loss": 0.7289, "step": 8786 }, { "epoch": 0.7, "grad_norm": 0.7506650489713488, "learning_rate": 2.113876179657852e-06, "loss": 1.046, "step": 8787 }, { "epoch": 0.71, "grad_norm": 1.4422946778876335, "learning_rate": 2.1128153265639023e-06, "loss": 0.7752, "step": 8788 }, { "epoch": 0.71, "grad_norm": 1.535325985746534, "learning_rate": 2.111754668419264e-06, "loss": 0.6872, "step": 8789 }, { "epoch": 0.71, "grad_norm": 1.4779460719736055, "learning_rate": 2.1106942052955535e-06, "loss": 0.7725, "step": 8790 }, { "epoch": 0.71, "grad_norm": 1.472878570430273, "learning_rate": 2.109633937264379e-06, "loss": 0.6803, "step": 8791 }, { "epoch": 0.71, "grad_norm": 1.593265051125879, "learning_rate": 2.10857386439733e-06, "loss": 0.708, "step": 8792 }, { "epoch": 0.71, "grad_norm": 1.8504507446977863, "learning_rate": 2.1075139867659853e-06, "loss": 0.6844, "step": 8793 }, { "epoch": 0.71, "grad_norm": 1.5978848470688238, "learning_rate": 2.1064543044419105e-06, "loss": 0.7854, "step": 8794 }, { "epoch": 0.71, "grad_norm": 1.5781568863572366, "learning_rate": 2.1053948174966577e-06, "loss": 0.7565, "step": 8795 }, { "epoch": 0.71, "grad_norm": 1.4872738688549465, "learning_rate": 2.104335526001765e-06, "loss": 0.6916, "step": 8796 }, { "epoch": 0.71, "grad_norm": 1.4737916326230112, "learning_rate": 2.1032764300287577e-06, "loss": 0.7765, "step": 8797 }, { "epoch": 0.71, "grad_norm": 1.5909553283594644, "learning_rate": 2.1022175296491516e-06, "loss": 0.812, "step": 8798 }, { "epoch": 0.71, "grad_norm": 0.7552444274646167, "learning_rate": 2.1011588249344434e-06, "loss": 1.0449, "step": 8799 }, { "epoch": 0.71, "grad_norm": 1.587558767961101, "learning_rate": 2.1001003159561185e-06, "loss": 0.7619, "step": 8800 }, { "epoch": 0.71, "grad_norm": 1.5648761358184804, "learning_rate": 2.099042002785653e-06, "loss": 0.7661, "step": 8801 }, { "epoch": 0.71, "grad_norm": 1.550907305487096, "learning_rate": 2.097983885494505e-06, "loss": 0.7938, "step": 8802 }, { "epoch": 0.71, "grad_norm": 1.4090413484310849, "learning_rate": 2.0969259641541214e-06, "loss": 0.7336, "step": 8803 }, { "epoch": 0.71, "grad_norm": 1.45565093996669, "learning_rate": 2.095868238835932e-06, "loss": 0.6237, "step": 8804 }, { "epoch": 0.71, "grad_norm": 1.4289412223810323, "learning_rate": 2.0948107096113647e-06, "loss": 0.8044, "step": 8805 }, { "epoch": 0.71, "grad_norm": 1.5335832421646052, "learning_rate": 2.0937533765518187e-06, "loss": 0.7636, "step": 8806 }, { "epoch": 0.71, "grad_norm": 0.7776481206586978, "learning_rate": 2.092696239728689e-06, "loss": 1.0729, "step": 8807 }, { "epoch": 0.71, "grad_norm": 1.5242965799380574, "learning_rate": 2.0916392992133582e-06, "loss": 0.6971, "step": 8808 }, { "epoch": 0.71, "grad_norm": 1.5990687728589879, "learning_rate": 2.090582555077193e-06, "loss": 0.8725, "step": 8809 }, { "epoch": 0.71, "grad_norm": 1.6627716823275616, "learning_rate": 2.089526007391546e-06, "loss": 0.8234, "step": 8810 }, { "epoch": 0.71, "grad_norm": 1.5435158280245527, "learning_rate": 2.0884696562277566e-06, "loss": 0.7634, "step": 8811 }, { "epoch": 0.71, "grad_norm": 1.5700281647383987, "learning_rate": 2.0874135016571556e-06, "loss": 0.7314, "step": 8812 }, { "epoch": 0.71, "grad_norm": 0.7575366868254246, "learning_rate": 2.0863575437510547e-06, "loss": 1.0786, "step": 8813 }, { "epoch": 0.71, "grad_norm": 1.578444607652206, "learning_rate": 2.085301782580753e-06, "loss": 0.8237, "step": 8814 }, { "epoch": 0.71, "grad_norm": 1.4535378818726827, "learning_rate": 2.0842462182175422e-06, "loss": 0.6828, "step": 8815 }, { "epoch": 0.71, "grad_norm": 1.442577480649827, "learning_rate": 2.0831908507326935e-06, "loss": 0.7081, "step": 8816 }, { "epoch": 0.71, "grad_norm": 1.8041916999530423, "learning_rate": 2.0821356801974686e-06, "loss": 0.7357, "step": 8817 }, { "epoch": 0.71, "grad_norm": 1.6420314213733418, "learning_rate": 2.0810807066831147e-06, "loss": 0.7646, "step": 8818 }, { "epoch": 0.71, "grad_norm": 1.4386090029049434, "learning_rate": 2.0800259302608656e-06, "loss": 0.7152, "step": 8819 }, { "epoch": 0.71, "grad_norm": 1.506198902452254, "learning_rate": 2.0789713510019425e-06, "loss": 0.7606, "step": 8820 }, { "epoch": 0.71, "grad_norm": 1.4896343478852094, "learning_rate": 2.077916968977552e-06, "loss": 0.7338, "step": 8821 }, { "epoch": 0.71, "grad_norm": 1.570219011004925, "learning_rate": 2.0768627842588906e-06, "loss": 0.7891, "step": 8822 }, { "epoch": 0.71, "grad_norm": 0.7783872442248423, "learning_rate": 2.0758087969171387e-06, "loss": 1.0466, "step": 8823 }, { "epoch": 0.71, "grad_norm": 1.4220175868201346, "learning_rate": 2.074755007023461e-06, "loss": 0.7343, "step": 8824 }, { "epoch": 0.71, "grad_norm": 1.5060069237683054, "learning_rate": 2.0737014146490165e-06, "loss": 0.8154, "step": 8825 }, { "epoch": 0.71, "grad_norm": 1.7171139785743978, "learning_rate": 2.072648019864944e-06, "loss": 0.6677, "step": 8826 }, { "epoch": 0.71, "grad_norm": 1.612845462917001, "learning_rate": 2.0715948227423704e-06, "loss": 0.8295, "step": 8827 }, { "epoch": 0.71, "grad_norm": 1.5573006069489843, "learning_rate": 2.070541823352411e-06, "loss": 0.6972, "step": 8828 }, { "epoch": 0.71, "grad_norm": 1.570665091206447, "learning_rate": 2.069489021766166e-06, "loss": 0.757, "step": 8829 }, { "epoch": 0.71, "grad_norm": 1.4863189656337197, "learning_rate": 2.068436418054724e-06, "loss": 0.7587, "step": 8830 }, { "epoch": 0.71, "grad_norm": 1.5140775926647114, "learning_rate": 2.067384012289156e-06, "loss": 0.6634, "step": 8831 }, { "epoch": 0.71, "grad_norm": 1.4959774779934785, "learning_rate": 2.0663318045405275e-06, "loss": 0.7427, "step": 8832 }, { "epoch": 0.71, "grad_norm": 1.5253184690705592, "learning_rate": 2.065279794879883e-06, "loss": 0.7144, "step": 8833 }, { "epoch": 0.71, "grad_norm": 1.4819994765483997, "learning_rate": 2.064227983378256e-06, "loss": 0.6665, "step": 8834 }, { "epoch": 0.71, "grad_norm": 1.5579914802594237, "learning_rate": 2.063176370106671e-06, "loss": 0.7168, "step": 8835 }, { "epoch": 0.71, "grad_norm": 1.4919534847957625, "learning_rate": 2.0621249551361323e-06, "loss": 0.7087, "step": 8836 }, { "epoch": 0.71, "grad_norm": 1.586289143791413, "learning_rate": 2.061073738537635e-06, "loss": 0.7794, "step": 8837 }, { "epoch": 0.71, "grad_norm": 0.7662943076416616, "learning_rate": 2.060022720382159e-06, "loss": 1.0896, "step": 8838 }, { "epoch": 0.71, "grad_norm": 0.7693620394400195, "learning_rate": 2.0589719007406713e-06, "loss": 1.0762, "step": 8839 }, { "epoch": 0.71, "grad_norm": 1.4271201023284485, "learning_rate": 2.0579212796841257e-06, "loss": 0.8374, "step": 8840 }, { "epoch": 0.71, "grad_norm": 1.4755801952957277, "learning_rate": 2.0568708572834615e-06, "loss": 0.6983, "step": 8841 }, { "epoch": 0.71, "grad_norm": 1.59301061039807, "learning_rate": 2.055820633609609e-06, "loss": 0.7141, "step": 8842 }, { "epoch": 0.71, "grad_norm": 1.601535374751086, "learning_rate": 2.0547706087334783e-06, "loss": 0.7167, "step": 8843 }, { "epoch": 0.71, "grad_norm": 1.5421393118413262, "learning_rate": 2.053720782725972e-06, "loss": 0.6892, "step": 8844 }, { "epoch": 0.71, "grad_norm": 1.4365636874418852, "learning_rate": 2.052671155657973e-06, "loss": 0.7148, "step": 8845 }, { "epoch": 0.71, "grad_norm": 0.7904456624867255, "learning_rate": 2.0516217276003593e-06, "loss": 1.1089, "step": 8846 }, { "epoch": 0.71, "grad_norm": 0.7746910421408888, "learning_rate": 2.0505724986239883e-06, "loss": 1.062, "step": 8847 }, { "epoch": 0.71, "grad_norm": 1.6070046650781, "learning_rate": 2.0495234687997046e-06, "loss": 0.8876, "step": 8848 }, { "epoch": 0.71, "grad_norm": 1.5002521556641006, "learning_rate": 2.048474638198347e-06, "loss": 0.7912, "step": 8849 }, { "epoch": 0.71, "grad_norm": 1.517723011222634, "learning_rate": 2.047426006890728e-06, "loss": 0.8468, "step": 8850 }, { "epoch": 0.71, "grad_norm": 1.6622507708299004, "learning_rate": 2.0463775749476556e-06, "loss": 0.7554, "step": 8851 }, { "epoch": 0.71, "grad_norm": 1.5045864973317238, "learning_rate": 2.0453293424399244e-06, "loss": 0.8032, "step": 8852 }, { "epoch": 0.71, "grad_norm": 3.2374226391839644, "learning_rate": 2.0442813094383128e-06, "loss": 0.7974, "step": 8853 }, { "epoch": 0.71, "grad_norm": 1.527037827276388, "learning_rate": 2.0432334760135856e-06, "loss": 0.7864, "step": 8854 }, { "epoch": 0.71, "grad_norm": 1.5266541324577658, "learning_rate": 2.042185842236494e-06, "loss": 0.7092, "step": 8855 }, { "epoch": 0.71, "grad_norm": 1.5402345566330267, "learning_rate": 2.0411384081777785e-06, "loss": 0.7295, "step": 8856 }, { "epoch": 0.71, "grad_norm": 1.5525383409897007, "learning_rate": 2.040091173908164e-06, "loss": 0.8301, "step": 8857 }, { "epoch": 0.71, "grad_norm": 1.6378463418474885, "learning_rate": 2.0390441394983603e-06, "loss": 0.7986, "step": 8858 }, { "epoch": 0.71, "grad_norm": 1.7462768752544333, "learning_rate": 2.037997305019068e-06, "loss": 0.775, "step": 8859 }, { "epoch": 0.71, "grad_norm": 1.5283465740048878, "learning_rate": 2.0369506705409713e-06, "loss": 0.6812, "step": 8860 }, { "epoch": 0.71, "grad_norm": 1.5724285233135942, "learning_rate": 2.0359042361347405e-06, "loss": 0.792, "step": 8861 }, { "epoch": 0.71, "grad_norm": 1.5633318386692796, "learning_rate": 2.0348580018710334e-06, "loss": 0.705, "step": 8862 }, { "epoch": 0.71, "grad_norm": 1.593781633489615, "learning_rate": 2.0338119678204944e-06, "loss": 0.7606, "step": 8863 }, { "epoch": 0.71, "grad_norm": 1.5880504952330674, "learning_rate": 2.0327661340537536e-06, "loss": 0.6773, "step": 8864 }, { "epoch": 0.71, "grad_norm": 1.510687947896017, "learning_rate": 2.031720500641427e-06, "loss": 0.8054, "step": 8865 }, { "epoch": 0.71, "grad_norm": 1.5845485585064583, "learning_rate": 2.0306750676541214e-06, "loss": 0.7387, "step": 8866 }, { "epoch": 0.71, "grad_norm": 1.4660733547426146, "learning_rate": 2.029629835162425e-06, "loss": 0.8358, "step": 8867 }, { "epoch": 0.71, "grad_norm": 1.3975576714120441, "learning_rate": 2.028584803236914e-06, "loss": 0.7317, "step": 8868 }, { "epoch": 0.71, "grad_norm": 1.3873540021136497, "learning_rate": 2.02753997194815e-06, "loss": 0.75, "step": 8869 }, { "epoch": 0.71, "grad_norm": 1.5099521876068092, "learning_rate": 2.0264953413666856e-06, "loss": 0.8006, "step": 8870 }, { "epoch": 0.71, "grad_norm": 0.805258029689985, "learning_rate": 2.0254509115630557e-06, "loss": 1.0701, "step": 8871 }, { "epoch": 0.71, "grad_norm": 1.6730796369129737, "learning_rate": 2.0244066826077812e-06, "loss": 0.7481, "step": 8872 }, { "epoch": 0.71, "grad_norm": 1.5199666665533178, "learning_rate": 2.023362654571372e-06, "loss": 0.7293, "step": 8873 }, { "epoch": 0.71, "grad_norm": 1.6212072271032454, "learning_rate": 2.0223188275243226e-06, "loss": 0.8059, "step": 8874 }, { "epoch": 0.71, "grad_norm": 1.5240758455245407, "learning_rate": 2.0212752015371135e-06, "loss": 0.7005, "step": 8875 }, { "epoch": 0.71, "grad_norm": 1.4699063215170414, "learning_rate": 2.0202317766802155e-06, "loss": 0.6882, "step": 8876 }, { "epoch": 0.71, "grad_norm": 1.492279589777096, "learning_rate": 2.019188553024082e-06, "loss": 0.7206, "step": 8877 }, { "epoch": 0.71, "grad_norm": 1.5037508846834915, "learning_rate": 2.018145530639153e-06, "loss": 0.7413, "step": 8878 }, { "epoch": 0.71, "grad_norm": 1.6654690371423002, "learning_rate": 2.0171027095958543e-06, "loss": 0.8282, "step": 8879 }, { "epoch": 0.71, "grad_norm": 1.5842183915705528, "learning_rate": 2.0160600899646033e-06, "loss": 0.7599, "step": 8880 }, { "epoch": 0.71, "grad_norm": 1.7063844188305413, "learning_rate": 2.0150176718157986e-06, "loss": 0.8134, "step": 8881 }, { "epoch": 0.71, "grad_norm": 1.536750239168504, "learning_rate": 2.013975455219826e-06, "loss": 0.8133, "step": 8882 }, { "epoch": 0.71, "grad_norm": 1.6799842872055237, "learning_rate": 2.0129334402470583e-06, "loss": 0.8351, "step": 8883 }, { "epoch": 0.71, "grad_norm": 1.5693362490517175, "learning_rate": 2.0118916269678555e-06, "loss": 0.6895, "step": 8884 }, { "epoch": 0.71, "grad_norm": 0.772309244428441, "learning_rate": 2.010850015452563e-06, "loss": 1.0446, "step": 8885 }, { "epoch": 0.71, "grad_norm": 1.6482371646272858, "learning_rate": 2.0098086057715106e-06, "loss": 0.8439, "step": 8886 }, { "epoch": 0.71, "grad_norm": 1.5693111788515117, "learning_rate": 2.00876739799502e-06, "loss": 0.6783, "step": 8887 }, { "epoch": 0.71, "grad_norm": 1.4959632302397945, "learning_rate": 2.0077263921933953e-06, "loss": 0.7255, "step": 8888 }, { "epoch": 0.71, "grad_norm": 1.7339190090143608, "learning_rate": 2.0066855884369246e-06, "loss": 0.7086, "step": 8889 }, { "epoch": 0.71, "grad_norm": 1.4801676734125215, "learning_rate": 2.0056449867958893e-06, "loss": 0.7437, "step": 8890 }, { "epoch": 0.71, "grad_norm": 1.407492087770128, "learning_rate": 2.0046045873405518e-06, "loss": 0.7473, "step": 8891 }, { "epoch": 0.71, "grad_norm": 1.4877647544058141, "learning_rate": 2.0035643901411593e-06, "loss": 0.7895, "step": 8892 }, { "epoch": 0.71, "grad_norm": 1.492896532953956, "learning_rate": 2.002524395267954e-06, "loss": 0.7554, "step": 8893 }, { "epoch": 0.71, "grad_norm": 1.358161603157602, "learning_rate": 2.001484602791157e-06, "loss": 0.7088, "step": 8894 }, { "epoch": 0.71, "grad_norm": 1.5313116226419123, "learning_rate": 2.000445012780974e-06, "loss": 0.7776, "step": 8895 }, { "epoch": 0.71, "grad_norm": 1.5851947874179244, "learning_rate": 1.999405625307601e-06, "loss": 0.7872, "step": 8896 }, { "epoch": 0.71, "grad_norm": 1.6484861402420659, "learning_rate": 1.9983664404412233e-06, "loss": 0.8185, "step": 8897 }, { "epoch": 0.71, "grad_norm": 0.7635757846685645, "learning_rate": 1.997327458252007e-06, "loss": 1.0778, "step": 8898 }, { "epoch": 0.71, "grad_norm": 0.7740062022536703, "learning_rate": 1.996288678810105e-06, "loss": 1.044, "step": 8899 }, { "epoch": 0.71, "grad_norm": 0.7653020545671417, "learning_rate": 1.995250102185662e-06, "loss": 1.0544, "step": 8900 }, { "epoch": 0.71, "grad_norm": 1.6138576785730476, "learning_rate": 1.9942117284488026e-06, "loss": 0.6421, "step": 8901 }, { "epoch": 0.71, "grad_norm": 1.5511182672765857, "learning_rate": 1.9931735576696405e-06, "loss": 0.7431, "step": 8902 }, { "epoch": 0.71, "grad_norm": 1.4893591831423465, "learning_rate": 1.9921355899182733e-06, "loss": 0.7616, "step": 8903 }, { "epoch": 0.71, "grad_norm": 1.4397908554453103, "learning_rate": 1.9910978252647913e-06, "loss": 0.7179, "step": 8904 }, { "epoch": 0.71, "grad_norm": 1.5567835452475074, "learning_rate": 1.9900602637792637e-06, "loss": 0.7321, "step": 8905 }, { "epoch": 0.71, "grad_norm": 1.5102810033285345, "learning_rate": 1.9890229055317502e-06, "loss": 0.7922, "step": 8906 }, { "epoch": 0.71, "grad_norm": 1.5857857546491043, "learning_rate": 1.987985750592295e-06, "loss": 0.8027, "step": 8907 }, { "epoch": 0.71, "grad_norm": 1.5722979672033213, "learning_rate": 1.986948799030929e-06, "loss": 0.8017, "step": 8908 }, { "epoch": 0.71, "grad_norm": 1.5126195750399822, "learning_rate": 1.9859120509176706e-06, "loss": 0.7588, "step": 8909 }, { "epoch": 0.71, "grad_norm": 1.4823179313297419, "learning_rate": 1.98487550632252e-06, "loss": 0.7445, "step": 8910 }, { "epoch": 0.71, "grad_norm": 1.62994006264953, "learning_rate": 1.983839165315472e-06, "loss": 0.7433, "step": 8911 }, { "epoch": 0.72, "grad_norm": 1.5946409560099917, "learning_rate": 1.9828030279665006e-06, "loss": 0.7171, "step": 8912 }, { "epoch": 0.72, "grad_norm": 1.4600343048175033, "learning_rate": 1.981767094345566e-06, "loss": 0.7981, "step": 8913 }, { "epoch": 0.72, "grad_norm": 1.509519220667268, "learning_rate": 1.98073136452262e-06, "loss": 0.6959, "step": 8914 }, { "epoch": 0.72, "grad_norm": 0.7723810990198323, "learning_rate": 1.979695838567597e-06, "loss": 1.0517, "step": 8915 }, { "epoch": 0.72, "grad_norm": 1.452591695995745, "learning_rate": 1.978660516550417e-06, "loss": 0.7952, "step": 8916 }, { "epoch": 0.72, "grad_norm": 1.4557906975467643, "learning_rate": 1.9776253985409876e-06, "loss": 0.7206, "step": 8917 }, { "epoch": 0.72, "grad_norm": 1.4575934421030878, "learning_rate": 1.9765904846092027e-06, "loss": 0.7544, "step": 8918 }, { "epoch": 0.72, "grad_norm": 1.4249162511029256, "learning_rate": 1.9755557748249415e-06, "loss": 0.6646, "step": 8919 }, { "epoch": 0.72, "grad_norm": 1.419907991600344, "learning_rate": 1.9745212692580684e-06, "loss": 0.7218, "step": 8920 }, { "epoch": 0.72, "grad_norm": 1.4319441720000685, "learning_rate": 1.973486967978439e-06, "loss": 0.7374, "step": 8921 }, { "epoch": 0.72, "grad_norm": 1.4621642508384134, "learning_rate": 1.9724528710558906e-06, "loss": 0.7877, "step": 8922 }, { "epoch": 0.72, "grad_norm": 1.5676678661571493, "learning_rate": 1.971418978560245e-06, "loss": 0.6781, "step": 8923 }, { "epoch": 0.72, "grad_norm": 1.4861406200085754, "learning_rate": 1.970385290561317e-06, "loss": 0.7484, "step": 8924 }, { "epoch": 0.72, "grad_norm": 1.4798374595798751, "learning_rate": 1.969351807128902e-06, "loss": 0.7226, "step": 8925 }, { "epoch": 0.72, "grad_norm": 1.495154693849385, "learning_rate": 1.968318528332783e-06, "loss": 0.7367, "step": 8926 }, { "epoch": 0.72, "grad_norm": 0.7761458351933237, "learning_rate": 1.9672854542427293e-06, "loss": 1.0641, "step": 8927 }, { "epoch": 0.72, "grad_norm": 1.5013349752546, "learning_rate": 1.9662525849284964e-06, "loss": 0.7504, "step": 8928 }, { "epoch": 0.72, "grad_norm": 0.7578340808187285, "learning_rate": 1.965219920459826e-06, "loss": 1.06, "step": 8929 }, { "epoch": 0.72, "grad_norm": 1.4257269807062176, "learning_rate": 1.9641874609064443e-06, "loss": 0.6893, "step": 8930 }, { "epoch": 0.72, "grad_norm": 1.5310599744510958, "learning_rate": 1.9631552063380687e-06, "loss": 0.7624, "step": 8931 }, { "epoch": 0.72, "grad_norm": 1.4761888465667479, "learning_rate": 1.962123156824398e-06, "loss": 0.79, "step": 8932 }, { "epoch": 0.72, "grad_norm": 1.4611550376924842, "learning_rate": 1.961091312435116e-06, "loss": 0.6809, "step": 8933 }, { "epoch": 0.72, "grad_norm": 1.4492823579832488, "learning_rate": 1.9600596732398994e-06, "loss": 0.6643, "step": 8934 }, { "epoch": 0.72, "grad_norm": 1.58382597444594, "learning_rate": 1.9590282393084054e-06, "loss": 0.7809, "step": 8935 }, { "epoch": 0.72, "grad_norm": 1.4950233814421021, "learning_rate": 1.957997010710278e-06, "loss": 0.7793, "step": 8936 }, { "epoch": 0.72, "grad_norm": 0.7627747854821669, "learning_rate": 1.9569659875151464e-06, "loss": 1.0496, "step": 8937 }, { "epoch": 0.72, "grad_norm": 1.5817550924855095, "learning_rate": 1.9559351697926337e-06, "loss": 0.7309, "step": 8938 }, { "epoch": 0.72, "grad_norm": 0.7718076636242407, "learning_rate": 1.954904557612337e-06, "loss": 1.0762, "step": 8939 }, { "epoch": 0.72, "grad_norm": 1.554068578416361, "learning_rate": 1.953874151043846e-06, "loss": 0.6899, "step": 8940 }, { "epoch": 0.72, "grad_norm": 1.573678790185004, "learning_rate": 1.9528439501567385e-06, "loss": 0.737, "step": 8941 }, { "epoch": 0.72, "grad_norm": 1.4809730583259506, "learning_rate": 1.951813955020576e-06, "loss": 0.754, "step": 8942 }, { "epoch": 0.72, "grad_norm": 0.7749314765476829, "learning_rate": 1.9507841657049056e-06, "loss": 1.065, "step": 8943 }, { "epoch": 0.72, "grad_norm": 1.5287359475853204, "learning_rate": 1.9497545822792584e-06, "loss": 0.7844, "step": 8944 }, { "epoch": 0.72, "grad_norm": 1.4187913876726683, "learning_rate": 1.948725204813159e-06, "loss": 0.7753, "step": 8945 }, { "epoch": 0.72, "grad_norm": 1.543293539117777, "learning_rate": 1.947696033376111e-06, "loss": 0.6894, "step": 8946 }, { "epoch": 0.72, "grad_norm": 1.512426950755788, "learning_rate": 1.946667068037604e-06, "loss": 0.7394, "step": 8947 }, { "epoch": 0.72, "grad_norm": 1.4522339924106178, "learning_rate": 1.9456383088671204e-06, "loss": 0.7185, "step": 8948 }, { "epoch": 0.72, "grad_norm": 1.52756486861164, "learning_rate": 1.9446097559341227e-06, "loss": 0.7921, "step": 8949 }, { "epoch": 0.72, "grad_norm": 1.5183475486028737, "learning_rate": 1.94358140930806e-06, "loss": 0.7196, "step": 8950 }, { "epoch": 0.72, "grad_norm": 1.6080131125802337, "learning_rate": 1.9425532690583704e-06, "loss": 0.7982, "step": 8951 }, { "epoch": 0.72, "grad_norm": 1.5140270210403055, "learning_rate": 1.941525335254475e-06, "loss": 0.727, "step": 8952 }, { "epoch": 0.72, "grad_norm": 1.4487319733159343, "learning_rate": 1.940497607965782e-06, "loss": 0.7792, "step": 8953 }, { "epoch": 0.72, "grad_norm": 1.4280446525578032, "learning_rate": 1.9394700872616856e-06, "loss": 0.6951, "step": 8954 }, { "epoch": 0.72, "grad_norm": 1.4733370358312954, "learning_rate": 1.938442773211569e-06, "loss": 0.6792, "step": 8955 }, { "epoch": 0.72, "grad_norm": 1.5419987252136977, "learning_rate": 1.9374156658847965e-06, "loss": 0.8401, "step": 8956 }, { "epoch": 0.72, "grad_norm": 1.5644664934553532, "learning_rate": 1.9363887653507195e-06, "loss": 0.7855, "step": 8957 }, { "epoch": 0.72, "grad_norm": 1.4955990936705872, "learning_rate": 1.935362071678681e-06, "loss": 0.7573, "step": 8958 }, { "epoch": 0.72, "grad_norm": 1.504117553742307, "learning_rate": 1.9343355849380023e-06, "loss": 0.7702, "step": 8959 }, { "epoch": 0.72, "grad_norm": 1.4710582126617087, "learning_rate": 1.933309305197995e-06, "loss": 0.8685, "step": 8960 }, { "epoch": 0.72, "grad_norm": 1.5121479347556743, "learning_rate": 1.9322832325279563e-06, "loss": 0.7785, "step": 8961 }, { "epoch": 0.72, "grad_norm": 1.4989900149027142, "learning_rate": 1.9312573669971684e-06, "loss": 0.7509, "step": 8962 }, { "epoch": 0.72, "grad_norm": 1.6194034925716667, "learning_rate": 1.9302317086749e-06, "loss": 0.7763, "step": 8963 }, { "epoch": 0.72, "grad_norm": 1.5822155097357817, "learning_rate": 1.9292062576304045e-06, "loss": 0.7667, "step": 8964 }, { "epoch": 0.72, "grad_norm": 3.118265398066812, "learning_rate": 1.9281810139329255e-06, "loss": 0.7717, "step": 8965 }, { "epoch": 0.72, "grad_norm": 1.5206148568119542, "learning_rate": 1.927155977651689e-06, "loss": 0.7362, "step": 8966 }, { "epoch": 0.72, "grad_norm": 1.5974926437714214, "learning_rate": 1.9261311488559077e-06, "loss": 0.7672, "step": 8967 }, { "epoch": 0.72, "grad_norm": 1.551024324083652, "learning_rate": 1.925106527614778e-06, "loss": 0.7547, "step": 8968 }, { "epoch": 0.72, "grad_norm": 1.4566223821588165, "learning_rate": 1.924082113997488e-06, "loss": 0.7671, "step": 8969 }, { "epoch": 0.72, "grad_norm": 1.5376883582952139, "learning_rate": 1.9230579080732074e-06, "loss": 0.7263, "step": 8970 }, { "epoch": 0.72, "grad_norm": 1.4920376947343512, "learning_rate": 1.922033909911093e-06, "loss": 0.7655, "step": 8971 }, { "epoch": 0.72, "grad_norm": 0.7986841507357801, "learning_rate": 1.9210101195802873e-06, "loss": 1.0425, "step": 8972 }, { "epoch": 0.72, "grad_norm": 1.4199608676142805, "learning_rate": 1.919986537149919e-06, "loss": 0.7333, "step": 8973 }, { "epoch": 0.72, "grad_norm": 1.3685757233153055, "learning_rate": 1.9189631626891004e-06, "loss": 0.8182, "step": 8974 }, { "epoch": 0.72, "grad_norm": 0.7530723229379651, "learning_rate": 1.9179399962669358e-06, "loss": 1.0477, "step": 8975 }, { "epoch": 0.72, "grad_norm": 1.4649851605689048, "learning_rate": 1.9169170379525102e-06, "loss": 0.7703, "step": 8976 }, { "epoch": 0.72, "grad_norm": 1.4903483268056776, "learning_rate": 1.915894287814897e-06, "loss": 0.7434, "step": 8977 }, { "epoch": 0.72, "grad_norm": 1.586235612978687, "learning_rate": 1.9148717459231507e-06, "loss": 0.8097, "step": 8978 }, { "epoch": 0.72, "grad_norm": 0.7945707974070908, "learning_rate": 1.9138494123463216e-06, "loss": 1.0409, "step": 8979 }, { "epoch": 0.72, "grad_norm": 1.507509951507935, "learning_rate": 1.9128272871534363e-06, "loss": 0.7766, "step": 8980 }, { "epoch": 0.72, "grad_norm": 1.5208071403911998, "learning_rate": 1.9118053704135103e-06, "loss": 0.8094, "step": 8981 }, { "epoch": 0.72, "grad_norm": 1.4189718360958496, "learning_rate": 1.910783662195551e-06, "loss": 0.7774, "step": 8982 }, { "epoch": 0.72, "grad_norm": 1.6513445695919888, "learning_rate": 1.909762162568541e-06, "loss": 0.6669, "step": 8983 }, { "epoch": 0.72, "grad_norm": 1.418030958494561, "learning_rate": 1.9087408716014562e-06, "loss": 0.7929, "step": 8984 }, { "epoch": 0.72, "grad_norm": 1.3738156031867634, "learning_rate": 1.907719789363254e-06, "loss": 0.713, "step": 8985 }, { "epoch": 0.72, "grad_norm": 1.4948792369290256, "learning_rate": 1.9066989159228844e-06, "loss": 0.7643, "step": 8986 }, { "epoch": 0.72, "grad_norm": 1.3995268724255967, "learning_rate": 1.9056782513492779e-06, "loss": 0.7292, "step": 8987 }, { "epoch": 0.72, "grad_norm": 1.5353213648277757, "learning_rate": 1.9046577957113487e-06, "loss": 0.7029, "step": 8988 }, { "epoch": 0.72, "grad_norm": 1.6208815714457068, "learning_rate": 1.9036375490780056e-06, "loss": 0.7133, "step": 8989 }, { "epoch": 0.72, "grad_norm": 1.6165035905391572, "learning_rate": 1.902617511518135e-06, "loss": 0.7416, "step": 8990 }, { "epoch": 0.72, "grad_norm": 0.8134995635743169, "learning_rate": 1.901597683100611e-06, "loss": 1.0749, "step": 8991 }, { "epoch": 0.72, "grad_norm": 1.4725879355110554, "learning_rate": 1.9005780638942982e-06, "loss": 0.6841, "step": 8992 }, { "epoch": 0.72, "grad_norm": 1.476048028610045, "learning_rate": 1.8995586539680422e-06, "loss": 0.7074, "step": 8993 }, { "epoch": 0.72, "grad_norm": 1.4953803169245197, "learning_rate": 1.8985394533906749e-06, "loss": 0.5939, "step": 8994 }, { "epoch": 0.72, "grad_norm": 1.5059740685398337, "learning_rate": 1.8975204622310157e-06, "loss": 0.7201, "step": 8995 }, { "epoch": 0.72, "grad_norm": 1.5573322497141062, "learning_rate": 1.896501680557869e-06, "loss": 0.78, "step": 8996 }, { "epoch": 0.72, "grad_norm": 1.5391471733953224, "learning_rate": 1.895483108440026e-06, "loss": 0.7493, "step": 8997 }, { "epoch": 0.72, "grad_norm": 0.774758916140538, "learning_rate": 1.89446474594626e-06, "loss": 1.0386, "step": 8998 }, { "epoch": 0.72, "grad_norm": 0.7627233541379113, "learning_rate": 1.8934465931453378e-06, "loss": 1.076, "step": 8999 }, { "epoch": 0.72, "grad_norm": 1.4947070734000882, "learning_rate": 1.8924286501060047e-06, "loss": 0.7342, "step": 9000 }, { "epoch": 0.72, "grad_norm": 1.50427155173262, "learning_rate": 1.8914109168969958e-06, "loss": 0.6584, "step": 9001 }, { "epoch": 0.72, "grad_norm": 1.4625939513214241, "learning_rate": 1.8903933935870277e-06, "loss": 0.6858, "step": 9002 }, { "epoch": 0.72, "grad_norm": 1.9706659948365657, "learning_rate": 1.8893760802448096e-06, "loss": 0.7568, "step": 9003 }, { "epoch": 0.72, "grad_norm": 1.5176789244714313, "learning_rate": 1.888358976939032e-06, "loss": 0.7798, "step": 9004 }, { "epoch": 0.72, "grad_norm": 1.5215454887323965, "learning_rate": 1.8873420837383715e-06, "loss": 0.711, "step": 9005 }, { "epoch": 0.72, "grad_norm": 0.7904088371333343, "learning_rate": 1.8863254007114912e-06, "loss": 1.1006, "step": 9006 }, { "epoch": 0.72, "grad_norm": 1.5006902598782765, "learning_rate": 1.8853089279270393e-06, "loss": 0.7748, "step": 9007 }, { "epoch": 0.72, "grad_norm": 1.6195149948736198, "learning_rate": 1.8842926654536508e-06, "loss": 0.7725, "step": 9008 }, { "epoch": 0.72, "grad_norm": 1.6218657421821607, "learning_rate": 1.8832766133599445e-06, "loss": 0.8206, "step": 9009 }, { "epoch": 0.72, "grad_norm": 1.6206080808014003, "learning_rate": 1.8822607717145291e-06, "loss": 0.758, "step": 9010 }, { "epoch": 0.72, "grad_norm": 0.7568642495002619, "learning_rate": 1.8812451405859966e-06, "loss": 1.0645, "step": 9011 }, { "epoch": 0.72, "grad_norm": 1.6008374004013357, "learning_rate": 1.8802297200429215e-06, "loss": 0.6304, "step": 9012 }, { "epoch": 0.72, "grad_norm": 1.4946217142923581, "learning_rate": 1.8792145101538712e-06, "loss": 0.7501, "step": 9013 }, { "epoch": 0.72, "grad_norm": 1.4814834799976933, "learning_rate": 1.8781995109873929e-06, "loss": 0.7281, "step": 9014 }, { "epoch": 0.72, "grad_norm": 1.5421556514647208, "learning_rate": 1.8771847226120227e-06, "loss": 0.7254, "step": 9015 }, { "epoch": 0.72, "grad_norm": 1.5238179283677238, "learning_rate": 1.8761701450962798e-06, "loss": 0.7533, "step": 9016 }, { "epoch": 0.72, "grad_norm": 1.5116826794758946, "learning_rate": 1.8751557785086727e-06, "loss": 0.7191, "step": 9017 }, { "epoch": 0.72, "grad_norm": 1.4647682015478283, "learning_rate": 1.8741416229176928e-06, "loss": 0.7382, "step": 9018 }, { "epoch": 0.72, "grad_norm": 1.5374191311853032, "learning_rate": 1.8731276783918162e-06, "loss": 0.8601, "step": 9019 }, { "epoch": 0.72, "grad_norm": 1.5162653274580136, "learning_rate": 1.8721139449995107e-06, "loss": 0.7771, "step": 9020 }, { "epoch": 0.72, "grad_norm": 1.4756993788011128, "learning_rate": 1.8711004228092233e-06, "loss": 0.676, "step": 9021 }, { "epoch": 0.72, "grad_norm": 1.5927597444191053, "learning_rate": 1.8700871118893893e-06, "loss": 0.6727, "step": 9022 }, { "epoch": 0.72, "grad_norm": 1.5195555612379161, "learning_rate": 1.8690740123084316e-06, "loss": 0.803, "step": 9023 }, { "epoch": 0.72, "grad_norm": 1.4816138609878466, "learning_rate": 1.8680611241347557e-06, "loss": 0.7798, "step": 9024 }, { "epoch": 0.72, "grad_norm": 1.521441307235049, "learning_rate": 1.8670484474367551e-06, "loss": 0.7175, "step": 9025 }, { "epoch": 0.72, "grad_norm": 1.5643611900422763, "learning_rate": 1.8660359822828066e-06, "loss": 0.8006, "step": 9026 }, { "epoch": 0.72, "grad_norm": 1.4504252200206096, "learning_rate": 1.8650237287412748e-06, "loss": 0.7997, "step": 9027 }, { "epoch": 0.72, "grad_norm": 1.550270185730639, "learning_rate": 1.8640116868805097e-06, "loss": 0.7777, "step": 9028 }, { "epoch": 0.72, "grad_norm": 0.7935406795138835, "learning_rate": 1.8629998567688445e-06, "loss": 1.0718, "step": 9029 }, { "epoch": 0.72, "grad_norm": 1.4916607056112976, "learning_rate": 1.861988238474604e-06, "loss": 0.7689, "step": 9030 }, { "epoch": 0.72, "grad_norm": 1.4622805692691805, "learning_rate": 1.8609768320660932e-06, "loss": 0.7157, "step": 9031 }, { "epoch": 0.72, "grad_norm": 1.8016868482922395, "learning_rate": 1.8599656376116026e-06, "loss": 0.8439, "step": 9032 }, { "epoch": 0.72, "grad_norm": 1.6237913359486311, "learning_rate": 1.8589546551794141e-06, "loss": 0.7429, "step": 9033 }, { "epoch": 0.72, "grad_norm": 1.438161336741648, "learning_rate": 1.8579438848377895e-06, "loss": 0.7833, "step": 9034 }, { "epoch": 0.72, "grad_norm": 1.51947726688505, "learning_rate": 1.8569333266549787e-06, "loss": 0.78, "step": 9035 }, { "epoch": 0.72, "grad_norm": 1.4802229232012183, "learning_rate": 1.8559229806992151e-06, "loss": 0.7869, "step": 9036 }, { "epoch": 0.73, "grad_norm": 1.451879407169695, "learning_rate": 1.8549128470387229e-06, "loss": 0.7266, "step": 9037 }, { "epoch": 0.73, "grad_norm": 1.4298293878553483, "learning_rate": 1.8539029257417068e-06, "loss": 0.7245, "step": 9038 }, { "epoch": 0.73, "grad_norm": 1.4616366727115857, "learning_rate": 1.8528932168763592e-06, "loss": 0.7598, "step": 9039 }, { "epoch": 0.73, "grad_norm": 1.5133308209743124, "learning_rate": 1.851883720510858e-06, "loss": 0.6988, "step": 9040 }, { "epoch": 0.73, "grad_norm": 1.4749691067192736, "learning_rate": 1.8508744367133662e-06, "loss": 0.6551, "step": 9041 }, { "epoch": 0.73, "grad_norm": 1.4783314330355415, "learning_rate": 1.8498653655520337e-06, "loss": 0.7165, "step": 9042 }, { "epoch": 0.73, "grad_norm": 0.7855912995164267, "learning_rate": 1.8488565070949931e-06, "loss": 1.0801, "step": 9043 }, { "epoch": 0.73, "grad_norm": 1.6081824859185057, "learning_rate": 1.8478478614103684e-06, "loss": 0.7813, "step": 9044 }, { "epoch": 0.73, "grad_norm": 1.462659210647125, "learning_rate": 1.8468394285662643e-06, "loss": 0.7541, "step": 9045 }, { "epoch": 0.73, "grad_norm": 1.6442368625281671, "learning_rate": 1.84583120863077e-06, "loss": 0.7253, "step": 9046 }, { "epoch": 0.73, "grad_norm": 1.5951833801022253, "learning_rate": 1.844823201671967e-06, "loss": 0.8434, "step": 9047 }, { "epoch": 0.73, "grad_norm": 1.54182252559575, "learning_rate": 1.8438154077579157e-06, "loss": 0.7709, "step": 9048 }, { "epoch": 0.73, "grad_norm": 0.7750038465905909, "learning_rate": 1.8428078269566652e-06, "loss": 1.0833, "step": 9049 }, { "epoch": 0.73, "grad_norm": 1.5046932926776329, "learning_rate": 1.8418004593362498e-06, "loss": 0.7428, "step": 9050 }, { "epoch": 0.73, "grad_norm": 1.5676843278827293, "learning_rate": 1.8407933049646893e-06, "loss": 0.8071, "step": 9051 }, { "epoch": 0.73, "grad_norm": 1.6406774070341008, "learning_rate": 1.8397863639099884e-06, "loss": 0.7393, "step": 9052 }, { "epoch": 0.73, "grad_norm": 1.3964933855449182, "learning_rate": 1.8387796362401367e-06, "loss": 0.6779, "step": 9053 }, { "epoch": 0.73, "grad_norm": 1.4978047015082183, "learning_rate": 1.8377731220231144e-06, "loss": 0.7074, "step": 9054 }, { "epoch": 0.73, "grad_norm": 1.527750510764953, "learning_rate": 1.8367668213268814e-06, "loss": 0.7457, "step": 9055 }, { "epoch": 0.73, "grad_norm": 1.506893828515584, "learning_rate": 1.8357607342193844e-06, "loss": 0.7193, "step": 9056 }, { "epoch": 0.73, "grad_norm": 1.5994736619599366, "learning_rate": 1.83475486076856e-06, "loss": 0.7697, "step": 9057 }, { "epoch": 0.73, "grad_norm": 1.4188940343381915, "learning_rate": 1.8337492010423252e-06, "loss": 0.7894, "step": 9058 }, { "epoch": 0.73, "grad_norm": 1.4554887404676946, "learning_rate": 1.8327437551085842e-06, "loss": 0.7793, "step": 9059 }, { "epoch": 0.73, "grad_norm": 1.433485965788859, "learning_rate": 1.8317385230352269e-06, "loss": 0.8117, "step": 9060 }, { "epoch": 0.73, "grad_norm": 1.3707311275787393, "learning_rate": 1.8307335048901299e-06, "loss": 0.6985, "step": 9061 }, { "epoch": 0.73, "grad_norm": 1.4603135243118102, "learning_rate": 1.829728700741153e-06, "loss": 0.747, "step": 9062 }, { "epoch": 0.73, "grad_norm": 1.5288737673690223, "learning_rate": 1.8287241106561422e-06, "loss": 0.8003, "step": 9063 }, { "epoch": 0.73, "grad_norm": 1.422483141260241, "learning_rate": 1.8277197347029324e-06, "loss": 0.6788, "step": 9064 }, { "epoch": 0.73, "grad_norm": 1.36357307880521, "learning_rate": 1.8267155729493403e-06, "loss": 0.7426, "step": 9065 }, { "epoch": 0.73, "grad_norm": 1.5689701068379638, "learning_rate": 1.8257116254631685e-06, "loss": 0.7336, "step": 9066 }, { "epoch": 0.73, "grad_norm": 1.6465616098699154, "learning_rate": 1.8247078923122046e-06, "loss": 0.7397, "step": 9067 }, { "epoch": 0.73, "grad_norm": 1.3849591407513553, "learning_rate": 1.8237043735642263e-06, "loss": 0.7808, "step": 9068 }, { "epoch": 0.73, "grad_norm": 1.5558343400428012, "learning_rate": 1.8227010692869918e-06, "loss": 0.8138, "step": 9069 }, { "epoch": 0.73, "grad_norm": 1.4942701165724106, "learning_rate": 1.8216979795482464e-06, "loss": 0.7238, "step": 9070 }, { "epoch": 0.73, "grad_norm": 1.4897431903361553, "learning_rate": 1.8206951044157212e-06, "loss": 0.722, "step": 9071 }, { "epoch": 0.73, "grad_norm": 1.5522273170161136, "learning_rate": 1.8196924439571323e-06, "loss": 0.7658, "step": 9072 }, { "epoch": 0.73, "grad_norm": 1.5029159136555514, "learning_rate": 1.8186899982401802e-06, "loss": 0.7464, "step": 9073 }, { "epoch": 0.73, "grad_norm": 1.4495772517638867, "learning_rate": 1.8176877673325555e-06, "loss": 0.7627, "step": 9074 }, { "epoch": 0.73, "grad_norm": 1.5771838108111327, "learning_rate": 1.8166857513019298e-06, "loss": 0.7993, "step": 9075 }, { "epoch": 0.73, "grad_norm": 1.582110062241659, "learning_rate": 1.8156839502159606e-06, "loss": 0.6791, "step": 9076 }, { "epoch": 0.73, "grad_norm": 1.5470807079626603, "learning_rate": 1.814682364142291e-06, "loss": 0.7233, "step": 9077 }, { "epoch": 0.73, "grad_norm": 1.6514109037357982, "learning_rate": 1.8136809931485544e-06, "loss": 0.7609, "step": 9078 }, { "epoch": 0.73, "grad_norm": 1.5858480028405824, "learning_rate": 1.8126798373023624e-06, "loss": 0.8134, "step": 9079 }, { "epoch": 0.73, "grad_norm": 1.4711547999056573, "learning_rate": 1.811678896671314e-06, "loss": 0.7916, "step": 9080 }, { "epoch": 0.73, "grad_norm": 1.4544737688905205, "learning_rate": 1.8106781713229993e-06, "loss": 0.7019, "step": 9081 }, { "epoch": 0.73, "grad_norm": 1.5628495377846643, "learning_rate": 1.8096776613249872e-06, "loss": 0.746, "step": 9082 }, { "epoch": 0.73, "grad_norm": 0.8108775697229388, "learning_rate": 1.8086773667448359e-06, "loss": 1.0602, "step": 9083 }, { "epoch": 0.73, "grad_norm": 0.7752499931276945, "learning_rate": 1.8076772876500831e-06, "loss": 1.076, "step": 9084 }, { "epoch": 0.73, "grad_norm": 1.4894829332743877, "learning_rate": 1.8066774241082612e-06, "loss": 0.733, "step": 9085 }, { "epoch": 0.73, "grad_norm": 1.433497792699876, "learning_rate": 1.8056777761868815e-06, "loss": 0.7563, "step": 9086 }, { "epoch": 0.73, "grad_norm": 1.4844033200160542, "learning_rate": 1.804678343953441e-06, "loss": 0.7676, "step": 9087 }, { "epoch": 0.73, "grad_norm": 1.516014001918474, "learning_rate": 1.8036791274754266e-06, "loss": 0.681, "step": 9088 }, { "epoch": 0.73, "grad_norm": 0.7933155424586475, "learning_rate": 1.802680126820307e-06, "loss": 1.043, "step": 9089 }, { "epoch": 0.73, "grad_norm": 1.4707896316554034, "learning_rate": 1.8016813420555346e-06, "loss": 0.7876, "step": 9090 }, { "epoch": 0.73, "grad_norm": 1.459385105568495, "learning_rate": 1.8006827732485528e-06, "loss": 0.7124, "step": 9091 }, { "epoch": 0.73, "grad_norm": 1.5613322008575725, "learning_rate": 1.7996844204667858e-06, "loss": 0.7837, "step": 9092 }, { "epoch": 0.73, "grad_norm": 1.4392652782869524, "learning_rate": 1.7986862837776448e-06, "loss": 0.7448, "step": 9093 }, { "epoch": 0.73, "grad_norm": 1.4542084096605008, "learning_rate": 1.7976883632485258e-06, "loss": 0.7714, "step": 9094 }, { "epoch": 0.73, "grad_norm": 1.7476804107469217, "learning_rate": 1.7966906589468114e-06, "loss": 0.7255, "step": 9095 }, { "epoch": 0.73, "grad_norm": 0.7510453886374318, "learning_rate": 1.7956931709398684e-06, "loss": 1.0774, "step": 9096 }, { "epoch": 0.73, "grad_norm": 1.491524293257138, "learning_rate": 1.794695899295048e-06, "loss": 0.7334, "step": 9097 }, { "epoch": 0.73, "grad_norm": 1.5460250409611556, "learning_rate": 1.7936988440796915e-06, "loss": 0.765, "step": 9098 }, { "epoch": 0.73, "grad_norm": 1.627236313825197, "learning_rate": 1.7927020053611204e-06, "loss": 0.7534, "step": 9099 }, { "epoch": 0.73, "grad_norm": 1.4942800176598443, "learning_rate": 1.7917053832066444e-06, "loss": 0.7092, "step": 9100 }, { "epoch": 0.73, "grad_norm": 1.4481504971410948, "learning_rate": 1.790708977683555e-06, "loss": 0.7142, "step": 9101 }, { "epoch": 0.73, "grad_norm": 1.4504175764977358, "learning_rate": 1.789712788859136e-06, "loss": 0.699, "step": 9102 }, { "epoch": 0.73, "grad_norm": 1.5420025872595933, "learning_rate": 1.7887168168006498e-06, "loss": 0.7575, "step": 9103 }, { "epoch": 0.73, "grad_norm": 1.387958860348744, "learning_rate": 1.7877210615753477e-06, "loss": 0.6742, "step": 9104 }, { "epoch": 0.73, "grad_norm": 1.5337391832380043, "learning_rate": 1.7867255232504644e-06, "loss": 0.7344, "step": 9105 }, { "epoch": 0.73, "grad_norm": 1.5405551850846713, "learning_rate": 1.7857302018932215e-06, "loss": 0.803, "step": 9106 }, { "epoch": 0.73, "grad_norm": 0.7234841510109024, "learning_rate": 1.7847350975708233e-06, "loss": 1.0386, "step": 9107 }, { "epoch": 0.73, "grad_norm": 1.502146422828761, "learning_rate": 1.7837402103504653e-06, "loss": 0.7489, "step": 9108 }, { "epoch": 0.73, "grad_norm": 1.5873641373470815, "learning_rate": 1.7827455402993231e-06, "loss": 0.819, "step": 9109 }, { "epoch": 0.73, "grad_norm": 1.5557865596874143, "learning_rate": 1.7817510874845585e-06, "loss": 0.7193, "step": 9110 }, { "epoch": 0.73, "grad_norm": 1.508694521032566, "learning_rate": 1.7807568519733175e-06, "loss": 0.7807, "step": 9111 }, { "epoch": 0.73, "grad_norm": 1.5096515083787445, "learning_rate": 1.7797628338327372e-06, "loss": 0.7704, "step": 9112 }, { "epoch": 0.73, "grad_norm": 1.5344426154506512, "learning_rate": 1.7787690331299334e-06, "loss": 0.8115, "step": 9113 }, { "epoch": 0.73, "grad_norm": 1.6297134356488117, "learning_rate": 1.7777754499320104e-06, "loss": 0.7924, "step": 9114 }, { "epoch": 0.73, "grad_norm": 1.5495189129082467, "learning_rate": 1.7767820843060575e-06, "loss": 0.732, "step": 9115 }, { "epoch": 0.73, "grad_norm": 0.8021373905463902, "learning_rate": 1.7757889363191484e-06, "loss": 1.0688, "step": 9116 }, { "epoch": 0.73, "grad_norm": 1.6021482799543167, "learning_rate": 1.774796006038343e-06, "loss": 0.7465, "step": 9117 }, { "epoch": 0.73, "grad_norm": 1.4968258642243784, "learning_rate": 1.7738032935306842e-06, "loss": 0.784, "step": 9118 }, { "epoch": 0.73, "grad_norm": 1.638683641500895, "learning_rate": 1.772810798863206e-06, "loss": 0.8248, "step": 9119 }, { "epoch": 0.73, "grad_norm": 1.4415034491510006, "learning_rate": 1.7718185221029217e-06, "loss": 0.7458, "step": 9120 }, { "epoch": 0.73, "grad_norm": 1.570349383337047, "learning_rate": 1.770826463316831e-06, "loss": 0.728, "step": 9121 }, { "epoch": 0.73, "grad_norm": 1.495359630982366, "learning_rate": 1.7698346225719232e-06, "loss": 0.8612, "step": 9122 }, { "epoch": 0.73, "grad_norm": 1.5634886432554336, "learning_rate": 1.7688429999351681e-06, "loss": 0.7975, "step": 9123 }, { "epoch": 0.73, "grad_norm": 1.5376690130487365, "learning_rate": 1.767851595473522e-06, "loss": 0.788, "step": 9124 }, { "epoch": 0.73, "grad_norm": 1.4947976694838772, "learning_rate": 1.7668604092539255e-06, "loss": 0.7445, "step": 9125 }, { "epoch": 0.73, "grad_norm": 1.4191060324094653, "learning_rate": 1.7658694413433087e-06, "loss": 0.7131, "step": 9126 }, { "epoch": 0.73, "grad_norm": 1.5547640911181309, "learning_rate": 1.7648786918085837e-06, "loss": 0.7054, "step": 9127 }, { "epoch": 0.73, "grad_norm": 1.4859956210165293, "learning_rate": 1.763888160716644e-06, "loss": 0.7457, "step": 9128 }, { "epoch": 0.73, "grad_norm": 1.468140694488055, "learning_rate": 1.7628978481343772e-06, "loss": 0.6829, "step": 9129 }, { "epoch": 0.73, "grad_norm": 1.6418363705896792, "learning_rate": 1.76190775412865e-06, "loss": 0.7451, "step": 9130 }, { "epoch": 0.73, "grad_norm": 1.7305560046527197, "learning_rate": 1.7609178787663135e-06, "loss": 0.7623, "step": 9131 }, { "epoch": 0.73, "grad_norm": 1.4927189414756228, "learning_rate": 1.75992822211421e-06, "loss": 0.8002, "step": 9132 }, { "epoch": 0.73, "grad_norm": 1.516872586062521, "learning_rate": 1.7589387842391626e-06, "loss": 0.7055, "step": 9133 }, { "epoch": 0.73, "grad_norm": 1.511803948932772, "learning_rate": 1.7579495652079786e-06, "loss": 0.7172, "step": 9134 }, { "epoch": 0.73, "grad_norm": 1.687899821702067, "learning_rate": 1.7569605650874526e-06, "loss": 0.8084, "step": 9135 }, { "epoch": 0.73, "grad_norm": 1.557370313149239, "learning_rate": 1.7559717839443664e-06, "loss": 0.7309, "step": 9136 }, { "epoch": 0.73, "grad_norm": 1.5722703795328898, "learning_rate": 1.7549832218454826e-06, "loss": 0.7164, "step": 9137 }, { "epoch": 0.73, "grad_norm": 1.5441130774087743, "learning_rate": 1.7539948788575524e-06, "loss": 0.6253, "step": 9138 }, { "epoch": 0.73, "grad_norm": 0.7887226525591595, "learning_rate": 1.75300675504731e-06, "loss": 1.0522, "step": 9139 }, { "epoch": 0.73, "grad_norm": 1.617572047805759, "learning_rate": 1.7520188504814767e-06, "loss": 0.7734, "step": 9140 }, { "epoch": 0.73, "grad_norm": 1.5936446499569914, "learning_rate": 1.7510311652267576e-06, "loss": 0.754, "step": 9141 }, { "epoch": 0.73, "grad_norm": 1.4171089263601067, "learning_rate": 1.7500436993498415e-06, "loss": 0.7492, "step": 9142 }, { "epoch": 0.73, "grad_norm": 1.512967279741477, "learning_rate": 1.7490564529174082e-06, "loss": 0.7545, "step": 9143 }, { "epoch": 0.73, "grad_norm": 1.4453487297923802, "learning_rate": 1.7480694259961162e-06, "loss": 0.7244, "step": 9144 }, { "epoch": 0.73, "grad_norm": 1.553368564469449, "learning_rate": 1.7470826186526114e-06, "loss": 0.8251, "step": 9145 }, { "epoch": 0.73, "grad_norm": 1.4786274111991848, "learning_rate": 1.7460960309535286e-06, "loss": 0.6713, "step": 9146 }, { "epoch": 0.73, "grad_norm": 1.6417514608497272, "learning_rate": 1.7451096629654813e-06, "loss": 0.7595, "step": 9147 }, { "epoch": 0.73, "grad_norm": 1.5466262900967662, "learning_rate": 1.7441235147550728e-06, "loss": 0.7004, "step": 9148 }, { "epoch": 0.73, "grad_norm": 1.58390902017322, "learning_rate": 1.74313758638889e-06, "loss": 0.7675, "step": 9149 }, { "epoch": 0.73, "grad_norm": 1.4239810672195217, "learning_rate": 1.7421518779335038e-06, "loss": 0.7529, "step": 9150 }, { "epoch": 0.73, "grad_norm": 1.5490613028879163, "learning_rate": 1.741166389455473e-06, "loss": 0.7726, "step": 9151 }, { "epoch": 0.73, "grad_norm": 1.627729799866342, "learning_rate": 1.7401811210213377e-06, "loss": 0.7605, "step": 9152 }, { "epoch": 0.73, "grad_norm": 1.5165486468148615, "learning_rate": 1.7391960726976281e-06, "loss": 0.8537, "step": 9153 }, { "epoch": 0.73, "grad_norm": 1.5270314493592017, "learning_rate": 1.7382112445508565e-06, "loss": 0.6718, "step": 9154 }, { "epoch": 0.73, "grad_norm": 1.546583348521981, "learning_rate": 1.7372266366475187e-06, "loss": 0.7757, "step": 9155 }, { "epoch": 0.73, "grad_norm": 1.4414717647332593, "learning_rate": 1.7362422490541003e-06, "loss": 0.7204, "step": 9156 }, { "epoch": 0.73, "grad_norm": 1.5340612813870216, "learning_rate": 1.7352580818370685e-06, "loss": 0.7414, "step": 9157 }, { "epoch": 0.73, "grad_norm": 1.5140381274042096, "learning_rate": 1.7342741350628767e-06, "loss": 0.7866, "step": 9158 }, { "epoch": 0.73, "grad_norm": 1.492093327098175, "learning_rate": 1.7332904087979623e-06, "loss": 0.7316, "step": 9159 }, { "epoch": 0.73, "grad_norm": 1.4981172755218095, "learning_rate": 1.7323069031087498e-06, "loss": 0.7695, "step": 9160 }, { "epoch": 0.73, "grad_norm": 0.786308150197478, "learning_rate": 1.7313236180616466e-06, "loss": 1.068, "step": 9161 }, { "epoch": 0.74, "grad_norm": 1.463436914739836, "learning_rate": 1.7303405537230456e-06, "loss": 0.6691, "step": 9162 }, { "epoch": 0.74, "grad_norm": 1.8130093397421394, "learning_rate": 1.729357710159329e-06, "loss": 0.7274, "step": 9163 }, { "epoch": 0.74, "grad_norm": 1.6080508084266667, "learning_rate": 1.7283750874368577e-06, "loss": 0.7824, "step": 9164 }, { "epoch": 0.74, "grad_norm": 2.0468505744872716, "learning_rate": 1.7273926856219824e-06, "loss": 0.7573, "step": 9165 }, { "epoch": 0.74, "grad_norm": 1.5189735981139543, "learning_rate": 1.7264105047810341e-06, "loss": 0.7795, "step": 9166 }, { "epoch": 0.74, "grad_norm": 1.5017136038688443, "learning_rate": 1.725428544980336e-06, "loss": 0.8509, "step": 9167 }, { "epoch": 0.74, "grad_norm": 1.4513155378685, "learning_rate": 1.7244468062861897e-06, "loss": 0.7278, "step": 9168 }, { "epoch": 0.74, "grad_norm": 1.5242345190456783, "learning_rate": 1.7234652887648838e-06, "loss": 0.7661, "step": 9169 }, { "epoch": 0.74, "grad_norm": 1.4897619809328135, "learning_rate": 1.7224839924826959e-06, "loss": 0.7498, "step": 9170 }, { "epoch": 0.74, "grad_norm": 0.7926154124161684, "learning_rate": 1.7215029175058845e-06, "loss": 1.0485, "step": 9171 }, { "epoch": 0.74, "grad_norm": 1.5438926616366768, "learning_rate": 1.7205220639006893e-06, "loss": 0.7366, "step": 9172 }, { "epoch": 0.74, "grad_norm": 1.539296307350383, "learning_rate": 1.7195414317333453e-06, "loss": 0.7905, "step": 9173 }, { "epoch": 0.74, "grad_norm": 1.5138864036710689, "learning_rate": 1.7185610210700654e-06, "loss": 0.6634, "step": 9174 }, { "epoch": 0.74, "grad_norm": 0.7577715114080709, "learning_rate": 1.7175808319770482e-06, "loss": 1.0754, "step": 9175 }, { "epoch": 0.74, "grad_norm": 1.3927023866629802, "learning_rate": 1.7166008645204774e-06, "loss": 0.731, "step": 9176 }, { "epoch": 0.74, "grad_norm": 1.5990516454726418, "learning_rate": 1.7156211187665262e-06, "loss": 0.7743, "step": 9177 }, { "epoch": 0.74, "grad_norm": 1.5457494203697455, "learning_rate": 1.7146415947813472e-06, "loss": 0.7716, "step": 9178 }, { "epoch": 0.74, "grad_norm": 1.7159247920119958, "learning_rate": 1.713662292631078e-06, "loss": 0.7942, "step": 9179 }, { "epoch": 0.74, "grad_norm": 1.5130877503498232, "learning_rate": 1.7126832123818475e-06, "loss": 0.8197, "step": 9180 }, { "epoch": 0.74, "grad_norm": 1.5197755172298342, "learning_rate": 1.7117043540997635e-06, "loss": 0.7664, "step": 9181 }, { "epoch": 0.74, "grad_norm": 1.4396809460013287, "learning_rate": 1.7107257178509202e-06, "loss": 0.7901, "step": 9182 }, { "epoch": 0.74, "grad_norm": 1.5297767596906362, "learning_rate": 1.7097473037013984e-06, "loss": 0.7664, "step": 9183 }, { "epoch": 0.74, "grad_norm": 1.5133265517428123, "learning_rate": 1.7087691117172617e-06, "loss": 0.7531, "step": 9184 }, { "epoch": 0.74, "grad_norm": 0.7759179337322503, "learning_rate": 1.707791141964561e-06, "loss": 1.0263, "step": 9185 }, { "epoch": 0.74, "grad_norm": 1.6756472773531088, "learning_rate": 1.7068133945093285e-06, "loss": 0.8381, "step": 9186 }, { "epoch": 0.74, "grad_norm": 1.4425332829739987, "learning_rate": 1.7058358694175875e-06, "loss": 0.794, "step": 9187 }, { "epoch": 0.74, "grad_norm": 1.5084276279472941, "learning_rate": 1.7048585667553414e-06, "loss": 0.7659, "step": 9188 }, { "epoch": 0.74, "grad_norm": 1.715371952286538, "learning_rate": 1.7038814865885779e-06, "loss": 0.7565, "step": 9189 }, { "epoch": 0.74, "grad_norm": 1.7765993970604774, "learning_rate": 1.7029046289832751e-06, "loss": 0.7416, "step": 9190 }, { "epoch": 0.74, "grad_norm": 0.7593226878867999, "learning_rate": 1.7019279940053906e-06, "loss": 1.1064, "step": 9191 }, { "epoch": 0.74, "grad_norm": 1.5706020226482502, "learning_rate": 1.7009515817208698e-06, "loss": 0.7817, "step": 9192 }, { "epoch": 0.74, "grad_norm": 0.7599623905172376, "learning_rate": 1.6999753921956425e-06, "loss": 1.0828, "step": 9193 }, { "epoch": 0.74, "grad_norm": 0.7744798397643095, "learning_rate": 1.6989994254956222e-06, "loss": 1.0797, "step": 9194 }, { "epoch": 0.74, "grad_norm": 1.4775517024690286, "learning_rate": 1.6980236816867086e-06, "loss": 0.6729, "step": 9195 }, { "epoch": 0.74, "grad_norm": 1.4180842717675088, "learning_rate": 1.6970481608347849e-06, "loss": 0.7074, "step": 9196 }, { "epoch": 0.74, "grad_norm": 1.509814104453549, "learning_rate": 1.696072863005724e-06, "loss": 0.6866, "step": 9197 }, { "epoch": 0.74, "grad_norm": 1.4640852579448582, "learning_rate": 1.6950977882653779e-06, "loss": 0.7856, "step": 9198 }, { "epoch": 0.74, "grad_norm": 1.4543695919843345, "learning_rate": 1.6941229366795864e-06, "loss": 0.7424, "step": 9199 }, { "epoch": 0.74, "grad_norm": 1.650004186760314, "learning_rate": 1.693148308314172e-06, "loss": 0.7829, "step": 9200 }, { "epoch": 0.74, "grad_norm": 1.4993408708487548, "learning_rate": 1.6921739032349472e-06, "loss": 0.6591, "step": 9201 }, { "epoch": 0.74, "grad_norm": 1.4602831439000117, "learning_rate": 1.691199721507704e-06, "loss": 0.7702, "step": 9202 }, { "epoch": 0.74, "grad_norm": 1.3754567167413485, "learning_rate": 1.6902257631982217e-06, "loss": 0.851, "step": 9203 }, { "epoch": 0.74, "grad_norm": 1.6650390833222224, "learning_rate": 1.6892520283722641e-06, "loss": 0.6637, "step": 9204 }, { "epoch": 0.74, "grad_norm": 0.7721627355957261, "learning_rate": 1.6882785170955801e-06, "loss": 1.057, "step": 9205 }, { "epoch": 0.74, "grad_norm": 1.5172718835040933, "learning_rate": 1.6873052294339015e-06, "loss": 0.8031, "step": 9206 }, { "epoch": 0.74, "grad_norm": 1.512150065910371, "learning_rate": 1.6863321654529508e-06, "loss": 0.7586, "step": 9207 }, { "epoch": 0.74, "grad_norm": 1.5274419661353846, "learning_rate": 1.6853593252184292e-06, "loss": 0.7734, "step": 9208 }, { "epoch": 0.74, "grad_norm": 1.5259049226602075, "learning_rate": 1.6843867087960252e-06, "loss": 0.8279, "step": 9209 }, { "epoch": 0.74, "grad_norm": 1.5635729711579451, "learning_rate": 1.6834143162514105e-06, "loss": 0.7288, "step": 9210 }, { "epoch": 0.74, "grad_norm": 1.5879593116349144, "learning_rate": 1.6824421476502467e-06, "loss": 0.7137, "step": 9211 }, { "epoch": 0.74, "grad_norm": 1.5812325433392616, "learning_rate": 1.6814702030581754e-06, "loss": 0.8521, "step": 9212 }, { "epoch": 0.74, "grad_norm": 1.548238748937519, "learning_rate": 1.6804984825408227e-06, "loss": 0.7431, "step": 9213 }, { "epoch": 0.74, "grad_norm": 1.4370822929663603, "learning_rate": 1.6795269861638041e-06, "loss": 0.6607, "step": 9214 }, { "epoch": 0.74, "grad_norm": 1.5458014722506537, "learning_rate": 1.6785557139927183e-06, "loss": 0.6944, "step": 9215 }, { "epoch": 0.74, "grad_norm": 0.7883556059575791, "learning_rate": 1.6775846660931439e-06, "loss": 1.0678, "step": 9216 }, { "epoch": 0.74, "grad_norm": 1.6140693381317457, "learning_rate": 1.6766138425306483e-06, "loss": 0.683, "step": 9217 }, { "epoch": 0.74, "grad_norm": 1.478996386798182, "learning_rate": 1.675643243370787e-06, "loss": 0.7271, "step": 9218 }, { "epoch": 0.74, "grad_norm": 1.5605387969116387, "learning_rate": 1.6746728686790952e-06, "loss": 0.6783, "step": 9219 }, { "epoch": 0.74, "grad_norm": 1.4676724224422681, "learning_rate": 1.6737027185210941e-06, "loss": 0.7995, "step": 9220 }, { "epoch": 0.74, "grad_norm": 1.5439649777761801, "learning_rate": 1.6727327929622928e-06, "loss": 0.7497, "step": 9221 }, { "epoch": 0.74, "grad_norm": 1.7303049436263334, "learning_rate": 1.6717630920681815e-06, "loss": 0.7746, "step": 9222 }, { "epoch": 0.74, "grad_norm": 1.4467402883800913, "learning_rate": 1.6707936159042364e-06, "loss": 0.7097, "step": 9223 }, { "epoch": 0.74, "grad_norm": 1.608174380629258, "learning_rate": 1.669824364535918e-06, "loss": 0.7583, "step": 9224 }, { "epoch": 0.74, "grad_norm": 1.670051317342552, "learning_rate": 1.6688553380286748e-06, "loss": 0.7798, "step": 9225 }, { "epoch": 0.74, "grad_norm": 0.7645509018505278, "learning_rate": 1.6678865364479362e-06, "loss": 1.0619, "step": 9226 }, { "epoch": 0.74, "grad_norm": 1.3970654332887158, "learning_rate": 1.6669179598591183e-06, "loss": 0.7643, "step": 9227 }, { "epoch": 0.74, "grad_norm": 1.5606658443862658, "learning_rate": 1.665949608327621e-06, "loss": 0.8253, "step": 9228 }, { "epoch": 0.74, "grad_norm": 1.5686421252845744, "learning_rate": 1.6649814819188304e-06, "loss": 0.7244, "step": 9229 }, { "epoch": 0.74, "grad_norm": 1.4564306434564396, "learning_rate": 1.664013580698114e-06, "loss": 0.7325, "step": 9230 }, { "epoch": 0.74, "grad_norm": 1.5390458430165195, "learning_rate": 1.6630459047308307e-06, "loss": 0.7653, "step": 9231 }, { "epoch": 0.74, "grad_norm": 1.5035328713271894, "learning_rate": 1.6620784540823182e-06, "loss": 0.739, "step": 9232 }, { "epoch": 0.74, "grad_norm": 1.5373781548041978, "learning_rate": 1.661111228817901e-06, "loss": 0.6863, "step": 9233 }, { "epoch": 0.74, "grad_norm": 0.7522744764427861, "learning_rate": 1.660144229002887e-06, "loss": 1.0323, "step": 9234 }, { "epoch": 0.74, "grad_norm": 1.559436927036574, "learning_rate": 1.6591774547025735e-06, "loss": 0.7992, "step": 9235 }, { "epoch": 0.74, "grad_norm": 1.4507404846430578, "learning_rate": 1.6582109059822371e-06, "loss": 0.77, "step": 9236 }, { "epoch": 0.74, "grad_norm": 1.5287730892018996, "learning_rate": 1.6572445829071421e-06, "loss": 0.7994, "step": 9237 }, { "epoch": 0.74, "grad_norm": 1.6472431580117108, "learning_rate": 1.6562784855425362e-06, "loss": 0.7361, "step": 9238 }, { "epoch": 0.74, "grad_norm": 1.5808352260586596, "learning_rate": 1.6553126139536534e-06, "loss": 0.7732, "step": 9239 }, { "epoch": 0.74, "grad_norm": 1.408503030263893, "learning_rate": 1.6543469682057105e-06, "loss": 0.7817, "step": 9240 }, { "epoch": 0.74, "grad_norm": 1.500042840670344, "learning_rate": 1.6533815483639094e-06, "loss": 0.7767, "step": 9241 }, { "epoch": 0.74, "grad_norm": 0.7399428372035358, "learning_rate": 1.6524163544934396e-06, "loss": 1.0345, "step": 9242 }, { "epoch": 0.74, "grad_norm": 1.5005395534888941, "learning_rate": 1.6514513866594728e-06, "loss": 0.7814, "step": 9243 }, { "epoch": 0.74, "grad_norm": 1.4044075837864731, "learning_rate": 1.6504866449271633e-06, "loss": 0.7559, "step": 9244 }, { "epoch": 0.74, "grad_norm": 1.534321523362935, "learning_rate": 1.649522129361657e-06, "loss": 0.726, "step": 9245 }, { "epoch": 0.74, "grad_norm": 1.4720599872272986, "learning_rate": 1.6485578400280772e-06, "loss": 0.7944, "step": 9246 }, { "epoch": 0.74, "grad_norm": 1.4471798852085327, "learning_rate": 1.6475937769915357e-06, "loss": 0.7075, "step": 9247 }, { "epoch": 0.74, "grad_norm": 1.535462016350755, "learning_rate": 1.646629940317128e-06, "loss": 0.7691, "step": 9248 }, { "epoch": 0.74, "grad_norm": 0.7694202822500037, "learning_rate": 1.6456663300699349e-06, "loss": 1.0643, "step": 9249 }, { "epoch": 0.74, "grad_norm": 1.80988445376309, "learning_rate": 1.6447029463150215e-06, "loss": 0.7939, "step": 9250 }, { "epoch": 0.74, "grad_norm": 1.5110477954265107, "learning_rate": 1.6437397891174357e-06, "loss": 0.6929, "step": 9251 }, { "epoch": 0.74, "grad_norm": 1.618009401875516, "learning_rate": 1.6427768585422155e-06, "loss": 0.8045, "step": 9252 }, { "epoch": 0.74, "grad_norm": 1.6452515639750989, "learning_rate": 1.6418141546543787e-06, "loss": 0.7225, "step": 9253 }, { "epoch": 0.74, "grad_norm": 1.3929055098430094, "learning_rate": 1.640851677518927e-06, "loss": 0.8451, "step": 9254 }, { "epoch": 0.74, "grad_norm": 1.5050459011784143, "learning_rate": 1.6398894272008532e-06, "loss": 0.7188, "step": 9255 }, { "epoch": 0.74, "grad_norm": 1.520348374633757, "learning_rate": 1.6389274037651288e-06, "loss": 0.8369, "step": 9256 }, { "epoch": 0.74, "grad_norm": 0.7451661451877218, "learning_rate": 1.6379656072767114e-06, "loss": 1.0608, "step": 9257 }, { "epoch": 0.74, "grad_norm": 1.6917728561823426, "learning_rate": 1.6370040378005426e-06, "loss": 0.7818, "step": 9258 }, { "epoch": 0.74, "grad_norm": 0.783513602621412, "learning_rate": 1.636042695401554e-06, "loss": 1.0335, "step": 9259 }, { "epoch": 0.74, "grad_norm": 1.5769046328979726, "learning_rate": 1.6350815801446534e-06, "loss": 0.8162, "step": 9260 }, { "epoch": 0.74, "grad_norm": 1.4722783739602536, "learning_rate": 1.6341206920947373e-06, "loss": 0.6947, "step": 9261 }, { "epoch": 0.74, "grad_norm": 1.5283386109159598, "learning_rate": 1.6331600313166896e-06, "loss": 0.847, "step": 9262 }, { "epoch": 0.74, "grad_norm": 1.630699770796367, "learning_rate": 1.6321995978753757e-06, "loss": 0.7641, "step": 9263 }, { "epoch": 0.74, "grad_norm": 0.7600025328926445, "learning_rate": 1.631239391835646e-06, "loss": 1.0527, "step": 9264 }, { "epoch": 0.74, "grad_norm": 1.4807066417147805, "learning_rate": 1.6302794132623346e-06, "loss": 0.7316, "step": 9265 }, { "epoch": 0.74, "grad_norm": 1.512905043349435, "learning_rate": 1.6293196622202635e-06, "loss": 0.686, "step": 9266 }, { "epoch": 0.74, "grad_norm": 1.600044942799814, "learning_rate": 1.6283601387742366e-06, "loss": 0.7995, "step": 9267 }, { "epoch": 0.74, "grad_norm": 1.5083948243265983, "learning_rate": 1.627400842989041e-06, "loss": 0.7673, "step": 9268 }, { "epoch": 0.74, "grad_norm": 1.340293452525885, "learning_rate": 1.6264417749294543e-06, "loss": 0.6478, "step": 9269 }, { "epoch": 0.74, "grad_norm": 0.7741641332582838, "learning_rate": 1.6254829346602324e-06, "loss": 1.0514, "step": 9270 }, { "epoch": 0.74, "grad_norm": 1.6080258312708362, "learning_rate": 1.6245243222461198e-06, "loss": 0.7511, "step": 9271 }, { "epoch": 0.74, "grad_norm": 1.610195052359815, "learning_rate": 1.6235659377518432e-06, "loss": 0.7268, "step": 9272 }, { "epoch": 0.74, "grad_norm": 1.6105781832745332, "learning_rate": 1.6226077812421154e-06, "loss": 0.8147, "step": 9273 }, { "epoch": 0.74, "grad_norm": 1.585715493929405, "learning_rate": 1.6216498527816328e-06, "loss": 0.8014, "step": 9274 }, { "epoch": 0.74, "grad_norm": 1.514998315194074, "learning_rate": 1.6206921524350754e-06, "loss": 0.7876, "step": 9275 }, { "epoch": 0.74, "grad_norm": 1.5167648466853563, "learning_rate": 1.6197346802671133e-06, "loss": 0.7283, "step": 9276 }, { "epoch": 0.74, "grad_norm": 1.5544995250339124, "learning_rate": 1.6187774363423946e-06, "loss": 0.7421, "step": 9277 }, { "epoch": 0.74, "grad_norm": 1.4915929158597128, "learning_rate": 1.6178204207255531e-06, "loss": 0.7504, "step": 9278 }, { "epoch": 0.74, "grad_norm": 1.6543088200777536, "learning_rate": 1.6168636334812126e-06, "loss": 0.6583, "step": 9279 }, { "epoch": 0.74, "grad_norm": 1.5219753328589805, "learning_rate": 1.6159070746739757e-06, "loss": 0.7848, "step": 9280 }, { "epoch": 0.74, "grad_norm": 1.521174967834369, "learning_rate": 1.6149507443684314e-06, "loss": 0.6967, "step": 9281 }, { "epoch": 0.74, "grad_norm": 1.326527784619663, "learning_rate": 1.613994642629153e-06, "loss": 0.683, "step": 9282 }, { "epoch": 0.74, "grad_norm": 1.4723098870235911, "learning_rate": 1.6130387695206989e-06, "loss": 0.7515, "step": 9283 }, { "epoch": 0.74, "grad_norm": 1.6414680552081728, "learning_rate": 1.6120831251076118e-06, "loss": 0.6897, "step": 9284 }, { "epoch": 0.74, "grad_norm": 1.8734205261911465, "learning_rate": 1.611127709454418e-06, "loss": 0.7207, "step": 9285 }, { "epoch": 0.75, "grad_norm": 1.4064049558298468, "learning_rate": 1.6101725226256316e-06, "loss": 0.6783, "step": 9286 }, { "epoch": 0.75, "grad_norm": 1.4609954451409313, "learning_rate": 1.6092175646857477e-06, "loss": 0.7457, "step": 9287 }, { "epoch": 0.75, "grad_norm": 0.7764374254643186, "learning_rate": 1.6082628356992453e-06, "loss": 1.0757, "step": 9288 }, { "epoch": 0.75, "grad_norm": 1.6288135262362202, "learning_rate": 1.607308335730594e-06, "loss": 0.8444, "step": 9289 }, { "epoch": 0.75, "grad_norm": 1.5785551893804821, "learning_rate": 1.6063540648442416e-06, "loss": 0.7623, "step": 9290 }, { "epoch": 0.75, "grad_norm": 1.4384018372810115, "learning_rate": 1.6054000231046229e-06, "loss": 0.7919, "step": 9291 }, { "epoch": 0.75, "grad_norm": 1.501478010359784, "learning_rate": 1.604446210576157e-06, "loss": 0.7168, "step": 9292 }, { "epoch": 0.75, "grad_norm": 1.503561579706657, "learning_rate": 1.603492627323247e-06, "loss": 0.7536, "step": 9293 }, { "epoch": 0.75, "grad_norm": 0.7831922004193947, "learning_rate": 1.6025392734102818e-06, "loss": 1.0634, "step": 9294 }, { "epoch": 0.75, "grad_norm": 1.6359060872652826, "learning_rate": 1.6015861489016316e-06, "loss": 0.8094, "step": 9295 }, { "epoch": 0.75, "grad_norm": 1.5614625869077492, "learning_rate": 1.6006332538616576e-06, "loss": 0.727, "step": 9296 }, { "epoch": 0.75, "grad_norm": 1.4612492630861058, "learning_rate": 1.599680588354699e-06, "loss": 0.7989, "step": 9297 }, { "epoch": 0.75, "grad_norm": 1.5093069878704097, "learning_rate": 1.5987281524450827e-06, "loss": 0.6717, "step": 9298 }, { "epoch": 0.75, "grad_norm": 1.491069171032876, "learning_rate": 1.597775946197117e-06, "loss": 0.7329, "step": 9299 }, { "epoch": 0.75, "grad_norm": 1.5503829951855062, "learning_rate": 1.5968239696751008e-06, "loss": 0.7361, "step": 9300 }, { "epoch": 0.75, "grad_norm": 1.6043661239408993, "learning_rate": 1.595872222943312e-06, "loss": 0.8271, "step": 9301 }, { "epoch": 0.75, "grad_norm": 0.7646790815430762, "learning_rate": 1.5949207060660138e-06, "loss": 1.0575, "step": 9302 }, { "epoch": 0.75, "grad_norm": 1.4624025351710768, "learning_rate": 1.5939694191074584e-06, "loss": 0.7349, "step": 9303 }, { "epoch": 0.75, "grad_norm": 1.4498767021985481, "learning_rate": 1.593018362131874e-06, "loss": 0.7973, "step": 9304 }, { "epoch": 0.75, "grad_norm": 0.7568908379803088, "learning_rate": 1.5920675352034792e-06, "loss": 1.0259, "step": 9305 }, { "epoch": 0.75, "grad_norm": 1.5348741051838808, "learning_rate": 1.5911169383864788e-06, "loss": 0.7379, "step": 9306 }, { "epoch": 0.75, "grad_norm": 0.77274836459209, "learning_rate": 1.5901665717450582e-06, "loss": 1.0556, "step": 9307 }, { "epoch": 0.75, "grad_norm": 1.5825214226733677, "learning_rate": 1.589216435343387e-06, "loss": 0.8059, "step": 9308 }, { "epoch": 0.75, "grad_norm": 1.5619764992705174, "learning_rate": 1.5882665292456196e-06, "loss": 0.7698, "step": 9309 }, { "epoch": 0.75, "grad_norm": 1.5812557786816608, "learning_rate": 1.5873168535158995e-06, "loss": 0.8173, "step": 9310 }, { "epoch": 0.75, "grad_norm": 1.4858207470537388, "learning_rate": 1.586367408218349e-06, "loss": 0.7379, "step": 9311 }, { "epoch": 0.75, "grad_norm": 1.573705484876075, "learning_rate": 1.5854181934170747e-06, "loss": 0.8165, "step": 9312 }, { "epoch": 0.75, "grad_norm": 1.9865636395053377, "learning_rate": 1.5844692091761742e-06, "loss": 0.6449, "step": 9313 }, { "epoch": 0.75, "grad_norm": 1.6289190219703689, "learning_rate": 1.5835204555597227e-06, "loss": 0.7739, "step": 9314 }, { "epoch": 0.75, "grad_norm": 1.4566499135503725, "learning_rate": 1.5825719326317817e-06, "loss": 0.7341, "step": 9315 }, { "epoch": 0.75, "grad_norm": 0.7393616460863047, "learning_rate": 1.581623640456399e-06, "loss": 1.0345, "step": 9316 }, { "epoch": 0.75, "grad_norm": 0.7367761828711421, "learning_rate": 1.5806755790976042e-06, "loss": 1.1046, "step": 9317 }, { "epoch": 0.75, "grad_norm": 1.6437811345784097, "learning_rate": 1.5797277486194136e-06, "loss": 0.8238, "step": 9318 }, { "epoch": 0.75, "grad_norm": 0.7398185199898041, "learning_rate": 1.578780149085824e-06, "loss": 1.0275, "step": 9319 }, { "epoch": 0.75, "grad_norm": 1.524114929252351, "learning_rate": 1.577832780560824e-06, "loss": 0.8089, "step": 9320 }, { "epoch": 0.75, "grad_norm": 0.732820995424231, "learning_rate": 1.5768856431083796e-06, "loss": 1.0244, "step": 9321 }, { "epoch": 0.75, "grad_norm": 1.520648622203859, "learning_rate": 1.575938736792444e-06, "loss": 0.7459, "step": 9322 }, { "epoch": 0.75, "grad_norm": 1.6720845091489884, "learning_rate": 1.5749920616769526e-06, "loss": 0.7432, "step": 9323 }, { "epoch": 0.75, "grad_norm": 1.5752076498848167, "learning_rate": 1.5740456178258312e-06, "loss": 0.7534, "step": 9324 }, { "epoch": 0.75, "grad_norm": 0.7656547323835673, "learning_rate": 1.573099405302983e-06, "loss": 1.0793, "step": 9325 }, { "epoch": 0.75, "grad_norm": 1.4598723493327956, "learning_rate": 1.5721534241722996e-06, "loss": 0.7454, "step": 9326 }, { "epoch": 0.75, "grad_norm": 1.52080128296719, "learning_rate": 1.5712076744976551e-06, "loss": 0.774, "step": 9327 }, { "epoch": 0.75, "grad_norm": 1.5184292019755365, "learning_rate": 1.5702621563429088e-06, "loss": 0.7763, "step": 9328 }, { "epoch": 0.75, "grad_norm": 0.7480769700078229, "learning_rate": 1.5693168697719024e-06, "loss": 1.045, "step": 9329 }, { "epoch": 0.75, "grad_norm": 1.514693176337305, "learning_rate": 1.5683718148484673e-06, "loss": 0.7569, "step": 9330 }, { "epoch": 0.75, "grad_norm": 1.4611740412755145, "learning_rate": 1.5674269916364144e-06, "loss": 0.7537, "step": 9331 }, { "epoch": 0.75, "grad_norm": 1.4797044361959082, "learning_rate": 1.5664824001995398e-06, "loss": 0.8024, "step": 9332 }, { "epoch": 0.75, "grad_norm": 1.4908193089495525, "learning_rate": 1.5655380406016236e-06, "loss": 0.686, "step": 9333 }, { "epoch": 0.75, "grad_norm": 1.497739064921044, "learning_rate": 1.5645939129064336e-06, "loss": 0.7413, "step": 9334 }, { "epoch": 0.75, "grad_norm": 1.6781102045070344, "learning_rate": 1.5636500171777181e-06, "loss": 0.7157, "step": 9335 }, { "epoch": 0.75, "grad_norm": 1.4867146672118166, "learning_rate": 1.5627063534792114e-06, "loss": 0.7213, "step": 9336 }, { "epoch": 0.75, "grad_norm": 1.4921822080551297, "learning_rate": 1.561762921874631e-06, "loss": 0.7336, "step": 9337 }, { "epoch": 0.75, "grad_norm": 1.4695350273912682, "learning_rate": 1.5608197224276806e-06, "loss": 0.6651, "step": 9338 }, { "epoch": 0.75, "grad_norm": 1.5780790419708863, "learning_rate": 1.5598767552020465e-06, "loss": 0.7528, "step": 9339 }, { "epoch": 0.75, "grad_norm": 1.6381150503757502, "learning_rate": 1.558934020261399e-06, "loss": 0.7542, "step": 9340 }, { "epoch": 0.75, "grad_norm": 1.4306859683528428, "learning_rate": 1.5579915176693961e-06, "loss": 0.601, "step": 9341 }, { "epoch": 0.75, "grad_norm": 0.765686831728065, "learning_rate": 1.5570492474896764e-06, "loss": 1.0522, "step": 9342 }, { "epoch": 0.75, "grad_norm": 1.5087266556349734, "learning_rate": 1.5561072097858632e-06, "loss": 0.6761, "step": 9343 }, { "epoch": 0.75, "grad_norm": 1.5140856290499989, "learning_rate": 1.555165404621567e-06, "loss": 0.7989, "step": 9344 }, { "epoch": 0.75, "grad_norm": 1.549546754785703, "learning_rate": 1.5542238320603802e-06, "loss": 0.7371, "step": 9345 }, { "epoch": 0.75, "grad_norm": 1.587478897101142, "learning_rate": 1.5532824921658779e-06, "loss": 0.7753, "step": 9346 }, { "epoch": 0.75, "grad_norm": 1.4780175105451603, "learning_rate": 1.5523413850016268e-06, "loss": 0.6871, "step": 9347 }, { "epoch": 0.75, "grad_norm": 0.7900091158926302, "learning_rate": 1.5514005106311668e-06, "loss": 1.0435, "step": 9348 }, { "epoch": 0.75, "grad_norm": 0.7518779682499173, "learning_rate": 1.55045986911803e-06, "loss": 1.0957, "step": 9349 }, { "epoch": 0.75, "grad_norm": 1.542555639093399, "learning_rate": 1.549519460525729e-06, "loss": 0.7253, "step": 9350 }, { "epoch": 0.75, "grad_norm": 1.5429150129512195, "learning_rate": 1.548579284917766e-06, "loss": 0.7297, "step": 9351 }, { "epoch": 0.75, "grad_norm": 1.4813810834592238, "learning_rate": 1.547639342357622e-06, "loss": 0.8038, "step": 9352 }, { "epoch": 0.75, "grad_norm": 1.4109893814578336, "learning_rate": 1.5466996329087618e-06, "loss": 0.7827, "step": 9353 }, { "epoch": 0.75, "grad_norm": 1.4879734012714863, "learning_rate": 1.5457601566346403e-06, "loss": 0.7153, "step": 9354 }, { "epoch": 0.75, "grad_norm": 1.6387247773932332, "learning_rate": 1.544820913598692e-06, "loss": 0.8315, "step": 9355 }, { "epoch": 0.75, "grad_norm": 1.588921834241734, "learning_rate": 1.5438819038643366e-06, "loss": 0.733, "step": 9356 }, { "epoch": 0.75, "grad_norm": 1.8525921625690882, "learning_rate": 1.5429431274949757e-06, "loss": 0.7845, "step": 9357 }, { "epoch": 0.75, "grad_norm": 1.5034040917603801, "learning_rate": 1.5420045845540022e-06, "loss": 0.7521, "step": 9358 }, { "epoch": 0.75, "grad_norm": 1.4760828134560084, "learning_rate": 1.5410662751047855e-06, "loss": 0.7029, "step": 9359 }, { "epoch": 0.75, "grad_norm": 1.5592814931972074, "learning_rate": 1.5401281992106838e-06, "loss": 0.7583, "step": 9360 }, { "epoch": 0.75, "grad_norm": 1.4745409887184293, "learning_rate": 1.5391903569350375e-06, "loss": 0.7255, "step": 9361 }, { "epoch": 0.75, "grad_norm": 1.4962860509232834, "learning_rate": 1.5382527483411718e-06, "loss": 0.7852, "step": 9362 }, { "epoch": 0.75, "grad_norm": 1.5551120498666697, "learning_rate": 1.5373153734923945e-06, "loss": 0.8046, "step": 9363 }, { "epoch": 0.75, "grad_norm": 1.614838133850024, "learning_rate": 1.5363782324520033e-06, "loss": 0.676, "step": 9364 }, { "epoch": 0.75, "grad_norm": 1.5341121556399981, "learning_rate": 1.5354413252832735e-06, "loss": 0.753, "step": 9365 }, { "epoch": 0.75, "grad_norm": 1.4231076287033806, "learning_rate": 1.5345046520494678e-06, "loss": 0.7464, "step": 9366 }, { "epoch": 0.75, "grad_norm": 1.4557759749343377, "learning_rate": 1.5335682128138302e-06, "loss": 0.7055, "step": 9367 }, { "epoch": 0.75, "grad_norm": 1.530119325209531, "learning_rate": 1.5326320076395955e-06, "loss": 0.7054, "step": 9368 }, { "epoch": 0.75, "grad_norm": 1.5163051727627412, "learning_rate": 1.5316960365899757e-06, "loss": 0.7615, "step": 9369 }, { "epoch": 0.75, "grad_norm": 1.547794600921958, "learning_rate": 1.5307602997281706e-06, "loss": 0.7973, "step": 9370 }, { "epoch": 0.75, "grad_norm": 1.7056470821871852, "learning_rate": 1.5298247971173636e-06, "loss": 0.7582, "step": 9371 }, { "epoch": 0.75, "grad_norm": 1.5559951881422502, "learning_rate": 1.5288895288207205e-06, "loss": 0.7856, "step": 9372 }, { "epoch": 0.75, "grad_norm": 1.5254452157091636, "learning_rate": 1.5279544949013935e-06, "loss": 0.8384, "step": 9373 }, { "epoch": 0.75, "grad_norm": 1.635836028364804, "learning_rate": 1.5270196954225175e-06, "loss": 0.7397, "step": 9374 }, { "epoch": 0.75, "grad_norm": 1.5843857805712795, "learning_rate": 1.526085130447214e-06, "loss": 0.7013, "step": 9375 }, { "epoch": 0.75, "grad_norm": 1.5067935640392722, "learning_rate": 1.5251508000385862e-06, "loss": 0.7062, "step": 9376 }, { "epoch": 0.75, "grad_norm": 1.4183903956735697, "learning_rate": 1.5242167042597206e-06, "loss": 0.7839, "step": 9377 }, { "epoch": 0.75, "grad_norm": 1.5252103139283095, "learning_rate": 1.523282843173693e-06, "loss": 0.7214, "step": 9378 }, { "epoch": 0.75, "grad_norm": 1.535968052418803, "learning_rate": 1.5223492168435572e-06, "loss": 0.779, "step": 9379 }, { "epoch": 0.75, "grad_norm": 1.5535147632856845, "learning_rate": 1.5214158253323546e-06, "loss": 0.8615, "step": 9380 }, { "epoch": 0.75, "grad_norm": 1.587171770504689, "learning_rate": 1.5204826687031099e-06, "loss": 0.7623, "step": 9381 }, { "epoch": 0.75, "grad_norm": 1.5441700413327935, "learning_rate": 1.5195497470188314e-06, "loss": 0.7738, "step": 9382 }, { "epoch": 0.75, "grad_norm": 1.596611069874904, "learning_rate": 1.5186170603425132e-06, "loss": 0.778, "step": 9383 }, { "epoch": 0.75, "grad_norm": 1.4557260709693693, "learning_rate": 1.5176846087371293e-06, "loss": 0.7563, "step": 9384 }, { "epoch": 0.75, "grad_norm": 1.414139599520713, "learning_rate": 1.5167523922656458e-06, "loss": 0.6932, "step": 9385 }, { "epoch": 0.75, "grad_norm": 1.4637195469447526, "learning_rate": 1.5158204109910051e-06, "loss": 0.7164, "step": 9386 }, { "epoch": 0.75, "grad_norm": 1.4897474283832115, "learning_rate": 1.5148886649761363e-06, "loss": 0.7773, "step": 9387 }, { "epoch": 0.75, "grad_norm": 1.7052931892247636, "learning_rate": 1.513957154283955e-06, "loss": 0.8368, "step": 9388 }, { "epoch": 0.75, "grad_norm": 1.4796652064570948, "learning_rate": 1.5130258789773583e-06, "loss": 0.6982, "step": 9389 }, { "epoch": 0.75, "grad_norm": 1.4716998710833489, "learning_rate": 1.5120948391192274e-06, "loss": 0.7213, "step": 9390 }, { "epoch": 0.75, "grad_norm": 0.7495318536391611, "learning_rate": 1.5111640347724293e-06, "loss": 1.053, "step": 9391 }, { "epoch": 0.75, "grad_norm": 1.5074496611903578, "learning_rate": 1.5102334659998124e-06, "loss": 0.8159, "step": 9392 }, { "epoch": 0.75, "grad_norm": 1.4605941232842066, "learning_rate": 1.509303132864212e-06, "loss": 0.7737, "step": 9393 }, { "epoch": 0.75, "grad_norm": 1.741380277165443, "learning_rate": 1.5083730354284449e-06, "loss": 0.7388, "step": 9394 }, { "epoch": 0.75, "grad_norm": 1.5408172244127545, "learning_rate": 1.5074431737553158e-06, "loss": 0.6265, "step": 9395 }, { "epoch": 0.75, "grad_norm": 1.6035364143856408, "learning_rate": 1.5065135479076098e-06, "loss": 0.7978, "step": 9396 }, { "epoch": 0.75, "grad_norm": 1.5354664714527295, "learning_rate": 1.5055841579480974e-06, "loss": 0.7002, "step": 9397 }, { "epoch": 0.75, "grad_norm": 1.639513959373965, "learning_rate": 1.5046550039395314e-06, "loss": 0.7557, "step": 9398 }, { "epoch": 0.75, "grad_norm": 1.4766544727050783, "learning_rate": 1.5037260859446535e-06, "loss": 0.7943, "step": 9399 }, { "epoch": 0.75, "grad_norm": 1.4615084195926922, "learning_rate": 1.5027974040261855e-06, "loss": 0.7291, "step": 9400 }, { "epoch": 0.75, "grad_norm": 1.553546369932395, "learning_rate": 1.5018689582468316e-06, "loss": 0.7565, "step": 9401 }, { "epoch": 0.75, "grad_norm": 1.499828502084393, "learning_rate": 1.5009407486692868e-06, "loss": 0.6992, "step": 9402 }, { "epoch": 0.75, "grad_norm": 1.511486323622818, "learning_rate": 1.5000127753562232e-06, "loss": 0.693, "step": 9403 }, { "epoch": 0.75, "grad_norm": 1.8327187105057239, "learning_rate": 1.4990850383703005e-06, "loss": 0.7871, "step": 9404 }, { "epoch": 0.75, "grad_norm": 1.563588647705604, "learning_rate": 1.498157537774161e-06, "loss": 0.7392, "step": 9405 }, { "epoch": 0.75, "grad_norm": 1.5321043294229746, "learning_rate": 1.4972302736304323e-06, "loss": 0.8145, "step": 9406 }, { "epoch": 0.75, "grad_norm": 1.5099653549498413, "learning_rate": 1.4963032460017247e-06, "loss": 0.7655, "step": 9407 }, { "epoch": 0.75, "grad_norm": 1.585409776736036, "learning_rate": 1.4953764549506323e-06, "loss": 0.7871, "step": 9408 }, { "epoch": 0.75, "grad_norm": 1.4959861665166552, "learning_rate": 1.4944499005397372e-06, "loss": 0.6751, "step": 9409 }, { "epoch": 0.75, "grad_norm": 1.4988184324233669, "learning_rate": 1.4935235828316002e-06, "loss": 0.7012, "step": 9410 }, { "epoch": 0.76, "grad_norm": 1.6490677551664028, "learning_rate": 1.4925975018887678e-06, "loss": 0.6928, "step": 9411 }, { "epoch": 0.76, "grad_norm": 1.6513745556280321, "learning_rate": 1.491671657773774e-06, "loss": 0.7112, "step": 9412 }, { "epoch": 0.76, "grad_norm": 1.465946736566473, "learning_rate": 1.4907460505491316e-06, "loss": 0.7992, "step": 9413 }, { "epoch": 0.76, "grad_norm": 1.635304546980997, "learning_rate": 1.4898206802773408e-06, "loss": 0.663, "step": 9414 }, { "epoch": 0.76, "grad_norm": 1.5284650025475603, "learning_rate": 1.4888955470208837e-06, "loss": 0.7224, "step": 9415 }, { "epoch": 0.76, "grad_norm": 1.6085022532294218, "learning_rate": 1.4879706508422286e-06, "loss": 0.843, "step": 9416 }, { "epoch": 0.76, "grad_norm": 1.4709493952870147, "learning_rate": 1.4870459918038256e-06, "loss": 0.7269, "step": 9417 }, { "epoch": 0.76, "grad_norm": 1.4996012349057388, "learning_rate": 1.486121569968108e-06, "loss": 0.7167, "step": 9418 }, { "epoch": 0.76, "grad_norm": 1.4375291186571901, "learning_rate": 1.4851973853974987e-06, "loss": 0.7409, "step": 9419 }, { "epoch": 0.76, "grad_norm": 1.570236143258724, "learning_rate": 1.4842734381543994e-06, "loss": 0.7677, "step": 9420 }, { "epoch": 0.76, "grad_norm": 1.5411273441208346, "learning_rate": 1.4833497283011967e-06, "loss": 0.7622, "step": 9421 }, { "epoch": 0.76, "grad_norm": 1.5319867606181867, "learning_rate": 1.4824262559002595e-06, "loss": 0.8084, "step": 9422 }, { "epoch": 0.76, "grad_norm": 1.5018977059360492, "learning_rate": 1.481503021013947e-06, "loss": 0.7919, "step": 9423 }, { "epoch": 0.76, "grad_norm": 1.4779716643237604, "learning_rate": 1.4805800237045958e-06, "loss": 0.6948, "step": 9424 }, { "epoch": 0.76, "grad_norm": 1.660853837845909, "learning_rate": 1.4796572640345297e-06, "loss": 0.8058, "step": 9425 }, { "epoch": 0.76, "grad_norm": 1.3929754804578618, "learning_rate": 1.4787347420660541e-06, "loss": 0.7202, "step": 9426 }, { "epoch": 0.76, "grad_norm": 0.7661811455217608, "learning_rate": 1.4778124578614611e-06, "loss": 1.0513, "step": 9427 }, { "epoch": 0.76, "grad_norm": 1.5315764418060995, "learning_rate": 1.476890411483023e-06, "loss": 0.7681, "step": 9428 }, { "epoch": 0.76, "grad_norm": 1.5337348892529243, "learning_rate": 1.475968602993002e-06, "loss": 0.7747, "step": 9429 }, { "epoch": 0.76, "grad_norm": 1.5830300581355987, "learning_rate": 1.4750470324536393e-06, "loss": 0.765, "step": 9430 }, { "epoch": 0.76, "grad_norm": 1.3825090451799005, "learning_rate": 1.4741256999271607e-06, "loss": 0.696, "step": 9431 }, { "epoch": 0.76, "grad_norm": 1.592585856088709, "learning_rate": 1.4732046054757765e-06, "loss": 0.6759, "step": 9432 }, { "epoch": 0.76, "grad_norm": 1.4493961293843134, "learning_rate": 1.4722837491616832e-06, "loss": 0.7383, "step": 9433 }, { "epoch": 0.76, "grad_norm": 1.5901068753300758, "learning_rate": 1.4713631310470571e-06, "loss": 0.8079, "step": 9434 }, { "epoch": 0.76, "grad_norm": 1.3863167220821857, "learning_rate": 1.4704427511940607e-06, "loss": 0.7185, "step": 9435 }, { "epoch": 0.76, "grad_norm": 1.558513537218464, "learning_rate": 1.4695226096648423e-06, "loss": 0.7708, "step": 9436 }, { "epoch": 0.76, "grad_norm": 1.5225431115866956, "learning_rate": 1.4686027065215297e-06, "loss": 0.7984, "step": 9437 }, { "epoch": 0.76, "grad_norm": 1.6689847567494265, "learning_rate": 1.4676830418262372e-06, "loss": 0.6857, "step": 9438 }, { "epoch": 0.76, "grad_norm": 1.5242494034987817, "learning_rate": 1.466763615641061e-06, "loss": 0.7716, "step": 9439 }, { "epoch": 0.76, "grad_norm": 1.5837501691709563, "learning_rate": 1.4658444280280864e-06, "loss": 0.7949, "step": 9440 }, { "epoch": 0.76, "grad_norm": 1.7707210931387947, "learning_rate": 1.4649254790493773e-06, "loss": 0.7751, "step": 9441 }, { "epoch": 0.76, "grad_norm": 1.9072790018558785, "learning_rate": 1.4640067687669818e-06, "loss": 0.7733, "step": 9442 }, { "epoch": 0.76, "grad_norm": 1.443134721193111, "learning_rate": 1.4630882972429367e-06, "loss": 0.8092, "step": 9443 }, { "epoch": 0.76, "grad_norm": 1.5113089138249947, "learning_rate": 1.4621700645392567e-06, "loss": 0.763, "step": 9444 }, { "epoch": 0.76, "grad_norm": 1.4595872979348983, "learning_rate": 1.4612520707179429e-06, "loss": 0.7152, "step": 9445 }, { "epoch": 0.76, "grad_norm": 1.655646916001771, "learning_rate": 1.4603343158409823e-06, "loss": 0.7994, "step": 9446 }, { "epoch": 0.76, "grad_norm": 1.5108828402666774, "learning_rate": 1.4594167999703423e-06, "loss": 0.7094, "step": 9447 }, { "epoch": 0.76, "grad_norm": 1.6977508112673279, "learning_rate": 1.4584995231679778e-06, "loss": 0.7123, "step": 9448 }, { "epoch": 0.76, "grad_norm": 1.5989252474850302, "learning_rate": 1.457582485495821e-06, "loss": 0.7355, "step": 9449 }, { "epoch": 0.76, "grad_norm": 1.7098801775461785, "learning_rate": 1.4566656870157958e-06, "loss": 0.7254, "step": 9450 }, { "epoch": 0.76, "grad_norm": 1.5973704030101157, "learning_rate": 1.4557491277898062e-06, "loss": 0.7493, "step": 9451 }, { "epoch": 0.76, "grad_norm": 1.543448755295573, "learning_rate": 1.454832807879738e-06, "loss": 0.7598, "step": 9452 }, { "epoch": 0.76, "grad_norm": 1.6422934287795874, "learning_rate": 1.4539167273474669e-06, "loss": 0.7659, "step": 9453 }, { "epoch": 0.76, "grad_norm": 1.5071000225209674, "learning_rate": 1.4530008862548472e-06, "loss": 0.6921, "step": 9454 }, { "epoch": 0.76, "grad_norm": 1.5775984536071304, "learning_rate": 1.4520852846637179e-06, "loss": 0.7189, "step": 9455 }, { "epoch": 0.76, "grad_norm": 1.5071798338001885, "learning_rate": 1.4511699226359016e-06, "loss": 0.7134, "step": 9456 }, { "epoch": 0.76, "grad_norm": 1.708951963378448, "learning_rate": 1.450254800233209e-06, "loss": 0.7781, "step": 9457 }, { "epoch": 0.76, "grad_norm": 0.7531615045021086, "learning_rate": 1.4493399175174288e-06, "loss": 1.0614, "step": 9458 }, { "epoch": 0.76, "grad_norm": 1.375108876568127, "learning_rate": 1.4484252745503363e-06, "loss": 0.7687, "step": 9459 }, { "epoch": 0.76, "grad_norm": 1.5477001431370736, "learning_rate": 1.4475108713936908e-06, "loss": 0.7451, "step": 9460 }, { "epoch": 0.76, "grad_norm": 1.4644409094495916, "learning_rate": 1.4465967081092346e-06, "loss": 0.7551, "step": 9461 }, { "epoch": 0.76, "grad_norm": 1.5629186052833572, "learning_rate": 1.4456827847586925e-06, "loss": 0.7355, "step": 9462 }, { "epoch": 0.76, "grad_norm": 1.4175560672156606, "learning_rate": 1.4447691014037774e-06, "loss": 0.6657, "step": 9463 }, { "epoch": 0.76, "grad_norm": 1.5206269895611775, "learning_rate": 1.4438556581061819e-06, "loss": 0.6894, "step": 9464 }, { "epoch": 0.76, "grad_norm": 1.501199603728742, "learning_rate": 1.4429424549275845e-06, "loss": 0.7311, "step": 9465 }, { "epoch": 0.76, "grad_norm": 1.4182629238077669, "learning_rate": 1.4420294919296446e-06, "loss": 0.7309, "step": 9466 }, { "epoch": 0.76, "grad_norm": 0.7592070394808534, "learning_rate": 1.4411167691740109e-06, "loss": 1.0692, "step": 9467 }, { "epoch": 0.76, "grad_norm": 1.5600202183335858, "learning_rate": 1.4402042867223104e-06, "loss": 0.7094, "step": 9468 }, { "epoch": 0.76, "grad_norm": 1.604150752474942, "learning_rate": 1.4392920446361563e-06, "loss": 0.7081, "step": 9469 }, { "epoch": 0.76, "grad_norm": 1.5015229479113, "learning_rate": 1.438380042977146e-06, "loss": 0.7616, "step": 9470 }, { "epoch": 0.76, "grad_norm": 1.48948228498623, "learning_rate": 1.4374682818068586e-06, "loss": 0.7925, "step": 9471 }, { "epoch": 0.76, "grad_norm": 1.5206480336609618, "learning_rate": 1.4365567611868598e-06, "loss": 0.8165, "step": 9472 }, { "epoch": 0.76, "grad_norm": 1.6591649324911522, "learning_rate": 1.4356454811786947e-06, "loss": 0.7714, "step": 9473 }, { "epoch": 0.76, "grad_norm": 0.7581917855484593, "learning_rate": 1.434734441843899e-06, "loss": 1.0318, "step": 9474 }, { "epoch": 0.76, "grad_norm": 1.4845694157894624, "learning_rate": 1.4338236432439862e-06, "loss": 0.7892, "step": 9475 }, { "epoch": 0.76, "grad_norm": 1.5593585834162063, "learning_rate": 1.4329130854404537e-06, "loss": 0.712, "step": 9476 }, { "epoch": 0.76, "grad_norm": 1.552971160104241, "learning_rate": 1.4320027684947878e-06, "loss": 0.783, "step": 9477 }, { "epoch": 0.76, "grad_norm": 1.5167327666028858, "learning_rate": 1.4310926924684542e-06, "loss": 0.7783, "step": 9478 }, { "epoch": 0.76, "grad_norm": 1.539114403488609, "learning_rate": 1.4301828574229026e-06, "loss": 0.7145, "step": 9479 }, { "epoch": 0.76, "grad_norm": 0.7850698692337855, "learning_rate": 1.4292732634195677e-06, "loss": 1.0832, "step": 9480 }, { "epoch": 0.76, "grad_norm": 1.5709632672979459, "learning_rate": 1.4283639105198666e-06, "loss": 0.6939, "step": 9481 }, { "epoch": 0.76, "grad_norm": 1.6573330335098013, "learning_rate": 1.427454798785201e-06, "loss": 0.7391, "step": 9482 }, { "epoch": 0.76, "grad_norm": 1.5566806261300232, "learning_rate": 1.4265459282769556e-06, "loss": 0.8388, "step": 9483 }, { "epoch": 0.76, "grad_norm": 1.4614264777372057, "learning_rate": 1.4256372990565016e-06, "loss": 0.6384, "step": 9484 }, { "epoch": 0.76, "grad_norm": 1.5199846475940837, "learning_rate": 1.4247289111851902e-06, "loss": 0.7961, "step": 9485 }, { "epoch": 0.76, "grad_norm": 0.7602728200499783, "learning_rate": 1.423820764724357e-06, "loss": 1.0664, "step": 9486 }, { "epoch": 0.76, "grad_norm": 1.3921099026872232, "learning_rate": 1.4229128597353243e-06, "loss": 0.7604, "step": 9487 }, { "epoch": 0.76, "grad_norm": 1.4878071251702487, "learning_rate": 1.4220051962793952e-06, "loss": 0.6812, "step": 9488 }, { "epoch": 0.76, "grad_norm": 1.5388958073244645, "learning_rate": 1.4210977744178562e-06, "loss": 0.7503, "step": 9489 }, { "epoch": 0.76, "grad_norm": 1.5015844076042044, "learning_rate": 1.4201905942119782e-06, "loss": 0.785, "step": 9490 }, { "epoch": 0.76, "grad_norm": 1.6119871619428312, "learning_rate": 1.4192836557230182e-06, "loss": 0.7523, "step": 9491 }, { "epoch": 0.76, "grad_norm": 1.703285087879522, "learning_rate": 1.4183769590122138e-06, "loss": 0.7554, "step": 9492 }, { "epoch": 0.76, "grad_norm": 1.5255974166207587, "learning_rate": 1.4174705041407872e-06, "loss": 0.7204, "step": 9493 }, { "epoch": 0.76, "grad_norm": 1.4910981011314883, "learning_rate": 1.4165642911699435e-06, "loss": 0.7629, "step": 9494 }, { "epoch": 0.76, "grad_norm": 1.5308285798863384, "learning_rate": 1.4156583201608732e-06, "loss": 0.8358, "step": 9495 }, { "epoch": 0.76, "grad_norm": 0.7769276522258671, "learning_rate": 1.4147525911747495e-06, "loss": 1.1205, "step": 9496 }, { "epoch": 0.76, "grad_norm": 1.6700819833988516, "learning_rate": 1.413847104272727e-06, "loss": 0.7884, "step": 9497 }, { "epoch": 0.76, "grad_norm": 1.4822811086769319, "learning_rate": 1.41294185951595e-06, "loss": 0.7822, "step": 9498 }, { "epoch": 0.76, "grad_norm": 1.4965040632647248, "learning_rate": 1.4120368569655408e-06, "loss": 0.7253, "step": 9499 }, { "epoch": 0.76, "grad_norm": 1.569057144636915, "learning_rate": 1.411132096682606e-06, "loss": 0.8213, "step": 9500 }, { "epoch": 0.76, "grad_norm": 1.564190279255054, "learning_rate": 1.41022757872824e-06, "loss": 0.7883, "step": 9501 }, { "epoch": 0.76, "grad_norm": 1.5931674786745857, "learning_rate": 1.4093233031635163e-06, "loss": 0.756, "step": 9502 }, { "epoch": 0.76, "grad_norm": 1.5266124900402602, "learning_rate": 1.4084192700494942e-06, "loss": 0.7322, "step": 9503 }, { "epoch": 0.76, "grad_norm": 1.3645980167352327, "learning_rate": 1.4075154794472152e-06, "loss": 0.7398, "step": 9504 }, { "epoch": 0.76, "grad_norm": 1.4992948730232998, "learning_rate": 1.4066119314177056e-06, "loss": 0.8278, "step": 9505 }, { "epoch": 0.76, "grad_norm": 1.5524571298765837, "learning_rate": 1.4057086260219755e-06, "loss": 0.7415, "step": 9506 }, { "epoch": 0.76, "grad_norm": 0.7629897494316663, "learning_rate": 1.4048055633210162e-06, "loss": 1.0728, "step": 9507 }, { "epoch": 0.76, "grad_norm": 1.4712892987489814, "learning_rate": 1.4039027433758073e-06, "loss": 0.6625, "step": 9508 }, { "epoch": 0.76, "grad_norm": 1.5533469940805962, "learning_rate": 1.4030001662473086e-06, "loss": 0.7277, "step": 9509 }, { "epoch": 0.76, "grad_norm": 1.510039260252321, "learning_rate": 1.4020978319964622e-06, "loss": 0.8, "step": 9510 }, { "epoch": 0.76, "grad_norm": 1.7584247272877243, "learning_rate": 1.4011957406841985e-06, "loss": 0.7294, "step": 9511 }, { "epoch": 0.76, "grad_norm": 1.4803243765609533, "learning_rate": 1.4002938923714282e-06, "loss": 0.7919, "step": 9512 }, { "epoch": 0.76, "grad_norm": 1.4396424972932387, "learning_rate": 1.3993922871190445e-06, "loss": 0.6419, "step": 9513 }, { "epoch": 0.76, "grad_norm": 1.53575935679358, "learning_rate": 1.3984909249879275e-06, "loss": 0.7236, "step": 9514 }, { "epoch": 0.76, "grad_norm": 1.520809155464972, "learning_rate": 1.3975898060389386e-06, "loss": 0.7359, "step": 9515 }, { "epoch": 0.76, "grad_norm": 1.3608965450704584, "learning_rate": 1.3966889303329233e-06, "loss": 0.8068, "step": 9516 }, { "epoch": 0.76, "grad_norm": 1.4941496079261412, "learning_rate": 1.3957882979307097e-06, "loss": 0.7796, "step": 9517 }, { "epoch": 0.76, "grad_norm": 0.7711979180341535, "learning_rate": 1.3948879088931128e-06, "loss": 1.0452, "step": 9518 }, { "epoch": 0.76, "grad_norm": 1.501333929489414, "learning_rate": 1.3939877632809279e-06, "loss": 0.729, "step": 9519 }, { "epoch": 0.76, "grad_norm": 1.586746473000299, "learning_rate": 1.3930878611549354e-06, "loss": 0.7339, "step": 9520 }, { "epoch": 0.76, "grad_norm": 1.5145725388613107, "learning_rate": 1.392188202575896e-06, "loss": 0.7705, "step": 9521 }, { "epoch": 0.76, "grad_norm": 1.4955659026563959, "learning_rate": 1.391288787604561e-06, "loss": 0.6796, "step": 9522 }, { "epoch": 0.76, "grad_norm": 0.78207191330754, "learning_rate": 1.3903896163016584e-06, "loss": 1.0762, "step": 9523 }, { "epoch": 0.76, "grad_norm": 1.60961261475027, "learning_rate": 1.389490688727903e-06, "loss": 0.7435, "step": 9524 }, { "epoch": 0.76, "grad_norm": 1.5909283086238848, "learning_rate": 1.3885920049439921e-06, "loss": 0.7914, "step": 9525 }, { "epoch": 0.76, "grad_norm": 1.5998790276966766, "learning_rate": 1.387693565010607e-06, "loss": 0.7114, "step": 9526 }, { "epoch": 0.76, "grad_norm": 1.4636930614697792, "learning_rate": 1.3867953689884118e-06, "loss": 0.7782, "step": 9527 }, { "epoch": 0.76, "grad_norm": 1.6098758174786292, "learning_rate": 1.3858974169380556e-06, "loss": 0.8076, "step": 9528 }, { "epoch": 0.76, "grad_norm": 1.48981612578326, "learning_rate": 1.3849997089201705e-06, "loss": 0.6724, "step": 9529 }, { "epoch": 0.76, "grad_norm": 0.7763186557175425, "learning_rate": 1.3841022449953718e-06, "loss": 1.0677, "step": 9530 }, { "epoch": 0.76, "grad_norm": 0.7769815046735407, "learning_rate": 1.3832050252242552e-06, "loss": 1.0539, "step": 9531 }, { "epoch": 0.76, "grad_norm": 1.5549386648455308, "learning_rate": 1.382308049667408e-06, "loss": 0.8523, "step": 9532 }, { "epoch": 0.76, "grad_norm": 1.5394146199977325, "learning_rate": 1.3814113183853928e-06, "loss": 0.7825, "step": 9533 }, { "epoch": 0.76, "grad_norm": 1.4799590811626766, "learning_rate": 1.380514831438759e-06, "loss": 0.7043, "step": 9534 }, { "epoch": 0.77, "grad_norm": 0.757302828762641, "learning_rate": 1.3796185888880414e-06, "loss": 1.0553, "step": 9535 }, { "epoch": 0.77, "grad_norm": 1.6250891623904644, "learning_rate": 1.3787225907937552e-06, "loss": 0.7852, "step": 9536 }, { "epoch": 0.77, "grad_norm": 1.487694707527705, "learning_rate": 1.3778268372164021e-06, "loss": 0.7799, "step": 9537 }, { "epoch": 0.77, "grad_norm": 1.4034398294572177, "learning_rate": 1.3769313282164597e-06, "loss": 0.7011, "step": 9538 }, { "epoch": 0.77, "grad_norm": 1.5559268462818403, "learning_rate": 1.3760360638544012e-06, "loss": 0.7465, "step": 9539 }, { "epoch": 0.77, "grad_norm": 1.5438247917286623, "learning_rate": 1.3751410441906737e-06, "loss": 0.7612, "step": 9540 }, { "epoch": 0.77, "grad_norm": 1.5310085903196955, "learning_rate": 1.37424626928571e-06, "loss": 0.7319, "step": 9541 }, { "epoch": 0.77, "grad_norm": 1.558354899647479, "learning_rate": 1.3733517391999313e-06, "loss": 0.7829, "step": 9542 }, { "epoch": 0.77, "grad_norm": 1.394080410267665, "learning_rate": 1.3724574539937352e-06, "loss": 0.744, "step": 9543 }, { "epoch": 0.77, "grad_norm": 0.7482395514406452, "learning_rate": 1.3715634137275052e-06, "loss": 1.0562, "step": 9544 }, { "epoch": 0.77, "grad_norm": 1.521987195592734, "learning_rate": 1.3706696184616126e-06, "loss": 0.7421, "step": 9545 }, { "epoch": 0.77, "grad_norm": 1.4225237564272504, "learning_rate": 1.369776068256406e-06, "loss": 0.7415, "step": 9546 }, { "epoch": 0.77, "grad_norm": 1.5887143749428199, "learning_rate": 1.3688827631722202e-06, "loss": 0.7465, "step": 9547 }, { "epoch": 0.77, "grad_norm": 1.537577728954312, "learning_rate": 1.3679897032693729e-06, "loss": 0.8252, "step": 9548 }, { "epoch": 0.77, "grad_norm": 1.4387642407979229, "learning_rate": 1.3670968886081664e-06, "loss": 0.8484, "step": 9549 }, { "epoch": 0.77, "grad_norm": 1.4712697426459656, "learning_rate": 1.366204319248885e-06, "loss": 0.7363, "step": 9550 }, { "epoch": 0.77, "grad_norm": 1.4929074427098512, "learning_rate": 1.3653119952517957e-06, "loss": 0.6612, "step": 9551 }, { "epoch": 0.77, "grad_norm": 1.5131274738282197, "learning_rate": 1.3644199166771531e-06, "loss": 0.7756, "step": 9552 }, { "epoch": 0.77, "grad_norm": 1.6440583778847409, "learning_rate": 1.363528083585191e-06, "loss": 0.7472, "step": 9553 }, { "epoch": 0.77, "grad_norm": 1.8990310794231473, "learning_rate": 1.3626364960361282e-06, "loss": 0.8166, "step": 9554 }, { "epoch": 0.77, "grad_norm": 1.530607098292728, "learning_rate": 1.3617451540901649e-06, "loss": 0.7544, "step": 9555 }, { "epoch": 0.77, "grad_norm": 1.5622471249158123, "learning_rate": 1.3608540578074897e-06, "loss": 0.745, "step": 9556 }, { "epoch": 0.77, "grad_norm": 1.5430420914705185, "learning_rate": 1.35996320724827e-06, "loss": 0.7289, "step": 9557 }, { "epoch": 0.77, "grad_norm": 1.4744979400074272, "learning_rate": 1.3590726024726575e-06, "loss": 0.6664, "step": 9558 }, { "epoch": 0.77, "grad_norm": 1.5299897805053908, "learning_rate": 1.3581822435407889e-06, "loss": 0.7714, "step": 9559 }, { "epoch": 0.77, "grad_norm": 1.5455249002153644, "learning_rate": 1.3572921305127823e-06, "loss": 0.7753, "step": 9560 }, { "epoch": 0.77, "grad_norm": 1.51768656150467, "learning_rate": 1.3564022634487395e-06, "loss": 0.7367, "step": 9561 }, { "epoch": 0.77, "grad_norm": 0.75018625671323, "learning_rate": 1.355512642408749e-06, "loss": 1.0487, "step": 9562 }, { "epoch": 0.77, "grad_norm": 1.5077494706405188, "learning_rate": 1.3546232674528782e-06, "loss": 0.7196, "step": 9563 }, { "epoch": 0.77, "grad_norm": 1.4253210284114108, "learning_rate": 1.35373413864118e-06, "loss": 0.8035, "step": 9564 }, { "epoch": 0.77, "grad_norm": 1.5095250189981502, "learning_rate": 1.352845256033689e-06, "loss": 0.7402, "step": 9565 }, { "epoch": 0.77, "grad_norm": 1.5414672968538685, "learning_rate": 1.3519566196904278e-06, "loss": 0.7586, "step": 9566 }, { "epoch": 0.77, "grad_norm": 1.5327785486704055, "learning_rate": 1.3510682296713972e-06, "loss": 0.7062, "step": 9567 }, { "epoch": 0.77, "grad_norm": 1.5192215335545376, "learning_rate": 1.3501800860365838e-06, "loss": 0.8137, "step": 9568 }, { "epoch": 0.77, "grad_norm": 1.6771151256886099, "learning_rate": 1.3492921888459566e-06, "loss": 0.8288, "step": 9569 }, { "epoch": 0.77, "grad_norm": 1.496314295514803, "learning_rate": 1.3484045381594684e-06, "loss": 0.7965, "step": 9570 }, { "epoch": 0.77, "grad_norm": 1.5255046285968645, "learning_rate": 1.3475171340370557e-06, "loss": 0.691, "step": 9571 }, { "epoch": 0.77, "grad_norm": 0.7796495459882584, "learning_rate": 1.346629976538637e-06, "loss": 1.0578, "step": 9572 }, { "epoch": 0.77, "grad_norm": 1.6926105582105158, "learning_rate": 1.3457430657241172e-06, "loss": 0.7987, "step": 9573 }, { "epoch": 0.77, "grad_norm": 1.5147398155239917, "learning_rate": 1.3448564016533821e-06, "loss": 0.687, "step": 9574 }, { "epoch": 0.77, "grad_norm": 1.4791053719310312, "learning_rate": 1.3439699843862986e-06, "loss": 0.7945, "step": 9575 }, { "epoch": 0.77, "grad_norm": 1.513324172175307, "learning_rate": 1.3430838139827235e-06, "loss": 0.7273, "step": 9576 }, { "epoch": 0.77, "grad_norm": 1.5623090074919714, "learning_rate": 1.342197890502492e-06, "loss": 0.7418, "step": 9577 }, { "epoch": 0.77, "grad_norm": 1.5601462746227026, "learning_rate": 1.3413122140054219e-06, "loss": 0.774, "step": 9578 }, { "epoch": 0.77, "grad_norm": 1.5420704990873941, "learning_rate": 1.3404267845513165e-06, "loss": 0.8706, "step": 9579 }, { "epoch": 0.77, "grad_norm": 1.5664559497710435, "learning_rate": 1.3395416021999641e-06, "loss": 0.7519, "step": 9580 }, { "epoch": 0.77, "grad_norm": 1.5870495164748304, "learning_rate": 1.3386566670111339e-06, "loss": 0.7746, "step": 9581 }, { "epoch": 0.77, "grad_norm": 0.7753911334820025, "learning_rate": 1.3377719790445753e-06, "loss": 1.0569, "step": 9582 }, { "epoch": 0.77, "grad_norm": 1.5529654079986897, "learning_rate": 1.3368875383600277e-06, "loss": 0.6815, "step": 9583 }, { "epoch": 0.77, "grad_norm": 0.7443475942759988, "learning_rate": 1.3360033450172106e-06, "loss": 1.0489, "step": 9584 }, { "epoch": 0.77, "grad_norm": 1.5967794829266049, "learning_rate": 1.3351193990758237e-06, "loss": 0.7362, "step": 9585 }, { "epoch": 0.77, "grad_norm": 1.7795442481231696, "learning_rate": 1.3342357005955569e-06, "loss": 0.644, "step": 9586 }, { "epoch": 0.77, "grad_norm": 1.4593669772466922, "learning_rate": 1.3333522496360778e-06, "loss": 0.7274, "step": 9587 }, { "epoch": 0.77, "grad_norm": 1.4896678987708647, "learning_rate": 1.3324690462570395e-06, "loss": 0.7804, "step": 9588 }, { "epoch": 0.77, "grad_norm": 0.75187794423945, "learning_rate": 1.3315860905180755e-06, "loss": 1.0823, "step": 9589 }, { "epoch": 0.77, "grad_norm": 1.5217665948823567, "learning_rate": 1.330703382478809e-06, "loss": 0.7905, "step": 9590 }, { "epoch": 0.77, "grad_norm": 0.7527459970275915, "learning_rate": 1.32982092219884e-06, "loss": 1.0716, "step": 9591 }, { "epoch": 0.77, "grad_norm": 1.5160869901241196, "learning_rate": 1.328938709737755e-06, "loss": 0.6719, "step": 9592 }, { "epoch": 0.77, "grad_norm": 0.7461165581322784, "learning_rate": 1.3280567451551224e-06, "loss": 1.0517, "step": 9593 }, { "epoch": 0.77, "grad_norm": 1.5721973386120813, "learning_rate": 1.3271750285104951e-06, "loss": 0.8219, "step": 9594 }, { "epoch": 0.77, "grad_norm": 1.55347573282422, "learning_rate": 1.326293559863408e-06, "loss": 0.7745, "step": 9595 }, { "epoch": 0.77, "grad_norm": 0.747970967438058, "learning_rate": 1.3254123392733793e-06, "loss": 1.0202, "step": 9596 }, { "epoch": 0.77, "grad_norm": 1.5757800026573971, "learning_rate": 1.3245313667999128e-06, "loss": 0.7996, "step": 9597 }, { "epoch": 0.77, "grad_norm": 1.5454402984962872, "learning_rate": 1.323650642502493e-06, "loss": 0.7675, "step": 9598 }, { "epoch": 0.77, "grad_norm": 1.5216830322153205, "learning_rate": 1.3227701664405868e-06, "loss": 0.7983, "step": 9599 }, { "epoch": 0.77, "grad_norm": 1.514376703242377, "learning_rate": 1.3218899386736488e-06, "loss": 0.7458, "step": 9600 }, { "epoch": 0.77, "grad_norm": 1.4694038662070055, "learning_rate": 1.321009959261113e-06, "loss": 0.749, "step": 9601 }, { "epoch": 0.77, "grad_norm": 1.500593865052254, "learning_rate": 1.3201302282623973e-06, "loss": 0.7217, "step": 9602 }, { "epoch": 0.77, "grad_norm": 1.5297825820085655, "learning_rate": 1.3192507457369025e-06, "loss": 0.7406, "step": 9603 }, { "epoch": 0.77, "grad_norm": 1.516061720849401, "learning_rate": 1.3183715117440143e-06, "loss": 0.7774, "step": 9604 }, { "epoch": 0.77, "grad_norm": 1.4324854677250756, "learning_rate": 1.3174925263431005e-06, "loss": 0.7988, "step": 9605 }, { "epoch": 0.77, "grad_norm": 1.477445752999541, "learning_rate": 1.31661378959351e-06, "loss": 0.7148, "step": 9606 }, { "epoch": 0.77, "grad_norm": 1.4170814254456279, "learning_rate": 1.3157353015545804e-06, "loss": 0.6904, "step": 9607 }, { "epoch": 0.77, "grad_norm": 1.4226212130693878, "learning_rate": 1.3148570622856282e-06, "loss": 0.7634, "step": 9608 }, { "epoch": 0.77, "grad_norm": 1.579406575996543, "learning_rate": 1.3139790718459522e-06, "loss": 0.712, "step": 9609 }, { "epoch": 0.77, "grad_norm": 1.529053448061561, "learning_rate": 1.3131013302948392e-06, "loss": 0.7804, "step": 9610 }, { "epoch": 0.77, "grad_norm": 0.7718516249378474, "learning_rate": 1.3122238376915546e-06, "loss": 1.0767, "step": 9611 }, { "epoch": 0.77, "grad_norm": 1.5170924862015003, "learning_rate": 1.3113465940953495e-06, "loss": 0.7659, "step": 9612 }, { "epoch": 0.77, "grad_norm": 1.45476092157081, "learning_rate": 1.310469599565457e-06, "loss": 0.7575, "step": 9613 }, { "epoch": 0.77, "grad_norm": 1.5173991993657474, "learning_rate": 1.3095928541610936e-06, "loss": 0.7719, "step": 9614 }, { "epoch": 0.77, "grad_norm": 1.541216629205336, "learning_rate": 1.3087163579414598e-06, "loss": 0.751, "step": 9615 }, { "epoch": 0.77, "grad_norm": 1.5258925149485894, "learning_rate": 1.3078401109657362e-06, "loss": 0.7766, "step": 9616 }, { "epoch": 0.77, "grad_norm": 1.566276447850946, "learning_rate": 1.3069641132930928e-06, "loss": 0.7484, "step": 9617 }, { "epoch": 0.77, "grad_norm": 1.5933224882959292, "learning_rate": 1.3060883649826766e-06, "loss": 0.7155, "step": 9618 }, { "epoch": 0.77, "grad_norm": 1.5523264636691527, "learning_rate": 1.3052128660936193e-06, "loss": 0.8087, "step": 9619 }, { "epoch": 0.77, "grad_norm": 1.489490514051949, "learning_rate": 1.3043376166850396e-06, "loss": 0.7421, "step": 9620 }, { "epoch": 0.77, "grad_norm": 1.4436663030879915, "learning_rate": 1.303462616816034e-06, "loss": 0.751, "step": 9621 }, { "epoch": 0.77, "grad_norm": 1.5212745186773196, "learning_rate": 1.302587866545686e-06, "loss": 0.8891, "step": 9622 }, { "epoch": 0.77, "grad_norm": 1.564278697001055, "learning_rate": 1.3017133659330583e-06, "loss": 0.8016, "step": 9623 }, { "epoch": 0.77, "grad_norm": 0.7740681373624709, "learning_rate": 1.300839115037202e-06, "loss": 1.0582, "step": 9624 }, { "epoch": 0.77, "grad_norm": 0.7636508265825822, "learning_rate": 1.2999651139171487e-06, "loss": 1.0318, "step": 9625 }, { "epoch": 0.77, "grad_norm": 1.6038596690907123, "learning_rate": 1.299091362631909e-06, "loss": 0.7499, "step": 9626 }, { "epoch": 0.77, "grad_norm": 1.5762151830768114, "learning_rate": 1.2982178612404839e-06, "loss": 0.7313, "step": 9627 }, { "epoch": 0.77, "grad_norm": 1.5122743103062353, "learning_rate": 1.2973446098018543e-06, "loss": 0.7747, "step": 9628 }, { "epoch": 0.77, "grad_norm": 1.6337470410589414, "learning_rate": 1.2964716083749829e-06, "loss": 0.7749, "step": 9629 }, { "epoch": 0.77, "grad_norm": 0.762287517112837, "learning_rate": 1.2955988570188155e-06, "loss": 1.0562, "step": 9630 }, { "epoch": 0.77, "grad_norm": 0.7366152460884505, "learning_rate": 1.2947263557922857e-06, "loss": 1.0431, "step": 9631 }, { "epoch": 0.77, "grad_norm": 0.7462172535733403, "learning_rate": 1.2938541047543046e-06, "loss": 1.0501, "step": 9632 }, { "epoch": 0.77, "grad_norm": 1.5912063365960425, "learning_rate": 1.2929821039637674e-06, "loss": 0.7682, "step": 9633 }, { "epoch": 0.77, "grad_norm": 1.4230842832492974, "learning_rate": 1.292110353479557e-06, "loss": 0.7904, "step": 9634 }, { "epoch": 0.77, "grad_norm": 0.7534972158542466, "learning_rate": 1.291238853360534e-06, "loss": 1.036, "step": 9635 }, { "epoch": 0.77, "grad_norm": 1.4914004209373755, "learning_rate": 1.2903676036655444e-06, "loss": 0.7498, "step": 9636 }, { "epoch": 0.77, "grad_norm": 1.459490155752423, "learning_rate": 1.2894966044534164e-06, "loss": 0.7023, "step": 9637 }, { "epoch": 0.77, "grad_norm": 1.5166228744595034, "learning_rate": 1.2886258557829622e-06, "loss": 0.6801, "step": 9638 }, { "epoch": 0.77, "grad_norm": 1.5130676308254578, "learning_rate": 1.2877553577129776e-06, "loss": 0.7627, "step": 9639 }, { "epoch": 0.77, "grad_norm": 1.4993624395259457, "learning_rate": 1.2868851103022378e-06, "loss": 0.7387, "step": 9640 }, { "epoch": 0.77, "grad_norm": 1.5671504758326247, "learning_rate": 1.2860151136095073e-06, "loss": 0.7967, "step": 9641 }, { "epoch": 0.77, "grad_norm": 1.4982710549100524, "learning_rate": 1.2851453676935289e-06, "loss": 0.7773, "step": 9642 }, { "epoch": 0.77, "grad_norm": 1.5099480439377788, "learning_rate": 1.2842758726130283e-06, "loss": 0.7138, "step": 9643 }, { "epoch": 0.77, "grad_norm": 1.6504818704952249, "learning_rate": 1.2834066284267189e-06, "loss": 0.8492, "step": 9644 }, { "epoch": 0.77, "grad_norm": 1.4625121203269378, "learning_rate": 1.2825376351932921e-06, "loss": 0.7139, "step": 9645 }, { "epoch": 0.77, "grad_norm": 0.7790379870518475, "learning_rate": 1.281668892971425e-06, "loss": 1.0983, "step": 9646 }, { "epoch": 0.77, "grad_norm": 1.5761273671731904, "learning_rate": 1.2808004018197767e-06, "loss": 0.7098, "step": 9647 }, { "epoch": 0.77, "grad_norm": 1.486822069423648, "learning_rate": 1.2799321617969895e-06, "loss": 0.7429, "step": 9648 }, { "epoch": 0.77, "grad_norm": 1.5072308357759612, "learning_rate": 1.2790641729616899e-06, "loss": 0.8714, "step": 9649 }, { "epoch": 0.77, "grad_norm": 1.4892434754555675, "learning_rate": 1.2781964353724836e-06, "loss": 0.8282, "step": 9650 }, { "epoch": 0.77, "grad_norm": 1.6029313886177872, "learning_rate": 1.277328949087966e-06, "loss": 0.7627, "step": 9651 }, { "epoch": 0.77, "grad_norm": 0.7691575046233411, "learning_rate": 1.27646171416671e-06, "loss": 1.0607, "step": 9652 }, { "epoch": 0.77, "grad_norm": 1.4771049568696826, "learning_rate": 1.275594730667274e-06, "loss": 0.7175, "step": 9653 }, { "epoch": 0.77, "grad_norm": 1.6271113954122174, "learning_rate": 1.2747279986481964e-06, "loss": 0.7353, "step": 9654 }, { "epoch": 0.77, "grad_norm": 1.6580376215400123, "learning_rate": 1.2738615181680043e-06, "loss": 0.6362, "step": 9655 }, { "epoch": 0.77, "grad_norm": 1.5651713463623247, "learning_rate": 1.272995289285202e-06, "loss": 0.7288, "step": 9656 }, { "epoch": 0.77, "grad_norm": 1.4743068896551788, "learning_rate": 1.2721293120582813e-06, "loss": 0.7786, "step": 9657 }, { "epoch": 0.77, "grad_norm": 1.4996875300319146, "learning_rate": 1.2712635865457129e-06, "loss": 0.751, "step": 9658 }, { "epoch": 0.77, "grad_norm": 1.6617894287928787, "learning_rate": 1.2703981128059534e-06, "loss": 0.8388, "step": 9659 }, { "epoch": 0.78, "grad_norm": 1.469818000939209, "learning_rate": 1.269532890897441e-06, "loss": 0.8071, "step": 9660 }, { "epoch": 0.78, "grad_norm": 1.5492949266297271, "learning_rate": 1.2686679208785984e-06, "loss": 0.695, "step": 9661 }, { "epoch": 0.78, "grad_norm": 1.5085355760952492, "learning_rate": 1.2678032028078307e-06, "loss": 0.7825, "step": 9662 }, { "epoch": 0.78, "grad_norm": 1.5490115079274849, "learning_rate": 1.2669387367435243e-06, "loss": 0.7017, "step": 9663 }, { "epoch": 0.78, "grad_norm": 1.6291960832211916, "learning_rate": 1.2660745227440496e-06, "loss": 0.6994, "step": 9664 }, { "epoch": 0.78, "grad_norm": 1.4615056493435754, "learning_rate": 1.2652105608677628e-06, "loss": 0.77, "step": 9665 }, { "epoch": 0.78, "grad_norm": 0.7709182794011195, "learning_rate": 1.264346851172999e-06, "loss": 1.084, "step": 9666 }, { "epoch": 0.78, "grad_norm": 1.5278382425772092, "learning_rate": 1.2634833937180756e-06, "loss": 0.7725, "step": 9667 }, { "epoch": 0.78, "grad_norm": 1.4560430178546355, "learning_rate": 1.2626201885612999e-06, "loss": 0.8331, "step": 9668 }, { "epoch": 0.78, "grad_norm": 1.4407502640807077, "learning_rate": 1.2617572357609565e-06, "loss": 0.6644, "step": 9669 }, { "epoch": 0.78, "grad_norm": 1.4560900145264788, "learning_rate": 1.260894535375311e-06, "loss": 0.8201, "step": 9670 }, { "epoch": 0.78, "grad_norm": 1.5638699710005548, "learning_rate": 1.260032087462615e-06, "loss": 0.7308, "step": 9671 }, { "epoch": 0.78, "grad_norm": 1.4466391721880791, "learning_rate": 1.2591698920811057e-06, "loss": 0.7436, "step": 9672 }, { "epoch": 0.78, "grad_norm": 1.5340369883760723, "learning_rate": 1.2583079492889994e-06, "loss": 0.7218, "step": 9673 }, { "epoch": 0.78, "grad_norm": 0.7354865172154489, "learning_rate": 1.257446259144494e-06, "loss": 1.0333, "step": 9674 }, { "epoch": 0.78, "grad_norm": 1.5244074894012882, "learning_rate": 1.2565848217057774e-06, "loss": 0.704, "step": 9675 }, { "epoch": 0.78, "grad_norm": 1.5500980992418643, "learning_rate": 1.2557236370310132e-06, "loss": 0.7786, "step": 9676 }, { "epoch": 0.78, "grad_norm": 1.478064283396387, "learning_rate": 1.2548627051783512e-06, "loss": 0.752, "step": 9677 }, { "epoch": 0.78, "grad_norm": 1.5619859010516988, "learning_rate": 1.254002026205921e-06, "loss": 0.7787, "step": 9678 }, { "epoch": 0.78, "grad_norm": 1.4750369695521748, "learning_rate": 1.2531416001718416e-06, "loss": 0.7865, "step": 9679 }, { "epoch": 0.78, "grad_norm": 1.5518907212603452, "learning_rate": 1.2522814271342093e-06, "loss": 0.7942, "step": 9680 }, { "epoch": 0.78, "grad_norm": 1.4802713306744018, "learning_rate": 1.2514215071511043e-06, "loss": 0.7519, "step": 9681 }, { "epoch": 0.78, "grad_norm": 1.4485021848995474, "learning_rate": 1.2505618402805909e-06, "loss": 0.7727, "step": 9682 }, { "epoch": 0.78, "grad_norm": 1.575285542803116, "learning_rate": 1.2497024265807156e-06, "loss": 0.8276, "step": 9683 }, { "epoch": 0.78, "grad_norm": 1.490654521252698, "learning_rate": 1.2488432661095068e-06, "loss": 0.7882, "step": 9684 }, { "epoch": 0.78, "grad_norm": 1.6594179052639808, "learning_rate": 1.2479843589249796e-06, "loss": 0.6682, "step": 9685 }, { "epoch": 0.78, "grad_norm": 1.5341369473561017, "learning_rate": 1.2471257050851277e-06, "loss": 0.7654, "step": 9686 }, { "epoch": 0.78, "grad_norm": 1.5276446153195493, "learning_rate": 1.24626730464793e-06, "loss": 0.832, "step": 9687 }, { "epoch": 0.78, "grad_norm": 1.4841018381231088, "learning_rate": 1.2454091576713457e-06, "loss": 0.6512, "step": 9688 }, { "epoch": 0.78, "grad_norm": 1.5292958847370455, "learning_rate": 1.2445512642133218e-06, "loss": 0.7916, "step": 9689 }, { "epoch": 0.78, "grad_norm": 1.5008857938022964, "learning_rate": 1.2436936243317837e-06, "loss": 0.7207, "step": 9690 }, { "epoch": 0.78, "grad_norm": 1.5411956990251847, "learning_rate": 1.242836238084642e-06, "loss": 0.7255, "step": 9691 }, { "epoch": 0.78, "grad_norm": 1.4402039180309176, "learning_rate": 1.2419791055297887e-06, "loss": 0.8019, "step": 9692 }, { "epoch": 0.78, "grad_norm": 1.4532299315808908, "learning_rate": 1.2411222267250988e-06, "loss": 0.718, "step": 9693 }, { "epoch": 0.78, "grad_norm": 1.499403360795155, "learning_rate": 1.240265601728432e-06, "loss": 0.7704, "step": 9694 }, { "epoch": 0.78, "grad_norm": 1.660830598427563, "learning_rate": 1.2394092305976274e-06, "loss": 0.7415, "step": 9695 }, { "epoch": 0.78, "grad_norm": 1.6593789824022072, "learning_rate": 1.238553113390512e-06, "loss": 0.8008, "step": 9696 }, { "epoch": 0.78, "grad_norm": 1.675224295136462, "learning_rate": 1.2376972501648915e-06, "loss": 0.8661, "step": 9697 }, { "epoch": 0.78, "grad_norm": 1.5102014925657228, "learning_rate": 1.2368416409785539e-06, "loss": 0.8411, "step": 9698 }, { "epoch": 0.78, "grad_norm": 1.5451480362890662, "learning_rate": 1.2359862858892751e-06, "loss": 0.8315, "step": 9699 }, { "epoch": 0.78, "grad_norm": 0.7486421292393971, "learning_rate": 1.2351311849548097e-06, "loss": 1.0624, "step": 9700 }, { "epoch": 0.78, "grad_norm": 1.6105020103634604, "learning_rate": 1.2342763382328954e-06, "loss": 0.7851, "step": 9701 }, { "epoch": 0.78, "grad_norm": 1.4177984611314585, "learning_rate": 1.2334217457812536e-06, "loss": 0.7019, "step": 9702 }, { "epoch": 0.78, "grad_norm": 1.460238474700881, "learning_rate": 1.2325674076575884e-06, "loss": 0.6801, "step": 9703 }, { "epoch": 0.78, "grad_norm": 1.6881844776259283, "learning_rate": 1.2317133239195866e-06, "loss": 0.8541, "step": 9704 }, { "epoch": 0.78, "grad_norm": 0.7519646444334398, "learning_rate": 1.2308594946249163e-06, "loss": 1.0408, "step": 9705 }, { "epoch": 0.78, "grad_norm": 1.8080911051206523, "learning_rate": 1.230005919831233e-06, "loss": 0.7189, "step": 9706 }, { "epoch": 0.78, "grad_norm": 1.4681397254596704, "learning_rate": 1.2291525995961707e-06, "loss": 0.759, "step": 9707 }, { "epoch": 0.78, "grad_norm": 1.431259141991019, "learning_rate": 1.2282995339773456e-06, "loss": 0.7548, "step": 9708 }, { "epoch": 0.78, "grad_norm": 1.5239988384287828, "learning_rate": 1.2274467230323622e-06, "loss": 0.7821, "step": 9709 }, { "epoch": 0.78, "grad_norm": 1.514760972254218, "learning_rate": 1.226594166818803e-06, "loss": 0.7801, "step": 9710 }, { "epoch": 0.78, "grad_norm": 1.5245903306172277, "learning_rate": 1.2257418653942332e-06, "loss": 0.7472, "step": 9711 }, { "epoch": 0.78, "grad_norm": 1.5176787337899422, "learning_rate": 1.2248898188162023e-06, "loss": 0.7721, "step": 9712 }, { "epoch": 0.78, "grad_norm": 1.4211822420437397, "learning_rate": 1.2240380271422459e-06, "loss": 0.7768, "step": 9713 }, { "epoch": 0.78, "grad_norm": 1.69400932776177, "learning_rate": 1.2231864904298746e-06, "loss": 0.8345, "step": 9714 }, { "epoch": 0.78, "grad_norm": 1.441308202311701, "learning_rate": 1.222335208736586e-06, "loss": 0.758, "step": 9715 }, { "epoch": 0.78, "grad_norm": 1.619096947655207, "learning_rate": 1.2214841821198641e-06, "loss": 0.816, "step": 9716 }, { "epoch": 0.78, "grad_norm": 1.4032898335832322, "learning_rate": 1.2206334106371702e-06, "loss": 0.7639, "step": 9717 }, { "epoch": 0.78, "grad_norm": 1.5026527174104731, "learning_rate": 1.219782894345949e-06, "loss": 0.6903, "step": 9718 }, { "epoch": 0.78, "grad_norm": 1.6200100515205298, "learning_rate": 1.2189326333036323e-06, "loss": 0.7141, "step": 9719 }, { "epoch": 0.78, "grad_norm": 1.5216180766316332, "learning_rate": 1.2180826275676294e-06, "loss": 0.8207, "step": 9720 }, { "epoch": 0.78, "grad_norm": 1.4958479245592358, "learning_rate": 1.2172328771953363e-06, "loss": 0.6859, "step": 9721 }, { "epoch": 0.78, "grad_norm": 1.576534211431381, "learning_rate": 1.2163833822441274e-06, "loss": 0.7945, "step": 9722 }, { "epoch": 0.78, "grad_norm": 1.4967091110773985, "learning_rate": 1.2155341427713658e-06, "loss": 0.8371, "step": 9723 }, { "epoch": 0.78, "grad_norm": 1.3786310552474215, "learning_rate": 1.2146851588343922e-06, "loss": 0.7431, "step": 9724 }, { "epoch": 0.78, "grad_norm": 0.7666622668832891, "learning_rate": 1.2138364304905326e-06, "loss": 1.0507, "step": 9725 }, { "epoch": 0.78, "grad_norm": 1.5736941820800832, "learning_rate": 1.212987957797095e-06, "loss": 0.7474, "step": 9726 }, { "epoch": 0.78, "grad_norm": 1.524665198252292, "learning_rate": 1.21213974081137e-06, "loss": 0.796, "step": 9727 }, { "epoch": 0.78, "grad_norm": 1.431808437693155, "learning_rate": 1.2112917795906309e-06, "loss": 0.7194, "step": 9728 }, { "epoch": 0.78, "grad_norm": 1.501053846411825, "learning_rate": 1.2104440741921326e-06, "loss": 0.7948, "step": 9729 }, { "epoch": 0.78, "grad_norm": 1.46751800037382, "learning_rate": 1.2095966246731179e-06, "loss": 0.7159, "step": 9730 }, { "epoch": 0.78, "grad_norm": 1.488667175871821, "learning_rate": 1.2087494310908056e-06, "loss": 0.7573, "step": 9731 }, { "epoch": 0.78, "grad_norm": 1.587441875911726, "learning_rate": 1.2079024935023998e-06, "loss": 0.7805, "step": 9732 }, { "epoch": 0.78, "grad_norm": 0.7458707907196326, "learning_rate": 1.2070558119650904e-06, "loss": 1.0573, "step": 9733 }, { "epoch": 0.78, "grad_norm": 0.7739026457481497, "learning_rate": 1.2062093865360458e-06, "loss": 1.0866, "step": 9734 }, { "epoch": 0.78, "grad_norm": 1.5595732286686566, "learning_rate": 1.2053632172724179e-06, "loss": 0.7203, "step": 9735 }, { "epoch": 0.78, "grad_norm": 1.5407824636612668, "learning_rate": 1.2045173042313429e-06, "loss": 0.7588, "step": 9736 }, { "epoch": 0.78, "grad_norm": 1.500108812381179, "learning_rate": 1.2036716474699383e-06, "loss": 0.7067, "step": 9737 }, { "epoch": 0.78, "grad_norm": 1.5310776947466698, "learning_rate": 1.202826247045305e-06, "loss": 0.7994, "step": 9738 }, { "epoch": 0.78, "grad_norm": 1.4854905065045652, "learning_rate": 1.2019811030145245e-06, "loss": 0.6918, "step": 9739 }, { "epoch": 0.78, "grad_norm": 1.4509280298817377, "learning_rate": 1.2011362154346668e-06, "loss": 0.7545, "step": 9740 }, { "epoch": 0.78, "grad_norm": 1.9018023254750045, "learning_rate": 1.2002915843627778e-06, "loss": 0.7703, "step": 9741 }, { "epoch": 0.78, "grad_norm": 1.482307323255835, "learning_rate": 1.1994472098558884e-06, "loss": 0.7509, "step": 9742 }, { "epoch": 0.78, "grad_norm": 1.3549629655139874, "learning_rate": 1.198603091971015e-06, "loss": 0.6778, "step": 9743 }, { "epoch": 0.78, "grad_norm": 1.5192417194307386, "learning_rate": 1.1977592307651536e-06, "loss": 0.7921, "step": 9744 }, { "epoch": 0.78, "grad_norm": 1.5798550685357082, "learning_rate": 1.196915626295283e-06, "loss": 0.7758, "step": 9745 }, { "epoch": 0.78, "grad_norm": 1.4392321869405804, "learning_rate": 1.196072278618366e-06, "loss": 0.7229, "step": 9746 }, { "epoch": 0.78, "grad_norm": 0.7736261874450823, "learning_rate": 1.195229187791347e-06, "loss": 1.0764, "step": 9747 }, { "epoch": 0.78, "grad_norm": 1.5750348576908395, "learning_rate": 1.1943863538711532e-06, "loss": 0.7897, "step": 9748 }, { "epoch": 0.78, "grad_norm": 1.702834795877273, "learning_rate": 1.193543776914693e-06, "loss": 0.729, "step": 9749 }, { "epoch": 0.78, "grad_norm": 1.6320803074220118, "learning_rate": 1.1927014569788624e-06, "loss": 0.7463, "step": 9750 }, { "epoch": 0.78, "grad_norm": 0.7885133474306262, "learning_rate": 1.1918593941205358e-06, "loss": 1.0433, "step": 9751 }, { "epoch": 0.78, "grad_norm": 1.4500075413555893, "learning_rate": 1.1910175883965708e-06, "loss": 0.7061, "step": 9752 }, { "epoch": 0.78, "grad_norm": 1.378346423215973, "learning_rate": 1.1901760398638062e-06, "loss": 0.7557, "step": 9753 }, { "epoch": 0.78, "grad_norm": 1.5610193765492888, "learning_rate": 1.189334748579069e-06, "loss": 0.7652, "step": 9754 }, { "epoch": 0.78, "grad_norm": 1.4604862292763041, "learning_rate": 1.1884937145991627e-06, "loss": 0.7901, "step": 9755 }, { "epoch": 0.78, "grad_norm": 1.5771799074676869, "learning_rate": 1.1876529379808749e-06, "loss": 0.6872, "step": 9756 }, { "epoch": 0.78, "grad_norm": 1.6100823743236223, "learning_rate": 1.1868124187809815e-06, "loss": 0.7079, "step": 9757 }, { "epoch": 0.78, "grad_norm": 0.7797667891495405, "learning_rate": 1.185972157056231e-06, "loss": 1.066, "step": 9758 }, { "epoch": 0.78, "grad_norm": 1.5192810219513084, "learning_rate": 1.1851321528633608e-06, "loss": 0.7288, "step": 9759 }, { "epoch": 0.78, "grad_norm": 1.6745646065927855, "learning_rate": 1.1842924062590922e-06, "loss": 0.834, "step": 9760 }, { "epoch": 0.78, "grad_norm": 1.4885198010444902, "learning_rate": 1.1834529173001253e-06, "loss": 0.7685, "step": 9761 }, { "epoch": 0.78, "grad_norm": 1.5709666958246562, "learning_rate": 1.1826136860431443e-06, "loss": 0.7525, "step": 9762 }, { "epoch": 0.78, "grad_norm": 1.7136826220765258, "learning_rate": 1.1817747125448148e-06, "loss": 0.7255, "step": 9763 }, { "epoch": 0.78, "grad_norm": 1.6006466238735344, "learning_rate": 1.1809359968617894e-06, "loss": 0.7523, "step": 9764 }, { "epoch": 0.78, "grad_norm": 1.5182935819262782, "learning_rate": 1.180097539050698e-06, "loss": 0.7102, "step": 9765 }, { "epoch": 0.78, "grad_norm": 1.597057624470285, "learning_rate": 1.1792593391681545e-06, "loss": 0.8017, "step": 9766 }, { "epoch": 0.78, "grad_norm": 1.5438742813379012, "learning_rate": 1.1784213972707581e-06, "loss": 0.7846, "step": 9767 }, { "epoch": 0.78, "grad_norm": 0.7502312227741539, "learning_rate": 1.1775837134150875e-06, "loss": 1.04, "step": 9768 }, { "epoch": 0.78, "grad_norm": 1.5188904557511478, "learning_rate": 1.1767462876577052e-06, "loss": 0.8075, "step": 9769 }, { "epoch": 0.78, "grad_norm": 1.5919566738337583, "learning_rate": 1.175909120055156e-06, "loss": 0.7172, "step": 9770 }, { "epoch": 0.78, "grad_norm": 1.6225264656067466, "learning_rate": 1.1750722106639673e-06, "loss": 0.6708, "step": 9771 }, { "epoch": 0.78, "grad_norm": 1.5994974694142772, "learning_rate": 1.1742355595406491e-06, "loss": 0.7112, "step": 9772 }, { "epoch": 0.78, "grad_norm": 0.7317714789861076, "learning_rate": 1.1733991667416928e-06, "loss": 1.0801, "step": 9773 }, { "epoch": 0.78, "grad_norm": 2.2912399117457163, "learning_rate": 1.1725630323235758e-06, "loss": 0.7271, "step": 9774 }, { "epoch": 0.78, "grad_norm": 1.5055808834308115, "learning_rate": 1.171727156342755e-06, "loss": 0.6948, "step": 9775 }, { "epoch": 0.78, "grad_norm": 1.4431548789768909, "learning_rate": 1.1708915388556707e-06, "loss": 0.7237, "step": 9776 }, { "epoch": 0.78, "grad_norm": 0.7375877301613611, "learning_rate": 1.1700561799187442e-06, "loss": 1.0695, "step": 9777 }, { "epoch": 0.78, "grad_norm": 1.4304383426286993, "learning_rate": 1.1692210795883835e-06, "loss": 0.7183, "step": 9778 }, { "epoch": 0.78, "grad_norm": 0.7586317777390962, "learning_rate": 1.1683862379209747e-06, "loss": 1.0412, "step": 9779 }, { "epoch": 0.78, "grad_norm": 1.5933403368830739, "learning_rate": 1.1675516549728887e-06, "loss": 0.761, "step": 9780 }, { "epoch": 0.78, "grad_norm": 1.5128870841844475, "learning_rate": 1.1667173308004787e-06, "loss": 0.7559, "step": 9781 }, { "epoch": 0.78, "grad_norm": 0.7516243188097932, "learning_rate": 1.1658832654600798e-06, "loss": 1.0568, "step": 9782 }, { "epoch": 0.78, "grad_norm": 1.5345618632440419, "learning_rate": 1.1650494590080085e-06, "loss": 0.7004, "step": 9783 }, { "epoch": 0.78, "grad_norm": 1.616817473468652, "learning_rate": 1.164215911500568e-06, "loss": 0.7813, "step": 9784 }, { "epoch": 0.79, "grad_norm": 1.6082228071908322, "learning_rate": 1.1633826229940408e-06, "loss": 0.8008, "step": 9785 }, { "epoch": 0.79, "grad_norm": 1.5918320137678663, "learning_rate": 1.1625495935446918e-06, "loss": 0.8516, "step": 9786 }, { "epoch": 0.79, "grad_norm": 1.5903029091880874, "learning_rate": 1.1617168232087671e-06, "loss": 0.7149, "step": 9787 }, { "epoch": 0.79, "grad_norm": 0.7626058045017418, "learning_rate": 1.1608843120425012e-06, "loss": 1.0636, "step": 9788 }, { "epoch": 0.79, "grad_norm": 1.4956867578873663, "learning_rate": 1.1600520601021048e-06, "loss": 0.7311, "step": 9789 }, { "epoch": 0.79, "grad_norm": 1.7030162017280477, "learning_rate": 1.1592200674437742e-06, "loss": 0.7908, "step": 9790 }, { "epoch": 0.79, "grad_norm": 1.6608069936930203, "learning_rate": 1.1583883341236874e-06, "loss": 0.8217, "step": 9791 }, { "epoch": 0.79, "grad_norm": 1.5405210971815246, "learning_rate": 1.1575568601980043e-06, "loss": 0.7321, "step": 9792 }, { "epoch": 0.79, "grad_norm": 0.7309868822689507, "learning_rate": 1.1567256457228681e-06, "loss": 1.0667, "step": 9793 }, { "epoch": 0.79, "grad_norm": 1.4965879483148223, "learning_rate": 1.1558946907544034e-06, "loss": 0.7403, "step": 9794 }, { "epoch": 0.79, "grad_norm": 1.532478530999538, "learning_rate": 1.1550639953487202e-06, "loss": 0.792, "step": 9795 }, { "epoch": 0.79, "grad_norm": 1.5320896055897109, "learning_rate": 1.1542335595619087e-06, "loss": 0.7388, "step": 9796 }, { "epoch": 0.79, "grad_norm": 1.549388449290148, "learning_rate": 1.1534033834500391e-06, "loss": 0.7209, "step": 9797 }, { "epoch": 0.79, "grad_norm": 1.4480360838824076, "learning_rate": 1.1525734670691702e-06, "loss": 0.7587, "step": 9798 }, { "epoch": 0.79, "grad_norm": 1.7074189503556427, "learning_rate": 1.1517438104753386e-06, "loss": 0.6619, "step": 9799 }, { "epoch": 0.79, "grad_norm": 0.7323665782056272, "learning_rate": 1.1509144137245638e-06, "loss": 1.0239, "step": 9800 }, { "epoch": 0.79, "grad_norm": 1.6005964044125225, "learning_rate": 1.1500852768728515e-06, "loss": 0.7681, "step": 9801 }, { "epoch": 0.79, "grad_norm": 1.4438866930342518, "learning_rate": 1.1492563999761829e-06, "loss": 0.6852, "step": 9802 }, { "epoch": 0.79, "grad_norm": 0.7645065682392673, "learning_rate": 1.1484277830905277e-06, "loss": 1.0719, "step": 9803 }, { "epoch": 0.79, "grad_norm": 1.6408394869742449, "learning_rate": 1.1475994262718348e-06, "loss": 0.7957, "step": 9804 }, { "epoch": 0.79, "grad_norm": 1.4873825765565303, "learning_rate": 1.1467713295760386e-06, "loss": 0.7247, "step": 9805 }, { "epoch": 0.79, "grad_norm": 1.4947620302894096, "learning_rate": 1.1459434930590535e-06, "loss": 0.7089, "step": 9806 }, { "epoch": 0.79, "grad_norm": 1.4840100849454885, "learning_rate": 1.1451159167767745e-06, "loss": 0.7988, "step": 9807 }, { "epoch": 0.79, "grad_norm": 1.617265620475383, "learning_rate": 1.1442886007850856e-06, "loss": 0.7269, "step": 9808 }, { "epoch": 0.79, "grad_norm": 1.6467646391438429, "learning_rate": 1.1434615451398467e-06, "loss": 0.7153, "step": 9809 }, { "epoch": 0.79, "grad_norm": 1.6223480131670076, "learning_rate": 1.142634749896903e-06, "loss": 0.8032, "step": 9810 }, { "epoch": 0.79, "grad_norm": 1.4391058466220195, "learning_rate": 1.1418082151120797e-06, "loss": 0.6606, "step": 9811 }, { "epoch": 0.79, "grad_norm": 1.4610249275160936, "learning_rate": 1.1409819408411898e-06, "loss": 0.7111, "step": 9812 }, { "epoch": 0.79, "grad_norm": 1.5822543038533203, "learning_rate": 1.140155927140023e-06, "loss": 0.716, "step": 9813 }, { "epoch": 0.79, "grad_norm": 1.8677320064654974, "learning_rate": 1.1393301740643542e-06, "loss": 0.7118, "step": 9814 }, { "epoch": 0.79, "grad_norm": 0.7635182615014467, "learning_rate": 1.1385046816699403e-06, "loss": 1.0714, "step": 9815 }, { "epoch": 0.79, "grad_norm": 1.6602100218534417, "learning_rate": 1.13767945001252e-06, "loss": 0.7402, "step": 9816 }, { "epoch": 0.79, "grad_norm": 1.6010542368701006, "learning_rate": 1.1368544791478132e-06, "loss": 0.6883, "step": 9817 }, { "epoch": 0.79, "grad_norm": 1.7143439630482251, "learning_rate": 1.136029769131527e-06, "loss": 0.7529, "step": 9818 }, { "epoch": 0.79, "grad_norm": 1.4168672945516831, "learning_rate": 1.1352053200193468e-06, "loss": 0.7043, "step": 9819 }, { "epoch": 0.79, "grad_norm": 1.5616740727782192, "learning_rate": 1.1343811318669407e-06, "loss": 0.767, "step": 9820 }, { "epoch": 0.79, "grad_norm": 1.4904628913504443, "learning_rate": 1.1335572047299582e-06, "loss": 0.774, "step": 9821 }, { "epoch": 0.79, "grad_norm": 1.6182970041525258, "learning_rate": 1.1327335386640354e-06, "loss": 0.7773, "step": 9822 }, { "epoch": 0.79, "grad_norm": 1.4917124380420883, "learning_rate": 1.1319101337247878e-06, "loss": 0.6736, "step": 9823 }, { "epoch": 0.79, "grad_norm": 1.8502454932864223, "learning_rate": 1.1310869899678122e-06, "loss": 0.7758, "step": 9824 }, { "epoch": 0.79, "grad_norm": 0.7349262897712906, "learning_rate": 1.1302641074486909e-06, "loss": 1.0767, "step": 9825 }, { "epoch": 0.79, "grad_norm": 1.6599322203199398, "learning_rate": 1.1294414862229847e-06, "loss": 0.795, "step": 9826 }, { "epoch": 0.79, "grad_norm": 0.7399811464589727, "learning_rate": 1.1286191263462404e-06, "loss": 1.0499, "step": 9827 }, { "epoch": 0.79, "grad_norm": 0.7564933700779674, "learning_rate": 1.1277970278739836e-06, "loss": 1.0614, "step": 9828 }, { "epoch": 0.79, "grad_norm": 1.4949199448217396, "learning_rate": 1.1269751908617277e-06, "loss": 0.7903, "step": 9829 }, { "epoch": 0.79, "grad_norm": 1.431780762907728, "learning_rate": 1.1261536153649627e-06, "loss": 0.7934, "step": 9830 }, { "epoch": 0.79, "grad_norm": 0.7638722004201676, "learning_rate": 1.125332301439162e-06, "loss": 1.1097, "step": 9831 }, { "epoch": 0.79, "grad_norm": 1.6883358873670051, "learning_rate": 1.1245112491397859e-06, "loss": 0.7276, "step": 9832 }, { "epoch": 0.79, "grad_norm": 1.489026947929863, "learning_rate": 1.1236904585222725e-06, "loss": 0.7488, "step": 9833 }, { "epoch": 0.79, "grad_norm": 1.7476693707869921, "learning_rate": 1.1228699296420425e-06, "loss": 0.7694, "step": 9834 }, { "epoch": 0.79, "grad_norm": 1.5889766105652579, "learning_rate": 1.1220496625545008e-06, "loss": 0.8407, "step": 9835 }, { "epoch": 0.79, "grad_norm": 1.591050339513009, "learning_rate": 1.1212296573150332e-06, "loss": 0.8001, "step": 9836 }, { "epoch": 0.79, "grad_norm": 1.5920593751824126, "learning_rate": 1.1204099139790087e-06, "loss": 0.795, "step": 9837 }, { "epoch": 0.79, "grad_norm": 1.4923120742993579, "learning_rate": 1.119590432601776e-06, "loss": 0.7192, "step": 9838 }, { "epoch": 0.79, "grad_norm": 1.5122267280194919, "learning_rate": 1.1187712132386723e-06, "loss": 0.7775, "step": 9839 }, { "epoch": 0.79, "grad_norm": 1.515099090075609, "learning_rate": 1.1179522559450112e-06, "loss": 0.7554, "step": 9840 }, { "epoch": 0.79, "grad_norm": 0.7530193726246929, "learning_rate": 1.1171335607760891e-06, "loss": 1.0131, "step": 9841 }, { "epoch": 0.79, "grad_norm": 1.5490059137472882, "learning_rate": 1.1163151277871892e-06, "loss": 0.8482, "step": 9842 }, { "epoch": 0.79, "grad_norm": 0.794090024004102, "learning_rate": 1.1154969570335722e-06, "loss": 1.067, "step": 9843 }, { "epoch": 0.79, "grad_norm": 1.6045849239379641, "learning_rate": 1.1146790485704834e-06, "loss": 0.7995, "step": 9844 }, { "epoch": 0.79, "grad_norm": 1.4335295952056695, "learning_rate": 1.1138614024531497e-06, "loss": 0.6258, "step": 9845 }, { "epoch": 0.79, "grad_norm": 1.6372444766629204, "learning_rate": 1.1130440187367802e-06, "loss": 0.7943, "step": 9846 }, { "epoch": 0.79, "grad_norm": 1.7043743210794442, "learning_rate": 1.1122268974765665e-06, "loss": 0.7695, "step": 9847 }, { "epoch": 0.79, "grad_norm": 1.5356883791207006, "learning_rate": 1.111410038727681e-06, "loss": 0.7207, "step": 9848 }, { "epoch": 0.79, "grad_norm": 0.7675642227592444, "learning_rate": 1.1105934425452831e-06, "loss": 1.0709, "step": 9849 }, { "epoch": 0.79, "grad_norm": 1.4592159392136095, "learning_rate": 1.1097771089845095e-06, "loss": 0.7638, "step": 9850 }, { "epoch": 0.79, "grad_norm": 1.6077172196751315, "learning_rate": 1.1089610381004812e-06, "loss": 0.7851, "step": 9851 }, { "epoch": 0.79, "grad_norm": 0.7714856337939658, "learning_rate": 1.1081452299482999e-06, "loss": 1.0556, "step": 9852 }, { "epoch": 0.79, "grad_norm": 1.4196314992032064, "learning_rate": 1.1073296845830529e-06, "loss": 0.745, "step": 9853 }, { "epoch": 0.79, "grad_norm": 1.7022937576726624, "learning_rate": 1.1065144020598067e-06, "loss": 0.7393, "step": 9854 }, { "epoch": 0.79, "grad_norm": 1.5648123311305904, "learning_rate": 1.1056993824336099e-06, "loss": 0.7645, "step": 9855 }, { "epoch": 0.79, "grad_norm": 0.7178802731671781, "learning_rate": 1.104884625759497e-06, "loss": 1.0227, "step": 9856 }, { "epoch": 0.79, "grad_norm": 1.4432176880726246, "learning_rate": 1.1040701320924808e-06, "loss": 0.6929, "step": 9857 }, { "epoch": 0.79, "grad_norm": 1.4596193038806062, "learning_rate": 1.1032559014875578e-06, "loss": 0.7077, "step": 9858 }, { "epoch": 0.79, "grad_norm": 0.7841268715039684, "learning_rate": 1.1024419339997066e-06, "loss": 1.0883, "step": 9859 }, { "epoch": 0.79, "grad_norm": 1.3433293539264881, "learning_rate": 1.1016282296838887e-06, "loss": 0.6399, "step": 9860 }, { "epoch": 0.79, "grad_norm": 1.6163528774108946, "learning_rate": 1.1008147885950472e-06, "loss": 0.7454, "step": 9861 }, { "epoch": 0.79, "grad_norm": 1.5280697928741949, "learning_rate": 1.100001610788105e-06, "loss": 0.7201, "step": 9862 }, { "epoch": 0.79, "grad_norm": 1.6400171479130976, "learning_rate": 1.0991886963179737e-06, "loss": 0.7782, "step": 9863 }, { "epoch": 0.79, "grad_norm": 1.4828515712647912, "learning_rate": 1.0983760452395415e-06, "loss": 0.7095, "step": 9864 }, { "epoch": 0.79, "grad_norm": 1.5098946575176468, "learning_rate": 1.0975636576076787e-06, "loss": 0.7271, "step": 9865 }, { "epoch": 0.79, "grad_norm": 1.4188087783502072, "learning_rate": 1.0967515334772428e-06, "loss": 0.69, "step": 9866 }, { "epoch": 0.79, "grad_norm": 1.501738066377932, "learning_rate": 1.0959396729030685e-06, "loss": 0.7318, "step": 9867 }, { "epoch": 0.79, "grad_norm": 1.823683812317142, "learning_rate": 1.0951280759399751e-06, "loss": 0.7516, "step": 9868 }, { "epoch": 0.79, "grad_norm": 1.492994854797437, "learning_rate": 1.0943167426427625e-06, "loss": 0.7668, "step": 9869 }, { "epoch": 0.79, "grad_norm": 0.7822132803101283, "learning_rate": 1.0935056730662146e-06, "loss": 1.0728, "step": 9870 }, { "epoch": 0.79, "grad_norm": 1.7165255238823054, "learning_rate": 1.0926948672650962e-06, "loss": 0.8092, "step": 9871 }, { "epoch": 0.79, "grad_norm": 1.5971143421669, "learning_rate": 1.0918843252941537e-06, "loss": 0.729, "step": 9872 }, { "epoch": 0.79, "grad_norm": 1.5924784196165214, "learning_rate": 1.0910740472081194e-06, "loss": 0.7421, "step": 9873 }, { "epoch": 0.79, "grad_norm": 1.5008518126994441, "learning_rate": 1.0902640330617036e-06, "loss": 0.7698, "step": 9874 }, { "epoch": 0.79, "grad_norm": 0.7538002423896824, "learning_rate": 1.0894542829095993e-06, "loss": 1.0663, "step": 9875 }, { "epoch": 0.79, "grad_norm": 0.7539716316448627, "learning_rate": 1.088644796806485e-06, "loss": 1.108, "step": 9876 }, { "epoch": 0.79, "grad_norm": 1.489375556491458, "learning_rate": 1.087835574807018e-06, "loss": 0.854, "step": 9877 }, { "epoch": 0.79, "grad_norm": 2.108206439487225, "learning_rate": 1.0870266169658383e-06, "loss": 0.6777, "step": 9878 }, { "epoch": 0.79, "grad_norm": 1.488719596058092, "learning_rate": 1.0862179233375697e-06, "loss": 0.7429, "step": 9879 }, { "epoch": 0.79, "grad_norm": 1.641656566589072, "learning_rate": 1.0854094939768156e-06, "loss": 0.8047, "step": 9880 }, { "epoch": 0.79, "grad_norm": 1.5517601527455305, "learning_rate": 1.084601328938164e-06, "loss": 0.8748, "step": 9881 }, { "epoch": 0.79, "grad_norm": 1.3909563526135977, "learning_rate": 1.0837934282761825e-06, "loss": 0.7519, "step": 9882 }, { "epoch": 0.79, "grad_norm": 0.7322682018575278, "learning_rate": 1.082985792045425e-06, "loss": 1.0797, "step": 9883 }, { "epoch": 0.79, "grad_norm": 1.4786547306148394, "learning_rate": 1.0821784203004238e-06, "loss": 0.76, "step": 9884 }, { "epoch": 0.79, "grad_norm": 1.516290420633004, "learning_rate": 1.0813713130956937e-06, "loss": 0.6693, "step": 9885 }, { "epoch": 0.79, "grad_norm": 1.5157411693594403, "learning_rate": 1.080564470485732e-06, "loss": 0.8082, "step": 9886 }, { "epoch": 0.79, "grad_norm": 1.93919555750198, "learning_rate": 1.0797578925250213e-06, "loss": 0.8731, "step": 9887 }, { "epoch": 0.79, "grad_norm": 1.5212995531353057, "learning_rate": 1.0789515792680217e-06, "loss": 0.7754, "step": 9888 }, { "epoch": 0.79, "grad_norm": 0.7705291944309108, "learning_rate": 1.0781455307691767e-06, "loss": 1.0471, "step": 9889 }, { "epoch": 0.79, "grad_norm": 1.4811053407716568, "learning_rate": 1.0773397470829145e-06, "loss": 0.7033, "step": 9890 }, { "epoch": 0.79, "grad_norm": 0.7494576273191048, "learning_rate": 1.0765342282636416e-06, "loss": 1.0621, "step": 9891 }, { "epoch": 0.79, "grad_norm": 1.6598604730943318, "learning_rate": 1.0757289743657495e-06, "loss": 0.739, "step": 9892 }, { "epoch": 0.79, "grad_norm": 1.4520879018561774, "learning_rate": 1.0749239854436089e-06, "loss": 0.7564, "step": 9893 }, { "epoch": 0.79, "grad_norm": 1.4444311135300092, "learning_rate": 1.0741192615515772e-06, "loss": 0.6963, "step": 9894 }, { "epoch": 0.79, "grad_norm": 1.5249853746005788, "learning_rate": 1.0733148027439904e-06, "loss": 0.7243, "step": 9895 }, { "epoch": 0.79, "grad_norm": 1.4478012910489448, "learning_rate": 1.0725106090751652e-06, "loss": 0.7965, "step": 9896 }, { "epoch": 0.79, "grad_norm": 0.7504689331268654, "learning_rate": 1.0717066805994064e-06, "loss": 1.0379, "step": 9897 }, { "epoch": 0.79, "grad_norm": 1.4836974404816603, "learning_rate": 1.0709030173709945e-06, "loss": 0.698, "step": 9898 }, { "epoch": 0.79, "grad_norm": 1.489300103372873, "learning_rate": 1.0700996194441944e-06, "loss": 0.8164, "step": 9899 }, { "epoch": 0.79, "grad_norm": 0.7572653679761618, "learning_rate": 1.0692964868732552e-06, "loss": 1.0682, "step": 9900 }, { "epoch": 0.79, "grad_norm": 0.7328233689880631, "learning_rate": 1.0684936197124058e-06, "loss": 1.0614, "step": 9901 }, { "epoch": 0.79, "grad_norm": 1.6065354597497288, "learning_rate": 1.067691018015858e-06, "loss": 0.7501, "step": 9902 }, { "epoch": 0.79, "grad_norm": 0.7603855919833004, "learning_rate": 1.0668886818378022e-06, "loss": 1.1045, "step": 9903 }, { "epoch": 0.79, "grad_norm": 1.5174711666140337, "learning_rate": 1.066086611232417e-06, "loss": 0.7458, "step": 9904 }, { "epoch": 0.79, "grad_norm": 1.7325618690199718, "learning_rate": 1.0652848062538595e-06, "loss": 0.664, "step": 9905 }, { "epoch": 0.79, "grad_norm": 1.4455362834855927, "learning_rate": 1.0644832669562676e-06, "loss": 0.7701, "step": 9906 }, { "epoch": 0.79, "grad_norm": 1.512443150042702, "learning_rate": 1.0636819933937664e-06, "loss": 0.7214, "step": 9907 }, { "epoch": 0.79, "grad_norm": 1.4559207495817643, "learning_rate": 1.062880985620458e-06, "loss": 0.6966, "step": 9908 }, { "epoch": 0.8, "grad_norm": 1.3672590107096925, "learning_rate": 1.0620802436904275e-06, "loss": 0.7051, "step": 9909 }, { "epoch": 0.8, "grad_norm": 1.8436841145149356, "learning_rate": 1.061279767657743e-06, "loss": 0.7165, "step": 9910 }, { "epoch": 0.8, "grad_norm": 1.5502076044513515, "learning_rate": 1.060479557576456e-06, "loss": 0.6701, "step": 9911 }, { "epoch": 0.8, "grad_norm": 0.7768165685610678, "learning_rate": 1.0596796135005976e-06, "loss": 1.0755, "step": 9912 }, { "epoch": 0.8, "grad_norm": 1.5104580199299305, "learning_rate": 1.0588799354841817e-06, "loss": 0.7043, "step": 9913 }, { "epoch": 0.8, "grad_norm": 0.7394320967651677, "learning_rate": 1.0580805235812042e-06, "loss": 1.0645, "step": 9914 }, { "epoch": 0.8, "grad_norm": 1.7330772966012828, "learning_rate": 1.0572813778456442e-06, "loss": 0.762, "step": 9915 }, { "epoch": 0.8, "grad_norm": 0.7810648256856987, "learning_rate": 1.0564824983314592e-06, "loss": 1.0533, "step": 9916 }, { "epoch": 0.8, "grad_norm": 1.4845291771217348, "learning_rate": 1.0556838850925949e-06, "loss": 0.7646, "step": 9917 }, { "epoch": 0.8, "grad_norm": 1.594312470393566, "learning_rate": 1.0548855381829736e-06, "loss": 0.6465, "step": 9918 }, { "epoch": 0.8, "grad_norm": 0.7325057911174004, "learning_rate": 1.0540874576565025e-06, "loss": 0.9925, "step": 9919 }, { "epoch": 0.8, "grad_norm": 1.6342221553449776, "learning_rate": 1.053289643567067e-06, "loss": 0.7191, "step": 9920 }, { "epoch": 0.8, "grad_norm": 1.5023774684861384, "learning_rate": 1.0524920959685414e-06, "loss": 0.7377, "step": 9921 }, { "epoch": 0.8, "grad_norm": 1.5492267150351635, "learning_rate": 1.0516948149147755e-06, "loss": 0.7442, "step": 9922 }, { "epoch": 0.8, "grad_norm": 1.4662324958907165, "learning_rate": 1.0508978004596043e-06, "loss": 0.7311, "step": 9923 }, { "epoch": 0.8, "grad_norm": 1.462629929321534, "learning_rate": 1.0501010526568439e-06, "loss": 0.7265, "step": 9924 }, { "epoch": 0.8, "grad_norm": 1.4578153095836521, "learning_rate": 1.0493045715602924e-06, "loss": 0.7703, "step": 9925 }, { "epoch": 0.8, "grad_norm": 0.7530096834663219, "learning_rate": 1.0485083572237297e-06, "loss": 1.0742, "step": 9926 }, { "epoch": 0.8, "grad_norm": 0.763321345379345, "learning_rate": 1.0477124097009172e-06, "loss": 1.0566, "step": 9927 }, { "epoch": 0.8, "grad_norm": 1.6643748837059584, "learning_rate": 1.0469167290456016e-06, "loss": 0.7817, "step": 9928 }, { "epoch": 0.8, "grad_norm": 1.554954979163268, "learning_rate": 1.046121315311508e-06, "loss": 0.7213, "step": 9929 }, { "epoch": 0.8, "grad_norm": 0.7364978070971894, "learning_rate": 1.0453261685523424e-06, "loss": 1.0515, "step": 9930 }, { "epoch": 0.8, "grad_norm": 1.5222219450088101, "learning_rate": 1.0445312888217985e-06, "loss": 0.7886, "step": 9931 }, { "epoch": 0.8, "grad_norm": 0.7749117756961922, "learning_rate": 1.0437366761735468e-06, "loss": 1.0875, "step": 9932 }, { "epoch": 0.8, "grad_norm": 1.5506141153928967, "learning_rate": 1.0429423306612414e-06, "loss": 0.7225, "step": 9933 }, { "epoch": 0.8, "grad_norm": 1.478725509426905, "learning_rate": 1.0421482523385174e-06, "loss": 0.8007, "step": 9934 }, { "epoch": 0.8, "grad_norm": 1.571929313678732, "learning_rate": 1.0413544412589944e-06, "loss": 0.6788, "step": 9935 }, { "epoch": 0.8, "grad_norm": 1.5211343329506113, "learning_rate": 1.040560897476271e-06, "loss": 0.7765, "step": 9936 }, { "epoch": 0.8, "grad_norm": 1.496615796086339, "learning_rate": 1.0397676210439283e-06, "loss": 0.8217, "step": 9937 }, { "epoch": 0.8, "grad_norm": 1.5729375399195828, "learning_rate": 1.038974612015533e-06, "loss": 0.816, "step": 9938 }, { "epoch": 0.8, "grad_norm": 1.5276773121424616, "learning_rate": 1.0381818704446296e-06, "loss": 0.7977, "step": 9939 }, { "epoch": 0.8, "grad_norm": 1.4034788353468814, "learning_rate": 1.0373893963847436e-06, "loss": 0.6919, "step": 9940 }, { "epoch": 0.8, "grad_norm": 1.9235943425954065, "learning_rate": 1.0365971898893884e-06, "loss": 0.739, "step": 9941 }, { "epoch": 0.8, "grad_norm": 1.4906536628240266, "learning_rate": 1.0358052510120537e-06, "loss": 0.7906, "step": 9942 }, { "epoch": 0.8, "grad_norm": 1.4349694638584727, "learning_rate": 1.0350135798062132e-06, "loss": 0.8024, "step": 9943 }, { "epoch": 0.8, "grad_norm": 1.4245457518317535, "learning_rate": 1.0342221763253207e-06, "loss": 0.7182, "step": 9944 }, { "epoch": 0.8, "grad_norm": 1.4696083637500126, "learning_rate": 1.0334310406228164e-06, "loss": 0.666, "step": 9945 }, { "epoch": 0.8, "grad_norm": 1.606755677518499, "learning_rate": 1.03264017275212e-06, "loss": 0.7646, "step": 9946 }, { "epoch": 0.8, "grad_norm": 1.605240811464785, "learning_rate": 1.0318495727666284e-06, "loss": 0.8858, "step": 9947 }, { "epoch": 0.8, "grad_norm": 0.7765754664569209, "learning_rate": 1.0310592407197285e-06, "loss": 1.0343, "step": 9948 }, { "epoch": 0.8, "grad_norm": 1.5509434376885962, "learning_rate": 1.0302691766647844e-06, "loss": 0.8107, "step": 9949 }, { "epoch": 0.8, "grad_norm": 1.468541262536462, "learning_rate": 1.029479380655143e-06, "loss": 0.6667, "step": 9950 }, { "epoch": 0.8, "grad_norm": 0.7754694891155596, "learning_rate": 1.0286898527441308e-06, "loss": 1.0359, "step": 9951 }, { "epoch": 0.8, "grad_norm": 1.5940928941990904, "learning_rate": 1.0279005929850626e-06, "loss": 0.7508, "step": 9952 }, { "epoch": 0.8, "grad_norm": 0.7483077481241737, "learning_rate": 1.0271116014312293e-06, "loss": 1.03, "step": 9953 }, { "epoch": 0.8, "grad_norm": 1.634862412598997, "learning_rate": 1.0263228781359037e-06, "loss": 0.7613, "step": 9954 }, { "epoch": 0.8, "grad_norm": 1.4245357797207696, "learning_rate": 1.025534423152345e-06, "loss": 0.7681, "step": 9955 }, { "epoch": 0.8, "grad_norm": 1.463897032270917, "learning_rate": 1.0247462365337901e-06, "loss": 0.7232, "step": 9956 }, { "epoch": 0.8, "grad_norm": 1.506398343828152, "learning_rate": 1.0239583183334596e-06, "loss": 0.7364, "step": 9957 }, { "epoch": 0.8, "grad_norm": 1.4700580875902514, "learning_rate": 1.023170668604555e-06, "loss": 0.7521, "step": 9958 }, { "epoch": 0.8, "grad_norm": 1.597444307123041, "learning_rate": 1.0223832874002603e-06, "loss": 0.6752, "step": 9959 }, { "epoch": 0.8, "grad_norm": 1.6162452984148508, "learning_rate": 1.021596174773742e-06, "loss": 0.7665, "step": 9960 }, { "epoch": 0.8, "grad_norm": 1.8112371082739316, "learning_rate": 1.0208093307781452e-06, "loss": 0.882, "step": 9961 }, { "epoch": 0.8, "grad_norm": 1.5619348114369491, "learning_rate": 1.0200227554666025e-06, "loss": 0.7306, "step": 9962 }, { "epoch": 0.8, "grad_norm": 0.741890794862913, "learning_rate": 1.0192364488922247e-06, "loss": 1.0673, "step": 9963 }, { "epoch": 0.8, "grad_norm": 1.5822581792262704, "learning_rate": 1.0184504111081029e-06, "loss": 0.8323, "step": 9964 }, { "epoch": 0.8, "grad_norm": 1.5146102341317738, "learning_rate": 1.0176646421673153e-06, "loss": 0.7782, "step": 9965 }, { "epoch": 0.8, "grad_norm": 1.6817894948429497, "learning_rate": 1.0168791421229169e-06, "loss": 0.7638, "step": 9966 }, { "epoch": 0.8, "grad_norm": 0.7783819079503992, "learning_rate": 1.0160939110279467e-06, "loss": 1.0393, "step": 9967 }, { "epoch": 0.8, "grad_norm": 1.5512464353912596, "learning_rate": 1.0153089489354256e-06, "loss": 0.6956, "step": 9968 }, { "epoch": 0.8, "grad_norm": 1.4379954249807125, "learning_rate": 1.014524255898356e-06, "loss": 0.7222, "step": 9969 }, { "epoch": 0.8, "grad_norm": 0.7593726451459591, "learning_rate": 1.0137398319697217e-06, "loss": 1.0745, "step": 9970 }, { "epoch": 0.8, "grad_norm": 1.424292545125485, "learning_rate": 1.0129556772024874e-06, "loss": 0.8022, "step": 9971 }, { "epoch": 0.8, "grad_norm": 1.769915689353312, "learning_rate": 1.0121717916496043e-06, "loss": 0.8187, "step": 9972 }, { "epoch": 0.8, "grad_norm": 1.535414714145598, "learning_rate": 1.011388175364001e-06, "loss": 0.7735, "step": 9973 }, { "epoch": 0.8, "grad_norm": 1.489133702307449, "learning_rate": 1.010604828398587e-06, "loss": 0.7515, "step": 9974 }, { "epoch": 0.8, "grad_norm": 1.5283309079683616, "learning_rate": 1.0098217508062587e-06, "loss": 0.7379, "step": 9975 }, { "epoch": 0.8, "grad_norm": 1.6369629642831451, "learning_rate": 1.0090389426398894e-06, "loss": 0.7574, "step": 9976 }, { "epoch": 0.8, "grad_norm": 1.4667582311086997, "learning_rate": 1.0082564039523368e-06, "loss": 0.7276, "step": 9977 }, { "epoch": 0.8, "grad_norm": 1.5152087624267274, "learning_rate": 1.0074741347964395e-06, "loss": 0.6962, "step": 9978 }, { "epoch": 0.8, "grad_norm": 1.540058188352714, "learning_rate": 1.006692135225018e-06, "loss": 0.74, "step": 9979 }, { "epoch": 0.8, "grad_norm": 0.7627628662568969, "learning_rate": 1.0059104052908753e-06, "loss": 1.0515, "step": 9980 }, { "epoch": 0.8, "grad_norm": 1.518079240879994, "learning_rate": 1.0051289450467933e-06, "loss": 0.7465, "step": 9981 }, { "epoch": 0.8, "grad_norm": 1.5273125539097983, "learning_rate": 1.004347754545541e-06, "loss": 0.7638, "step": 9982 }, { "epoch": 0.8, "grad_norm": 1.7461493065854514, "learning_rate": 1.0035668338398652e-06, "loss": 0.8066, "step": 9983 }, { "epoch": 0.8, "grad_norm": 0.7470959665143866, "learning_rate": 1.0027861829824953e-06, "loss": 1.0676, "step": 9984 }, { "epoch": 0.8, "grad_norm": 1.5596749935107677, "learning_rate": 1.002005802026141e-06, "loss": 0.7336, "step": 9985 }, { "epoch": 0.8, "grad_norm": 1.4822243966070272, "learning_rate": 1.001225691023498e-06, "loss": 0.7746, "step": 9986 }, { "epoch": 0.8, "grad_norm": 1.498488129895625, "learning_rate": 1.0004458500272402e-06, "loss": 0.6897, "step": 9987 }, { "epoch": 0.8, "grad_norm": 1.5780239383879824, "learning_rate": 9.99666279090023e-07, "loss": 0.7652, "step": 9988 }, { "epoch": 0.8, "grad_norm": 1.5596402968125291, "learning_rate": 9.988869782644872e-07, "loss": 0.7736, "step": 9989 }, { "epoch": 0.8, "grad_norm": 1.4048992986528201, "learning_rate": 9.98107947603253e-07, "loss": 0.7363, "step": 9990 }, { "epoch": 0.8, "grad_norm": 1.5054891743923948, "learning_rate": 9.973291871589198e-07, "loss": 0.7946, "step": 9991 }, { "epoch": 0.8, "grad_norm": 1.5413199951514691, "learning_rate": 9.96550696984071e-07, "loss": 0.806, "step": 9992 }, { "epoch": 0.8, "grad_norm": 1.5137538617524684, "learning_rate": 9.957724771312754e-07, "loss": 0.7591, "step": 9993 }, { "epoch": 0.8, "grad_norm": 1.4602703945897617, "learning_rate": 9.949945276530782e-07, "loss": 0.684, "step": 9994 }, { "epoch": 0.8, "grad_norm": 1.4244060529743618, "learning_rate": 9.942168486020065e-07, "loss": 0.68, "step": 9995 }, { "epoch": 0.8, "grad_norm": 1.6239557235796354, "learning_rate": 9.934394400305752e-07, "loss": 0.6931, "step": 9996 }, { "epoch": 0.8, "grad_norm": 1.549635646807986, "learning_rate": 9.92662301991274e-07, "loss": 0.7256, "step": 9997 }, { "epoch": 0.8, "grad_norm": 1.5839919813283077, "learning_rate": 9.918854345365758e-07, "loss": 0.8486, "step": 9998 }, { "epoch": 0.8, "grad_norm": 1.557121871986747, "learning_rate": 9.911088377189405e-07, "loss": 0.7209, "step": 9999 }, { "epoch": 0.8, "grad_norm": 0.73840477818183, "learning_rate": 9.903325115908025e-07, "loss": 1.0525, "step": 10000 }, { "epoch": 0.8, "grad_norm": 1.6300699265410092, "learning_rate": 9.895564562045822e-07, "loss": 0.77, "step": 10001 }, { "epoch": 0.8, "grad_norm": 0.7703477922921379, "learning_rate": 9.887806716126808e-07, "loss": 1.0451, "step": 10002 }, { "epoch": 0.8, "grad_norm": 1.5493199621946296, "learning_rate": 9.880051578674798e-07, "loss": 0.7918, "step": 10003 }, { "epoch": 0.8, "grad_norm": 1.5314515802110467, "learning_rate": 9.872299150213455e-07, "loss": 0.7689, "step": 10004 }, { "epoch": 0.8, "grad_norm": 0.7578326334830287, "learning_rate": 9.864549431266212e-07, "loss": 1.0935, "step": 10005 }, { "epoch": 0.8, "grad_norm": 1.5065597472216277, "learning_rate": 9.856802422356383e-07, "loss": 0.6631, "step": 10006 }, { "epoch": 0.8, "grad_norm": 1.6057063613628346, "learning_rate": 9.849058124007044e-07, "loss": 0.7923, "step": 10007 }, { "epoch": 0.8, "grad_norm": 1.547253788079528, "learning_rate": 9.841316536741114e-07, "loss": 0.8108, "step": 10008 }, { "epoch": 0.8, "grad_norm": 0.7596427705728889, "learning_rate": 9.8335776610813e-07, "loss": 1.0764, "step": 10009 }, { "epoch": 0.8, "grad_norm": 1.4690039205500238, "learning_rate": 9.825841497550186e-07, "loss": 0.6792, "step": 10010 }, { "epoch": 0.8, "grad_norm": 1.512809560152084, "learning_rate": 9.818108046670123e-07, "loss": 0.7191, "step": 10011 }, { "epoch": 0.8, "grad_norm": 1.576048747684372, "learning_rate": 9.810377308963282e-07, "loss": 0.7979, "step": 10012 }, { "epoch": 0.8, "grad_norm": 0.7526058482891663, "learning_rate": 9.802649284951666e-07, "loss": 1.0565, "step": 10013 }, { "epoch": 0.8, "grad_norm": 1.5888677116655572, "learning_rate": 9.794923975157083e-07, "loss": 0.7316, "step": 10014 }, { "epoch": 0.8, "grad_norm": 0.7516282159204318, "learning_rate": 9.787201380101157e-07, "loss": 1.0672, "step": 10015 }, { "epoch": 0.8, "grad_norm": 1.580243063663469, "learning_rate": 9.77948150030536e-07, "loss": 0.7838, "step": 10016 }, { "epoch": 0.8, "grad_norm": 1.5166439123361695, "learning_rate": 9.77176433629094e-07, "loss": 0.8469, "step": 10017 }, { "epoch": 0.8, "grad_norm": 1.4448914120153866, "learning_rate": 9.76404988857898e-07, "loss": 0.7666, "step": 10018 }, { "epoch": 0.8, "grad_norm": 1.5936146083393048, "learning_rate": 9.75633815769036e-07, "loss": 0.7674, "step": 10019 }, { "epoch": 0.8, "grad_norm": 1.5069030230510183, "learning_rate": 9.748629144145827e-07, "loss": 0.8125, "step": 10020 }, { "epoch": 0.8, "grad_norm": 1.6428484641500387, "learning_rate": 9.740922848465894e-07, "loss": 0.8094, "step": 10021 }, { "epoch": 0.8, "grad_norm": 1.5243114974051433, "learning_rate": 9.733219271170914e-07, "loss": 0.6925, "step": 10022 }, { "epoch": 0.8, "grad_norm": 1.573857585439232, "learning_rate": 9.725518412781037e-07, "loss": 0.8026, "step": 10023 }, { "epoch": 0.8, "grad_norm": 1.5717195668191197, "learning_rate": 9.717820273816248e-07, "loss": 0.8039, "step": 10024 }, { "epoch": 0.8, "grad_norm": 1.572345594584929, "learning_rate": 9.71012485479635e-07, "loss": 0.775, "step": 10025 }, { "epoch": 0.8, "grad_norm": 1.4336179069881017, "learning_rate": 9.702432156240937e-07, "loss": 0.7668, "step": 10026 }, { "epoch": 0.8, "grad_norm": 1.5750471469440321, "learning_rate": 9.694742178669464e-07, "loss": 0.7262, "step": 10027 }, { "epoch": 0.8, "grad_norm": 1.5533922540323182, "learning_rate": 9.687054922601157e-07, "loss": 0.7376, "step": 10028 }, { "epoch": 0.8, "grad_norm": 1.5524968021445842, "learning_rate": 9.679370388555077e-07, "loss": 0.7418, "step": 10029 }, { "epoch": 0.8, "grad_norm": 2.206263898031004, "learning_rate": 9.671688577050114e-07, "loss": 0.7759, "step": 10030 }, { "epoch": 0.8, "grad_norm": 1.5711550428226237, "learning_rate": 9.66400948860496e-07, "loss": 0.7667, "step": 10031 }, { "epoch": 0.8, "grad_norm": 1.629589260593589, "learning_rate": 9.656333123738116e-07, "loss": 0.7673, "step": 10032 }, { "epoch": 0.8, "grad_norm": 1.453753722035826, "learning_rate": 9.648659482967898e-07, "loss": 0.7269, "step": 10033 }, { "epoch": 0.81, "grad_norm": 1.5407494070002221, "learning_rate": 9.640988566812475e-07, "loss": 0.7693, "step": 10034 }, { "epoch": 0.81, "grad_norm": 1.8335445615548538, "learning_rate": 9.633320375789807e-07, "loss": 0.8667, "step": 10035 }, { "epoch": 0.81, "grad_norm": 1.6074542537459433, "learning_rate": 9.62565491041762e-07, "loss": 0.7859, "step": 10036 }, { "epoch": 0.81, "grad_norm": 1.5770597358720424, "learning_rate": 9.617992171213547e-07, "loss": 0.7553, "step": 10037 }, { "epoch": 0.81, "grad_norm": 1.4048124219177545, "learning_rate": 9.610332158694985e-07, "loss": 0.7348, "step": 10038 }, { "epoch": 0.81, "grad_norm": 1.469687655713396, "learning_rate": 9.602674873379137e-07, "loss": 0.7288, "step": 10039 }, { "epoch": 0.81, "grad_norm": 1.6229339728432595, "learning_rate": 9.59502031578307e-07, "loss": 0.7722, "step": 10040 }, { "epoch": 0.81, "grad_norm": 1.6541281315253025, "learning_rate": 9.587368486423621e-07, "loss": 0.7134, "step": 10041 }, { "epoch": 0.81, "grad_norm": 1.5334861149150596, "learning_rate": 9.57971938581746e-07, "loss": 0.7993, "step": 10042 }, { "epoch": 0.81, "grad_norm": 1.5350322184301337, "learning_rate": 9.572073014481065e-07, "loss": 0.7891, "step": 10043 }, { "epoch": 0.81, "grad_norm": 1.5199366027302554, "learning_rate": 9.56442937293075e-07, "loss": 0.7766, "step": 10044 }, { "epoch": 0.81, "grad_norm": 0.7301134784174522, "learning_rate": 9.55678846168263e-07, "loss": 1.0572, "step": 10045 }, { "epoch": 0.81, "grad_norm": 1.592663081354399, "learning_rate": 9.549150281252633e-07, "loss": 0.8344, "step": 10046 }, { "epoch": 0.81, "grad_norm": 1.5822813077879134, "learning_rate": 9.541514832156501e-07, "loss": 0.8018, "step": 10047 }, { "epoch": 0.81, "grad_norm": 1.5946840898853993, "learning_rate": 9.533882114909804e-07, "loss": 0.7841, "step": 10048 }, { "epoch": 0.81, "grad_norm": 1.6320966740116614, "learning_rate": 9.526252130027919e-07, "loss": 0.6431, "step": 10049 }, { "epoch": 0.81, "grad_norm": 1.551913307401585, "learning_rate": 9.518624878026028e-07, "loss": 0.7963, "step": 10050 }, { "epoch": 0.81, "grad_norm": 0.7544402790623255, "learning_rate": 9.511000359419159e-07, "loss": 1.1027, "step": 10051 }, { "epoch": 0.81, "grad_norm": 0.7548761842406448, "learning_rate": 9.503378574722133e-07, "loss": 1.0891, "step": 10052 }, { "epoch": 0.81, "grad_norm": 2.291632442848924, "learning_rate": 9.495759524449572e-07, "loss": 0.7389, "step": 10053 }, { "epoch": 0.81, "grad_norm": 1.5290425083375367, "learning_rate": 9.488143209115958e-07, "loss": 0.7342, "step": 10054 }, { "epoch": 0.81, "grad_norm": 1.46451791484958, "learning_rate": 9.480529629235552e-07, "loss": 0.68, "step": 10055 }, { "epoch": 0.81, "grad_norm": 1.5398079748499862, "learning_rate": 9.472918785322444e-07, "loss": 0.8076, "step": 10056 }, { "epoch": 0.81, "grad_norm": 1.5953283620873504, "learning_rate": 9.465310677890522e-07, "loss": 0.8309, "step": 10057 }, { "epoch": 0.81, "grad_norm": 1.5279790852075992, "learning_rate": 9.457705307453519e-07, "loss": 0.766, "step": 10058 }, { "epoch": 0.81, "grad_norm": 1.460450072714933, "learning_rate": 9.450102674524952e-07, "loss": 0.7248, "step": 10059 }, { "epoch": 0.81, "grad_norm": 1.5441545125752085, "learning_rate": 9.442502779618168e-07, "loss": 0.778, "step": 10060 }, { "epoch": 0.81, "grad_norm": 1.5403706828580137, "learning_rate": 9.434905623246343e-07, "loss": 0.7208, "step": 10061 }, { "epoch": 0.81, "grad_norm": 1.4957351803120378, "learning_rate": 9.427311205922457e-07, "loss": 0.7523, "step": 10062 }, { "epoch": 0.81, "grad_norm": 1.6216676518096198, "learning_rate": 9.419719528159271e-07, "loss": 0.81, "step": 10063 }, { "epoch": 0.81, "grad_norm": 1.5219800096421445, "learning_rate": 9.412130590469438e-07, "loss": 0.7479, "step": 10064 }, { "epoch": 0.81, "grad_norm": 0.7416098807213682, "learning_rate": 9.40454439336535e-07, "loss": 1.0673, "step": 10065 }, { "epoch": 0.81, "grad_norm": 1.5714751296155174, "learning_rate": 9.396960937359251e-07, "loss": 0.8146, "step": 10066 }, { "epoch": 0.81, "grad_norm": 0.7805042803665215, "learning_rate": 9.389380222963195e-07, "loss": 1.0822, "step": 10067 }, { "epoch": 0.81, "grad_norm": 0.7504705881755415, "learning_rate": 9.381802250689054e-07, "loss": 1.0877, "step": 10068 }, { "epoch": 0.81, "grad_norm": 1.5078948739238665, "learning_rate": 9.374227021048499e-07, "loss": 0.7925, "step": 10069 }, { "epoch": 0.81, "grad_norm": 1.682923275942174, "learning_rate": 9.366654534553021e-07, "loss": 0.7504, "step": 10070 }, { "epoch": 0.81, "grad_norm": 1.4479906332238635, "learning_rate": 9.359084791713952e-07, "loss": 0.7503, "step": 10071 }, { "epoch": 0.81, "grad_norm": 0.7525012432358235, "learning_rate": 9.351517793042408e-07, "loss": 1.0901, "step": 10072 }, { "epoch": 0.81, "grad_norm": 0.73834634060162, "learning_rate": 9.343953539049322e-07, "loss": 1.0758, "step": 10073 }, { "epoch": 0.81, "grad_norm": 1.699823893955865, "learning_rate": 9.336392030245473e-07, "loss": 0.7704, "step": 10074 }, { "epoch": 0.81, "grad_norm": 1.6103913498015554, "learning_rate": 9.328833267141413e-07, "loss": 0.7511, "step": 10075 }, { "epoch": 0.81, "grad_norm": 1.5853866310890927, "learning_rate": 9.321277250247535e-07, "loss": 0.8052, "step": 10076 }, { "epoch": 0.81, "grad_norm": 1.4922542371440413, "learning_rate": 9.313723980074018e-07, "loss": 0.6834, "step": 10077 }, { "epoch": 0.81, "grad_norm": 1.593302284972048, "learning_rate": 9.306173457130907e-07, "loss": 0.7428, "step": 10078 }, { "epoch": 0.81, "grad_norm": 0.7539440806364981, "learning_rate": 9.298625681928031e-07, "loss": 1.0396, "step": 10079 }, { "epoch": 0.81, "grad_norm": 1.6084444942568388, "learning_rate": 9.291080654974994e-07, "loss": 0.7461, "step": 10080 }, { "epoch": 0.81, "grad_norm": 1.5076269613389197, "learning_rate": 9.283538376781287e-07, "loss": 0.761, "step": 10081 }, { "epoch": 0.81, "grad_norm": 1.4999538985043177, "learning_rate": 9.275998847856172e-07, "loss": 0.7429, "step": 10082 }, { "epoch": 0.81, "grad_norm": 1.5260305013084035, "learning_rate": 9.268462068708733e-07, "loss": 0.7419, "step": 10083 }, { "epoch": 0.81, "grad_norm": 1.6304756674798115, "learning_rate": 9.260928039847866e-07, "loss": 0.8069, "step": 10084 }, { "epoch": 0.81, "grad_norm": 1.5049000628513265, "learning_rate": 9.253396761782308e-07, "loss": 0.7887, "step": 10085 }, { "epoch": 0.81, "grad_norm": 1.4947621776173312, "learning_rate": 9.245868235020566e-07, "loss": 0.7417, "step": 10086 }, { "epoch": 0.81, "grad_norm": 1.4639369683191377, "learning_rate": 9.238342460070981e-07, "loss": 0.7481, "step": 10087 }, { "epoch": 0.81, "grad_norm": 1.6452959363297952, "learning_rate": 9.230819437441734e-07, "loss": 0.8551, "step": 10088 }, { "epoch": 0.81, "grad_norm": 1.5502412084504489, "learning_rate": 9.223299167640787e-07, "loss": 0.8019, "step": 10089 }, { "epoch": 0.81, "grad_norm": 1.4851281093890076, "learning_rate": 9.215781651175915e-07, "loss": 0.7098, "step": 10090 }, { "epoch": 0.81, "grad_norm": 1.5374814833216, "learning_rate": 9.208266888554729e-07, "loss": 0.7741, "step": 10091 }, { "epoch": 0.81, "grad_norm": 0.7667298922962859, "learning_rate": 9.200754880284635e-07, "loss": 1.037, "step": 10092 }, { "epoch": 0.81, "grad_norm": 0.7556848812728668, "learning_rate": 9.193245626872871e-07, "loss": 1.068, "step": 10093 }, { "epoch": 0.81, "grad_norm": 1.5187962965390294, "learning_rate": 9.185739128826454e-07, "loss": 0.707, "step": 10094 }, { "epoch": 0.81, "grad_norm": 1.4856766099985772, "learning_rate": 9.178235386652273e-07, "loss": 0.7335, "step": 10095 }, { "epoch": 0.81, "grad_norm": 1.5159176186665357, "learning_rate": 9.170734400856979e-07, "loss": 0.7727, "step": 10096 }, { "epoch": 0.81, "grad_norm": 0.7337127933395794, "learning_rate": 9.163236171947054e-07, "loss": 1.0473, "step": 10097 }, { "epoch": 0.81, "grad_norm": 1.533979402870509, "learning_rate": 9.155740700428811e-07, "loss": 0.8186, "step": 10098 }, { "epoch": 0.81, "grad_norm": 1.6984259719417314, "learning_rate": 9.148247986808351e-07, "loss": 0.7964, "step": 10099 }, { "epoch": 0.81, "grad_norm": 1.4770933395935582, "learning_rate": 9.140758031591601e-07, "loss": 0.7375, "step": 10100 }, { "epoch": 0.81, "grad_norm": 1.4928466786751593, "learning_rate": 9.133270835284303e-07, "loss": 0.7234, "step": 10101 }, { "epoch": 0.81, "grad_norm": 1.6410857605634024, "learning_rate": 9.125786398392e-07, "loss": 0.7761, "step": 10102 }, { "epoch": 0.81, "grad_norm": 1.5635427244039393, "learning_rate": 9.118304721420068e-07, "loss": 0.7941, "step": 10103 }, { "epoch": 0.81, "grad_norm": 1.5304782788452176, "learning_rate": 9.110825804873668e-07, "loss": 0.7546, "step": 10104 }, { "epoch": 0.81, "grad_norm": 1.5821809740114878, "learning_rate": 9.10334964925782e-07, "loss": 0.7638, "step": 10105 }, { "epoch": 0.81, "grad_norm": 1.4865433069373912, "learning_rate": 9.095876255077318e-07, "loss": 0.7078, "step": 10106 }, { "epoch": 0.81, "grad_norm": 1.4723806850996077, "learning_rate": 9.088405622836788e-07, "loss": 0.7581, "step": 10107 }, { "epoch": 0.81, "grad_norm": 1.5385109388010025, "learning_rate": 9.080937753040647e-07, "loss": 0.6749, "step": 10108 }, { "epoch": 0.81, "grad_norm": 1.441215162981642, "learning_rate": 9.073472646193171e-07, "loss": 0.7107, "step": 10109 }, { "epoch": 0.81, "grad_norm": 0.7555390709094947, "learning_rate": 9.066010302798401e-07, "loss": 1.0552, "step": 10110 }, { "epoch": 0.81, "grad_norm": 1.3280024778236452, "learning_rate": 9.05855072336022e-07, "loss": 0.7225, "step": 10111 }, { "epoch": 0.81, "grad_norm": 1.5359475863593215, "learning_rate": 9.051093908382313e-07, "loss": 0.7226, "step": 10112 }, { "epoch": 0.81, "grad_norm": 1.4726214283860126, "learning_rate": 9.04363985836818e-07, "loss": 0.7456, "step": 10113 }, { "epoch": 0.81, "grad_norm": 1.52020436345313, "learning_rate": 9.036188573821119e-07, "loss": 0.7843, "step": 10114 }, { "epoch": 0.81, "grad_norm": 1.501936477475615, "learning_rate": 9.028740055244294e-07, "loss": 0.7431, "step": 10115 }, { "epoch": 0.81, "grad_norm": 1.3989649304626168, "learning_rate": 9.021294303140621e-07, "loss": 0.7133, "step": 10116 }, { "epoch": 0.81, "grad_norm": 1.5871713969902226, "learning_rate": 9.013851318012867e-07, "loss": 0.8137, "step": 10117 }, { "epoch": 0.81, "grad_norm": 1.6833821359491188, "learning_rate": 9.006411100363577e-07, "loss": 0.6829, "step": 10118 }, { "epoch": 0.81, "grad_norm": 1.533150142420492, "learning_rate": 8.998973650695158e-07, "loss": 0.7653, "step": 10119 }, { "epoch": 0.81, "grad_norm": 1.5485299609942147, "learning_rate": 8.991538969509789e-07, "loss": 0.857, "step": 10120 }, { "epoch": 0.81, "grad_norm": 1.5879626897936094, "learning_rate": 8.984107057309476e-07, "loss": 0.6973, "step": 10121 }, { "epoch": 0.81, "grad_norm": 1.5457025980431855, "learning_rate": 8.976677914596049e-07, "loss": 0.7389, "step": 10122 }, { "epoch": 0.81, "grad_norm": 1.4821339979436228, "learning_rate": 8.969251541871149e-07, "loss": 0.7781, "step": 10123 }, { "epoch": 0.81, "grad_norm": 0.7705381661315329, "learning_rate": 8.961827939636198e-07, "loss": 1.0655, "step": 10124 }, { "epoch": 0.81, "grad_norm": 1.4783518080521583, "learning_rate": 8.954407108392449e-07, "loss": 0.725, "step": 10125 }, { "epoch": 0.81, "grad_norm": 1.6874063092655454, "learning_rate": 8.946989048641003e-07, "loss": 0.6991, "step": 10126 }, { "epoch": 0.81, "grad_norm": 1.5464894536714908, "learning_rate": 8.939573760882725e-07, "loss": 0.8277, "step": 10127 }, { "epoch": 0.81, "grad_norm": 0.7802660889935711, "learning_rate": 8.932161245618309e-07, "loss": 1.0707, "step": 10128 }, { "epoch": 0.81, "grad_norm": 1.5134453042046034, "learning_rate": 8.924751503348283e-07, "loss": 0.7457, "step": 10129 }, { "epoch": 0.81, "grad_norm": 1.560490867114649, "learning_rate": 8.917344534572958e-07, "loss": 0.6965, "step": 10130 }, { "epoch": 0.81, "grad_norm": 1.5764814063442085, "learning_rate": 8.909940339792461e-07, "loss": 0.7546, "step": 10131 }, { "epoch": 0.81, "grad_norm": 1.515347282147829, "learning_rate": 8.902538919506764e-07, "loss": 0.7616, "step": 10132 }, { "epoch": 0.81, "grad_norm": 0.7531315407678318, "learning_rate": 8.895140274215614e-07, "loss": 1.0218, "step": 10133 }, { "epoch": 0.81, "grad_norm": 1.5008331805921915, "learning_rate": 8.887744404418585e-07, "loss": 0.6935, "step": 10134 }, { "epoch": 0.81, "grad_norm": 0.765675471255049, "learning_rate": 8.88035131061506e-07, "loss": 1.0759, "step": 10135 }, { "epoch": 0.81, "grad_norm": 0.7438077656652952, "learning_rate": 8.872960993304242e-07, "loss": 1.0404, "step": 10136 }, { "epoch": 0.81, "grad_norm": 1.4635293951977566, "learning_rate": 8.865573452985143e-07, "loss": 0.7372, "step": 10137 }, { "epoch": 0.81, "grad_norm": 1.4755754594313066, "learning_rate": 8.858188690156567e-07, "loss": 0.8275, "step": 10138 }, { "epoch": 0.81, "grad_norm": 1.5126892016295028, "learning_rate": 8.850806705317183e-07, "loss": 0.7799, "step": 10139 }, { "epoch": 0.81, "grad_norm": 1.417690208879637, "learning_rate": 8.843427498965423e-07, "loss": 0.7238, "step": 10140 }, { "epoch": 0.81, "grad_norm": 1.5570456402305617, "learning_rate": 8.836051071599544e-07, "loss": 0.824, "step": 10141 }, { "epoch": 0.81, "grad_norm": 1.5839204884786524, "learning_rate": 8.828677423717613e-07, "loss": 0.829, "step": 10142 }, { "epoch": 0.81, "grad_norm": 1.5575821924331552, "learning_rate": 8.821306555817543e-07, "loss": 0.7346, "step": 10143 }, { "epoch": 0.81, "grad_norm": 1.501007863847518, "learning_rate": 8.813938468397015e-07, "loss": 0.6818, "step": 10144 }, { "epoch": 0.81, "grad_norm": 1.5194149106942936, "learning_rate": 8.806573161953536e-07, "loss": 0.7458, "step": 10145 }, { "epoch": 0.81, "grad_norm": 1.5738341834858505, "learning_rate": 8.799210636984428e-07, "loss": 0.7722, "step": 10146 }, { "epoch": 0.81, "grad_norm": 1.442636467051838, "learning_rate": 8.791850893986836e-07, "loss": 0.7724, "step": 10147 }, { "epoch": 0.81, "grad_norm": 1.5618563346852357, "learning_rate": 8.784493933457699e-07, "loss": 0.7366, "step": 10148 }, { "epoch": 0.81, "grad_norm": 1.528717899225521, "learning_rate": 8.77713975589376e-07, "loss": 0.7785, "step": 10149 }, { "epoch": 0.81, "grad_norm": 1.5393243543183721, "learning_rate": 8.76978836179162e-07, "loss": 0.7188, "step": 10150 }, { "epoch": 0.81, "grad_norm": 0.7426861015636986, "learning_rate": 8.76243975164765e-07, "loss": 1.0519, "step": 10151 }, { "epoch": 0.81, "grad_norm": 0.7501658562681219, "learning_rate": 8.755093925958031e-07, "loss": 1.0636, "step": 10152 }, { "epoch": 0.81, "grad_norm": 1.6020802432548888, "learning_rate": 8.747750885218792e-07, "loss": 0.7777, "step": 10153 }, { "epoch": 0.81, "grad_norm": 1.5555029530125557, "learning_rate": 8.740410629925744e-07, "loss": 0.748, "step": 10154 }, { "epoch": 0.81, "grad_norm": 1.3860848196581452, "learning_rate": 8.733073160574517e-07, "loss": 0.7676, "step": 10155 }, { "epoch": 0.81, "grad_norm": 1.7968976988873908, "learning_rate": 8.725738477660556e-07, "loss": 0.7055, "step": 10156 }, { "epoch": 0.81, "grad_norm": 1.4347857152493084, "learning_rate": 8.718406581679107e-07, "loss": 0.7229, "step": 10157 }, { "epoch": 0.81, "grad_norm": 1.5358969048263418, "learning_rate": 8.71107747312524e-07, "loss": 0.7009, "step": 10158 }, { "epoch": 0.82, "grad_norm": 1.547901260272034, "learning_rate": 8.703751152493828e-07, "loss": 0.7693, "step": 10159 }, { "epoch": 0.82, "grad_norm": 0.7496180059543212, "learning_rate": 8.696427620279579e-07, "loss": 1.0605, "step": 10160 }, { "epoch": 0.82, "grad_norm": 1.5953470538975045, "learning_rate": 8.689106876976983e-07, "loss": 0.8224, "step": 10161 }, { "epoch": 0.82, "grad_norm": 0.7774204025243533, "learning_rate": 8.681788923080337e-07, "loss": 1.0582, "step": 10162 }, { "epoch": 0.82, "grad_norm": 1.4354758058779573, "learning_rate": 8.674473759083801e-07, "loss": 0.7371, "step": 10163 }, { "epoch": 0.82, "grad_norm": 1.52074137938149, "learning_rate": 8.667161385481288e-07, "loss": 0.7244, "step": 10164 }, { "epoch": 0.82, "grad_norm": 1.5488173634886915, "learning_rate": 8.659851802766555e-07, "loss": 0.6389, "step": 10165 }, { "epoch": 0.82, "grad_norm": 1.513652808658308, "learning_rate": 8.652545011433144e-07, "loss": 0.7513, "step": 10166 }, { "epoch": 0.82, "grad_norm": 1.741752828838893, "learning_rate": 8.64524101197447e-07, "loss": 0.7322, "step": 10167 }, { "epoch": 0.82, "grad_norm": 1.4414207737678317, "learning_rate": 8.637939804883672e-07, "loss": 0.7198, "step": 10168 }, { "epoch": 0.82, "grad_norm": 1.459053725213051, "learning_rate": 8.630641390653743e-07, "loss": 0.763, "step": 10169 }, { "epoch": 0.82, "grad_norm": 0.76739420214067, "learning_rate": 8.623345769777514e-07, "loss": 1.0642, "step": 10170 }, { "epoch": 0.82, "grad_norm": 1.5401220092356134, "learning_rate": 8.616052942747599e-07, "loss": 0.7616, "step": 10171 }, { "epoch": 0.82, "grad_norm": 1.4591732511524664, "learning_rate": 8.6087629100564e-07, "loss": 0.735, "step": 10172 }, { "epoch": 0.82, "grad_norm": 1.5578403503390603, "learning_rate": 8.601475672196197e-07, "loss": 0.7916, "step": 10173 }, { "epoch": 0.82, "grad_norm": 0.7445508591833385, "learning_rate": 8.594191229659016e-07, "loss": 1.0414, "step": 10174 }, { "epoch": 0.82, "grad_norm": 1.4300110117459108, "learning_rate": 8.58690958293672e-07, "loss": 0.7375, "step": 10175 }, { "epoch": 0.82, "grad_norm": 1.4912285846996498, "learning_rate": 8.579630732520977e-07, "loss": 0.8451, "step": 10176 }, { "epoch": 0.82, "grad_norm": 1.4969331914012287, "learning_rate": 8.572354678903289e-07, "loss": 0.8301, "step": 10177 }, { "epoch": 0.82, "grad_norm": 1.5342405687355272, "learning_rate": 8.56508142257495e-07, "loss": 0.7128, "step": 10178 }, { "epoch": 0.82, "grad_norm": 1.6278346228126688, "learning_rate": 8.557810964027053e-07, "loss": 0.7043, "step": 10179 }, { "epoch": 0.82, "grad_norm": 1.4520925146516004, "learning_rate": 8.550543303750524e-07, "loss": 0.7107, "step": 10180 }, { "epoch": 0.82, "grad_norm": 1.8977213897993432, "learning_rate": 8.543278442236097e-07, "loss": 0.7459, "step": 10181 }, { "epoch": 0.82, "grad_norm": 1.4442924210143684, "learning_rate": 8.536016379974299e-07, "loss": 0.6952, "step": 10182 }, { "epoch": 0.82, "grad_norm": 1.4524355796860815, "learning_rate": 8.528757117455477e-07, "loss": 0.6414, "step": 10183 }, { "epoch": 0.82, "grad_norm": 0.7514701978352157, "learning_rate": 8.521500655169823e-07, "loss": 1.0712, "step": 10184 }, { "epoch": 0.82, "grad_norm": 1.6348993875766722, "learning_rate": 8.514246993607284e-07, "loss": 0.8038, "step": 10185 }, { "epoch": 0.82, "grad_norm": 1.5118774201926732, "learning_rate": 8.506996133257639e-07, "loss": 0.7185, "step": 10186 }, { "epoch": 0.82, "grad_norm": 1.4951837789466995, "learning_rate": 8.499748074610508e-07, "loss": 0.7413, "step": 10187 }, { "epoch": 0.82, "grad_norm": 1.5850954121658314, "learning_rate": 8.492502818155285e-07, "loss": 0.7322, "step": 10188 }, { "epoch": 0.82, "grad_norm": 1.451877513298632, "learning_rate": 8.485260364381187e-07, "loss": 0.7564, "step": 10189 }, { "epoch": 0.82, "grad_norm": 0.7565086499529938, "learning_rate": 8.478020713777235e-07, "loss": 1.0777, "step": 10190 }, { "epoch": 0.82, "grad_norm": 1.704899018521599, "learning_rate": 8.470783866832266e-07, "loss": 0.737, "step": 10191 }, { "epoch": 0.82, "grad_norm": 1.542126325351262, "learning_rate": 8.463549824034939e-07, "loss": 0.7124, "step": 10192 }, { "epoch": 0.82, "grad_norm": 1.5974615369250018, "learning_rate": 8.456318585873691e-07, "loss": 0.7208, "step": 10193 }, { "epoch": 0.82, "grad_norm": 1.5187077342593653, "learning_rate": 8.44909015283682e-07, "loss": 0.6571, "step": 10194 }, { "epoch": 0.82, "grad_norm": 1.5736099974968196, "learning_rate": 8.441864525412396e-07, "loss": 0.8368, "step": 10195 }, { "epoch": 0.82, "grad_norm": 1.63263632469122, "learning_rate": 8.434641704088292e-07, "loss": 0.7494, "step": 10196 }, { "epoch": 0.82, "grad_norm": 1.4477721140924287, "learning_rate": 8.427421689352239e-07, "loss": 0.6645, "step": 10197 }, { "epoch": 0.82, "grad_norm": 1.666066352949737, "learning_rate": 8.420204481691734e-07, "loss": 0.7191, "step": 10198 }, { "epoch": 0.82, "grad_norm": 1.4825500970841934, "learning_rate": 8.412990081594102e-07, "loss": 0.7463, "step": 10199 }, { "epoch": 0.82, "grad_norm": 1.614495739773159, "learning_rate": 8.405778489546474e-07, "loss": 0.7442, "step": 10200 }, { "epoch": 0.82, "grad_norm": 1.4918497704445945, "learning_rate": 8.398569706035791e-07, "loss": 0.724, "step": 10201 }, { "epoch": 0.82, "grad_norm": 1.4795152591543022, "learning_rate": 8.391363731548813e-07, "loss": 0.783, "step": 10202 }, { "epoch": 0.82, "grad_norm": 1.6500498020394825, "learning_rate": 8.384160566572086e-07, "loss": 0.7941, "step": 10203 }, { "epoch": 0.82, "grad_norm": 1.4742088121139285, "learning_rate": 8.376960211592011e-07, "loss": 0.6915, "step": 10204 }, { "epoch": 0.82, "grad_norm": 1.4991766951381171, "learning_rate": 8.369762667094755e-07, "loss": 0.6646, "step": 10205 }, { "epoch": 0.82, "grad_norm": 1.436987827435293, "learning_rate": 8.362567933566318e-07, "loss": 0.777, "step": 10206 }, { "epoch": 0.82, "grad_norm": 1.4368734587817575, "learning_rate": 8.355376011492494e-07, "loss": 0.7457, "step": 10207 }, { "epoch": 0.82, "grad_norm": 1.640520049255847, "learning_rate": 8.348186901358923e-07, "loss": 0.6711, "step": 10208 }, { "epoch": 0.82, "grad_norm": 0.7525985723075654, "learning_rate": 8.341000603651012e-07, "loss": 1.0713, "step": 10209 }, { "epoch": 0.82, "grad_norm": 1.5059165286419782, "learning_rate": 8.333817118853982e-07, "loss": 0.7328, "step": 10210 }, { "epoch": 0.82, "grad_norm": 1.5650353981886584, "learning_rate": 8.326636447452929e-07, "loss": 0.765, "step": 10211 }, { "epoch": 0.82, "grad_norm": 0.7585011484426548, "learning_rate": 8.319458589932655e-07, "loss": 1.0514, "step": 10212 }, { "epoch": 0.82, "grad_norm": 1.6445875837214472, "learning_rate": 8.312283546777838e-07, "loss": 0.7847, "step": 10213 }, { "epoch": 0.82, "grad_norm": 1.3982329666333004, "learning_rate": 8.30511131847297e-07, "loss": 0.6997, "step": 10214 }, { "epoch": 0.82, "grad_norm": 1.6035742326762072, "learning_rate": 8.297941905502327e-07, "loss": 0.7825, "step": 10215 }, { "epoch": 0.82, "grad_norm": 1.5599981503941596, "learning_rate": 8.290775308350008e-07, "loss": 0.7617, "step": 10216 }, { "epoch": 0.82, "grad_norm": 1.5520083592957572, "learning_rate": 8.283611527499896e-07, "loss": 0.7391, "step": 10217 }, { "epoch": 0.82, "grad_norm": 1.5544708827922447, "learning_rate": 8.276450563435739e-07, "loss": 0.7258, "step": 10218 }, { "epoch": 0.82, "grad_norm": 1.4727551778047538, "learning_rate": 8.26929241664105e-07, "loss": 0.7793, "step": 10219 }, { "epoch": 0.82, "grad_norm": 1.4974960701712126, "learning_rate": 8.262137087599142e-07, "loss": 0.7592, "step": 10220 }, { "epoch": 0.82, "grad_norm": 1.4026477198583949, "learning_rate": 8.254984576793196e-07, "loss": 0.6974, "step": 10221 }, { "epoch": 0.82, "grad_norm": 1.4068663818250469, "learning_rate": 8.247834884706141e-07, "loss": 0.7791, "step": 10222 }, { "epoch": 0.82, "grad_norm": 1.4759468815370436, "learning_rate": 8.240688011820752e-07, "loss": 0.8847, "step": 10223 }, { "epoch": 0.82, "grad_norm": 1.511019834245238, "learning_rate": 8.233543958619594e-07, "loss": 0.811, "step": 10224 }, { "epoch": 0.82, "grad_norm": 1.3995781304111894, "learning_rate": 8.226402725585053e-07, "loss": 0.7457, "step": 10225 }, { "epoch": 0.82, "grad_norm": 0.7736057707822106, "learning_rate": 8.219264313199322e-07, "loss": 1.0558, "step": 10226 }, { "epoch": 0.82, "grad_norm": 0.7814040401984077, "learning_rate": 8.212128721944385e-07, "loss": 1.0732, "step": 10227 }, { "epoch": 0.82, "grad_norm": 1.5101283017678795, "learning_rate": 8.204995952302087e-07, "loss": 0.7714, "step": 10228 }, { "epoch": 0.82, "grad_norm": 1.4683763197373363, "learning_rate": 8.197866004754029e-07, "loss": 0.7713, "step": 10229 }, { "epoch": 0.82, "grad_norm": 1.5056232147940996, "learning_rate": 8.190738879781634e-07, "loss": 0.6931, "step": 10230 }, { "epoch": 0.82, "grad_norm": 1.5147970630088945, "learning_rate": 8.183614577866166e-07, "loss": 0.8008, "step": 10231 }, { "epoch": 0.82, "grad_norm": 1.5289036058634384, "learning_rate": 8.176493099488664e-07, "loss": 0.778, "step": 10232 }, { "epoch": 0.82, "grad_norm": 1.6294036582417435, "learning_rate": 8.169374445129979e-07, "loss": 0.7987, "step": 10233 }, { "epoch": 0.82, "grad_norm": 1.5520114729667593, "learning_rate": 8.162258615270779e-07, "loss": 0.7522, "step": 10234 }, { "epoch": 0.82, "grad_norm": 1.5595654595584059, "learning_rate": 8.155145610391552e-07, "loss": 0.7779, "step": 10235 }, { "epoch": 0.82, "grad_norm": 1.5477778735512766, "learning_rate": 8.148035430972573e-07, "loss": 0.7606, "step": 10236 }, { "epoch": 0.82, "grad_norm": 1.5270388064118565, "learning_rate": 8.140928077493937e-07, "loss": 0.758, "step": 10237 }, { "epoch": 0.82, "grad_norm": 1.64085322706877, "learning_rate": 8.13382355043556e-07, "loss": 0.8114, "step": 10238 }, { "epoch": 0.82, "grad_norm": 1.6146529992285388, "learning_rate": 8.126721850277147e-07, "loss": 0.8136, "step": 10239 }, { "epoch": 0.82, "grad_norm": 1.4926189446044338, "learning_rate": 8.119622977498226e-07, "loss": 0.775, "step": 10240 }, { "epoch": 0.82, "grad_norm": 0.754038067042479, "learning_rate": 8.112526932578118e-07, "loss": 1.0748, "step": 10241 }, { "epoch": 0.82, "grad_norm": 1.5001416245813683, "learning_rate": 8.105433715995981e-07, "loss": 0.7364, "step": 10242 }, { "epoch": 0.82, "grad_norm": 1.46228679948755, "learning_rate": 8.098343328230762e-07, "loss": 0.7734, "step": 10243 }, { "epoch": 0.82, "grad_norm": 1.5484875276888, "learning_rate": 8.091255769761213e-07, "loss": 0.8016, "step": 10244 }, { "epoch": 0.82, "grad_norm": 1.442836367190372, "learning_rate": 8.084171041065903e-07, "loss": 0.7235, "step": 10245 }, { "epoch": 0.82, "grad_norm": 1.5071222364428718, "learning_rate": 8.077089142623212e-07, "loss": 0.7857, "step": 10246 }, { "epoch": 0.82, "grad_norm": 1.5942964847271635, "learning_rate": 8.070010074911322e-07, "loss": 0.8741, "step": 10247 }, { "epoch": 0.82, "grad_norm": 1.633149424258743, "learning_rate": 8.062933838408221e-07, "loss": 0.7937, "step": 10248 }, { "epoch": 0.82, "grad_norm": 0.776484005388792, "learning_rate": 8.055860433591734e-07, "loss": 1.0945, "step": 10249 }, { "epoch": 0.82, "grad_norm": 0.7546064764548901, "learning_rate": 8.04878986093946e-07, "loss": 1.062, "step": 10250 }, { "epoch": 0.82, "grad_norm": 1.4943057687077619, "learning_rate": 8.041722120928814e-07, "loss": 0.7331, "step": 10251 }, { "epoch": 0.82, "grad_norm": 1.4591992800753493, "learning_rate": 8.034657214037044e-07, "loss": 0.7466, "step": 10252 }, { "epoch": 0.82, "grad_norm": 1.4736758923055433, "learning_rate": 8.027595140741179e-07, "loss": 0.6754, "step": 10253 }, { "epoch": 0.82, "grad_norm": 0.7771733837920801, "learning_rate": 8.02053590151805e-07, "loss": 1.0858, "step": 10254 }, { "epoch": 0.82, "grad_norm": 1.536919960923253, "learning_rate": 8.013479496844356e-07, "loss": 0.7553, "step": 10255 }, { "epoch": 0.82, "grad_norm": 1.536941881898944, "learning_rate": 8.00642592719652e-07, "loss": 0.7422, "step": 10256 }, { "epoch": 0.82, "grad_norm": 1.4362632627505598, "learning_rate": 7.999375193050828e-07, "loss": 0.786, "step": 10257 }, { "epoch": 0.82, "grad_norm": 1.6465829958170026, "learning_rate": 7.992327294883356e-07, "loss": 0.7031, "step": 10258 }, { "epoch": 0.82, "grad_norm": 1.5027636303804737, "learning_rate": 7.985282233170011e-07, "loss": 0.7004, "step": 10259 }, { "epoch": 0.82, "grad_norm": 1.5610655343788522, "learning_rate": 7.978240008386484e-07, "loss": 0.7726, "step": 10260 }, { "epoch": 0.82, "grad_norm": 1.551819591667791, "learning_rate": 7.971200621008268e-07, "loss": 0.7187, "step": 10261 }, { "epoch": 0.82, "grad_norm": 1.5493630133110987, "learning_rate": 7.964164071510699e-07, "loss": 0.7193, "step": 10262 }, { "epoch": 0.82, "grad_norm": 1.5785564375583097, "learning_rate": 7.957130360368898e-07, "loss": 0.8462, "step": 10263 }, { "epoch": 0.82, "grad_norm": 1.4630168601358768, "learning_rate": 7.950099488057788e-07, "loss": 0.7374, "step": 10264 }, { "epoch": 0.82, "grad_norm": 1.6070547531547, "learning_rate": 7.943071455052104e-07, "loss": 0.775, "step": 10265 }, { "epoch": 0.82, "grad_norm": 1.5071354970366702, "learning_rate": 7.936046261826413e-07, "loss": 0.7089, "step": 10266 }, { "epoch": 0.82, "grad_norm": 1.584734252628964, "learning_rate": 7.929023908855066e-07, "loss": 0.7355, "step": 10267 }, { "epoch": 0.82, "grad_norm": 1.5980278197505224, "learning_rate": 7.922004396612226e-07, "loss": 0.741, "step": 10268 }, { "epoch": 0.82, "grad_norm": 1.5348900988624177, "learning_rate": 7.914987725571866e-07, "loss": 0.7261, "step": 10269 }, { "epoch": 0.82, "grad_norm": 0.7323165694928259, "learning_rate": 7.907973896207765e-07, "loss": 1.071, "step": 10270 }, { "epoch": 0.82, "grad_norm": 1.6220279908319377, "learning_rate": 7.900962908993509e-07, "loss": 0.7909, "step": 10271 }, { "epoch": 0.82, "grad_norm": 0.7649054450814116, "learning_rate": 7.893954764402512e-07, "loss": 1.0643, "step": 10272 }, { "epoch": 0.82, "grad_norm": 1.5417060639215268, "learning_rate": 7.886949462907967e-07, "loss": 0.7242, "step": 10273 }, { "epoch": 0.82, "grad_norm": 1.612312457868986, "learning_rate": 7.879947004982896e-07, "loss": 0.7931, "step": 10274 }, { "epoch": 0.82, "grad_norm": 1.597137712567434, "learning_rate": 7.872947391100106e-07, "loss": 0.7609, "step": 10275 }, { "epoch": 0.82, "grad_norm": 1.6347883306074404, "learning_rate": 7.865950621732244e-07, "loss": 0.7638, "step": 10276 }, { "epoch": 0.82, "grad_norm": 0.7470409383064237, "learning_rate": 7.858956697351744e-07, "loss": 1.0279, "step": 10277 }, { "epoch": 0.82, "grad_norm": 1.5180657775056046, "learning_rate": 7.851965618430852e-07, "loss": 0.6709, "step": 10278 }, { "epoch": 0.82, "grad_norm": 1.4825635691066743, "learning_rate": 7.844977385441615e-07, "loss": 0.718, "step": 10279 }, { "epoch": 0.82, "grad_norm": 1.496532067294344, "learning_rate": 7.837991998855899e-07, "loss": 0.7568, "step": 10280 }, { "epoch": 0.82, "grad_norm": 1.457051423912158, "learning_rate": 7.831009459145372e-07, "loss": 0.7054, "step": 10281 }, { "epoch": 0.82, "grad_norm": 1.4589054575110274, "learning_rate": 7.824029766781499e-07, "loss": 0.7396, "step": 10282 }, { "epoch": 0.83, "grad_norm": 0.7698748056826352, "learning_rate": 7.817052922235591e-07, "loss": 1.0756, "step": 10283 }, { "epoch": 0.83, "grad_norm": 1.5313419975978542, "learning_rate": 7.810078925978731e-07, "loss": 0.7039, "step": 10284 }, { "epoch": 0.83, "grad_norm": 1.6188830003514372, "learning_rate": 7.803107778481794e-07, "loss": 0.7551, "step": 10285 }, { "epoch": 0.83, "grad_norm": 1.419430971444412, "learning_rate": 7.796139480215525e-07, "loss": 0.8577, "step": 10286 }, { "epoch": 0.83, "grad_norm": 1.46528383426777, "learning_rate": 7.789174031650426e-07, "loss": 0.7363, "step": 10287 }, { "epoch": 0.83, "grad_norm": 1.6267693388492481, "learning_rate": 7.782211433256815e-07, "loss": 0.7214, "step": 10288 }, { "epoch": 0.83, "grad_norm": 1.4683304813193683, "learning_rate": 7.775251685504826e-07, "loss": 0.732, "step": 10289 }, { "epoch": 0.83, "grad_norm": 1.4328687078390012, "learning_rate": 7.768294788864395e-07, "loss": 0.7141, "step": 10290 }, { "epoch": 0.83, "grad_norm": 1.6604123596625227, "learning_rate": 7.761340743805268e-07, "loss": 0.7556, "step": 10291 }, { "epoch": 0.83, "grad_norm": 1.5296655377143857, "learning_rate": 7.754389550796987e-07, "loss": 0.7147, "step": 10292 }, { "epoch": 0.83, "grad_norm": 1.483671714936275, "learning_rate": 7.747441210308937e-07, "loss": 0.7295, "step": 10293 }, { "epoch": 0.83, "grad_norm": 1.54895338398349, "learning_rate": 7.740495722810271e-07, "loss": 0.7751, "step": 10294 }, { "epoch": 0.83, "grad_norm": 1.5208849836493767, "learning_rate": 7.733553088769952e-07, "loss": 0.7564, "step": 10295 }, { "epoch": 0.83, "grad_norm": 0.7669142752324981, "learning_rate": 7.726613308656788e-07, "loss": 1.0532, "step": 10296 }, { "epoch": 0.83, "grad_norm": 1.5715339787896745, "learning_rate": 7.719676382939362e-07, "loss": 0.8116, "step": 10297 }, { "epoch": 0.83, "grad_norm": 1.4740648374303387, "learning_rate": 7.712742312086064e-07, "loss": 0.721, "step": 10298 }, { "epoch": 0.83, "grad_norm": 1.4880516738219, "learning_rate": 7.705811096565102e-07, "loss": 0.7388, "step": 10299 }, { "epoch": 0.83, "grad_norm": 1.501940404441746, "learning_rate": 7.698882736844487e-07, "loss": 0.8, "step": 10300 }, { "epoch": 0.83, "grad_norm": 1.621992149723792, "learning_rate": 7.691957233392034e-07, "loss": 0.7408, "step": 10301 }, { "epoch": 0.83, "grad_norm": 1.5358297130641876, "learning_rate": 7.685034586675361e-07, "loss": 0.7193, "step": 10302 }, { "epoch": 0.83, "grad_norm": 0.7498570102289274, "learning_rate": 7.678114797161928e-07, "loss": 1.0231, "step": 10303 }, { "epoch": 0.83, "grad_norm": 1.4608801048723459, "learning_rate": 7.671197865318952e-07, "loss": 0.7639, "step": 10304 }, { "epoch": 0.83, "grad_norm": 1.5848901877746584, "learning_rate": 7.664283791613492e-07, "loss": 0.7075, "step": 10305 }, { "epoch": 0.83, "grad_norm": 1.6534338065588428, "learning_rate": 7.657372576512384e-07, "loss": 0.7785, "step": 10306 }, { "epoch": 0.83, "grad_norm": 1.5534680216176417, "learning_rate": 7.650464220482312e-07, "loss": 0.7592, "step": 10307 }, { "epoch": 0.83, "grad_norm": 0.7922126301473036, "learning_rate": 7.64355872398973e-07, "loss": 1.0475, "step": 10308 }, { "epoch": 0.83, "grad_norm": 1.5542972862980537, "learning_rate": 7.63665608750091e-07, "loss": 0.8239, "step": 10309 }, { "epoch": 0.83, "grad_norm": 0.7573813760714273, "learning_rate": 7.62975631148195e-07, "loss": 1.015, "step": 10310 }, { "epoch": 0.83, "grad_norm": 0.7847927430900885, "learning_rate": 7.622859396398735e-07, "loss": 1.0643, "step": 10311 }, { "epoch": 0.83, "grad_norm": 1.4369227988905473, "learning_rate": 7.615965342716952e-07, "loss": 0.7523, "step": 10312 }, { "epoch": 0.83, "grad_norm": 1.5937328612498105, "learning_rate": 7.609074150902102e-07, "loss": 0.7028, "step": 10313 }, { "epoch": 0.83, "grad_norm": 0.7656282234931853, "learning_rate": 7.6021858214195e-07, "loss": 1.0525, "step": 10314 }, { "epoch": 0.83, "grad_norm": 1.5527211629838769, "learning_rate": 7.595300354734264e-07, "loss": 0.7223, "step": 10315 }, { "epoch": 0.83, "grad_norm": 1.4934760246590477, "learning_rate": 7.588417751311295e-07, "loss": 0.7697, "step": 10316 }, { "epoch": 0.83, "grad_norm": 1.6212137554661288, "learning_rate": 7.581538011615352e-07, "loss": 0.7798, "step": 10317 }, { "epoch": 0.83, "grad_norm": 1.5285218912753724, "learning_rate": 7.574661136110961e-07, "loss": 0.7483, "step": 10318 }, { "epoch": 0.83, "grad_norm": 0.7923824465177316, "learning_rate": 7.567787125262449e-07, "loss": 1.0432, "step": 10319 }, { "epoch": 0.83, "grad_norm": 1.4920213332260217, "learning_rate": 7.56091597953399e-07, "loss": 0.7863, "step": 10320 }, { "epoch": 0.83, "grad_norm": 1.4835500345882757, "learning_rate": 7.554047699389522e-07, "loss": 0.8009, "step": 10321 }, { "epoch": 0.83, "grad_norm": 1.5025873877671012, "learning_rate": 7.547182285292815e-07, "loss": 0.6965, "step": 10322 }, { "epoch": 0.83, "grad_norm": 1.595733219146192, "learning_rate": 7.540319737707436e-07, "loss": 0.7401, "step": 10323 }, { "epoch": 0.83, "grad_norm": 1.5105110648519158, "learning_rate": 7.533460057096753e-07, "loss": 0.7967, "step": 10324 }, { "epoch": 0.83, "grad_norm": 0.766914754321143, "learning_rate": 7.526603243923958e-07, "loss": 1.0592, "step": 10325 }, { "epoch": 0.83, "grad_norm": 1.6215265805439474, "learning_rate": 7.519749298652018e-07, "loss": 0.7621, "step": 10326 }, { "epoch": 0.83, "grad_norm": 1.5330426107122235, "learning_rate": 7.512898221743759e-07, "loss": 0.7667, "step": 10327 }, { "epoch": 0.83, "grad_norm": 1.5690573473778342, "learning_rate": 7.506050013661758e-07, "loss": 0.8169, "step": 10328 }, { "epoch": 0.83, "grad_norm": 1.6036763239530325, "learning_rate": 7.499204674868421e-07, "loss": 0.7498, "step": 10329 }, { "epoch": 0.83, "grad_norm": 1.5928210510040004, "learning_rate": 7.492362205825981e-07, "loss": 0.7915, "step": 10330 }, { "epoch": 0.83, "grad_norm": 1.7204956188703666, "learning_rate": 7.485522606996443e-07, "loss": 0.7759, "step": 10331 }, { "epoch": 0.83, "grad_norm": 1.450371505809567, "learning_rate": 7.478685878841629e-07, "loss": 0.7724, "step": 10332 }, { "epoch": 0.83, "grad_norm": 1.4309713569626306, "learning_rate": 7.471852021823184e-07, "loss": 0.7251, "step": 10333 }, { "epoch": 0.83, "grad_norm": 1.4269180575021303, "learning_rate": 7.465021036402531e-07, "loss": 0.745, "step": 10334 }, { "epoch": 0.83, "grad_norm": 1.6290975203159619, "learning_rate": 7.458192923040919e-07, "loss": 0.7194, "step": 10335 }, { "epoch": 0.83, "grad_norm": 1.4166478068792738, "learning_rate": 7.451367682199389e-07, "loss": 0.771, "step": 10336 }, { "epoch": 0.83, "grad_norm": 0.7440258357671208, "learning_rate": 7.444545314338819e-07, "loss": 1.0581, "step": 10337 }, { "epoch": 0.83, "grad_norm": 1.537765593305599, "learning_rate": 7.437725819919861e-07, "loss": 0.7029, "step": 10338 }, { "epoch": 0.83, "grad_norm": 1.4606459179133346, "learning_rate": 7.430909199402974e-07, "loss": 0.711, "step": 10339 }, { "epoch": 0.83, "grad_norm": 1.444786337492407, "learning_rate": 7.424095453248431e-07, "loss": 0.7545, "step": 10340 }, { "epoch": 0.83, "grad_norm": 0.7287186007689946, "learning_rate": 7.417284581916329e-07, "loss": 1.0749, "step": 10341 }, { "epoch": 0.83, "grad_norm": 1.5226347725380003, "learning_rate": 7.410476585866538e-07, "loss": 0.7262, "step": 10342 }, { "epoch": 0.83, "grad_norm": 0.7414041986488041, "learning_rate": 7.403671465558765e-07, "loss": 1.0471, "step": 10343 }, { "epoch": 0.83, "grad_norm": 0.7410818256361182, "learning_rate": 7.396869221452491e-07, "loss": 1.0698, "step": 10344 }, { "epoch": 0.83, "grad_norm": 1.5374568775795046, "learning_rate": 7.390069854007026e-07, "loss": 0.7523, "step": 10345 }, { "epoch": 0.83, "grad_norm": 1.5235908515650571, "learning_rate": 7.383273363681476e-07, "loss": 0.7621, "step": 10346 }, { "epoch": 0.83, "grad_norm": 0.767277451350047, "learning_rate": 7.376479750934745e-07, "loss": 1.0411, "step": 10347 }, { "epoch": 0.83, "grad_norm": 1.5172849884533024, "learning_rate": 7.369689016225578e-07, "loss": 0.8048, "step": 10348 }, { "epoch": 0.83, "grad_norm": 1.5217305451830077, "learning_rate": 7.362901160012492e-07, "loss": 0.7124, "step": 10349 }, { "epoch": 0.83, "grad_norm": 1.671789482090962, "learning_rate": 7.356116182753803e-07, "loss": 0.7907, "step": 10350 }, { "epoch": 0.83, "grad_norm": 1.6081906839393203, "learning_rate": 7.349334084907672e-07, "loss": 0.7447, "step": 10351 }, { "epoch": 0.83, "grad_norm": 0.756013551609216, "learning_rate": 7.342554866932028e-07, "loss": 1.0433, "step": 10352 }, { "epoch": 0.83, "grad_norm": 1.5682380616308471, "learning_rate": 7.335778529284615e-07, "loss": 0.7626, "step": 10353 }, { "epoch": 0.83, "grad_norm": 1.4565073083883056, "learning_rate": 7.329005072423001e-07, "loss": 0.7094, "step": 10354 }, { "epoch": 0.83, "grad_norm": 1.543418417449908, "learning_rate": 7.322234496804536e-07, "loss": 0.7607, "step": 10355 }, { "epoch": 0.83, "grad_norm": 1.4818711511818212, "learning_rate": 7.315466802886401e-07, "loss": 0.7397, "step": 10356 }, { "epoch": 0.83, "grad_norm": 1.55167643444967, "learning_rate": 7.308701991125527e-07, "loss": 0.7767, "step": 10357 }, { "epoch": 0.83, "grad_norm": 1.8701745948581088, "learning_rate": 7.301940061978724e-07, "loss": 0.7821, "step": 10358 }, { "epoch": 0.83, "grad_norm": 1.4877157008475492, "learning_rate": 7.295181015902569e-07, "loss": 0.7717, "step": 10359 }, { "epoch": 0.83, "grad_norm": 1.492867331982862, "learning_rate": 7.288424853353426e-07, "loss": 0.7301, "step": 10360 }, { "epoch": 0.83, "grad_norm": 1.5286979661644513, "learning_rate": 7.281671574787513e-07, "loss": 0.7952, "step": 10361 }, { "epoch": 0.83, "grad_norm": 1.4723094658909126, "learning_rate": 7.274921180660821e-07, "loss": 0.8476, "step": 10362 }, { "epoch": 0.83, "grad_norm": 1.4939130870980386, "learning_rate": 7.268173671429147e-07, "loss": 0.7871, "step": 10363 }, { "epoch": 0.83, "grad_norm": 1.556158302633824, "learning_rate": 7.261429047548085e-07, "loss": 0.7343, "step": 10364 }, { "epoch": 0.83, "grad_norm": 1.3939570313579885, "learning_rate": 7.254687309473074e-07, "loss": 0.7868, "step": 10365 }, { "epoch": 0.83, "grad_norm": 1.6452253118852742, "learning_rate": 7.247948457659315e-07, "loss": 0.7938, "step": 10366 }, { "epoch": 0.83, "grad_norm": 1.6410398683002072, "learning_rate": 7.241212492561839e-07, "loss": 0.7998, "step": 10367 }, { "epoch": 0.83, "grad_norm": 1.5047545742220603, "learning_rate": 7.23447941463547e-07, "loss": 0.8085, "step": 10368 }, { "epoch": 0.83, "grad_norm": 1.3598967667450848, "learning_rate": 7.22774922433484e-07, "loss": 0.7128, "step": 10369 }, { "epoch": 0.83, "grad_norm": 1.4368467622978538, "learning_rate": 7.221021922114374e-07, "loss": 0.7689, "step": 10370 }, { "epoch": 0.83, "grad_norm": 0.7321282115736699, "learning_rate": 7.214297508428336e-07, "loss": 1.0398, "step": 10371 }, { "epoch": 0.83, "grad_norm": 1.5961704318913943, "learning_rate": 7.207575983730774e-07, "loss": 0.7505, "step": 10372 }, { "epoch": 0.83, "grad_norm": 1.5874872168562826, "learning_rate": 7.200857348475526e-07, "loss": 0.8058, "step": 10373 }, { "epoch": 0.83, "grad_norm": 1.5624671074458882, "learning_rate": 7.194141603116244e-07, "loss": 0.792, "step": 10374 }, { "epoch": 0.83, "grad_norm": 1.4817360058821694, "learning_rate": 7.187428748106418e-07, "loss": 0.7506, "step": 10375 }, { "epoch": 0.83, "grad_norm": 1.5225122412042733, "learning_rate": 7.180718783899298e-07, "loss": 0.7696, "step": 10376 }, { "epoch": 0.83, "grad_norm": 1.4556160859518892, "learning_rate": 7.174011710947959e-07, "loss": 0.77, "step": 10377 }, { "epoch": 0.83, "grad_norm": 1.4754991747118285, "learning_rate": 7.167307529705275e-07, "loss": 0.737, "step": 10378 }, { "epoch": 0.83, "grad_norm": 1.6087676355742386, "learning_rate": 7.16060624062393e-07, "loss": 0.7624, "step": 10379 }, { "epoch": 0.83, "grad_norm": 1.4733683845101624, "learning_rate": 7.153907844156411e-07, "loss": 0.7222, "step": 10380 }, { "epoch": 0.83, "grad_norm": 1.5751049190670687, "learning_rate": 7.147212340754994e-07, "loss": 0.784, "step": 10381 }, { "epoch": 0.83, "grad_norm": 1.5822974695301222, "learning_rate": 7.140519730871804e-07, "loss": 0.803, "step": 10382 }, { "epoch": 0.83, "grad_norm": 1.5736565683593153, "learning_rate": 7.13383001495872e-07, "loss": 0.7541, "step": 10383 }, { "epoch": 0.83, "grad_norm": 1.4972999396497033, "learning_rate": 7.127143193467445e-07, "loss": 0.6669, "step": 10384 }, { "epoch": 0.83, "grad_norm": 1.5076944113034005, "learning_rate": 7.120459266849511e-07, "loss": 0.713, "step": 10385 }, { "epoch": 0.83, "grad_norm": 1.4111353403347884, "learning_rate": 7.113778235556212e-07, "loss": 0.7454, "step": 10386 }, { "epoch": 0.83, "grad_norm": 1.4975962764849649, "learning_rate": 7.107100100038672e-07, "loss": 0.7375, "step": 10387 }, { "epoch": 0.83, "grad_norm": 1.431892913448652, "learning_rate": 7.100424860747817e-07, "loss": 0.7112, "step": 10388 }, { "epoch": 0.83, "grad_norm": 1.552786234205501, "learning_rate": 7.093752518134367e-07, "loss": 0.8015, "step": 10389 }, { "epoch": 0.83, "grad_norm": 1.4809457687093304, "learning_rate": 7.087083072648865e-07, "loss": 0.7583, "step": 10390 }, { "epoch": 0.83, "grad_norm": 1.422177878433341, "learning_rate": 7.080416524741623e-07, "loss": 0.7519, "step": 10391 }, { "epoch": 0.83, "grad_norm": 1.352242991668909, "learning_rate": 7.07375287486281e-07, "loss": 0.6241, "step": 10392 }, { "epoch": 0.83, "grad_norm": 1.609716033823568, "learning_rate": 7.067092123462361e-07, "loss": 0.7603, "step": 10393 }, { "epoch": 0.83, "grad_norm": 1.4493573413328664, "learning_rate": 7.060434270990013e-07, "loss": 0.7849, "step": 10394 }, { "epoch": 0.83, "grad_norm": 1.5505538315015905, "learning_rate": 7.053779317895343e-07, "loss": 0.6872, "step": 10395 }, { "epoch": 0.83, "grad_norm": 1.4844584330618307, "learning_rate": 7.047127264627696e-07, "loss": 0.7813, "step": 10396 }, { "epoch": 0.83, "grad_norm": 1.6785934385288757, "learning_rate": 7.040478111636229e-07, "loss": 0.7712, "step": 10397 }, { "epoch": 0.83, "grad_norm": 1.551043294529431, "learning_rate": 7.033831859369905e-07, "loss": 0.7561, "step": 10398 }, { "epoch": 0.83, "grad_norm": 1.6275683778412977, "learning_rate": 7.027188508277516e-07, "loss": 0.7181, "step": 10399 }, { "epoch": 0.83, "grad_norm": 1.6460406975846045, "learning_rate": 7.02054805880763e-07, "loss": 0.7061, "step": 10400 }, { "epoch": 0.83, "grad_norm": 1.643622283812845, "learning_rate": 7.013910511408595e-07, "loss": 0.7441, "step": 10401 }, { "epoch": 0.83, "grad_norm": 2.006361350256462, "learning_rate": 7.007275866528623e-07, "loss": 0.7239, "step": 10402 }, { "epoch": 0.83, "grad_norm": 1.5755313017684354, "learning_rate": 7.000644124615702e-07, "loss": 0.7964, "step": 10403 }, { "epoch": 0.83, "grad_norm": 0.7612019366299966, "learning_rate": 6.994015286117606e-07, "loss": 1.0402, "step": 10404 }, { "epoch": 0.83, "grad_norm": 1.6293684743276031, "learning_rate": 6.987389351481933e-07, "loss": 0.8086, "step": 10405 }, { "epoch": 0.83, "grad_norm": 1.40190700398306, "learning_rate": 6.980766321156091e-07, "loss": 0.7017, "step": 10406 }, { "epoch": 0.83, "grad_norm": 0.7745999216855157, "learning_rate": 6.974146195587278e-07, "loss": 1.0444, "step": 10407 }, { "epoch": 0.84, "grad_norm": 1.558690876148342, "learning_rate": 6.967528975222487e-07, "loss": 0.7021, "step": 10408 }, { "epoch": 0.84, "grad_norm": 1.484925340531523, "learning_rate": 6.96091466050855e-07, "loss": 0.7164, "step": 10409 }, { "epoch": 0.84, "grad_norm": 1.626538985161877, "learning_rate": 6.95430325189207e-07, "loss": 0.7675, "step": 10410 }, { "epoch": 0.84, "grad_norm": 0.7345292324539173, "learning_rate": 6.947694749819467e-07, "loss": 1.0465, "step": 10411 }, { "epoch": 0.84, "grad_norm": 1.4969143328667123, "learning_rate": 6.941089154736958e-07, "loss": 0.7919, "step": 10412 }, { "epoch": 0.84, "grad_norm": 1.4362062591228537, "learning_rate": 6.934486467090568e-07, "loss": 0.6731, "step": 10413 }, { "epoch": 0.84, "grad_norm": 1.4731033817943517, "learning_rate": 6.927886687326129e-07, "loss": 0.8045, "step": 10414 }, { "epoch": 0.84, "grad_norm": 1.5893601955211494, "learning_rate": 6.921289815889259e-07, "loss": 0.7433, "step": 10415 }, { "epoch": 0.84, "grad_norm": 1.4235624541398297, "learning_rate": 6.914695853225417e-07, "loss": 0.6569, "step": 10416 }, { "epoch": 0.84, "grad_norm": 1.5348053873130933, "learning_rate": 6.90810479977983e-07, "loss": 0.7742, "step": 10417 }, { "epoch": 0.84, "grad_norm": 0.743148788674857, "learning_rate": 6.901516655997536e-07, "loss": 1.0821, "step": 10418 }, { "epoch": 0.84, "grad_norm": 1.5451662833929896, "learning_rate": 6.894931422323398e-07, "loss": 0.7192, "step": 10419 }, { "epoch": 0.84, "grad_norm": 1.4718227314500927, "learning_rate": 6.888349099202051e-07, "loss": 0.7832, "step": 10420 }, { "epoch": 0.84, "grad_norm": 0.746766551768053, "learning_rate": 6.881769687077955e-07, "loss": 1.0748, "step": 10421 }, { "epoch": 0.84, "grad_norm": 1.547023583495951, "learning_rate": 6.875193186395368e-07, "loss": 0.7808, "step": 10422 }, { "epoch": 0.84, "grad_norm": 1.799574900743195, "learning_rate": 6.868619597598347e-07, "loss": 0.7452, "step": 10423 }, { "epoch": 0.84, "grad_norm": 1.4666801513776357, "learning_rate": 6.86204892113076e-07, "loss": 0.8121, "step": 10424 }, { "epoch": 0.84, "grad_norm": 1.4096099335048984, "learning_rate": 6.855481157436256e-07, "loss": 0.6789, "step": 10425 }, { "epoch": 0.84, "grad_norm": 0.7531968088940726, "learning_rate": 6.84891630695833e-07, "loss": 1.0379, "step": 10426 }, { "epoch": 0.84, "grad_norm": 1.5299006193082145, "learning_rate": 6.842354370140247e-07, "loss": 0.7406, "step": 10427 }, { "epoch": 0.84, "grad_norm": 1.4416928193368106, "learning_rate": 6.835795347425073e-07, "loss": 0.7035, "step": 10428 }, { "epoch": 0.84, "grad_norm": 1.4436125806461273, "learning_rate": 6.829239239255708e-07, "loss": 0.7448, "step": 10429 }, { "epoch": 0.84, "grad_norm": 1.5249002123904924, "learning_rate": 6.822686046074828e-07, "loss": 0.805, "step": 10430 }, { "epoch": 0.84, "grad_norm": 1.5790945422586637, "learning_rate": 6.816135768324916e-07, "loss": 0.7873, "step": 10431 }, { "epoch": 0.84, "grad_norm": 1.4538199438046915, "learning_rate": 6.809588406448264e-07, "loss": 0.7548, "step": 10432 }, { "epoch": 0.84, "grad_norm": 1.4990017582592883, "learning_rate": 6.803043960886957e-07, "loss": 0.8747, "step": 10433 }, { "epoch": 0.84, "grad_norm": 1.474141685193094, "learning_rate": 6.796502432082902e-07, "loss": 0.799, "step": 10434 }, { "epoch": 0.84, "grad_norm": 1.4861493096194343, "learning_rate": 6.789963820477785e-07, "loss": 0.7811, "step": 10435 }, { "epoch": 0.84, "grad_norm": 1.4687373557534227, "learning_rate": 6.783428126513125e-07, "loss": 0.6686, "step": 10436 }, { "epoch": 0.84, "grad_norm": 0.7486296172418683, "learning_rate": 6.776895350630219e-07, "loss": 1.0375, "step": 10437 }, { "epoch": 0.84, "grad_norm": 0.7469736403092688, "learning_rate": 6.770365493270176e-07, "loss": 1.0219, "step": 10438 }, { "epoch": 0.84, "grad_norm": 0.7608148781235129, "learning_rate": 6.763838554873892e-07, "loss": 1.0241, "step": 10439 }, { "epoch": 0.84, "grad_norm": 1.4099427752002593, "learning_rate": 6.757314535882104e-07, "loss": 0.773, "step": 10440 }, { "epoch": 0.84, "grad_norm": 1.4737713668865997, "learning_rate": 6.75079343673532e-07, "loss": 0.7092, "step": 10441 }, { "epoch": 0.84, "grad_norm": 1.4966978458705134, "learning_rate": 6.74427525787385e-07, "loss": 0.7206, "step": 10442 }, { "epoch": 0.84, "grad_norm": 1.559992155865875, "learning_rate": 6.737759999737836e-07, "loss": 0.7553, "step": 10443 }, { "epoch": 0.84, "grad_norm": 1.531756538675424, "learning_rate": 6.731247662767199e-07, "loss": 0.7447, "step": 10444 }, { "epoch": 0.84, "grad_norm": 1.888783732287368, "learning_rate": 6.724738247401652e-07, "loss": 0.7465, "step": 10445 }, { "epoch": 0.84, "grad_norm": 1.4584868350945932, "learning_rate": 6.718231754080723e-07, "loss": 0.7999, "step": 10446 }, { "epoch": 0.84, "grad_norm": 0.7354782209157104, "learning_rate": 6.711728183243766e-07, "loss": 1.0423, "step": 10447 }, { "epoch": 0.84, "grad_norm": 1.453862820522831, "learning_rate": 6.7052275353299e-07, "loss": 0.7663, "step": 10448 }, { "epoch": 0.84, "grad_norm": 1.4542831935814535, "learning_rate": 6.698729810778065e-07, "loss": 0.7195, "step": 10449 }, { "epoch": 0.84, "grad_norm": 1.4129335768813776, "learning_rate": 6.69223501002702e-07, "loss": 0.7789, "step": 10450 }, { "epoch": 0.84, "grad_norm": 1.789275292532673, "learning_rate": 6.685743133515293e-07, "loss": 0.8127, "step": 10451 }, { "epoch": 0.84, "grad_norm": 1.4125882547420212, "learning_rate": 6.679254181681228e-07, "loss": 0.6727, "step": 10452 }, { "epoch": 0.84, "grad_norm": 1.7032565382561737, "learning_rate": 6.672768154962983e-07, "loss": 0.7173, "step": 10453 }, { "epoch": 0.84, "grad_norm": 1.4668332389472905, "learning_rate": 6.66628505379851e-07, "loss": 0.7233, "step": 10454 }, { "epoch": 0.84, "grad_norm": 1.53065052875446, "learning_rate": 6.659804878625559e-07, "loss": 0.7841, "step": 10455 }, { "epoch": 0.84, "grad_norm": 0.7849040164090195, "learning_rate": 6.653327629881689e-07, "loss": 1.0516, "step": 10456 }, { "epoch": 0.84, "grad_norm": 1.4882027396296327, "learning_rate": 6.646853308004253e-07, "loss": 0.6437, "step": 10457 }, { "epoch": 0.84, "grad_norm": 1.6882672720891754, "learning_rate": 6.64038191343041e-07, "loss": 0.8217, "step": 10458 }, { "epoch": 0.84, "grad_norm": 1.4216467836302558, "learning_rate": 6.633913446597124e-07, "loss": 0.7152, "step": 10459 }, { "epoch": 0.84, "grad_norm": 1.4392467760459209, "learning_rate": 6.62744790794117e-07, "loss": 0.7012, "step": 10460 }, { "epoch": 0.84, "grad_norm": 1.5119470045627927, "learning_rate": 6.620985297899113e-07, "loss": 0.6963, "step": 10461 }, { "epoch": 0.84, "grad_norm": 1.4712938680726466, "learning_rate": 6.614525616907319e-07, "loss": 0.7257, "step": 10462 }, { "epoch": 0.84, "grad_norm": 1.6267643994987406, "learning_rate": 6.608068865401957e-07, "loss": 0.7624, "step": 10463 }, { "epoch": 0.84, "grad_norm": 1.4800174923931173, "learning_rate": 6.60161504381901e-07, "loss": 0.7676, "step": 10464 }, { "epoch": 0.84, "grad_norm": 1.4798795178230548, "learning_rate": 6.595164152594258e-07, "loss": 0.7476, "step": 10465 }, { "epoch": 0.84, "grad_norm": 1.6827016802595964, "learning_rate": 6.588716192163269e-07, "loss": 0.7759, "step": 10466 }, { "epoch": 0.84, "grad_norm": 1.3798405874164192, "learning_rate": 6.582271162961428e-07, "loss": 0.65, "step": 10467 }, { "epoch": 0.84, "grad_norm": 1.8684535040393007, "learning_rate": 6.575829065423922e-07, "loss": 0.6797, "step": 10468 }, { "epoch": 0.84, "grad_norm": 0.7827010100263714, "learning_rate": 6.569389899985723e-07, "loss": 1.0717, "step": 10469 }, { "epoch": 0.84, "grad_norm": 1.400417183628508, "learning_rate": 6.562953667081634e-07, "loss": 0.704, "step": 10470 }, { "epoch": 0.84, "grad_norm": 1.798061893404342, "learning_rate": 6.556520367146246e-07, "loss": 0.6846, "step": 10471 }, { "epoch": 0.84, "grad_norm": 1.5111573280707653, "learning_rate": 6.550090000613935e-07, "loss": 0.759, "step": 10472 }, { "epoch": 0.84, "grad_norm": 0.7389934228175498, "learning_rate": 6.543662567918895e-07, "loss": 1.0652, "step": 10473 }, { "epoch": 0.84, "grad_norm": 1.489247324426463, "learning_rate": 6.537238069495133e-07, "loss": 0.7711, "step": 10474 }, { "epoch": 0.84, "grad_norm": 1.4523888456299305, "learning_rate": 6.530816505776444e-07, "loss": 0.8114, "step": 10475 }, { "epoch": 0.84, "grad_norm": 0.7402449252069958, "learning_rate": 6.524397877196426e-07, "loss": 1.0583, "step": 10476 }, { "epoch": 0.84, "grad_norm": 1.7606411275246279, "learning_rate": 6.51798218418847e-07, "loss": 0.7941, "step": 10477 }, { "epoch": 0.84, "grad_norm": 1.5600404326924076, "learning_rate": 6.511569427185788e-07, "loss": 0.7618, "step": 10478 }, { "epoch": 0.84, "grad_norm": 0.7803944325378279, "learning_rate": 6.505159606621381e-07, "loss": 1.03, "step": 10479 }, { "epoch": 0.84, "grad_norm": 0.7600361654022643, "learning_rate": 6.498752722928042e-07, "loss": 1.0262, "step": 10480 }, { "epoch": 0.84, "grad_norm": 1.5294669689718934, "learning_rate": 6.492348776538398e-07, "loss": 0.7593, "step": 10481 }, { "epoch": 0.84, "grad_norm": 1.536786675000522, "learning_rate": 6.48594776788486e-07, "loss": 0.7547, "step": 10482 }, { "epoch": 0.84, "grad_norm": 0.7613802910804228, "learning_rate": 6.479549697399612e-07, "loss": 1.0398, "step": 10483 }, { "epoch": 0.84, "grad_norm": 1.4587158909304605, "learning_rate": 6.473154565514695e-07, "loss": 0.7985, "step": 10484 }, { "epoch": 0.84, "grad_norm": 1.436131729704655, "learning_rate": 6.466762372661911e-07, "loss": 0.7075, "step": 10485 }, { "epoch": 0.84, "grad_norm": 1.3842884281038133, "learning_rate": 6.460373119272867e-07, "loss": 0.6712, "step": 10486 }, { "epoch": 0.84, "grad_norm": 1.6293583953349016, "learning_rate": 6.453986805779006e-07, "loss": 0.7617, "step": 10487 }, { "epoch": 0.84, "grad_norm": 1.532988824443648, "learning_rate": 6.447603432611533e-07, "loss": 0.7726, "step": 10488 }, { "epoch": 0.84, "grad_norm": 1.4569271563263309, "learning_rate": 6.441223000201457e-07, "loss": 0.7952, "step": 10489 }, { "epoch": 0.84, "grad_norm": 1.519212098672324, "learning_rate": 6.434845508979598e-07, "loss": 0.779, "step": 10490 }, { "epoch": 0.84, "grad_norm": 1.4604464914824096, "learning_rate": 6.428470959376593e-07, "loss": 0.7345, "step": 10491 }, { "epoch": 0.84, "grad_norm": 3.557633962388726, "learning_rate": 6.422099351822864e-07, "loss": 1.0641, "step": 10492 }, { "epoch": 0.84, "grad_norm": 1.5456850324523328, "learning_rate": 6.415730686748628e-07, "loss": 0.6857, "step": 10493 }, { "epoch": 0.84, "grad_norm": 1.5852065714181305, "learning_rate": 6.409364964583919e-07, "loss": 0.7969, "step": 10494 }, { "epoch": 0.84, "grad_norm": 1.5970580993416825, "learning_rate": 6.403002185758572e-07, "loss": 0.8334, "step": 10495 }, { "epoch": 0.84, "grad_norm": 1.4631418824599978, "learning_rate": 6.396642350702204e-07, "loss": 0.6722, "step": 10496 }, { "epoch": 0.84, "grad_norm": 0.7403562797043832, "learning_rate": 6.390285459844236e-07, "loss": 1.0476, "step": 10497 }, { "epoch": 0.84, "grad_norm": 1.4839784079230407, "learning_rate": 6.383931513613928e-07, "loss": 0.7027, "step": 10498 }, { "epoch": 0.84, "grad_norm": 1.5650671328739711, "learning_rate": 6.377580512440301e-07, "loss": 0.7683, "step": 10499 }, { "epoch": 0.84, "grad_norm": 1.5469041121509528, "learning_rate": 6.37123245675218e-07, "loss": 0.6991, "step": 10500 }, { "epoch": 0.84, "grad_norm": 1.5479319778099485, "learning_rate": 6.364887346978211e-07, "loss": 0.7404, "step": 10501 }, { "epoch": 0.84, "grad_norm": 1.566916579044323, "learning_rate": 6.358545183546827e-07, "loss": 0.6948, "step": 10502 }, { "epoch": 0.84, "grad_norm": 1.5878718975369863, "learning_rate": 6.35220596688626e-07, "loss": 0.7527, "step": 10503 }, { "epoch": 0.84, "grad_norm": 1.5189381160540856, "learning_rate": 6.345869697424544e-07, "loss": 0.6923, "step": 10504 }, { "epoch": 0.84, "grad_norm": 1.4767972658213235, "learning_rate": 6.339536375589539e-07, "loss": 0.8199, "step": 10505 }, { "epoch": 0.84, "grad_norm": 1.4415498090505827, "learning_rate": 6.333206001808878e-07, "loss": 0.7508, "step": 10506 }, { "epoch": 0.84, "grad_norm": 1.534528153153127, "learning_rate": 6.326878576509982e-07, "loss": 0.8246, "step": 10507 }, { "epoch": 0.84, "grad_norm": 1.5364062504817957, "learning_rate": 6.320554100120119e-07, "loss": 0.7566, "step": 10508 }, { "epoch": 0.84, "grad_norm": 1.461410407609989, "learning_rate": 6.314232573066326e-07, "loss": 0.7617, "step": 10509 }, { "epoch": 0.84, "grad_norm": 1.7569940145523752, "learning_rate": 6.307913995775439e-07, "loss": 0.8022, "step": 10510 }, { "epoch": 0.84, "grad_norm": 0.7246711628430602, "learning_rate": 6.301598368674106e-07, "loss": 1.0583, "step": 10511 }, { "epoch": 0.84, "grad_norm": 1.5319012186580074, "learning_rate": 6.295285692188779e-07, "loss": 0.7682, "step": 10512 }, { "epoch": 0.84, "grad_norm": 1.5217823381412638, "learning_rate": 6.288975966745697e-07, "loss": 0.7861, "step": 10513 }, { "epoch": 0.84, "grad_norm": 0.7571552220228096, "learning_rate": 6.282669192770896e-07, "loss": 1.0637, "step": 10514 }, { "epoch": 0.84, "grad_norm": 1.546997368330712, "learning_rate": 6.276365370690246e-07, "loss": 0.7407, "step": 10515 }, { "epoch": 0.84, "grad_norm": 1.518796557351805, "learning_rate": 6.27006450092939e-07, "loss": 0.8571, "step": 10516 }, { "epoch": 0.84, "grad_norm": 1.4646669307594735, "learning_rate": 6.263766583913766e-07, "loss": 0.7764, "step": 10517 }, { "epoch": 0.84, "grad_norm": 1.4832945058331344, "learning_rate": 6.257471620068634e-07, "loss": 0.7563, "step": 10518 }, { "epoch": 0.84, "grad_norm": 1.4436715239143716, "learning_rate": 6.251179609819047e-07, "loss": 0.8176, "step": 10519 }, { "epoch": 0.84, "grad_norm": 1.6196485537825582, "learning_rate": 6.24489055358985e-07, "loss": 0.8144, "step": 10520 }, { "epoch": 0.84, "grad_norm": 0.7433239592448581, "learning_rate": 6.238604451805691e-07, "loss": 1.0759, "step": 10521 }, { "epoch": 0.84, "grad_norm": 1.5846108999451023, "learning_rate": 6.232321304891032e-07, "loss": 0.8317, "step": 10522 }, { "epoch": 0.84, "grad_norm": 1.5950412344049762, "learning_rate": 6.226041113270115e-07, "loss": 0.7044, "step": 10523 }, { "epoch": 0.84, "grad_norm": 1.5757830643935897, "learning_rate": 6.219763877366986e-07, "loss": 0.7374, "step": 10524 }, { "epoch": 0.84, "grad_norm": 1.4973662899147278, "learning_rate": 6.213489597605526e-07, "loss": 0.7549, "step": 10525 }, { "epoch": 0.84, "grad_norm": 1.7715024760722176, "learning_rate": 6.207218274409366e-07, "loss": 0.7436, "step": 10526 }, { "epoch": 0.84, "grad_norm": 1.560094627528198, "learning_rate": 6.200949908201959e-07, "loss": 0.7365, "step": 10527 }, { "epoch": 0.84, "grad_norm": 0.7509627516517307, "learning_rate": 6.194684499406578e-07, "loss": 1.0429, "step": 10528 }, { "epoch": 0.84, "grad_norm": 1.579229615721901, "learning_rate": 6.188422048446263e-07, "loss": 0.8039, "step": 10529 }, { "epoch": 0.84, "grad_norm": 1.4175984073126289, "learning_rate": 6.182162555743876e-07, "loss": 0.703, "step": 10530 }, { "epoch": 0.84, "grad_norm": 1.5432174668425107, "learning_rate": 6.175906021722055e-07, "loss": 0.7652, "step": 10531 }, { "epoch": 0.84, "grad_norm": 1.466592482822433, "learning_rate": 6.169652446803292e-07, "loss": 0.7603, "step": 10532 }, { "epoch": 0.85, "grad_norm": 1.4882773340321385, "learning_rate": 6.16340183140981e-07, "loss": 0.6865, "step": 10533 }, { "epoch": 0.85, "grad_norm": 0.7546393740179701, "learning_rate": 6.157154175963665e-07, "loss": 1.072, "step": 10534 }, { "epoch": 0.85, "grad_norm": 0.7588800931127216, "learning_rate": 6.15090948088673e-07, "loss": 1.0725, "step": 10535 }, { "epoch": 0.85, "grad_norm": 0.7590148260440126, "learning_rate": 6.144667746600652e-07, "loss": 1.0482, "step": 10536 }, { "epoch": 0.85, "grad_norm": 1.6369816884381203, "learning_rate": 6.138428973526894e-07, "loss": 0.8314, "step": 10537 }, { "epoch": 0.85, "grad_norm": 1.4528960419440884, "learning_rate": 6.132193162086697e-07, "loss": 0.6958, "step": 10538 }, { "epoch": 0.85, "grad_norm": 1.510794793469759, "learning_rate": 6.125960312701135e-07, "loss": 0.8457, "step": 10539 }, { "epoch": 0.85, "grad_norm": 0.7655761919027867, "learning_rate": 6.119730425791059e-07, "loss": 1.0858, "step": 10540 }, { "epoch": 0.85, "grad_norm": 1.7416002711462988, "learning_rate": 6.113503501777113e-07, "loss": 0.7998, "step": 10541 }, { "epoch": 0.85, "grad_norm": 1.5648510335105579, "learning_rate": 6.107279541079769e-07, "loss": 0.7753, "step": 10542 }, { "epoch": 0.85, "grad_norm": 0.7467911789172526, "learning_rate": 6.101058544119282e-07, "loss": 1.057, "step": 10543 }, { "epoch": 0.85, "grad_norm": 0.7465331206212319, "learning_rate": 6.094840511315703e-07, "loss": 1.0873, "step": 10544 }, { "epoch": 0.85, "grad_norm": 1.5918462737205172, "learning_rate": 6.088625443088885e-07, "loss": 0.842, "step": 10545 }, { "epoch": 0.85, "grad_norm": 1.453487384422699, "learning_rate": 6.082413339858489e-07, "loss": 0.7267, "step": 10546 }, { "epoch": 0.85, "grad_norm": 1.8232227349125614, "learning_rate": 6.076204202043968e-07, "loss": 0.6874, "step": 10547 }, { "epoch": 0.85, "grad_norm": 1.49449028243409, "learning_rate": 6.069998030064561e-07, "loss": 0.7403, "step": 10548 }, { "epoch": 0.85, "grad_norm": 1.5052313433227271, "learning_rate": 6.063794824339359e-07, "loss": 0.7989, "step": 10549 }, { "epoch": 0.85, "grad_norm": 1.7350464854517975, "learning_rate": 6.057594585287191e-07, "loss": 0.7927, "step": 10550 }, { "epoch": 0.85, "grad_norm": 1.7028366267284434, "learning_rate": 6.051397313326707e-07, "loss": 0.7049, "step": 10551 }, { "epoch": 0.85, "grad_norm": 1.5287994030572938, "learning_rate": 6.045203008876383e-07, "loss": 0.7653, "step": 10552 }, { "epoch": 0.85, "grad_norm": 0.76898414335785, "learning_rate": 6.039011672354456e-07, "loss": 1.0963, "step": 10553 }, { "epoch": 0.85, "grad_norm": 0.7478283684865068, "learning_rate": 6.032823304178986e-07, "loss": 1.0555, "step": 10554 }, { "epoch": 0.85, "grad_norm": 1.6455016538588079, "learning_rate": 6.026637904767824e-07, "loss": 0.6986, "step": 10555 }, { "epoch": 0.85, "grad_norm": 1.5621064560096705, "learning_rate": 6.020455474538622e-07, "loss": 0.7768, "step": 10556 }, { "epoch": 0.85, "grad_norm": 1.4870109306205488, "learning_rate": 6.014276013908832e-07, "loss": 0.7511, "step": 10557 }, { "epoch": 0.85, "grad_norm": 1.4465316600824325, "learning_rate": 6.008099523295696e-07, "loss": 0.8117, "step": 10558 }, { "epoch": 0.85, "grad_norm": 1.560575544240619, "learning_rate": 6.001926003116282e-07, "loss": 0.7536, "step": 10559 }, { "epoch": 0.85, "grad_norm": 1.4566470195293502, "learning_rate": 5.995755453787433e-07, "loss": 0.6899, "step": 10560 }, { "epoch": 0.85, "grad_norm": 1.4862953275351058, "learning_rate": 5.9895878757258e-07, "loss": 0.7832, "step": 10561 }, { "epoch": 0.85, "grad_norm": 1.5247669532911683, "learning_rate": 5.983423269347816e-07, "loss": 0.6803, "step": 10562 }, { "epoch": 0.85, "grad_norm": 1.6797097384646174, "learning_rate": 5.977261635069753e-07, "loss": 0.6405, "step": 10563 }, { "epoch": 0.85, "grad_norm": 0.7546939017864326, "learning_rate": 5.971102973307646e-07, "loss": 1.0781, "step": 10564 }, { "epoch": 0.85, "grad_norm": 1.594278704695481, "learning_rate": 5.964947284477346e-07, "loss": 0.7218, "step": 10565 }, { "epoch": 0.85, "grad_norm": 1.560490576755668, "learning_rate": 5.958794568994503e-07, "loss": 0.7716, "step": 10566 }, { "epoch": 0.85, "grad_norm": 1.4795719876748201, "learning_rate": 5.95264482727455e-07, "loss": 0.7818, "step": 10567 }, { "epoch": 0.85, "grad_norm": 1.4794055424884966, "learning_rate": 5.946498059732731e-07, "loss": 0.7354, "step": 10568 }, { "epoch": 0.85, "grad_norm": 1.565433059041811, "learning_rate": 5.940354266784109e-07, "loss": 0.762, "step": 10569 }, { "epoch": 0.85, "grad_norm": 1.5304103532499334, "learning_rate": 5.934213448843512e-07, "loss": 0.7328, "step": 10570 }, { "epoch": 0.85, "grad_norm": 1.5413959449152705, "learning_rate": 5.92807560632559e-07, "loss": 0.8255, "step": 10571 }, { "epoch": 0.85, "grad_norm": 1.5585873185341812, "learning_rate": 5.921940739644766e-07, "loss": 0.7549, "step": 10572 }, { "epoch": 0.85, "grad_norm": 0.7538045039970029, "learning_rate": 5.915808849215304e-07, "loss": 1.03, "step": 10573 }, { "epoch": 0.85, "grad_norm": 1.4711792948008318, "learning_rate": 5.909679935451235e-07, "loss": 0.8114, "step": 10574 }, { "epoch": 0.85, "grad_norm": 1.6215686485155403, "learning_rate": 5.903553998766387e-07, "loss": 0.7932, "step": 10575 }, { "epoch": 0.85, "grad_norm": 1.3989742993734906, "learning_rate": 5.897431039574414e-07, "loss": 0.655, "step": 10576 }, { "epoch": 0.85, "grad_norm": 0.7540439000225984, "learning_rate": 5.891311058288751e-07, "loss": 1.0383, "step": 10577 }, { "epoch": 0.85, "grad_norm": 0.8160916585887958, "learning_rate": 5.885194055322618e-07, "loss": 1.0623, "step": 10578 }, { "epoch": 0.85, "grad_norm": 1.4122437461049024, "learning_rate": 5.879080031089047e-07, "loss": 0.7229, "step": 10579 }, { "epoch": 0.85, "grad_norm": 1.560511260000694, "learning_rate": 5.872968986000893e-07, "loss": 0.7371, "step": 10580 }, { "epoch": 0.85, "grad_norm": 1.4425415410589826, "learning_rate": 5.866860920470773e-07, "loss": 0.7791, "step": 10581 }, { "epoch": 0.85, "grad_norm": 1.5056419993815482, "learning_rate": 5.860755834911108e-07, "loss": 0.7832, "step": 10582 }, { "epoch": 0.85, "grad_norm": 1.4566053990461625, "learning_rate": 5.854653729734156e-07, "loss": 0.7244, "step": 10583 }, { "epoch": 0.85, "grad_norm": 1.4881113783124142, "learning_rate": 5.848554605351925e-07, "loss": 0.7012, "step": 10584 }, { "epoch": 0.85, "grad_norm": 1.6786858323430598, "learning_rate": 5.842458462176231e-07, "loss": 0.7227, "step": 10585 }, { "epoch": 0.85, "grad_norm": 1.553182825504059, "learning_rate": 5.83636530061873e-07, "loss": 0.7909, "step": 10586 }, { "epoch": 0.85, "grad_norm": 1.6468691723998505, "learning_rate": 5.830275121090828e-07, "loss": 0.8033, "step": 10587 }, { "epoch": 0.85, "grad_norm": 1.602047408575517, "learning_rate": 5.824187924003749e-07, "loss": 0.8198, "step": 10588 }, { "epoch": 0.85, "grad_norm": 1.4679298566231678, "learning_rate": 5.818103709768519e-07, "loss": 0.7484, "step": 10589 }, { "epoch": 0.85, "grad_norm": 1.4254932942182705, "learning_rate": 5.812022478795954e-07, "loss": 0.7359, "step": 10590 }, { "epoch": 0.85, "grad_norm": 1.6418651659946617, "learning_rate": 5.805944231496669e-07, "loss": 0.7693, "step": 10591 }, { "epoch": 0.85, "grad_norm": 1.517608652066018, "learning_rate": 5.799868968281075e-07, "loss": 0.7509, "step": 10592 }, { "epoch": 0.85, "grad_norm": 1.43812808123009, "learning_rate": 5.793796689559411e-07, "loss": 0.707, "step": 10593 }, { "epoch": 0.85, "grad_norm": 1.7133115875627756, "learning_rate": 5.787727395741682e-07, "loss": 0.7097, "step": 10594 }, { "epoch": 0.85, "grad_norm": 1.5570657839993853, "learning_rate": 5.781661087237689e-07, "loss": 0.7463, "step": 10595 }, { "epoch": 0.85, "grad_norm": 1.679370956468701, "learning_rate": 5.775597764457047e-07, "loss": 0.8101, "step": 10596 }, { "epoch": 0.85, "grad_norm": 1.5270219386733994, "learning_rate": 5.769537427809174e-07, "loss": 0.69, "step": 10597 }, { "epoch": 0.85, "grad_norm": 1.4543781993299443, "learning_rate": 5.763480077703276e-07, "loss": 0.7765, "step": 10598 }, { "epoch": 0.85, "grad_norm": 1.524973318580101, "learning_rate": 5.757425714548354e-07, "loss": 0.8068, "step": 10599 }, { "epoch": 0.85, "grad_norm": 1.51206405713927, "learning_rate": 5.751374338753218e-07, "loss": 0.6935, "step": 10600 }, { "epoch": 0.85, "grad_norm": 1.5687466764301068, "learning_rate": 5.745325950726466e-07, "loss": 0.7385, "step": 10601 }, { "epoch": 0.85, "grad_norm": 1.9809507542362046, "learning_rate": 5.739280550876497e-07, "loss": 0.8308, "step": 10602 }, { "epoch": 0.85, "grad_norm": 0.7515309932223118, "learning_rate": 5.733238139611508e-07, "loss": 1.0716, "step": 10603 }, { "epoch": 0.85, "grad_norm": 1.5373506235425038, "learning_rate": 5.727198717339511e-07, "loss": 0.7572, "step": 10604 }, { "epoch": 0.85, "grad_norm": 4.901289004258174, "learning_rate": 5.72116228446829e-07, "loss": 0.7689, "step": 10605 }, { "epoch": 0.85, "grad_norm": 1.555798067149641, "learning_rate": 5.715128841405432e-07, "loss": 0.8148, "step": 10606 }, { "epoch": 0.85, "grad_norm": 1.6860447046847298, "learning_rate": 5.709098388558348e-07, "loss": 0.7504, "step": 10607 }, { "epoch": 0.85, "grad_norm": 1.4323710878262288, "learning_rate": 5.70307092633422e-07, "loss": 0.6863, "step": 10608 }, { "epoch": 0.85, "grad_norm": 1.53317670527803, "learning_rate": 5.697046455140031e-07, "loss": 0.7997, "step": 10609 }, { "epoch": 0.85, "grad_norm": 0.7608030377371746, "learning_rate": 5.69102497538257e-07, "loss": 1.0451, "step": 10610 }, { "epoch": 0.85, "grad_norm": 1.6228869480140762, "learning_rate": 5.685006487468426e-07, "loss": 0.7675, "step": 10611 }, { "epoch": 0.85, "grad_norm": 1.5163830932052216, "learning_rate": 5.678990991803973e-07, "loss": 0.7108, "step": 10612 }, { "epoch": 0.85, "grad_norm": 1.4974097507295567, "learning_rate": 5.672978488795383e-07, "loss": 0.7499, "step": 10613 }, { "epoch": 0.85, "grad_norm": 1.5732594439684298, "learning_rate": 5.666968978848659e-07, "loss": 0.7551, "step": 10614 }, { "epoch": 0.85, "grad_norm": 1.4230785523989802, "learning_rate": 5.660962462369562e-07, "loss": 0.796, "step": 10615 }, { "epoch": 0.85, "grad_norm": 1.5072422378450723, "learning_rate": 5.654958939763655e-07, "loss": 0.7186, "step": 10616 }, { "epoch": 0.85, "grad_norm": 1.4838842175019953, "learning_rate": 5.648958411436334e-07, "loss": 0.798, "step": 10617 }, { "epoch": 0.85, "grad_norm": 0.7589315873358868, "learning_rate": 5.642960877792752e-07, "loss": 1.0258, "step": 10618 }, { "epoch": 0.85, "grad_norm": 1.8951134175728026, "learning_rate": 5.636966339237882e-07, "loss": 0.7227, "step": 10619 }, { "epoch": 0.85, "grad_norm": 1.5077349614705646, "learning_rate": 5.630974796176481e-07, "loss": 0.8248, "step": 10620 }, { "epoch": 0.85, "grad_norm": 1.478941503368043, "learning_rate": 5.624986249013131e-07, "loss": 0.7602, "step": 10621 }, { "epoch": 0.85, "grad_norm": 1.49673180997114, "learning_rate": 5.619000698152171e-07, "loss": 0.7704, "step": 10622 }, { "epoch": 0.85, "grad_norm": 0.7298171328792727, "learning_rate": 5.613018143997762e-07, "loss": 1.082, "step": 10623 }, { "epoch": 0.85, "grad_norm": 1.6805563470773388, "learning_rate": 5.607038586953873e-07, "loss": 0.7124, "step": 10624 }, { "epoch": 0.85, "grad_norm": 0.7777007368980453, "learning_rate": 5.601062027424243e-07, "loss": 1.0249, "step": 10625 }, { "epoch": 0.85, "grad_norm": 1.5706155518184461, "learning_rate": 5.595088465812426e-07, "loss": 0.7515, "step": 10626 }, { "epoch": 0.85, "grad_norm": 1.5097634878890067, "learning_rate": 5.589117902521779e-07, "loss": 0.6507, "step": 10627 }, { "epoch": 0.85, "grad_norm": 1.4633041537008902, "learning_rate": 5.583150337955445e-07, "loss": 0.7434, "step": 10628 }, { "epoch": 0.85, "grad_norm": 1.404378663824516, "learning_rate": 5.577185772516369e-07, "loss": 0.6991, "step": 10629 }, { "epoch": 0.85, "grad_norm": 1.4176453633116144, "learning_rate": 5.571224206607274e-07, "loss": 0.6669, "step": 10630 }, { "epoch": 0.85, "grad_norm": 1.4784067185545693, "learning_rate": 5.565265640630724e-07, "loss": 0.678, "step": 10631 }, { "epoch": 0.85, "grad_norm": 1.5577111898788731, "learning_rate": 5.559310074989044e-07, "loss": 0.7399, "step": 10632 }, { "epoch": 0.85, "grad_norm": 0.7459198882130866, "learning_rate": 5.553357510084368e-07, "loss": 1.0284, "step": 10633 }, { "epoch": 0.85, "grad_norm": 1.4857550489252411, "learning_rate": 5.547407946318628e-07, "loss": 0.6937, "step": 10634 }, { "epoch": 0.85, "grad_norm": 0.7361966300294183, "learning_rate": 5.541461384093549e-07, "loss": 1.0421, "step": 10635 }, { "epoch": 0.85, "grad_norm": 1.6208521134451581, "learning_rate": 5.535517823810654e-07, "loss": 0.6652, "step": 10636 }, { "epoch": 0.85, "grad_norm": 0.762287939537756, "learning_rate": 5.529577265871266e-07, "loss": 1.0584, "step": 10637 }, { "epoch": 0.85, "grad_norm": 1.5966156835091907, "learning_rate": 5.523639710676515e-07, "loss": 0.7307, "step": 10638 }, { "epoch": 0.85, "grad_norm": 1.8513473404729726, "learning_rate": 5.517705158627313e-07, "loss": 0.8013, "step": 10639 }, { "epoch": 0.85, "grad_norm": 1.3546384272239413, "learning_rate": 5.511773610124366e-07, "loss": 0.6617, "step": 10640 }, { "epoch": 0.85, "grad_norm": 1.5471172118421526, "learning_rate": 5.5058450655682e-07, "loss": 0.7299, "step": 10641 }, { "epoch": 0.85, "grad_norm": 1.554386273443815, "learning_rate": 5.499919525359121e-07, "loss": 0.6868, "step": 10642 }, { "epoch": 0.85, "grad_norm": 1.4492036587014332, "learning_rate": 5.493996989897227e-07, "loss": 0.7384, "step": 10643 }, { "epoch": 0.85, "grad_norm": 1.5925727195266548, "learning_rate": 5.488077459582425e-07, "loss": 0.78, "step": 10644 }, { "epoch": 0.85, "grad_norm": 1.5962278724681638, "learning_rate": 5.482160934814418e-07, "loss": 0.7888, "step": 10645 }, { "epoch": 0.85, "grad_norm": 1.501442019924188, "learning_rate": 5.476247415992702e-07, "loss": 0.7524, "step": 10646 }, { "epoch": 0.85, "grad_norm": 1.6827083466978405, "learning_rate": 5.47033690351656e-07, "loss": 0.7151, "step": 10647 }, { "epoch": 0.85, "grad_norm": 1.6102823960522417, "learning_rate": 5.464429397785099e-07, "loss": 0.7018, "step": 10648 }, { "epoch": 0.85, "grad_norm": 1.490299365644473, "learning_rate": 5.458524899197204e-07, "loss": 0.686, "step": 10649 }, { "epoch": 0.85, "grad_norm": 1.4864402783449124, "learning_rate": 5.452623408151553e-07, "loss": 0.853, "step": 10650 }, { "epoch": 0.85, "grad_norm": 0.7611929007203355, "learning_rate": 5.446724925046637e-07, "loss": 1.033, "step": 10651 }, { "epoch": 0.85, "grad_norm": 0.7541218496393737, "learning_rate": 5.440829450280732e-07, "loss": 1.0902, "step": 10652 }, { "epoch": 0.85, "grad_norm": 1.6199859173166222, "learning_rate": 5.434936984251916e-07, "loss": 0.7417, "step": 10653 }, { "epoch": 0.85, "grad_norm": 1.4269645401209525, "learning_rate": 5.429047527358056e-07, "loss": 0.7247, "step": 10654 }, { "epoch": 0.85, "grad_norm": 1.6074244734168412, "learning_rate": 5.423161079996824e-07, "loss": 0.6565, "step": 10655 }, { "epoch": 0.85, "grad_norm": 1.5036907991710482, "learning_rate": 5.417277642565694e-07, "loss": 0.7488, "step": 10656 }, { "epoch": 0.86, "grad_norm": 1.4611061702109365, "learning_rate": 5.411397215461905e-07, "loss": 0.879, "step": 10657 }, { "epoch": 0.86, "grad_norm": 1.496054497251319, "learning_rate": 5.405519799082548e-07, "loss": 0.7658, "step": 10658 }, { "epoch": 0.86, "grad_norm": 1.489377056496907, "learning_rate": 5.399645393824465e-07, "loss": 0.7393, "step": 10659 }, { "epoch": 0.86, "grad_norm": 1.5232933804575872, "learning_rate": 5.393774000084307e-07, "loss": 0.6974, "step": 10660 }, { "epoch": 0.86, "grad_norm": 0.7608169839726203, "learning_rate": 5.387905618258521e-07, "loss": 1.0407, "step": 10661 }, { "epoch": 0.86, "grad_norm": 1.4993444599169157, "learning_rate": 5.382040248743364e-07, "loss": 0.745, "step": 10662 }, { "epoch": 0.86, "grad_norm": 1.6753462121695666, "learning_rate": 5.37617789193488e-07, "loss": 0.7328, "step": 10663 }, { "epoch": 0.86, "grad_norm": 1.570468865217088, "learning_rate": 5.370318548228886e-07, "loss": 0.8535, "step": 10664 }, { "epoch": 0.86, "grad_norm": 1.6027263463616042, "learning_rate": 5.364462218021066e-07, "loss": 0.8198, "step": 10665 }, { "epoch": 0.86, "grad_norm": 1.5809352373172658, "learning_rate": 5.358608901706802e-07, "loss": 0.7555, "step": 10666 }, { "epoch": 0.86, "grad_norm": 1.5646452410986769, "learning_rate": 5.352758599681341e-07, "loss": 0.7864, "step": 10667 }, { "epoch": 0.86, "grad_norm": 1.5513899139727896, "learning_rate": 5.346911312339719e-07, "loss": 0.7528, "step": 10668 }, { "epoch": 0.86, "grad_norm": 1.6679122564609645, "learning_rate": 5.341067040076752e-07, "loss": 0.736, "step": 10669 }, { "epoch": 0.86, "grad_norm": 1.6261253776910327, "learning_rate": 5.335225783287051e-07, "loss": 0.7118, "step": 10670 }, { "epoch": 0.86, "grad_norm": 1.6282146964290545, "learning_rate": 5.329387542365033e-07, "loss": 0.7479, "step": 10671 }, { "epoch": 0.86, "grad_norm": 1.4673581294017208, "learning_rate": 5.323552317704922e-07, "loss": 0.7149, "step": 10672 }, { "epoch": 0.86, "grad_norm": 1.5941734477844849, "learning_rate": 5.317720109700719e-07, "loss": 0.7499, "step": 10673 }, { "epoch": 0.86, "grad_norm": 1.4419007244734823, "learning_rate": 5.311890918746216e-07, "loss": 0.7151, "step": 10674 }, { "epoch": 0.86, "grad_norm": 0.7627666863689653, "learning_rate": 5.306064745235035e-07, "loss": 1.0591, "step": 10675 }, { "epoch": 0.86, "grad_norm": 1.6717186124925845, "learning_rate": 5.300241589560556e-07, "loss": 0.7445, "step": 10676 }, { "epoch": 0.86, "grad_norm": 0.7739806745865697, "learning_rate": 5.294421452115983e-07, "loss": 1.035, "step": 10677 }, { "epoch": 0.86, "grad_norm": 1.5334945218157354, "learning_rate": 5.288604333294295e-07, "loss": 0.7564, "step": 10678 }, { "epoch": 0.86, "grad_norm": 1.4603335532675281, "learning_rate": 5.282790233488283e-07, "loss": 0.7062, "step": 10679 }, { "epoch": 0.86, "grad_norm": 1.5477972123553387, "learning_rate": 5.276979153090528e-07, "loss": 0.7939, "step": 10680 }, { "epoch": 0.86, "grad_norm": 0.7677028954458824, "learning_rate": 5.271171092493393e-07, "loss": 1.1006, "step": 10681 }, { "epoch": 0.86, "grad_norm": 1.4964386917454109, "learning_rate": 5.265366052089077e-07, "loss": 0.6849, "step": 10682 }, { "epoch": 0.86, "grad_norm": 0.7622269651061627, "learning_rate": 5.259564032269538e-07, "loss": 1.079, "step": 10683 }, { "epoch": 0.86, "grad_norm": 1.5876986363839836, "learning_rate": 5.253765033426528e-07, "loss": 0.7443, "step": 10684 }, { "epoch": 0.86, "grad_norm": 1.5082872924646236, "learning_rate": 5.24796905595163e-07, "loss": 0.7194, "step": 10685 }, { "epoch": 0.86, "grad_norm": 1.5832711948298928, "learning_rate": 5.242176100236195e-07, "loss": 0.7424, "step": 10686 }, { "epoch": 0.86, "grad_norm": 1.6574993504247448, "learning_rate": 5.236386166671376e-07, "loss": 0.7026, "step": 10687 }, { "epoch": 0.86, "grad_norm": 0.8040597391468379, "learning_rate": 5.230599255648116e-07, "loss": 1.05, "step": 10688 }, { "epoch": 0.86, "grad_norm": 1.5334024951417946, "learning_rate": 5.224815367557173e-07, "loss": 0.7088, "step": 10689 }, { "epoch": 0.86, "grad_norm": 1.4733085546152755, "learning_rate": 5.219034502789078e-07, "loss": 0.7965, "step": 10690 }, { "epoch": 0.86, "grad_norm": 1.5127337881537215, "learning_rate": 5.213256661734162e-07, "loss": 0.7084, "step": 10691 }, { "epoch": 0.86, "grad_norm": 1.5528205234778825, "learning_rate": 5.207481844782575e-07, "loss": 0.8372, "step": 10692 }, { "epoch": 0.86, "grad_norm": 0.7483865997239115, "learning_rate": 5.201710052324238e-07, "loss": 1.0572, "step": 10693 }, { "epoch": 0.86, "grad_norm": 0.7582654611289715, "learning_rate": 5.195941284748879e-07, "loss": 1.0846, "step": 10694 }, { "epoch": 0.86, "grad_norm": 0.7349772349306587, "learning_rate": 5.190175542446002e-07, "loss": 1.0276, "step": 10695 }, { "epoch": 0.86, "grad_norm": 0.7400493499720503, "learning_rate": 5.184412825804947e-07, "loss": 1.0207, "step": 10696 }, { "epoch": 0.86, "grad_norm": 1.4404320840150981, "learning_rate": 5.178653135214811e-07, "loss": 0.6797, "step": 10697 }, { "epoch": 0.86, "grad_norm": 0.7545429608427342, "learning_rate": 5.172896471064514e-07, "loss": 1.0821, "step": 10698 }, { "epoch": 0.86, "grad_norm": 1.6037273647412593, "learning_rate": 5.167142833742744e-07, "loss": 0.7371, "step": 10699 }, { "epoch": 0.86, "grad_norm": 1.6070630630046192, "learning_rate": 5.161392223638012e-07, "loss": 0.7721, "step": 10700 }, { "epoch": 0.86, "grad_norm": 1.5528749814279657, "learning_rate": 5.155644641138602e-07, "loss": 0.6195, "step": 10701 }, { "epoch": 0.86, "grad_norm": 1.53077067565577, "learning_rate": 5.149900086632597e-07, "loss": 0.7224, "step": 10702 }, { "epoch": 0.86, "grad_norm": 1.5688498916884153, "learning_rate": 5.144158560507912e-07, "loss": 0.7769, "step": 10703 }, { "epoch": 0.86, "grad_norm": 1.50992264124819, "learning_rate": 5.138420063152205e-07, "loss": 0.7747, "step": 10704 }, { "epoch": 0.86, "grad_norm": 0.7522032466641091, "learning_rate": 5.132684594952946e-07, "loss": 1.0448, "step": 10705 }, { "epoch": 0.86, "grad_norm": 1.4384324068280459, "learning_rate": 5.126952156297433e-07, "loss": 0.7015, "step": 10706 }, { "epoch": 0.86, "grad_norm": 1.463693899665976, "learning_rate": 5.121222747572712e-07, "loss": 0.7026, "step": 10707 }, { "epoch": 0.86, "grad_norm": 1.467132058479348, "learning_rate": 5.115496369165651e-07, "loss": 0.7636, "step": 10708 }, { "epoch": 0.86, "grad_norm": 1.5576195332032134, "learning_rate": 5.109773021462921e-07, "loss": 0.79, "step": 10709 }, { "epoch": 0.86, "grad_norm": 1.5140269972669043, "learning_rate": 5.10405270485096e-07, "loss": 0.6938, "step": 10710 }, { "epoch": 0.86, "grad_norm": 1.49063356643001, "learning_rate": 5.098335419716022e-07, "loss": 0.722, "step": 10711 }, { "epoch": 0.86, "grad_norm": 0.7815024520261052, "learning_rate": 5.092621166444139e-07, "loss": 1.0772, "step": 10712 }, { "epoch": 0.86, "grad_norm": 0.7424635467214203, "learning_rate": 5.08690994542117e-07, "loss": 1.0491, "step": 10713 }, { "epoch": 0.86, "grad_norm": 1.6162737516251804, "learning_rate": 5.081201757032744e-07, "loss": 0.743, "step": 10714 }, { "epoch": 0.86, "grad_norm": 1.501818171953974, "learning_rate": 5.075496601664276e-07, "loss": 0.7671, "step": 10715 }, { "epoch": 0.86, "grad_norm": 1.5465526333440849, "learning_rate": 5.069794479701013e-07, "loss": 0.7221, "step": 10716 }, { "epoch": 0.86, "grad_norm": 1.507311805627084, "learning_rate": 5.064095391527968e-07, "loss": 0.6899, "step": 10717 }, { "epoch": 0.86, "grad_norm": 1.5830778815080264, "learning_rate": 5.058399337529957e-07, "loss": 0.8064, "step": 10718 }, { "epoch": 0.86, "grad_norm": 1.5309835635536504, "learning_rate": 5.052706318091572e-07, "loss": 0.7175, "step": 10719 }, { "epoch": 0.86, "grad_norm": 1.6451625038781048, "learning_rate": 5.047016333597248e-07, "loss": 0.7597, "step": 10720 }, { "epoch": 0.86, "grad_norm": 0.7650018947353433, "learning_rate": 5.04132938443117e-07, "loss": 1.046, "step": 10721 }, { "epoch": 0.86, "grad_norm": 1.654864128033921, "learning_rate": 5.03564547097734e-07, "loss": 0.8197, "step": 10722 }, { "epoch": 0.86, "grad_norm": 1.5502362618439645, "learning_rate": 5.029964593619541e-07, "loss": 0.7866, "step": 10723 }, { "epoch": 0.86, "grad_norm": 0.753674645585248, "learning_rate": 5.024286752741364e-07, "loss": 1.0435, "step": 10724 }, { "epoch": 0.86, "grad_norm": 1.5817776832524295, "learning_rate": 5.018611948726182e-07, "loss": 0.7713, "step": 10725 }, { "epoch": 0.86, "grad_norm": 1.5760535695866238, "learning_rate": 5.012940181957182e-07, "loss": 0.761, "step": 10726 }, { "epoch": 0.86, "grad_norm": 1.5417930896154035, "learning_rate": 5.00727145281733e-07, "loss": 0.7898, "step": 10727 }, { "epoch": 0.86, "grad_norm": 1.6038448645944507, "learning_rate": 5.001605761689399e-07, "loss": 0.7431, "step": 10728 }, { "epoch": 0.86, "grad_norm": 1.6170973341904578, "learning_rate": 4.995943108955926e-07, "loss": 0.8111, "step": 10729 }, { "epoch": 0.86, "grad_norm": 1.734378877963947, "learning_rate": 4.990283494999293e-07, "loss": 0.6849, "step": 10730 }, { "epoch": 0.86, "grad_norm": 1.4127169500503385, "learning_rate": 4.984626920201641e-07, "loss": 0.7272, "step": 10731 }, { "epoch": 0.86, "grad_norm": 0.7293267566120726, "learning_rate": 4.978973384944913e-07, "loss": 1.0434, "step": 10732 }, { "epoch": 0.86, "grad_norm": 1.5911336987141511, "learning_rate": 4.973322889610849e-07, "loss": 0.7567, "step": 10733 }, { "epoch": 0.86, "grad_norm": 1.578865247044147, "learning_rate": 4.967675434580982e-07, "loss": 0.7785, "step": 10734 }, { "epoch": 0.86, "grad_norm": 1.519050588420745, "learning_rate": 4.96203102023664e-07, "loss": 0.7441, "step": 10735 }, { "epoch": 0.86, "grad_norm": 1.4829550624644494, "learning_rate": 4.956389646958943e-07, "loss": 0.7438, "step": 10736 }, { "epoch": 0.86, "grad_norm": 1.4374848474141362, "learning_rate": 4.950751315128821e-07, "loss": 0.6736, "step": 10737 }, { "epoch": 0.86, "grad_norm": 1.5602851056143778, "learning_rate": 4.945116025126984e-07, "loss": 0.7864, "step": 10738 }, { "epoch": 0.86, "grad_norm": 1.5512055640208706, "learning_rate": 4.939483777333931e-07, "loss": 0.7395, "step": 10739 }, { "epoch": 0.86, "grad_norm": 1.6742643399237433, "learning_rate": 4.933854572129975e-07, "loss": 0.7614, "step": 10740 }, { "epoch": 0.86, "grad_norm": 1.605704445786202, "learning_rate": 4.928228409895214e-07, "loss": 0.7742, "step": 10741 }, { "epoch": 0.86, "grad_norm": 1.5005978452659055, "learning_rate": 4.922605291009525e-07, "loss": 0.7678, "step": 10742 }, { "epoch": 0.86, "grad_norm": 1.5275244881984402, "learning_rate": 4.91698521585261e-07, "loss": 0.7169, "step": 10743 }, { "epoch": 0.86, "grad_norm": 1.602446129312948, "learning_rate": 4.911368184803939e-07, "loss": 0.7834, "step": 10744 }, { "epoch": 0.86, "grad_norm": 1.4295698713918725, "learning_rate": 4.90575419824279e-07, "loss": 0.7101, "step": 10745 }, { "epoch": 0.86, "grad_norm": 1.4767143973314751, "learning_rate": 4.900143256548223e-07, "loss": 0.7396, "step": 10746 }, { "epoch": 0.86, "grad_norm": 1.6422440857329006, "learning_rate": 4.894535360099117e-07, "loss": 0.8586, "step": 10747 }, { "epoch": 0.86, "grad_norm": 0.7583225780138904, "learning_rate": 4.888930509274125e-07, "loss": 1.0765, "step": 10748 }, { "epoch": 0.86, "grad_norm": 1.5774375851330238, "learning_rate": 4.883328704451689e-07, "loss": 0.6701, "step": 10749 }, { "epoch": 0.86, "grad_norm": 1.4796662597927759, "learning_rate": 4.877729946010073e-07, "loss": 0.7659, "step": 10750 }, { "epoch": 0.86, "grad_norm": 1.5681507967343238, "learning_rate": 4.872134234327308e-07, "loss": 0.8154, "step": 10751 }, { "epoch": 0.86, "grad_norm": 1.478846645597066, "learning_rate": 4.866541569781235e-07, "loss": 0.7795, "step": 10752 }, { "epoch": 0.86, "grad_norm": 0.7452529042061491, "learning_rate": 4.860951952749477e-07, "loss": 1.075, "step": 10753 }, { "epoch": 0.86, "grad_norm": 1.535543027183284, "learning_rate": 4.855365383609457e-07, "loss": 0.8176, "step": 10754 }, { "epoch": 0.86, "grad_norm": 1.4926332519109893, "learning_rate": 4.849781862738401e-07, "loss": 0.7389, "step": 10755 }, { "epoch": 0.86, "grad_norm": 1.587253276206484, "learning_rate": 4.844201390513297e-07, "loss": 0.7457, "step": 10756 }, { "epoch": 0.86, "grad_norm": 1.4699854656303906, "learning_rate": 4.83862396731099e-07, "loss": 0.6814, "step": 10757 }, { "epoch": 0.86, "grad_norm": 1.5288191605767416, "learning_rate": 4.833049593508055e-07, "loss": 0.6933, "step": 10758 }, { "epoch": 0.86, "grad_norm": 1.6214061477740267, "learning_rate": 4.827478269480895e-07, "loss": 0.7883, "step": 10759 }, { "epoch": 0.86, "grad_norm": 1.5931764849037424, "learning_rate": 4.821909995605684e-07, "loss": 0.7166, "step": 10760 }, { "epoch": 0.86, "grad_norm": 0.7592697801337348, "learning_rate": 4.816344772258425e-07, "loss": 1.0793, "step": 10761 }, { "epoch": 0.86, "grad_norm": 1.5219655014312945, "learning_rate": 4.810782599814884e-07, "loss": 0.7452, "step": 10762 }, { "epoch": 0.86, "grad_norm": 1.5361478950549132, "learning_rate": 4.805223478650628e-07, "loss": 0.7826, "step": 10763 }, { "epoch": 0.86, "grad_norm": 1.6620316855875783, "learning_rate": 4.799667409141035e-07, "loss": 0.7281, "step": 10764 }, { "epoch": 0.86, "grad_norm": 1.4915248735035098, "learning_rate": 4.794114391661253e-07, "loss": 0.8015, "step": 10765 }, { "epoch": 0.86, "grad_norm": 1.3795497675119444, "learning_rate": 4.78856442658624e-07, "loss": 0.7223, "step": 10766 }, { "epoch": 0.86, "grad_norm": 1.5006046817147776, "learning_rate": 4.783017514290739e-07, "loss": 0.7259, "step": 10767 }, { "epoch": 0.86, "grad_norm": 1.6752794783781924, "learning_rate": 4.77747365514929e-07, "loss": 0.7531, "step": 10768 }, { "epoch": 0.86, "grad_norm": 1.4916175605608066, "learning_rate": 4.77193284953622e-07, "loss": 0.7963, "step": 10769 }, { "epoch": 0.86, "grad_norm": 0.7439317600220771, "learning_rate": 4.7663950978256657e-07, "loss": 1.0725, "step": 10770 }, { "epoch": 0.86, "grad_norm": 1.5048080432530966, "learning_rate": 4.760860400391548e-07, "loss": 0.7688, "step": 10771 }, { "epoch": 0.86, "grad_norm": 1.5303043080785739, "learning_rate": 4.755328757607586e-07, "loss": 0.6779, "step": 10772 }, { "epoch": 0.86, "grad_norm": 0.7614386974255439, "learning_rate": 4.7498001698472793e-07, "loss": 1.0662, "step": 10773 }, { "epoch": 0.86, "grad_norm": 1.3839866251104858, "learning_rate": 4.7442746374839363e-07, "loss": 0.7342, "step": 10774 }, { "epoch": 0.86, "grad_norm": 1.4184984169412276, "learning_rate": 4.7387521608906585e-07, "loss": 0.796, "step": 10775 }, { "epoch": 0.86, "grad_norm": 1.4507836053134406, "learning_rate": 4.73323274044033e-07, "loss": 0.7284, "step": 10776 }, { "epoch": 0.86, "grad_norm": 1.425163291562211, "learning_rate": 4.727716376505637e-07, "loss": 0.7033, "step": 10777 }, { "epoch": 0.86, "grad_norm": 1.558501208118554, "learning_rate": 4.722203069459053e-07, "loss": 0.8186, "step": 10778 }, { "epoch": 0.86, "grad_norm": 0.7717244550680494, "learning_rate": 4.7166928196728524e-07, "loss": 1.0596, "step": 10779 }, { "epoch": 0.86, "grad_norm": 1.4471640301629336, "learning_rate": 4.7111856275190937e-07, "loss": 0.6609, "step": 10780 }, { "epoch": 0.86, "grad_norm": 1.49447412732685, "learning_rate": 4.705681493369646e-07, "loss": 0.71, "step": 10781 }, { "epoch": 0.87, "grad_norm": 0.756293533275371, "learning_rate": 4.700180417596156e-07, "loss": 1.0765, "step": 10782 }, { "epoch": 0.87, "grad_norm": 1.582992954933307, "learning_rate": 4.6946824005700606e-07, "loss": 0.6741, "step": 10783 }, { "epoch": 0.87, "grad_norm": 0.7463881622146418, "learning_rate": 4.6891874426626125e-07, "loss": 1.0418, "step": 10784 }, { "epoch": 0.87, "grad_norm": 1.5246374399681542, "learning_rate": 4.683695544244843e-07, "loss": 0.7455, "step": 10785 }, { "epoch": 0.87, "grad_norm": 1.6077003125687528, "learning_rate": 4.678206705687566e-07, "loss": 0.7668, "step": 10786 }, { "epoch": 0.87, "grad_norm": 1.5461386255268992, "learning_rate": 4.672720927361413e-07, "loss": 0.7456, "step": 10787 }, { "epoch": 0.87, "grad_norm": 2.6126972276297873, "learning_rate": 4.667238209636782e-07, "loss": 0.7584, "step": 10788 }, { "epoch": 0.87, "grad_norm": 1.6057232003618762, "learning_rate": 4.6617585528838937e-07, "loss": 0.7825, "step": 10789 }, { "epoch": 0.87, "grad_norm": 1.5496909320708245, "learning_rate": 4.6562819574727304e-07, "loss": 0.716, "step": 10790 }, { "epoch": 0.87, "grad_norm": 1.58685017039491, "learning_rate": 4.650808423773101e-07, "loss": 0.7661, "step": 10791 }, { "epoch": 0.87, "grad_norm": 1.5882866282604633, "learning_rate": 4.645337952154583e-07, "loss": 0.7195, "step": 10792 }, { "epoch": 0.87, "grad_norm": 1.502770437280089, "learning_rate": 4.6398705429865574e-07, "loss": 0.7107, "step": 10793 }, { "epoch": 0.87, "grad_norm": 1.4941326371463215, "learning_rate": 4.634406196638186e-07, "loss": 0.754, "step": 10794 }, { "epoch": 0.87, "grad_norm": 1.509496447284733, "learning_rate": 4.62894491347845e-07, "loss": 0.6891, "step": 10795 }, { "epoch": 0.87, "grad_norm": 1.5645790937728679, "learning_rate": 4.623486693876106e-07, "loss": 0.7143, "step": 10796 }, { "epoch": 0.87, "grad_norm": 1.4794202390874187, "learning_rate": 4.6180315381996963e-07, "loss": 0.726, "step": 10797 }, { "epoch": 0.87, "grad_norm": 1.589974092047516, "learning_rate": 4.6125794468175723e-07, "loss": 0.7225, "step": 10798 }, { "epoch": 0.87, "grad_norm": 1.454638255372068, "learning_rate": 4.607130420097866e-07, "loss": 0.7733, "step": 10799 }, { "epoch": 0.87, "grad_norm": 1.5838284963981806, "learning_rate": 4.601684458408506e-07, "loss": 0.7507, "step": 10800 }, { "epoch": 0.87, "grad_norm": 1.4978858351776485, "learning_rate": 4.5962415621172205e-07, "loss": 0.7576, "step": 10801 }, { "epoch": 0.87, "grad_norm": 1.5015797210270119, "learning_rate": 4.590801731591532e-07, "loss": 0.8012, "step": 10802 }, { "epoch": 0.87, "grad_norm": 1.544530860167229, "learning_rate": 4.5853649671987464e-07, "loss": 0.781, "step": 10803 }, { "epoch": 0.87, "grad_norm": 1.5519572991136248, "learning_rate": 4.579931269305954e-07, "loss": 0.7494, "step": 10804 }, { "epoch": 0.87, "grad_norm": 1.504341838994756, "learning_rate": 4.574500638280072e-07, "loss": 0.7815, "step": 10805 }, { "epoch": 0.87, "grad_norm": 1.5032595960959758, "learning_rate": 4.5690730744877733e-07, "loss": 0.7027, "step": 10806 }, { "epoch": 0.87, "grad_norm": 1.4196085629133373, "learning_rate": 4.563648578295543e-07, "loss": 0.7103, "step": 10807 }, { "epoch": 0.87, "grad_norm": 1.362049089544699, "learning_rate": 4.5582271500696607e-07, "loss": 0.694, "step": 10808 }, { "epoch": 0.87, "grad_norm": 1.535717556051172, "learning_rate": 4.552808790176194e-07, "loss": 0.8219, "step": 10809 }, { "epoch": 0.87, "grad_norm": 1.4118023088878167, "learning_rate": 4.5473934989810064e-07, "loss": 0.7463, "step": 10810 }, { "epoch": 0.87, "grad_norm": 0.7594779698868047, "learning_rate": 4.5419812768497274e-07, "loss": 1.0131, "step": 10811 }, { "epoch": 0.87, "grad_norm": 1.5970778011653948, "learning_rate": 4.5365721241478256e-07, "loss": 0.7528, "step": 10812 }, { "epoch": 0.87, "grad_norm": 0.748541265263908, "learning_rate": 4.531166041240531e-07, "loss": 1.0409, "step": 10813 }, { "epoch": 0.87, "grad_norm": 0.7352203158506543, "learning_rate": 4.525763028492869e-07, "loss": 1.0644, "step": 10814 }, { "epoch": 0.87, "grad_norm": 1.6181497760065366, "learning_rate": 4.5203630862696803e-07, "loss": 0.7538, "step": 10815 }, { "epoch": 0.87, "grad_norm": 1.401876461513214, "learning_rate": 4.514966214935573e-07, "loss": 0.7475, "step": 10816 }, { "epoch": 0.87, "grad_norm": 1.522176580198074, "learning_rate": 4.5095724148549515e-07, "loss": 0.7864, "step": 10817 }, { "epoch": 0.87, "grad_norm": 1.436349681437484, "learning_rate": 4.504181686392012e-07, "loss": 0.7563, "step": 10818 }, { "epoch": 0.87, "grad_norm": 1.4925702902415443, "learning_rate": 4.498794029910769e-07, "loss": 0.7118, "step": 10819 }, { "epoch": 0.87, "grad_norm": 1.612995103467293, "learning_rate": 4.4934094457749934e-07, "loss": 0.7689, "step": 10820 }, { "epoch": 0.87, "grad_norm": 0.7477687298888301, "learning_rate": 4.4880279343482713e-07, "loss": 1.0679, "step": 10821 }, { "epoch": 0.87, "grad_norm": 1.5697601672896522, "learning_rate": 4.482649495993974e-07, "loss": 0.6962, "step": 10822 }, { "epoch": 0.87, "grad_norm": 1.5751908979681308, "learning_rate": 4.4772741310752653e-07, "loss": 0.7833, "step": 10823 }, { "epoch": 0.87, "grad_norm": 1.4855339785603408, "learning_rate": 4.4719018399550893e-07, "loss": 0.6843, "step": 10824 }, { "epoch": 0.87, "grad_norm": 1.431638528527591, "learning_rate": 4.4665326229962167e-07, "loss": 0.7307, "step": 10825 }, { "epoch": 0.87, "grad_norm": 1.6236283987898097, "learning_rate": 4.46116648056118e-07, "loss": 0.7217, "step": 10826 }, { "epoch": 0.87, "grad_norm": 1.5773995367136033, "learning_rate": 4.455803413012316e-07, "loss": 0.7706, "step": 10827 }, { "epoch": 0.87, "grad_norm": 1.591105818424367, "learning_rate": 4.4504434207117363e-07, "loss": 0.824, "step": 10828 }, { "epoch": 0.87, "grad_norm": 0.7419227931692505, "learning_rate": 4.445086504021384e-07, "loss": 1.093, "step": 10829 }, { "epoch": 0.87, "grad_norm": 1.6440755997786751, "learning_rate": 4.439732663302954e-07, "loss": 0.7256, "step": 10830 }, { "epoch": 0.87, "grad_norm": 1.7665875502893416, "learning_rate": 4.434381898917961e-07, "loss": 0.7668, "step": 10831 }, { "epoch": 0.87, "grad_norm": 0.7618737919593795, "learning_rate": 4.4290342112276895e-07, "loss": 1.0827, "step": 10832 }, { "epoch": 0.87, "grad_norm": 0.7819819426092904, "learning_rate": 4.423689600593234e-07, "loss": 1.0409, "step": 10833 }, { "epoch": 0.87, "grad_norm": 1.4977293824654383, "learning_rate": 4.418348067375472e-07, "loss": 0.8294, "step": 10834 }, { "epoch": 0.87, "grad_norm": 0.7449262311134747, "learning_rate": 4.4130096119350707e-07, "loss": 1.0324, "step": 10835 }, { "epoch": 0.87, "grad_norm": 1.5676177864437064, "learning_rate": 4.4076742346325086e-07, "loss": 0.7231, "step": 10836 }, { "epoch": 0.87, "grad_norm": 1.4885152709754226, "learning_rate": 4.4023419358280307e-07, "loss": 0.7561, "step": 10837 }, { "epoch": 0.87, "grad_norm": 1.585636121606808, "learning_rate": 4.397012715881688e-07, "loss": 0.9106, "step": 10838 }, { "epoch": 0.87, "grad_norm": 1.7250152348376198, "learning_rate": 4.3916865751533313e-07, "loss": 0.7634, "step": 10839 }, { "epoch": 0.87, "grad_norm": 1.5883764889785326, "learning_rate": 4.38636351400259e-07, "loss": 0.7862, "step": 10840 }, { "epoch": 0.87, "grad_norm": 1.5437528587024798, "learning_rate": 4.3810435327888814e-07, "loss": 0.7308, "step": 10841 }, { "epoch": 0.87, "grad_norm": 1.5543574001658695, "learning_rate": 4.37572663187143e-07, "loss": 0.7234, "step": 10842 }, { "epoch": 0.87, "grad_norm": 1.5635287132782059, "learning_rate": 4.3704128116092423e-07, "loss": 0.7016, "step": 10843 }, { "epoch": 0.87, "grad_norm": 1.4471386716701087, "learning_rate": 4.365102072361116e-07, "loss": 0.6631, "step": 10844 }, { "epoch": 0.87, "grad_norm": 1.4305168448675005, "learning_rate": 4.359794414485646e-07, "loss": 0.7912, "step": 10845 }, { "epoch": 0.87, "grad_norm": 1.6476042030217537, "learning_rate": 4.354489838341225e-07, "loss": 0.8229, "step": 10846 }, { "epoch": 0.87, "grad_norm": 1.6195577112356045, "learning_rate": 4.3491883442860263e-07, "loss": 0.8073, "step": 10847 }, { "epoch": 0.87, "grad_norm": 0.7432980531406017, "learning_rate": 4.343889932678008e-07, "loss": 1.0165, "step": 10848 }, { "epoch": 0.87, "grad_norm": 1.615904267883659, "learning_rate": 4.338594603874946e-07, "loss": 0.6997, "step": 10849 }, { "epoch": 0.87, "grad_norm": 0.7759843240832383, "learning_rate": 4.3333023582343925e-07, "loss": 1.0559, "step": 10850 }, { "epoch": 0.87, "grad_norm": 1.5663035775603884, "learning_rate": 4.328013196113684e-07, "loss": 0.8689, "step": 10851 }, { "epoch": 0.87, "grad_norm": 1.4449564823603271, "learning_rate": 4.322727117869951e-07, "loss": 0.6754, "step": 10852 }, { "epoch": 0.87, "grad_norm": 1.6791153537287589, "learning_rate": 4.317444123860143e-07, "loss": 0.8228, "step": 10853 }, { "epoch": 0.87, "grad_norm": 1.521176159129267, "learning_rate": 4.3121642144409726e-07, "loss": 0.7174, "step": 10854 }, { "epoch": 0.87, "grad_norm": 1.619480829500294, "learning_rate": 4.306887389968928e-07, "loss": 0.7431, "step": 10855 }, { "epoch": 0.87, "grad_norm": 0.7226325045998638, "learning_rate": 4.3016136508003404e-07, "loss": 0.9968, "step": 10856 }, { "epoch": 0.87, "grad_norm": 1.5059336600920292, "learning_rate": 4.296342997291292e-07, "loss": 0.7245, "step": 10857 }, { "epoch": 0.87, "grad_norm": 1.4866489154432223, "learning_rate": 4.2910754297976755e-07, "loss": 0.7428, "step": 10858 }, { "epoch": 0.87, "grad_norm": 0.750979320381967, "learning_rate": 4.285810948675156e-07, "loss": 1.054, "step": 10859 }, { "epoch": 0.87, "grad_norm": 1.5955691763209754, "learning_rate": 4.280549554279223e-07, "loss": 0.8723, "step": 10860 }, { "epoch": 0.87, "grad_norm": 1.5728529398262352, "learning_rate": 4.27529124696513e-07, "loss": 0.7514, "step": 10861 }, { "epoch": 0.87, "grad_norm": 1.473266208648008, "learning_rate": 4.270036027087915e-07, "loss": 0.6887, "step": 10862 }, { "epoch": 0.87, "grad_norm": 1.4987134534024584, "learning_rate": 4.2647838950024445e-07, "loss": 0.8458, "step": 10863 }, { "epoch": 0.87, "grad_norm": 1.6637440350760873, "learning_rate": 4.259534851063346e-07, "loss": 0.7062, "step": 10864 }, { "epoch": 0.87, "grad_norm": 1.6371718619071305, "learning_rate": 4.2542888956250475e-07, "loss": 0.7437, "step": 10865 }, { "epoch": 0.87, "grad_norm": 1.505095516169909, "learning_rate": 4.2490460290417645e-07, "loss": 0.7983, "step": 10866 }, { "epoch": 0.87, "grad_norm": 1.4874924709777637, "learning_rate": 4.243806251667509e-07, "loss": 0.7013, "step": 10867 }, { "epoch": 0.87, "grad_norm": 1.414192099784101, "learning_rate": 4.2385695638560874e-07, "loss": 0.6876, "step": 10868 }, { "epoch": 0.87, "grad_norm": 1.5367233329125694, "learning_rate": 4.2333359659610715e-07, "loss": 0.8357, "step": 10869 }, { "epoch": 0.87, "grad_norm": 1.5259162089539835, "learning_rate": 4.228105458335879e-07, "loss": 0.7668, "step": 10870 }, { "epoch": 0.87, "grad_norm": 0.7509049937396648, "learning_rate": 4.222878041333672e-07, "loss": 1.0728, "step": 10871 }, { "epoch": 0.87, "grad_norm": 1.4850529327171975, "learning_rate": 4.2176537153074014e-07, "loss": 0.7803, "step": 10872 }, { "epoch": 0.87, "grad_norm": 1.5081266063100325, "learning_rate": 4.212432480609846e-07, "loss": 0.729, "step": 10873 }, { "epoch": 0.87, "grad_norm": 1.800014874269248, "learning_rate": 4.207214337593557e-07, "loss": 0.7534, "step": 10874 }, { "epoch": 0.87, "grad_norm": 1.5796778189469272, "learning_rate": 4.2019992866108637e-07, "loss": 0.6964, "step": 10875 }, { "epoch": 0.87, "grad_norm": 1.4199688347483326, "learning_rate": 4.1967873280139017e-07, "loss": 0.7706, "step": 10876 }, { "epoch": 0.87, "grad_norm": 1.4414603757546947, "learning_rate": 4.191578462154594e-07, "loss": 0.6974, "step": 10877 }, { "epoch": 0.87, "grad_norm": 1.531750112399289, "learning_rate": 4.186372689384655e-07, "loss": 0.832, "step": 10878 }, { "epoch": 0.87, "grad_norm": 1.6257890063176823, "learning_rate": 4.181170010055585e-07, "loss": 0.7619, "step": 10879 }, { "epoch": 0.87, "grad_norm": 1.5189046642418236, "learning_rate": 4.1759704245186936e-07, "loss": 0.6893, "step": 10880 }, { "epoch": 0.87, "grad_norm": 1.5925069284966133, "learning_rate": 4.170773933125061e-07, "loss": 0.8033, "step": 10881 }, { "epoch": 0.87, "grad_norm": 1.5804564878128753, "learning_rate": 4.165580536225561e-07, "loss": 0.7841, "step": 10882 }, { "epoch": 0.87, "grad_norm": 1.5997023754792323, "learning_rate": 4.1603902341708804e-07, "loss": 0.7178, "step": 10883 }, { "epoch": 0.87, "grad_norm": 1.4802467174291711, "learning_rate": 4.1552030273114665e-07, "loss": 0.7551, "step": 10884 }, { "epoch": 0.87, "grad_norm": 1.7621645153175403, "learning_rate": 4.150018915997578e-07, "loss": 0.8266, "step": 10885 }, { "epoch": 0.87, "grad_norm": 1.4672942145649917, "learning_rate": 4.1448379005792517e-07, "loss": 0.671, "step": 10886 }, { "epoch": 0.87, "grad_norm": 1.4908575742250816, "learning_rate": 4.1396599814063244e-07, "loss": 0.6795, "step": 10887 }, { "epoch": 0.87, "grad_norm": 0.7591012438197998, "learning_rate": 4.1344851588284216e-07, "loss": 1.1026, "step": 10888 }, { "epoch": 0.87, "grad_norm": 1.4849777900195205, "learning_rate": 4.129313433194948e-07, "loss": 0.6946, "step": 10889 }, { "epoch": 0.87, "grad_norm": 1.5730539321805446, "learning_rate": 4.124144804855135e-07, "loss": 0.7879, "step": 10890 }, { "epoch": 0.87, "grad_norm": 1.6567694862738451, "learning_rate": 4.118979274157964e-07, "loss": 0.7686, "step": 10891 }, { "epoch": 0.87, "grad_norm": 1.5752157648285017, "learning_rate": 4.113816841452223e-07, "loss": 0.7535, "step": 10892 }, { "epoch": 0.87, "grad_norm": 1.8718092463510594, "learning_rate": 4.1086575070864885e-07, "loss": 0.7495, "step": 10893 }, { "epoch": 0.87, "grad_norm": 1.4919671070474896, "learning_rate": 4.1035012714091436e-07, "loss": 0.7357, "step": 10894 }, { "epoch": 0.87, "grad_norm": 1.3918360649688855, "learning_rate": 4.098348134768343e-07, "loss": 0.6677, "step": 10895 }, { "epoch": 0.87, "grad_norm": 0.7337175479942323, "learning_rate": 4.0931980975120246e-07, "loss": 1.0458, "step": 10896 }, { "epoch": 0.87, "grad_norm": 1.5823529764943791, "learning_rate": 4.0880511599879545e-07, "loss": 0.8521, "step": 10897 }, { "epoch": 0.87, "grad_norm": 1.53303764219004, "learning_rate": 4.0829073225436613e-07, "loss": 0.7706, "step": 10898 }, { "epoch": 0.87, "grad_norm": 1.4716144815591148, "learning_rate": 4.077766585526444e-07, "loss": 0.6956, "step": 10899 }, { "epoch": 0.87, "grad_norm": 1.5956499949241874, "learning_rate": 4.072628949283447e-07, "loss": 0.7485, "step": 10900 }, { "epoch": 0.87, "grad_norm": 1.5238950269877727, "learning_rate": 4.067494414161555e-07, "loss": 0.7841, "step": 10901 }, { "epoch": 0.87, "grad_norm": 1.4876642918472216, "learning_rate": 4.0623629805074784e-07, "loss": 0.6775, "step": 10902 }, { "epoch": 0.87, "grad_norm": 1.4149890774524612, "learning_rate": 4.057234648667685e-07, "loss": 0.6657, "step": 10903 }, { "epoch": 0.87, "grad_norm": 1.452399398702419, "learning_rate": 4.05210941898847e-07, "loss": 0.7044, "step": 10904 }, { "epoch": 0.87, "grad_norm": 1.3609363650430728, "learning_rate": 4.046987291815896e-07, "loss": 0.7823, "step": 10905 }, { "epoch": 0.88, "grad_norm": 1.4273855151067214, "learning_rate": 4.0418682674958074e-07, "loss": 0.7584, "step": 10906 }, { "epoch": 0.88, "grad_norm": 1.528420336150421, "learning_rate": 4.036752346373868e-07, "loss": 0.7856, "step": 10907 }, { "epoch": 0.88, "grad_norm": 1.4910433458487766, "learning_rate": 4.0316395287955166e-07, "loss": 0.7972, "step": 10908 }, { "epoch": 0.88, "grad_norm": 1.6430989164408867, "learning_rate": 4.0265298151059785e-07, "loss": 0.7386, "step": 10909 }, { "epoch": 0.88, "grad_norm": 0.7353311932513732, "learning_rate": 4.0214232056502653e-07, "loss": 1.0638, "step": 10910 }, { "epoch": 0.88, "grad_norm": 1.4727645104989497, "learning_rate": 4.016319700773197e-07, "loss": 0.7766, "step": 10911 }, { "epoch": 0.88, "grad_norm": 1.440976912736856, "learning_rate": 4.0112193008193746e-07, "loss": 0.7371, "step": 10912 }, { "epoch": 0.88, "grad_norm": 0.7377174199134139, "learning_rate": 4.006122006133173e-07, "loss": 1.0355, "step": 10913 }, { "epoch": 0.88, "grad_norm": 1.410285765460856, "learning_rate": 4.001027817058789e-07, "loss": 0.77, "step": 10914 }, { "epoch": 0.88, "grad_norm": 1.5743927414276078, "learning_rate": 3.995936733940198e-07, "loss": 0.7924, "step": 10915 }, { "epoch": 0.88, "grad_norm": 1.4891792665549142, "learning_rate": 3.9908487571211463e-07, "loss": 0.742, "step": 10916 }, { "epoch": 0.88, "grad_norm": 1.5335841003067048, "learning_rate": 3.985763886945188e-07, "loss": 0.7066, "step": 10917 }, { "epoch": 0.88, "grad_norm": 1.5586759305478077, "learning_rate": 3.9806821237556805e-07, "loss": 0.6873, "step": 10918 }, { "epoch": 0.88, "grad_norm": 1.561460467590906, "learning_rate": 3.975603467895739e-07, "loss": 0.7813, "step": 10919 }, { "epoch": 0.88, "grad_norm": 0.7658741678068239, "learning_rate": 3.9705279197083003e-07, "loss": 1.0692, "step": 10920 }, { "epoch": 0.88, "grad_norm": 1.498333711543909, "learning_rate": 3.9654554795360624e-07, "loss": 0.7132, "step": 10921 }, { "epoch": 0.88, "grad_norm": 1.4639315271740825, "learning_rate": 3.96038614772154e-07, "loss": 0.7003, "step": 10922 }, { "epoch": 0.88, "grad_norm": 1.6184995695559106, "learning_rate": 3.95531992460701e-07, "loss": 0.8107, "step": 10923 }, { "epoch": 0.88, "grad_norm": 1.6549879459964296, "learning_rate": 3.9502568105345753e-07, "loss": 0.8111, "step": 10924 }, { "epoch": 0.88, "grad_norm": 1.5087869999785404, "learning_rate": 3.9451968058460967e-07, "loss": 0.7518, "step": 10925 }, { "epoch": 0.88, "grad_norm": 1.4931849029850557, "learning_rate": 3.940139910883245e-07, "loss": 0.8085, "step": 10926 }, { "epoch": 0.88, "grad_norm": 1.439156383846267, "learning_rate": 3.9350861259874586e-07, "loss": 0.7111, "step": 10927 }, { "epoch": 0.88, "grad_norm": 1.5367679471867846, "learning_rate": 3.930035451499997e-07, "loss": 0.8175, "step": 10928 }, { "epoch": 0.88, "grad_norm": 1.4523201130013537, "learning_rate": 3.9249878877618886e-07, "loss": 0.6872, "step": 10929 }, { "epoch": 0.88, "grad_norm": 1.4068640721745465, "learning_rate": 3.9199434351139544e-07, "loss": 0.6547, "step": 10930 }, { "epoch": 0.88, "grad_norm": 1.5219191792677558, "learning_rate": 3.914902093896811e-07, "loss": 0.6802, "step": 10931 }, { "epoch": 0.88, "grad_norm": 1.5915826367163501, "learning_rate": 3.909863864450852e-07, "loss": 0.7386, "step": 10932 }, { "epoch": 0.88, "grad_norm": 0.7537331106421039, "learning_rate": 3.9048287471162847e-07, "loss": 1.0912, "step": 10933 }, { "epoch": 0.88, "grad_norm": 1.7019205329750964, "learning_rate": 3.8997967422330693e-07, "loss": 0.763, "step": 10934 }, { "epoch": 0.88, "grad_norm": 1.5220772044557014, "learning_rate": 3.8947678501410014e-07, "loss": 0.8192, "step": 10935 }, { "epoch": 0.88, "grad_norm": 1.5628237485440022, "learning_rate": 3.889742071179636e-07, "loss": 0.7386, "step": 10936 }, { "epoch": 0.88, "grad_norm": 0.7524469453717851, "learning_rate": 3.884719405688314e-07, "loss": 1.082, "step": 10937 }, { "epoch": 0.88, "grad_norm": 1.4110842848276386, "learning_rate": 3.8796998540061916e-07, "loss": 0.8143, "step": 10938 }, { "epoch": 0.88, "grad_norm": 1.485213912445654, "learning_rate": 3.8746834164722024e-07, "loss": 0.7806, "step": 10939 }, { "epoch": 0.88, "grad_norm": 1.879718417042932, "learning_rate": 3.8696700934250485e-07, "loss": 0.7611, "step": 10940 }, { "epoch": 0.88, "grad_norm": 0.7606604894187893, "learning_rate": 3.8646598852032593e-07, "loss": 1.0472, "step": 10941 }, { "epoch": 0.88, "grad_norm": 1.5618685182129253, "learning_rate": 3.859652792145141e-07, "loss": 0.782, "step": 10942 }, { "epoch": 0.88, "grad_norm": 1.4350105166458642, "learning_rate": 3.8546488145887627e-07, "loss": 0.714, "step": 10943 }, { "epoch": 0.88, "grad_norm": 1.6468784960142941, "learning_rate": 3.84964795287201e-07, "loss": 0.7932, "step": 10944 }, { "epoch": 0.88, "grad_norm": 1.5339274857138525, "learning_rate": 3.844650207332562e-07, "loss": 0.7755, "step": 10945 }, { "epoch": 0.88, "grad_norm": 1.5113205656119701, "learning_rate": 3.8396555783078717e-07, "loss": 0.8319, "step": 10946 }, { "epoch": 0.88, "grad_norm": 1.5860154363324779, "learning_rate": 3.8346640661351795e-07, "loss": 0.6949, "step": 10947 }, { "epoch": 0.88, "grad_norm": 1.696046099297465, "learning_rate": 3.8296756711515446e-07, "loss": 0.7767, "step": 10948 }, { "epoch": 0.88, "grad_norm": 1.5666204426998542, "learning_rate": 3.8246903936937806e-07, "loss": 0.6784, "step": 10949 }, { "epoch": 0.88, "grad_norm": 0.7625153174115986, "learning_rate": 3.819708234098507e-07, "loss": 1.0759, "step": 10950 }, { "epoch": 0.88, "grad_norm": 1.5965289860890333, "learning_rate": 3.8147291927021213e-07, "loss": 0.7501, "step": 10951 }, { "epoch": 0.88, "grad_norm": 1.6654556425391605, "learning_rate": 3.8097532698408436e-07, "loss": 0.7991, "step": 10952 }, { "epoch": 0.88, "grad_norm": 1.650604801938321, "learning_rate": 3.804780465850644e-07, "loss": 0.7445, "step": 10953 }, { "epoch": 0.88, "grad_norm": 1.5602761237847655, "learning_rate": 3.799810781067298e-07, "loss": 0.7677, "step": 10954 }, { "epoch": 0.88, "grad_norm": 1.3925359297347504, "learning_rate": 3.794844215826371e-07, "loss": 0.7513, "step": 10955 }, { "epoch": 0.88, "grad_norm": 1.576378133715264, "learning_rate": 3.789880770463217e-07, "loss": 0.7435, "step": 10956 }, { "epoch": 0.88, "grad_norm": 1.5223247566696465, "learning_rate": 3.784920445312978e-07, "loss": 0.764, "step": 10957 }, { "epoch": 0.88, "grad_norm": 1.4307736332204142, "learning_rate": 3.779963240710577e-07, "loss": 0.6782, "step": 10958 }, { "epoch": 0.88, "grad_norm": 2.334372188484022, "learning_rate": 3.775009156990761e-07, "loss": 0.7624, "step": 10959 }, { "epoch": 0.88, "grad_norm": 1.4987633409644332, "learning_rate": 3.7700581944880246e-07, "loss": 0.7582, "step": 10960 }, { "epoch": 0.88, "grad_norm": 1.6890232753226628, "learning_rate": 3.765110353536661e-07, "loss": 0.8296, "step": 10961 }, { "epoch": 0.88, "grad_norm": 1.5624747119192703, "learning_rate": 3.7601656344707746e-07, "loss": 0.6624, "step": 10962 }, { "epoch": 0.88, "grad_norm": 1.4953135106282294, "learning_rate": 3.755224037624239e-07, "loss": 0.769, "step": 10963 }, { "epoch": 0.88, "grad_norm": 1.4351060883818185, "learning_rate": 3.7502855633307246e-07, "loss": 0.7233, "step": 10964 }, { "epoch": 0.88, "grad_norm": 1.4485235587462515, "learning_rate": 3.745350211923682e-07, "loss": 0.7421, "step": 10965 }, { "epoch": 0.88, "grad_norm": 1.4426987004814353, "learning_rate": 3.7404179837363665e-07, "loss": 0.7554, "step": 10966 }, { "epoch": 0.88, "grad_norm": 1.479405918577684, "learning_rate": 3.735488879101801e-07, "loss": 0.7301, "step": 10967 }, { "epoch": 0.88, "grad_norm": 1.6216574133211057, "learning_rate": 3.730562898352813e-07, "loss": 0.7961, "step": 10968 }, { "epoch": 0.88, "grad_norm": 1.467189544336084, "learning_rate": 3.725640041822026e-07, "loss": 0.7265, "step": 10969 }, { "epoch": 0.88, "grad_norm": 1.4876668454848354, "learning_rate": 3.7207203098418354e-07, "loss": 0.8603, "step": 10970 }, { "epoch": 0.88, "grad_norm": 1.5783907627603446, "learning_rate": 3.715803702744425e-07, "loss": 0.8183, "step": 10971 }, { "epoch": 0.88, "grad_norm": 1.4770757276812514, "learning_rate": 3.71089022086179e-07, "loss": 0.7995, "step": 10972 }, { "epoch": 0.88, "grad_norm": 1.6008629648848236, "learning_rate": 3.7059798645256996e-07, "loss": 0.7436, "step": 10973 }, { "epoch": 0.88, "grad_norm": 1.5665279358286361, "learning_rate": 3.701072634067704e-07, "loss": 0.7398, "step": 10974 }, { "epoch": 0.88, "grad_norm": 0.7503801235726272, "learning_rate": 3.6961685298191496e-07, "loss": 1.0382, "step": 10975 }, { "epoch": 0.88, "grad_norm": 1.5306359479740053, "learning_rate": 3.691267552111183e-07, "loss": 0.7382, "step": 10976 }, { "epoch": 0.88, "grad_norm": 1.5814265836627432, "learning_rate": 3.686369701274717e-07, "loss": 0.7531, "step": 10977 }, { "epoch": 0.88, "grad_norm": 1.6662224173174685, "learning_rate": 3.681474977640465e-07, "loss": 0.7672, "step": 10978 }, { "epoch": 0.88, "grad_norm": 1.6167283556678482, "learning_rate": 3.676583381538945e-07, "loss": 0.7509, "step": 10979 }, { "epoch": 0.88, "grad_norm": 1.5642568650651039, "learning_rate": 3.671694913300439e-07, "loss": 0.696, "step": 10980 }, { "epoch": 0.88, "grad_norm": 1.4840671086771284, "learning_rate": 3.6668095732550203e-07, "loss": 0.7116, "step": 10981 }, { "epoch": 0.88, "grad_norm": 1.7147326429796217, "learning_rate": 3.66192736173257e-07, "loss": 0.7208, "step": 10982 }, { "epoch": 0.88, "grad_norm": 1.379189352853151, "learning_rate": 3.6570482790627526e-07, "loss": 0.7257, "step": 10983 }, { "epoch": 0.88, "grad_norm": 1.4131510046607434, "learning_rate": 3.652172325574999e-07, "loss": 0.747, "step": 10984 }, { "epoch": 0.88, "grad_norm": 1.5013898256092313, "learning_rate": 3.647299501598539e-07, "loss": 0.7528, "step": 10985 }, { "epoch": 0.88, "grad_norm": 1.6270174736009082, "learning_rate": 3.6424298074624333e-07, "loss": 0.7802, "step": 10986 }, { "epoch": 0.88, "grad_norm": 1.502568192627465, "learning_rate": 3.6375632434954564e-07, "loss": 0.7705, "step": 10987 }, { "epoch": 0.88, "grad_norm": 1.707811120973351, "learning_rate": 3.6326998100262134e-07, "loss": 0.8572, "step": 10988 }, { "epoch": 0.88, "grad_norm": 0.7374993424738078, "learning_rate": 3.6278395073831183e-07, "loss": 1.054, "step": 10989 }, { "epoch": 0.88, "grad_norm": 1.5533169856310978, "learning_rate": 3.622982335894332e-07, "loss": 0.698, "step": 10990 }, { "epoch": 0.88, "grad_norm": 0.7595724411641711, "learning_rate": 3.6181282958878295e-07, "loss": 1.0451, "step": 10991 }, { "epoch": 0.88, "grad_norm": 1.444656803839999, "learning_rate": 3.6132773876913495e-07, "loss": 0.787, "step": 10992 }, { "epoch": 0.88, "grad_norm": 1.6168321970799713, "learning_rate": 3.6084296116324637e-07, "loss": 0.7611, "step": 10993 }, { "epoch": 0.88, "grad_norm": 1.5031100369159351, "learning_rate": 3.603584968038487e-07, "loss": 0.7079, "step": 10994 }, { "epoch": 0.88, "grad_norm": 1.3672294757163161, "learning_rate": 3.598743457236542e-07, "loss": 0.6527, "step": 10995 }, { "epoch": 0.88, "grad_norm": 1.6075434588299722, "learning_rate": 3.5939050795535446e-07, "loss": 0.6865, "step": 10996 }, { "epoch": 0.88, "grad_norm": 1.588607134089166, "learning_rate": 3.5890698353161947e-07, "loss": 0.7268, "step": 10997 }, { "epoch": 0.88, "grad_norm": 1.471690486971903, "learning_rate": 3.5842377248509696e-07, "loss": 0.7552, "step": 10998 }, { "epoch": 0.88, "grad_norm": 1.9660411802613984, "learning_rate": 3.5794087484841534e-07, "loss": 0.7971, "step": 10999 }, { "epoch": 0.88, "grad_norm": 1.4468538421342834, "learning_rate": 3.5745829065418013e-07, "loss": 0.6409, "step": 11000 }, { "epoch": 0.88, "grad_norm": 1.6797742275037628, "learning_rate": 3.56976019934977e-07, "loss": 0.7181, "step": 11001 }, { "epoch": 0.88, "grad_norm": 1.469969120821769, "learning_rate": 3.5649406272336926e-07, "loss": 0.7576, "step": 11002 }, { "epoch": 0.88, "grad_norm": 1.5435041712455972, "learning_rate": 3.5601241905190153e-07, "loss": 0.7762, "step": 11003 }, { "epoch": 0.88, "grad_norm": 1.495842426979491, "learning_rate": 3.5553108895309443e-07, "loss": 0.7379, "step": 11004 }, { "epoch": 0.88, "grad_norm": 1.569247196517556, "learning_rate": 3.550500724594469e-07, "loss": 0.8037, "step": 11005 }, { "epoch": 0.88, "grad_norm": 1.4236960188204681, "learning_rate": 3.545693696034413e-07, "loss": 0.6986, "step": 11006 }, { "epoch": 0.88, "grad_norm": 1.5359390912035282, "learning_rate": 3.54088980417534e-07, "loss": 0.7034, "step": 11007 }, { "epoch": 0.88, "grad_norm": 0.7647765252402849, "learning_rate": 3.5360890493416235e-07, "loss": 1.0737, "step": 11008 }, { "epoch": 0.88, "grad_norm": 1.4664208388494542, "learning_rate": 3.53129143185742e-07, "loss": 0.7341, "step": 11009 }, { "epoch": 0.88, "grad_norm": 1.6337413694314546, "learning_rate": 3.5264969520466765e-07, "loss": 0.7932, "step": 11010 }, { "epoch": 0.88, "grad_norm": 1.4586087315406375, "learning_rate": 3.521705610233123e-07, "loss": 0.7153, "step": 11011 }, { "epoch": 0.88, "grad_norm": 0.7353728430896286, "learning_rate": 3.5169174067402833e-07, "loss": 1.0817, "step": 11012 }, { "epoch": 0.88, "grad_norm": 0.7537308377300403, "learning_rate": 3.5121323418914723e-07, "loss": 1.065, "step": 11013 }, { "epoch": 0.88, "grad_norm": 1.4368479478626945, "learning_rate": 3.5073504160097913e-07, "loss": 0.5962, "step": 11014 }, { "epoch": 0.88, "grad_norm": 1.5013169349365483, "learning_rate": 3.502571629418122e-07, "loss": 0.7932, "step": 11015 }, { "epoch": 0.88, "grad_norm": 1.479773614881406, "learning_rate": 3.4977959824391285e-07, "loss": 0.7826, "step": 11016 }, { "epoch": 0.88, "grad_norm": 0.7576735520904445, "learning_rate": 3.4930234753952963e-07, "loss": 1.0703, "step": 11017 }, { "epoch": 0.88, "grad_norm": 1.5335334592636083, "learning_rate": 3.488254108608857e-07, "loss": 0.7709, "step": 11018 }, { "epoch": 0.88, "grad_norm": 1.4557138408128887, "learning_rate": 3.483487882401859e-07, "loss": 0.6993, "step": 11019 }, { "epoch": 0.88, "grad_norm": 1.488197374703026, "learning_rate": 3.478724797096128e-07, "loss": 0.8006, "step": 11020 }, { "epoch": 0.88, "grad_norm": 0.7559731160109061, "learning_rate": 3.473964853013273e-07, "loss": 1.08, "step": 11021 }, { "epoch": 0.88, "grad_norm": 1.6358992732156503, "learning_rate": 3.4692080504746926e-07, "loss": 0.816, "step": 11022 }, { "epoch": 0.88, "grad_norm": 0.7409542379487125, "learning_rate": 3.464454389801597e-07, "loss": 1.0722, "step": 11023 }, { "epoch": 0.88, "grad_norm": 1.609956343512661, "learning_rate": 3.4597038713149455e-07, "loss": 0.7169, "step": 11024 }, { "epoch": 0.88, "grad_norm": 1.6205486501016622, "learning_rate": 3.45495649533551e-07, "loss": 0.731, "step": 11025 }, { "epoch": 0.88, "grad_norm": 1.5250345800515839, "learning_rate": 3.450212262183833e-07, "loss": 0.6936, "step": 11026 }, { "epoch": 0.88, "grad_norm": 1.4978815365001965, "learning_rate": 3.4454711721802757e-07, "loss": 0.748, "step": 11027 }, { "epoch": 0.88, "grad_norm": 1.4348511161941986, "learning_rate": 3.44073322564496e-07, "loss": 0.749, "step": 11028 }, { "epoch": 0.88, "grad_norm": 1.5315407208297407, "learning_rate": 3.4359984228977907e-07, "loss": 0.7341, "step": 11029 }, { "epoch": 0.88, "grad_norm": 1.5563067478184318, "learning_rate": 3.431266764258501e-07, "loss": 0.7309, "step": 11030 }, { "epoch": 0.89, "grad_norm": 1.5835311062703263, "learning_rate": 3.4265382500465574e-07, "loss": 0.7172, "step": 11031 }, { "epoch": 0.89, "grad_norm": 1.493435371298899, "learning_rate": 3.421812880581249e-07, "loss": 0.8348, "step": 11032 }, { "epoch": 0.89, "grad_norm": 1.5460081782634507, "learning_rate": 3.417090656181632e-07, "loss": 0.7702, "step": 11033 }, { "epoch": 0.89, "grad_norm": 1.4394257856035668, "learning_rate": 3.4123715771665786e-07, "loss": 0.7922, "step": 11034 }, { "epoch": 0.89, "grad_norm": 1.5400473262501295, "learning_rate": 3.4076556438547294e-07, "loss": 0.7747, "step": 11035 }, { "epoch": 0.89, "grad_norm": 1.4704212777838803, "learning_rate": 3.402942856564501e-07, "loss": 0.7721, "step": 11036 }, { "epoch": 0.89, "grad_norm": 1.5288603626255577, "learning_rate": 3.398233215614127e-07, "loss": 0.7424, "step": 11037 }, { "epoch": 0.89, "grad_norm": 1.5455326262385387, "learning_rate": 3.3935267213216163e-07, "loss": 0.7528, "step": 11038 }, { "epoch": 0.89, "grad_norm": 1.6149503135211043, "learning_rate": 3.388823374004735e-07, "loss": 0.8393, "step": 11039 }, { "epoch": 0.89, "grad_norm": 1.6007372849225834, "learning_rate": 3.384123173981096e-07, "loss": 0.7704, "step": 11040 }, { "epoch": 0.89, "grad_norm": 1.5543947543635532, "learning_rate": 3.3794261215680525e-07, "loss": 0.7146, "step": 11041 }, { "epoch": 0.89, "grad_norm": 1.5458531108575948, "learning_rate": 3.3747322170827656e-07, "loss": 0.7698, "step": 11042 }, { "epoch": 0.89, "grad_norm": 1.5959077644225101, "learning_rate": 3.370041460842166e-07, "loss": 0.7278, "step": 11043 }, { "epoch": 0.89, "grad_norm": 1.5741018908522728, "learning_rate": 3.3653538531630006e-07, "loss": 0.8237, "step": 11044 }, { "epoch": 0.89, "grad_norm": 1.4708693042316467, "learning_rate": 3.3606693943617764e-07, "loss": 0.7494, "step": 11045 }, { "epoch": 0.89, "grad_norm": 1.5567242926608835, "learning_rate": 3.3559880847547965e-07, "loss": 0.6624, "step": 11046 }, { "epoch": 0.89, "grad_norm": 1.5654550355716215, "learning_rate": 3.3513099246581636e-07, "loss": 0.7637, "step": 11047 }, { "epoch": 0.89, "grad_norm": 1.600159050000962, "learning_rate": 3.346634914387753e-07, "loss": 0.8179, "step": 11048 }, { "epoch": 0.89, "grad_norm": 1.3134972523034971, "learning_rate": 3.3419630542592343e-07, "loss": 0.7866, "step": 11049 }, { "epoch": 0.89, "grad_norm": 1.5256731107356623, "learning_rate": 3.337294344588055e-07, "loss": 0.7443, "step": 11050 }, { "epoch": 0.89, "grad_norm": 1.5369712477818505, "learning_rate": 3.332628785689462e-07, "loss": 0.7656, "step": 11051 }, { "epoch": 0.89, "grad_norm": 1.573666141803481, "learning_rate": 3.327966377878494e-07, "loss": 0.7967, "step": 11052 }, { "epoch": 0.89, "grad_norm": 1.731535928574638, "learning_rate": 3.323307121469954e-07, "loss": 0.8713, "step": 11053 }, { "epoch": 0.89, "grad_norm": 1.63106374085155, "learning_rate": 3.3186510167784456e-07, "loss": 0.6757, "step": 11054 }, { "epoch": 0.89, "grad_norm": 1.5974079207498195, "learning_rate": 3.313998064118373e-07, "loss": 0.7401, "step": 11055 }, { "epoch": 0.89, "grad_norm": 1.6680928299266247, "learning_rate": 3.3093482638038963e-07, "loss": 0.7336, "step": 11056 }, { "epoch": 0.89, "grad_norm": 1.5588846108935976, "learning_rate": 3.3047016161489867e-07, "loss": 0.7476, "step": 11057 }, { "epoch": 0.89, "grad_norm": 1.6818346922757996, "learning_rate": 3.3000581214674086e-07, "loss": 0.7561, "step": 11058 }, { "epoch": 0.89, "grad_norm": 1.526085003276602, "learning_rate": 3.295417780072685e-07, "loss": 0.7641, "step": 11059 }, { "epoch": 0.89, "grad_norm": 1.4415828415768006, "learning_rate": 3.290780592278148e-07, "loss": 0.7694, "step": 11060 }, { "epoch": 0.89, "grad_norm": 1.5435710536209635, "learning_rate": 3.286146558396919e-07, "loss": 0.7361, "step": 11061 }, { "epoch": 0.89, "grad_norm": 1.6377427950484176, "learning_rate": 3.2815156787418925e-07, "loss": 0.811, "step": 11062 }, { "epoch": 0.89, "grad_norm": 1.473475459016356, "learning_rate": 3.276887953625751e-07, "loss": 0.7684, "step": 11063 }, { "epoch": 0.89, "grad_norm": 1.6634186712148324, "learning_rate": 3.2722633833609797e-07, "loss": 0.8335, "step": 11064 }, { "epoch": 0.89, "grad_norm": 1.5733499458681153, "learning_rate": 3.2676419682598325e-07, "loss": 0.7203, "step": 11065 }, { "epoch": 0.89, "grad_norm": 1.5540484411116942, "learning_rate": 3.263023708634355e-07, "loss": 0.6922, "step": 11066 }, { "epoch": 0.89, "grad_norm": 1.5180307903587187, "learning_rate": 3.258408604796387e-07, "loss": 0.7523, "step": 11067 }, { "epoch": 0.89, "grad_norm": 1.5702897044992534, "learning_rate": 3.2537966570575566e-07, "loss": 0.7209, "step": 11068 }, { "epoch": 0.89, "grad_norm": 1.4017798212283716, "learning_rate": 3.2491878657292643e-07, "loss": 0.6724, "step": 11069 }, { "epoch": 0.89, "grad_norm": 1.5216823019588588, "learning_rate": 3.2445822311227003e-07, "loss": 0.8391, "step": 11070 }, { "epoch": 0.89, "grad_norm": 0.7630209103467722, "learning_rate": 3.239979753548872e-07, "loss": 1.0913, "step": 11071 }, { "epoch": 0.89, "grad_norm": 0.7363059981320395, "learning_rate": 3.23538043331853e-07, "loss": 1.0508, "step": 11072 }, { "epoch": 0.89, "grad_norm": 1.4410785088342541, "learning_rate": 3.2307842707422324e-07, "loss": 0.7282, "step": 11073 }, { "epoch": 0.89, "grad_norm": 1.5710786186475005, "learning_rate": 3.2261912661303297e-07, "loss": 0.727, "step": 11074 }, { "epoch": 0.89, "grad_norm": 1.4838948764394324, "learning_rate": 3.2216014197929413e-07, "loss": 0.761, "step": 11075 }, { "epoch": 0.89, "grad_norm": 1.5004303920154511, "learning_rate": 3.217014732039997e-07, "loss": 0.6929, "step": 11076 }, { "epoch": 0.89, "grad_norm": 0.7486107578822753, "learning_rate": 3.2124312031811823e-07, "loss": 1.0482, "step": 11077 }, { "epoch": 0.89, "grad_norm": 1.5033042459130572, "learning_rate": 3.207850833526005e-07, "loss": 0.7734, "step": 11078 }, { "epoch": 0.89, "grad_norm": 0.7652063197074335, "learning_rate": 3.20327362338374e-07, "loss": 1.0665, "step": 11079 }, { "epoch": 0.89, "grad_norm": 1.49440311736683, "learning_rate": 3.1986995730634404e-07, "loss": 0.6626, "step": 11080 }, { "epoch": 0.89, "grad_norm": 1.4766376726770714, "learning_rate": 3.194128682873965e-07, "loss": 0.7188, "step": 11081 }, { "epoch": 0.89, "grad_norm": 1.5837729503858349, "learning_rate": 3.1895609531239545e-07, "loss": 0.7425, "step": 11082 }, { "epoch": 0.89, "grad_norm": 1.5691954533046482, "learning_rate": 3.1849963841218244e-07, "loss": 0.7766, "step": 11083 }, { "epoch": 0.89, "grad_norm": 1.488387640694689, "learning_rate": 3.180434976175784e-07, "loss": 0.7393, "step": 11084 }, { "epoch": 0.89, "grad_norm": 1.45229830219342, "learning_rate": 3.1758767295938356e-07, "loss": 0.7402, "step": 11085 }, { "epoch": 0.89, "grad_norm": 1.502770014179707, "learning_rate": 3.1713216446837613e-07, "loss": 0.7468, "step": 11086 }, { "epoch": 0.89, "grad_norm": 1.607685596364651, "learning_rate": 3.1667697217531324e-07, "loss": 0.8521, "step": 11087 }, { "epoch": 0.89, "grad_norm": 1.5305685294045883, "learning_rate": 3.1622209611093023e-07, "loss": 0.7437, "step": 11088 }, { "epoch": 0.89, "grad_norm": 0.7631086789271833, "learning_rate": 3.157675363059409e-07, "loss": 1.0755, "step": 11089 }, { "epoch": 0.89, "grad_norm": 1.4330018539225748, "learning_rate": 3.1531329279103905e-07, "loss": 0.6925, "step": 11090 }, { "epoch": 0.89, "grad_norm": 1.4965084725542455, "learning_rate": 3.148593655968951e-07, "loss": 0.8227, "step": 11091 }, { "epoch": 0.89, "grad_norm": 0.7495933806925165, "learning_rate": 3.144057547541607e-07, "loss": 1.0675, "step": 11092 }, { "epoch": 0.89, "grad_norm": 0.7535336703476626, "learning_rate": 3.1395246029346405e-07, "loss": 1.0574, "step": 11093 }, { "epoch": 0.89, "grad_norm": 0.7542244074805167, "learning_rate": 3.1349948224541183e-07, "loss": 1.0621, "step": 11094 }, { "epoch": 0.89, "grad_norm": 0.7613576971654857, "learning_rate": 3.1304682064059177e-07, "loss": 1.097, "step": 11095 }, { "epoch": 0.89, "grad_norm": 1.6207985871307777, "learning_rate": 3.1259447550956777e-07, "loss": 0.7086, "step": 11096 }, { "epoch": 0.89, "grad_norm": 1.4210154238674653, "learning_rate": 3.1214244688288263e-07, "loss": 0.7794, "step": 11097 }, { "epoch": 0.89, "grad_norm": 1.459571018567844, "learning_rate": 3.116907347910597e-07, "loss": 0.7719, "step": 11098 }, { "epoch": 0.89, "grad_norm": 1.572077166043891, "learning_rate": 3.112393392645985e-07, "loss": 0.7646, "step": 11099 }, { "epoch": 0.89, "grad_norm": 1.5066274261242514, "learning_rate": 3.1078826033397845e-07, "loss": 0.6904, "step": 11100 }, { "epoch": 0.89, "grad_norm": 0.7226837222371575, "learning_rate": 3.1033749802965694e-07, "loss": 1.0536, "step": 11101 }, { "epoch": 0.89, "grad_norm": 1.5414470772811537, "learning_rate": 3.098870523820718e-07, "loss": 0.7056, "step": 11102 }, { "epoch": 0.89, "grad_norm": 1.5043675942812087, "learning_rate": 3.094369234216371e-07, "loss": 0.7682, "step": 11103 }, { "epoch": 0.89, "grad_norm": 1.4754611139138323, "learning_rate": 3.089871111787468e-07, "loss": 0.788, "step": 11104 }, { "epoch": 0.89, "grad_norm": 1.5153370976574505, "learning_rate": 3.085376156837738e-07, "loss": 0.7751, "step": 11105 }, { "epoch": 0.89, "grad_norm": 1.5970161107419645, "learning_rate": 3.080884369670689e-07, "loss": 0.8021, "step": 11106 }, { "epoch": 0.89, "grad_norm": 1.8556855385147228, "learning_rate": 3.076395750589617e-07, "loss": 0.761, "step": 11107 }, { "epoch": 0.89, "grad_norm": 0.7702770675156423, "learning_rate": 3.071910299897596e-07, "loss": 1.0587, "step": 11108 }, { "epoch": 0.89, "grad_norm": 1.4804488829888518, "learning_rate": 3.0674280178975e-07, "loss": 0.7301, "step": 11109 }, { "epoch": 0.89, "grad_norm": 1.4379106700339863, "learning_rate": 3.062948904891988e-07, "loss": 0.749, "step": 11110 }, { "epoch": 0.89, "grad_norm": 1.4674472648119166, "learning_rate": 3.0584729611834785e-07, "loss": 0.7531, "step": 11111 }, { "epoch": 0.89, "grad_norm": 1.6081638999178967, "learning_rate": 3.054000187074224e-07, "loss": 0.8096, "step": 11112 }, { "epoch": 0.89, "grad_norm": 1.656772717771923, "learning_rate": 3.049530582866228e-07, "loss": 0.7817, "step": 11113 }, { "epoch": 0.89, "grad_norm": 1.4965200978167352, "learning_rate": 3.045064148861282e-07, "loss": 0.6786, "step": 11114 }, { "epoch": 0.89, "grad_norm": 1.4442389330689, "learning_rate": 3.040600885360967e-07, "loss": 0.7003, "step": 11115 }, { "epoch": 0.89, "grad_norm": 1.5909026499593781, "learning_rate": 3.0361407926666644e-07, "loss": 0.7411, "step": 11116 }, { "epoch": 0.89, "grad_norm": 1.6257001686865984, "learning_rate": 3.031683871079527e-07, "loss": 0.6991, "step": 11117 }, { "epoch": 0.89, "grad_norm": 1.5235540981042892, "learning_rate": 3.0272301209004873e-07, "loss": 0.8388, "step": 11118 }, { "epoch": 0.89, "grad_norm": 1.5234892219202067, "learning_rate": 3.022779542430293e-07, "loss": 0.7576, "step": 11119 }, { "epoch": 0.89, "grad_norm": 1.5618575996523274, "learning_rate": 3.018332135969443e-07, "loss": 0.7849, "step": 11120 }, { "epoch": 0.89, "grad_norm": 1.5011259729150346, "learning_rate": 3.0138879018182243e-07, "loss": 0.7075, "step": 11121 }, { "epoch": 0.89, "grad_norm": 1.5634672964473333, "learning_rate": 3.009446840276742e-07, "loss": 0.8107, "step": 11122 }, { "epoch": 0.89, "grad_norm": 1.5315534292063606, "learning_rate": 3.0050089516448553e-07, "loss": 0.7655, "step": 11123 }, { "epoch": 0.89, "grad_norm": 1.5288700280827774, "learning_rate": 3.000574236222231e-07, "loss": 0.7594, "step": 11124 }, { "epoch": 0.89, "grad_norm": 1.9538907399958063, "learning_rate": 2.996142694308296e-07, "loss": 0.7729, "step": 11125 }, { "epoch": 0.89, "grad_norm": 1.4225168814962479, "learning_rate": 2.991714326202294e-07, "loss": 0.6651, "step": 11126 }, { "epoch": 0.89, "grad_norm": 1.518797373285312, "learning_rate": 2.9872891322032307e-07, "loss": 0.7757, "step": 11127 }, { "epoch": 0.89, "grad_norm": 1.4442787857491621, "learning_rate": 2.9828671126098995e-07, "loss": 0.7213, "step": 11128 }, { "epoch": 0.89, "grad_norm": 1.5138077300199504, "learning_rate": 2.9784482677209013e-07, "loss": 0.7866, "step": 11129 }, { "epoch": 0.89, "grad_norm": 0.7488686022148093, "learning_rate": 2.9740325978345976e-07, "loss": 1.0785, "step": 11130 }, { "epoch": 0.89, "grad_norm": 1.52477985990948, "learning_rate": 2.9696201032491434e-07, "loss": 0.6429, "step": 11131 }, { "epoch": 0.89, "grad_norm": 1.529601856509054, "learning_rate": 2.965210784262479e-07, "loss": 0.7249, "step": 11132 }, { "epoch": 0.89, "grad_norm": 1.5025061182541974, "learning_rate": 2.9608046411723325e-07, "loss": 0.7722, "step": 11133 }, { "epoch": 0.89, "grad_norm": 1.6472983172781575, "learning_rate": 2.9564016742762214e-07, "loss": 0.7476, "step": 11134 }, { "epoch": 0.89, "grad_norm": 1.5268752727115371, "learning_rate": 2.952001883871436e-07, "loss": 0.7602, "step": 11135 }, { "epoch": 0.89, "grad_norm": 0.7540060255321654, "learning_rate": 2.9476052702550716e-07, "loss": 1.0449, "step": 11136 }, { "epoch": 0.89, "grad_norm": 1.4449056837729604, "learning_rate": 2.9432118337239857e-07, "loss": 0.7427, "step": 11137 }, { "epoch": 0.89, "grad_norm": 1.5268550208876228, "learning_rate": 2.9388215745748347e-07, "loss": 0.7237, "step": 11138 }, { "epoch": 0.89, "grad_norm": 1.3448534174589557, "learning_rate": 2.934434493104071e-07, "loss": 0.7442, "step": 11139 }, { "epoch": 0.89, "grad_norm": 1.6334527995966484, "learning_rate": 2.9300505896079135e-07, "loss": 0.8298, "step": 11140 }, { "epoch": 0.89, "grad_norm": 1.6124570887749639, "learning_rate": 2.925669864382369e-07, "loss": 0.7608, "step": 11141 }, { "epoch": 0.89, "grad_norm": 1.6007905334160644, "learning_rate": 2.92129231772324e-07, "loss": 0.7522, "step": 11142 }, { "epoch": 0.89, "grad_norm": 1.538164758555258, "learning_rate": 2.916917949926107e-07, "loss": 0.7655, "step": 11143 }, { "epoch": 0.89, "grad_norm": 1.6751777019348677, "learning_rate": 2.912546761286333e-07, "loss": 0.7735, "step": 11144 }, { "epoch": 0.89, "grad_norm": 0.7538311398031248, "learning_rate": 2.9081787520990665e-07, "loss": 1.0829, "step": 11145 }, { "epoch": 0.89, "grad_norm": 1.4697033471218546, "learning_rate": 2.90381392265926e-07, "loss": 0.7049, "step": 11146 }, { "epoch": 0.89, "grad_norm": 1.5704763312678693, "learning_rate": 2.899452273261627e-07, "loss": 0.7396, "step": 11147 }, { "epoch": 0.89, "grad_norm": 1.4429982874019476, "learning_rate": 2.895093804200683e-07, "loss": 0.6887, "step": 11148 }, { "epoch": 0.89, "grad_norm": 1.4562277318305354, "learning_rate": 2.890738515770708e-07, "loss": 0.7517, "step": 11149 }, { "epoch": 0.89, "grad_norm": 0.7608110630876941, "learning_rate": 2.8863864082657955e-07, "loss": 1.0354, "step": 11150 }, { "epoch": 0.89, "grad_norm": 0.7670376629116403, "learning_rate": 2.882037481979805e-07, "loss": 1.1139, "step": 11151 }, { "epoch": 0.89, "grad_norm": 1.6430535815584841, "learning_rate": 2.8776917372063896e-07, "loss": 0.7894, "step": 11152 }, { "epoch": 0.89, "grad_norm": 1.7414853330520232, "learning_rate": 2.8733491742389765e-07, "loss": 0.749, "step": 11153 }, { "epoch": 0.89, "grad_norm": 1.559874876878309, "learning_rate": 2.8690097933707863e-07, "loss": 0.761, "step": 11154 }, { "epoch": 0.89, "grad_norm": 1.5622344693841999, "learning_rate": 2.8646735948948234e-07, "loss": 0.7514, "step": 11155 }, { "epoch": 0.9, "grad_norm": 1.584200461198389, "learning_rate": 2.8603405791038876e-07, "loss": 0.8195, "step": 11156 }, { "epoch": 0.9, "grad_norm": 0.7484765503962355, "learning_rate": 2.856010746290544e-07, "loss": 1.0632, "step": 11157 }, { "epoch": 0.9, "grad_norm": 0.736996704500388, "learning_rate": 2.851684096747159e-07, "loss": 1.0417, "step": 11158 }, { "epoch": 0.9, "grad_norm": 1.5741538709505476, "learning_rate": 2.847360630765866e-07, "loss": 0.6575, "step": 11159 }, { "epoch": 0.9, "grad_norm": 1.4600336146316644, "learning_rate": 2.843040348638615e-07, "loss": 0.7327, "step": 11160 }, { "epoch": 0.9, "grad_norm": 1.5553370524861467, "learning_rate": 2.8387232506571105e-07, "loss": 0.6892, "step": 11161 }, { "epoch": 0.9, "grad_norm": 1.5227563776069415, "learning_rate": 2.834409337112842e-07, "loss": 0.7674, "step": 11162 }, { "epoch": 0.9, "grad_norm": 1.4364587937439275, "learning_rate": 2.8300986082971214e-07, "loss": 0.7126, "step": 11163 }, { "epoch": 0.9, "grad_norm": 1.5044351871457913, "learning_rate": 2.8257910645009935e-07, "loss": 0.7957, "step": 11164 }, { "epoch": 0.9, "grad_norm": 1.6691165460395936, "learning_rate": 2.821486706015325e-07, "loss": 0.7365, "step": 11165 }, { "epoch": 0.9, "grad_norm": 1.4833382921410052, "learning_rate": 2.817185533130751e-07, "loss": 0.746, "step": 11166 }, { "epoch": 0.9, "grad_norm": 0.7440988017640782, "learning_rate": 2.812887546137705e-07, "loss": 1.0457, "step": 11167 }, { "epoch": 0.9, "grad_norm": 1.5031318516521872, "learning_rate": 2.8085927453263883e-07, "loss": 0.728, "step": 11168 }, { "epoch": 0.9, "grad_norm": 1.6655269631777834, "learning_rate": 2.804301130986797e-07, "loss": 0.7151, "step": 11169 }, { "epoch": 0.9, "grad_norm": 1.5812249830621519, "learning_rate": 2.8000127034087165e-07, "loss": 0.819, "step": 11170 }, { "epoch": 0.9, "grad_norm": 0.7457674211927122, "learning_rate": 2.795727462881709e-07, "loss": 1.0107, "step": 11171 }, { "epoch": 0.9, "grad_norm": 1.6202076514880337, "learning_rate": 2.7914454096951206e-07, "loss": 0.7564, "step": 11172 }, { "epoch": 0.9, "grad_norm": 1.5114659472752774, "learning_rate": 2.7871665441380814e-07, "loss": 0.69, "step": 11173 }, { "epoch": 0.9, "grad_norm": 1.6009334540339515, "learning_rate": 2.7828908664995216e-07, "loss": 0.7792, "step": 11174 }, { "epoch": 0.9, "grad_norm": 1.8783175508850718, "learning_rate": 2.7786183770681376e-07, "loss": 0.7629, "step": 11175 }, { "epoch": 0.9, "grad_norm": 1.5527537809739251, "learning_rate": 2.774349076132421e-07, "loss": 0.8582, "step": 11176 }, { "epoch": 0.9, "grad_norm": 1.538307224641372, "learning_rate": 2.770082963980647e-07, "loss": 0.7147, "step": 11177 }, { "epoch": 0.9, "grad_norm": 1.6369990047400909, "learning_rate": 2.7658200409008626e-07, "loss": 0.828, "step": 11178 }, { "epoch": 0.9, "grad_norm": 1.4729337445463648, "learning_rate": 2.7615603071809103e-07, "loss": 0.6735, "step": 11179 }, { "epoch": 0.9, "grad_norm": 0.7659668188755265, "learning_rate": 2.757303763108432e-07, "loss": 1.061, "step": 11180 }, { "epoch": 0.9, "grad_norm": 1.4427532812894908, "learning_rate": 2.753050408970831e-07, "loss": 0.7518, "step": 11181 }, { "epoch": 0.9, "grad_norm": 0.734512275662547, "learning_rate": 2.748800245055305e-07, "loss": 1.0607, "step": 11182 }, { "epoch": 0.9, "grad_norm": 1.5379605408438441, "learning_rate": 2.74455327164882e-07, "loss": 0.6932, "step": 11183 }, { "epoch": 0.9, "grad_norm": 1.497355428426903, "learning_rate": 2.7403094890381674e-07, "loss": 0.7118, "step": 11184 }, { "epoch": 0.9, "grad_norm": 1.4332489589790323, "learning_rate": 2.7360688975098806e-07, "loss": 0.7701, "step": 11185 }, { "epoch": 0.9, "grad_norm": 0.7630941521841698, "learning_rate": 2.7318314973502957e-07, "loss": 1.032, "step": 11186 }, { "epoch": 0.9, "grad_norm": 1.6217914698184144, "learning_rate": 2.7275972888455347e-07, "loss": 0.7633, "step": 11187 }, { "epoch": 0.9, "grad_norm": 0.7442088758093761, "learning_rate": 2.7233662722815024e-07, "loss": 1.0731, "step": 11188 }, { "epoch": 0.9, "grad_norm": 1.8329429703670623, "learning_rate": 2.719138447943881e-07, "loss": 0.7793, "step": 11189 }, { "epoch": 0.9, "grad_norm": 0.757490377558337, "learning_rate": 2.714913816118142e-07, "loss": 1.058, "step": 11190 }, { "epoch": 0.9, "grad_norm": 1.5174357912285095, "learning_rate": 2.7106923770895466e-07, "loss": 0.8166, "step": 11191 }, { "epoch": 0.9, "grad_norm": 1.5485125485370086, "learning_rate": 2.706474131143144e-07, "loss": 0.7787, "step": 11192 }, { "epoch": 0.9, "grad_norm": 1.4765659884508924, "learning_rate": 2.7022590785637406e-07, "loss": 0.7836, "step": 11193 }, { "epoch": 0.9, "grad_norm": 1.6444051691627424, "learning_rate": 2.698047219635963e-07, "loss": 0.803, "step": 11194 }, { "epoch": 0.9, "grad_norm": 1.4481311027530321, "learning_rate": 2.693838554644196e-07, "loss": 0.6756, "step": 11195 }, { "epoch": 0.9, "grad_norm": 1.5586522906935116, "learning_rate": 2.689633083872628e-07, "loss": 0.8328, "step": 11196 }, { "epoch": 0.9, "grad_norm": 1.4078306578321635, "learning_rate": 2.68543080760521e-07, "loss": 0.7105, "step": 11197 }, { "epoch": 0.9, "grad_norm": 1.577800511719139, "learning_rate": 2.6812317261256995e-07, "loss": 0.8326, "step": 11198 }, { "epoch": 0.9, "grad_norm": 1.5548554409192066, "learning_rate": 2.6770358397176233e-07, "loss": 0.8341, "step": 11199 }, { "epoch": 0.9, "grad_norm": 0.7661293905237565, "learning_rate": 2.672843148664289e-07, "loss": 1.0635, "step": 11200 }, { "epoch": 0.9, "grad_norm": 0.7255393645150351, "learning_rate": 2.668653653248815e-07, "loss": 1.0493, "step": 11201 }, { "epoch": 0.9, "grad_norm": 1.4968942774606921, "learning_rate": 2.6644673537540746e-07, "loss": 0.7704, "step": 11202 }, { "epoch": 0.9, "grad_norm": 1.582346429737381, "learning_rate": 2.66028425046273e-07, "loss": 0.79, "step": 11203 }, { "epoch": 0.9, "grad_norm": 1.4851103457608008, "learning_rate": 2.656104343657251e-07, "loss": 0.787, "step": 11204 }, { "epoch": 0.9, "grad_norm": 1.6537778387379753, "learning_rate": 2.6519276336198665e-07, "loss": 0.827, "step": 11205 }, { "epoch": 0.9, "grad_norm": 1.5327496712017237, "learning_rate": 2.6477541206325896e-07, "loss": 0.756, "step": 11206 }, { "epoch": 0.9, "grad_norm": 1.4514045570242222, "learning_rate": 2.6435838049772346e-07, "loss": 0.7441, "step": 11207 }, { "epoch": 0.9, "grad_norm": 1.486791672045777, "learning_rate": 2.6394166869353923e-07, "loss": 0.7589, "step": 11208 }, { "epoch": 0.9, "grad_norm": 1.6014507005606395, "learning_rate": 2.6352527667884264e-07, "loss": 0.7383, "step": 11209 }, { "epoch": 0.9, "grad_norm": 1.7855351634420427, "learning_rate": 2.6310920448174957e-07, "loss": 0.7637, "step": 11210 }, { "epoch": 0.9, "grad_norm": 1.4557309067323967, "learning_rate": 2.6269345213035536e-07, "loss": 0.7544, "step": 11211 }, { "epoch": 0.9, "grad_norm": 1.692361618884787, "learning_rate": 2.622780196527314e-07, "loss": 0.7621, "step": 11212 }, { "epoch": 0.9, "grad_norm": 1.5314135517072656, "learning_rate": 2.6186290707692907e-07, "loss": 0.8127, "step": 11213 }, { "epoch": 0.9, "grad_norm": 1.5264408982331834, "learning_rate": 2.614481144309772e-07, "loss": 0.7276, "step": 11214 }, { "epoch": 0.9, "grad_norm": 1.5382460004775107, "learning_rate": 2.6103364174288435e-07, "loss": 0.6276, "step": 11215 }, { "epoch": 0.9, "grad_norm": 1.5183983600786577, "learning_rate": 2.6061948904063663e-07, "loss": 0.6778, "step": 11216 }, { "epoch": 0.9, "grad_norm": 0.7403674630537943, "learning_rate": 2.602056563521976e-07, "loss": 1.0477, "step": 11217 }, { "epoch": 0.9, "grad_norm": 1.6134141454191993, "learning_rate": 2.597921437055112e-07, "loss": 0.8577, "step": 11218 }, { "epoch": 0.9, "grad_norm": 1.5231508426191889, "learning_rate": 2.5937895112849886e-07, "loss": 0.7553, "step": 11219 }, { "epoch": 0.9, "grad_norm": 1.5263139922591729, "learning_rate": 2.5896607864905944e-07, "loss": 0.7042, "step": 11220 }, { "epoch": 0.9, "grad_norm": 0.7460763140995438, "learning_rate": 2.585535262950717e-07, "loss": 1.047, "step": 11221 }, { "epoch": 0.9, "grad_norm": 1.4540421339177418, "learning_rate": 2.581412940943917e-07, "loss": 0.7006, "step": 11222 }, { "epoch": 0.9, "grad_norm": 1.496683974960104, "learning_rate": 2.577293820748544e-07, "loss": 0.7387, "step": 11223 }, { "epoch": 0.9, "grad_norm": 1.53687619552774, "learning_rate": 2.573177902642726e-07, "loss": 0.744, "step": 11224 }, { "epoch": 0.9, "grad_norm": 0.7506978887917596, "learning_rate": 2.569065186904385e-07, "loss": 1.0293, "step": 11225 }, { "epoch": 0.9, "grad_norm": 1.5451994418085164, "learning_rate": 2.564955673811226e-07, "loss": 0.727, "step": 11226 }, { "epoch": 0.9, "grad_norm": 1.4206439122441608, "learning_rate": 2.560849363640716e-07, "loss": 0.648, "step": 11227 }, { "epoch": 0.9, "grad_norm": 2.3878093579943576, "learning_rate": 2.556746256670145e-07, "loss": 0.7749, "step": 11228 }, { "epoch": 0.9, "grad_norm": 1.4985749226422698, "learning_rate": 2.5526463531765467e-07, "loss": 0.6971, "step": 11229 }, { "epoch": 0.9, "grad_norm": 1.483609937464334, "learning_rate": 2.5485496534367657e-07, "loss": 0.7335, "step": 11230 }, { "epoch": 0.9, "grad_norm": 1.4554354859203376, "learning_rate": 2.544456157727415e-07, "loss": 0.7992, "step": 11231 }, { "epoch": 0.9, "grad_norm": 1.4282278999025104, "learning_rate": 2.5403658663248953e-07, "loss": 0.7034, "step": 11232 }, { "epoch": 0.9, "grad_norm": 1.4945602828862585, "learning_rate": 2.536278779505402e-07, "loss": 0.7517, "step": 11233 }, { "epoch": 0.9, "grad_norm": 1.4317518934981917, "learning_rate": 2.532194897544882e-07, "loss": 0.7324, "step": 11234 }, { "epoch": 0.9, "grad_norm": 1.5545037796736385, "learning_rate": 2.528114220719119e-07, "loss": 0.7616, "step": 11235 }, { "epoch": 0.9, "grad_norm": 1.5307505978044706, "learning_rate": 2.524036749303632e-07, "loss": 0.7475, "step": 11236 }, { "epoch": 0.9, "grad_norm": 1.6197481615382159, "learning_rate": 2.5199624835737345e-07, "loss": 0.7125, "step": 11237 }, { "epoch": 0.9, "grad_norm": 1.5287344884858842, "learning_rate": 2.5158914238045507e-07, "loss": 0.7605, "step": 11238 }, { "epoch": 0.9, "grad_norm": 1.6447468489407668, "learning_rate": 2.511823570270955e-07, "loss": 0.7608, "step": 11239 }, { "epoch": 0.9, "grad_norm": 1.5135873557017792, "learning_rate": 2.5077589232476217e-07, "loss": 0.7674, "step": 11240 }, { "epoch": 0.9, "grad_norm": 1.5472361340390777, "learning_rate": 2.503697483008999e-07, "loss": 0.7194, "step": 11241 }, { "epoch": 0.9, "grad_norm": 1.5351681018963006, "learning_rate": 2.4996392498293334e-07, "loss": 0.7385, "step": 11242 }, { "epoch": 0.9, "grad_norm": 1.5142152721575817, "learning_rate": 2.495584223982644e-07, "loss": 0.778, "step": 11243 }, { "epoch": 0.9, "grad_norm": 1.5751234467732027, "learning_rate": 2.491532405742719e-07, "loss": 0.7701, "step": 11244 }, { "epoch": 0.9, "grad_norm": 1.5424047807534318, "learning_rate": 2.4874837953831723e-07, "loss": 0.8033, "step": 11245 }, { "epoch": 0.9, "grad_norm": 1.6466924936224756, "learning_rate": 2.483438393177362e-07, "loss": 0.7513, "step": 11246 }, { "epoch": 0.9, "grad_norm": 1.4497958496034145, "learning_rate": 2.479396199398448e-07, "loss": 0.7117, "step": 11247 }, { "epoch": 0.9, "grad_norm": 1.609320427328318, "learning_rate": 2.475357214319357e-07, "loss": 0.7709, "step": 11248 }, { "epoch": 0.9, "grad_norm": 1.51061343446991, "learning_rate": 2.4713214382128204e-07, "loss": 0.7699, "step": 11249 }, { "epoch": 0.9, "grad_norm": 1.5620083902440538, "learning_rate": 2.4672888713513476e-07, "loss": 0.756, "step": 11250 }, { "epoch": 0.9, "grad_norm": 0.7337232870886723, "learning_rate": 2.463259514007216e-07, "loss": 1.0703, "step": 11251 }, { "epoch": 0.9, "grad_norm": 1.4620361613376, "learning_rate": 2.459233366452507e-07, "loss": 0.7589, "step": 11252 }, { "epoch": 0.9, "grad_norm": 1.6237781717899087, "learning_rate": 2.455210428959065e-07, "loss": 0.7129, "step": 11253 }, { "epoch": 0.9, "grad_norm": 1.5225875120319627, "learning_rate": 2.451190701798523e-07, "loss": 0.7526, "step": 11254 }, { "epoch": 0.9, "grad_norm": 1.4723323202050016, "learning_rate": 2.447174185242324e-07, "loss": 0.8148, "step": 11255 }, { "epoch": 0.9, "grad_norm": 1.504256939558105, "learning_rate": 2.443160879561657e-07, "loss": 0.743, "step": 11256 }, { "epoch": 0.9, "grad_norm": 1.6387728527545875, "learning_rate": 2.4391507850275166e-07, "loss": 0.7532, "step": 11257 }, { "epoch": 0.9, "grad_norm": 1.5247521225676715, "learning_rate": 2.4351439019106584e-07, "loss": 0.7555, "step": 11258 }, { "epoch": 0.9, "grad_norm": 1.5244893211370647, "learning_rate": 2.4311402304816546e-07, "loss": 0.8505, "step": 11259 }, { "epoch": 0.9, "grad_norm": 0.7558001718713542, "learning_rate": 2.427139771010839e-07, "loss": 1.0383, "step": 11260 }, { "epoch": 0.9, "grad_norm": 1.5075921059561654, "learning_rate": 2.423142523768318e-07, "loss": 0.7504, "step": 11261 }, { "epoch": 0.9, "grad_norm": 1.412525831422138, "learning_rate": 2.4191484890240093e-07, "loss": 0.7564, "step": 11262 }, { "epoch": 0.9, "grad_norm": 0.7437306126323908, "learning_rate": 2.4151576670476016e-07, "loss": 1.0573, "step": 11263 }, { "epoch": 0.9, "grad_norm": 1.5577401122491874, "learning_rate": 2.411170058108558e-07, "loss": 0.7351, "step": 11264 }, { "epoch": 0.9, "grad_norm": 1.4010333689266676, "learning_rate": 2.407185662476119e-07, "loss": 0.6593, "step": 11265 }, { "epoch": 0.9, "grad_norm": 1.4899102286359993, "learning_rate": 2.403204480419341e-07, "loss": 0.7949, "step": 11266 }, { "epoch": 0.9, "grad_norm": 0.7572016066556934, "learning_rate": 2.3992265122070314e-07, "loss": 1.0539, "step": 11267 }, { "epoch": 0.9, "grad_norm": 2.242723894728857, "learning_rate": 2.395251758107786e-07, "loss": 0.8296, "step": 11268 }, { "epoch": 0.9, "grad_norm": 1.6297353910050398, "learning_rate": 2.391280218390002e-07, "loss": 0.7051, "step": 11269 }, { "epoch": 0.9, "grad_norm": 1.6118987226141208, "learning_rate": 2.387311893321842e-07, "loss": 0.6917, "step": 11270 }, { "epoch": 0.9, "grad_norm": 1.4844342512957134, "learning_rate": 2.3833467831712587e-07, "loss": 0.7423, "step": 11271 }, { "epoch": 0.9, "grad_norm": 1.6988900950856047, "learning_rate": 2.3793848882059768e-07, "loss": 0.8332, "step": 11272 }, { "epoch": 0.9, "grad_norm": 1.5450056836410146, "learning_rate": 2.375426208693521e-07, "loss": 0.8017, "step": 11273 }, { "epoch": 0.9, "grad_norm": 0.7406808298954016, "learning_rate": 2.3714707449011886e-07, "loss": 1.0831, "step": 11274 }, { "epoch": 0.9, "grad_norm": 1.5965278225440347, "learning_rate": 2.3675184970960607e-07, "loss": 0.7352, "step": 11275 }, { "epoch": 0.9, "grad_norm": 1.597125082974124, "learning_rate": 2.363569465545007e-07, "loss": 0.6943, "step": 11276 }, { "epoch": 0.9, "grad_norm": 1.4499118102016848, "learning_rate": 2.3596236505146642e-07, "loss": 0.7126, "step": 11277 }, { "epoch": 0.9, "grad_norm": 0.7630486501097397, "learning_rate": 2.3556810522714636e-07, "loss": 1.0552, "step": 11278 }, { "epoch": 0.9, "grad_norm": 1.5250295568857868, "learning_rate": 2.351741671081631e-07, "loss": 0.7611, "step": 11279 }, { "epoch": 0.91, "grad_norm": 1.4796313149393943, "learning_rate": 2.3478055072111538e-07, "loss": 0.7482, "step": 11280 }, { "epoch": 0.91, "grad_norm": 0.741368244625926, "learning_rate": 2.3438725609258138e-07, "loss": 1.0596, "step": 11281 }, { "epoch": 0.91, "grad_norm": 1.4945696793466325, "learning_rate": 2.3399428324911654e-07, "loss": 0.7183, "step": 11282 }, { "epoch": 0.91, "grad_norm": 1.4549555749028342, "learning_rate": 2.336016322172563e-07, "loss": 0.724, "step": 11283 }, { "epoch": 0.91, "grad_norm": 1.6178294908945854, "learning_rate": 2.332093030235133e-07, "loss": 0.7177, "step": 11284 }, { "epoch": 0.91, "grad_norm": 1.5218919229653736, "learning_rate": 2.328172956943775e-07, "loss": 0.7362, "step": 11285 }, { "epoch": 0.91, "grad_norm": 1.465339177273194, "learning_rate": 2.3242561025631882e-07, "loss": 0.7268, "step": 11286 }, { "epoch": 0.91, "grad_norm": 1.5309234880549285, "learning_rate": 2.32034246735785e-07, "loss": 0.8257, "step": 11287 }, { "epoch": 0.91, "grad_norm": 1.455840489393032, "learning_rate": 2.3164320515920101e-07, "loss": 0.7587, "step": 11288 }, { "epoch": 0.91, "grad_norm": 1.6622149837574556, "learning_rate": 2.3125248555297074e-07, "loss": 0.831, "step": 11289 }, { "epoch": 0.91, "grad_norm": 1.5272210281184677, "learning_rate": 2.308620879434781e-07, "loss": 0.7808, "step": 11290 }, { "epoch": 0.91, "grad_norm": 0.7286223045301974, "learning_rate": 2.3047201235708195e-07, "loss": 1.065, "step": 11291 }, { "epoch": 0.91, "grad_norm": 1.8661284347836218, "learning_rate": 2.3008225882012125e-07, "loss": 0.7071, "step": 11292 }, { "epoch": 0.91, "grad_norm": 1.6061117268080947, "learning_rate": 2.296928273589144e-07, "loss": 0.7859, "step": 11293 }, { "epoch": 0.91, "grad_norm": 1.4078803341145592, "learning_rate": 2.2930371799975593e-07, "loss": 0.7951, "step": 11294 }, { "epoch": 0.91, "grad_norm": 2.0831678717499917, "learning_rate": 2.2891493076891924e-07, "loss": 0.6557, "step": 11295 }, { "epoch": 0.91, "grad_norm": 1.5118646676983432, "learning_rate": 2.2852646569265556e-07, "loss": 0.6535, "step": 11296 }, { "epoch": 0.91, "grad_norm": 0.7497925286566581, "learning_rate": 2.2813832279719615e-07, "loss": 1.0748, "step": 11297 }, { "epoch": 0.91, "grad_norm": 1.5103118472819625, "learning_rate": 2.277505021087484e-07, "loss": 0.7435, "step": 11298 }, { "epoch": 0.91, "grad_norm": 0.7518153876725854, "learning_rate": 2.2736300365349905e-07, "loss": 1.0412, "step": 11299 }, { "epoch": 0.91, "grad_norm": 1.5363083465712088, "learning_rate": 2.2697582745761282e-07, "loss": 0.6852, "step": 11300 }, { "epoch": 0.91, "grad_norm": 1.4770625130362167, "learning_rate": 2.2658897354723373e-07, "loss": 0.7149, "step": 11301 }, { "epoch": 0.91, "grad_norm": 1.5868537327168775, "learning_rate": 2.2620244194848096e-07, "loss": 0.756, "step": 11302 }, { "epoch": 0.91, "grad_norm": 0.7789083676839663, "learning_rate": 2.258162326874558e-07, "loss": 1.0448, "step": 11303 }, { "epoch": 0.91, "grad_norm": 1.5333064982832316, "learning_rate": 2.2543034579023572e-07, "loss": 0.8183, "step": 11304 }, { "epoch": 0.91, "grad_norm": 1.3206393528267721, "learning_rate": 2.2504478128287654e-07, "loss": 0.7324, "step": 11305 }, { "epoch": 0.91, "grad_norm": 0.7549097864176597, "learning_rate": 2.2465953919141136e-07, "loss": 1.0371, "step": 11306 }, { "epoch": 0.91, "grad_norm": 1.4460452026287987, "learning_rate": 2.2427461954185493e-07, "loss": 0.7151, "step": 11307 }, { "epoch": 0.91, "grad_norm": 1.446209712062776, "learning_rate": 2.2389002236019642e-07, "loss": 0.6694, "step": 11308 }, { "epoch": 0.91, "grad_norm": 1.8449757154346536, "learning_rate": 2.2350574767240395e-07, "loss": 0.7062, "step": 11309 }, { "epoch": 0.91, "grad_norm": 1.3944072738625088, "learning_rate": 2.231217955044257e-07, "loss": 0.7548, "step": 11310 }, { "epoch": 0.91, "grad_norm": 0.742147293239935, "learning_rate": 2.22738165882187e-07, "loss": 1.0695, "step": 11311 }, { "epoch": 0.91, "grad_norm": 1.4312518944682713, "learning_rate": 2.2235485883159159e-07, "loss": 0.7088, "step": 11312 }, { "epoch": 0.91, "grad_norm": 1.544638094897233, "learning_rate": 2.2197187437851985e-07, "loss": 0.7522, "step": 11313 }, { "epoch": 0.91, "grad_norm": 1.4267707711497692, "learning_rate": 2.2158921254883337e-07, "loss": 0.6941, "step": 11314 }, { "epoch": 0.91, "grad_norm": 1.6184482967929608, "learning_rate": 2.2120687336837033e-07, "loss": 0.7202, "step": 11315 }, { "epoch": 0.91, "grad_norm": 0.7575072789247222, "learning_rate": 2.2082485686294507e-07, "loss": 1.0774, "step": 11316 }, { "epoch": 0.91, "grad_norm": 0.7333684617798224, "learning_rate": 2.204431630583548e-07, "loss": 1.0486, "step": 11317 }, { "epoch": 0.91, "grad_norm": 0.7364018148926658, "learning_rate": 2.200617919803716e-07, "loss": 1.0358, "step": 11318 }, { "epoch": 0.91, "grad_norm": 1.5036891181160223, "learning_rate": 2.1968074365474544e-07, "loss": 0.7823, "step": 11319 }, { "epoch": 0.91, "grad_norm": 1.5531406433195596, "learning_rate": 2.1930001810720692e-07, "loss": 0.6739, "step": 11320 }, { "epoch": 0.91, "grad_norm": 1.8497853453226605, "learning_rate": 2.1891961536346262e-07, "loss": 0.7879, "step": 11321 }, { "epoch": 0.91, "grad_norm": 1.6778807227948678, "learning_rate": 2.185395354491987e-07, "loss": 0.8188, "step": 11322 }, { "epoch": 0.91, "grad_norm": 1.514165802985255, "learning_rate": 2.1815977839007795e-07, "loss": 0.735, "step": 11323 }, { "epoch": 0.91, "grad_norm": 1.5664206309518336, "learning_rate": 2.1778034421174433e-07, "loss": 0.8323, "step": 11324 }, { "epoch": 0.91, "grad_norm": 0.7606371933012575, "learning_rate": 2.1740123293981675e-07, "loss": 1.0451, "step": 11325 }, { "epoch": 0.91, "grad_norm": 1.4856853089915207, "learning_rate": 2.1702244459989308e-07, "loss": 0.7862, "step": 11326 }, { "epoch": 0.91, "grad_norm": 1.5329465777044016, "learning_rate": 2.166439792175523e-07, "loss": 0.795, "step": 11327 }, { "epoch": 0.91, "grad_norm": 1.513790992225619, "learning_rate": 2.1626583681834733e-07, "loss": 0.6951, "step": 11328 }, { "epoch": 0.91, "grad_norm": 1.6075026525574054, "learning_rate": 2.1588801742781163e-07, "loss": 0.7723, "step": 11329 }, { "epoch": 0.91, "grad_norm": 1.7890698689826547, "learning_rate": 2.1551052107145698e-07, "loss": 0.7465, "step": 11330 }, { "epoch": 0.91, "grad_norm": 0.7722807042340272, "learning_rate": 2.1513334777477192e-07, "loss": 1.0566, "step": 11331 }, { "epoch": 0.91, "grad_norm": 1.5260360661567334, "learning_rate": 2.1475649756322436e-07, "loss": 0.7765, "step": 11332 }, { "epoch": 0.91, "grad_norm": 0.7636934219662053, "learning_rate": 2.1437997046226012e-07, "loss": 1.064, "step": 11333 }, { "epoch": 0.91, "grad_norm": 1.594049569361944, "learning_rate": 2.140037664973038e-07, "loss": 0.7953, "step": 11334 }, { "epoch": 0.91, "grad_norm": 1.5307857207867879, "learning_rate": 2.1362788569375682e-07, "loss": 0.809, "step": 11335 }, { "epoch": 0.91, "grad_norm": 1.6095733688955813, "learning_rate": 2.132523280769988e-07, "loss": 0.8303, "step": 11336 }, { "epoch": 0.91, "grad_norm": 1.5315309849958798, "learning_rate": 2.1287709367239008e-07, "loss": 0.7319, "step": 11337 }, { "epoch": 0.91, "grad_norm": 0.7404872477536104, "learning_rate": 2.1250218250526643e-07, "loss": 1.0412, "step": 11338 }, { "epoch": 0.91, "grad_norm": 1.5452586403658264, "learning_rate": 2.1212759460094268e-07, "loss": 0.6767, "step": 11339 }, { "epoch": 0.91, "grad_norm": 1.632780722184232, "learning_rate": 2.1175332998471189e-07, "loss": 0.7774, "step": 11340 }, { "epoch": 0.91, "grad_norm": 0.7470833439375405, "learning_rate": 2.1137938868184493e-07, "loss": 1.0894, "step": 11341 }, { "epoch": 0.91, "grad_norm": 1.4998593138579897, "learning_rate": 2.1100577071759164e-07, "loss": 0.7136, "step": 11342 }, { "epoch": 0.91, "grad_norm": 1.602828556297149, "learning_rate": 2.1063247611717908e-07, "loss": 0.7868, "step": 11343 }, { "epoch": 0.91, "grad_norm": 1.527764059481734, "learning_rate": 2.102595049058137e-07, "loss": 0.639, "step": 11344 }, { "epoch": 0.91, "grad_norm": 1.4891679578059003, "learning_rate": 2.0988685710867874e-07, "loss": 0.7705, "step": 11345 }, { "epoch": 0.91, "grad_norm": 0.7288237016125884, "learning_rate": 2.095145327509368e-07, "loss": 1.0536, "step": 11346 }, { "epoch": 0.91, "grad_norm": 1.4776278588501102, "learning_rate": 2.0914253185772727e-07, "loss": 0.7539, "step": 11347 }, { "epoch": 0.91, "grad_norm": 0.7550206097661549, "learning_rate": 2.0877085445416889e-07, "loss": 1.0514, "step": 11348 }, { "epoch": 0.91, "grad_norm": 1.6751800895574427, "learning_rate": 2.0839950056535884e-07, "loss": 0.7304, "step": 11349 }, { "epoch": 0.91, "grad_norm": 1.4781372518518667, "learning_rate": 2.080284702163704e-07, "loss": 0.73, "step": 11350 }, { "epoch": 0.91, "grad_norm": 1.4821034265889417, "learning_rate": 2.076577634322574e-07, "loss": 0.7404, "step": 11351 }, { "epoch": 0.91, "grad_norm": 1.570714681835568, "learning_rate": 2.072873802380515e-07, "loss": 0.7397, "step": 11352 }, { "epoch": 0.91, "grad_norm": 1.409165223593943, "learning_rate": 2.069173206587599e-07, "loss": 0.7792, "step": 11353 }, { "epoch": 0.91, "grad_norm": 1.4938012946881862, "learning_rate": 2.0654758471937098e-07, "loss": 0.7804, "step": 11354 }, { "epoch": 0.91, "grad_norm": 1.9595966796617497, "learning_rate": 2.0617817244485027e-07, "loss": 0.6948, "step": 11355 }, { "epoch": 0.91, "grad_norm": 1.4792613334112894, "learning_rate": 2.058090838601412e-07, "loss": 0.6942, "step": 11356 }, { "epoch": 0.91, "grad_norm": 1.3962570996578123, "learning_rate": 2.0544031899016437e-07, "loss": 0.7095, "step": 11357 }, { "epoch": 0.91, "grad_norm": 1.5848901259174668, "learning_rate": 2.0507187785982153e-07, "loss": 0.7265, "step": 11358 }, { "epoch": 0.91, "grad_norm": 0.7531521104197739, "learning_rate": 2.0470376049398944e-07, "loss": 1.0795, "step": 11359 }, { "epoch": 0.91, "grad_norm": 1.4841083065028131, "learning_rate": 2.0433596691752432e-07, "loss": 0.7602, "step": 11360 }, { "epoch": 0.91, "grad_norm": 1.4704723805143989, "learning_rate": 2.0396849715526134e-07, "loss": 0.7574, "step": 11361 }, { "epoch": 0.91, "grad_norm": 0.7379352724767565, "learning_rate": 2.0360135123201175e-07, "loss": 1.0383, "step": 11362 }, { "epoch": 0.91, "grad_norm": 1.513674713153626, "learning_rate": 2.0323452917256736e-07, "loss": 0.7532, "step": 11363 }, { "epoch": 0.91, "grad_norm": 1.5558452918709833, "learning_rate": 2.0286803100169507e-07, "loss": 0.7105, "step": 11364 }, { "epoch": 0.91, "grad_norm": 1.515950691166428, "learning_rate": 2.0250185674414336e-07, "loss": 0.6443, "step": 11365 }, { "epoch": 0.91, "grad_norm": 1.5660753671523235, "learning_rate": 2.0213600642463583e-07, "loss": 0.7562, "step": 11366 }, { "epoch": 0.91, "grad_norm": 1.6262792025691646, "learning_rate": 2.0177048006787604e-07, "loss": 0.8486, "step": 11367 }, { "epoch": 0.91, "grad_norm": 1.567495656211438, "learning_rate": 2.014052776985459e-07, "loss": 0.7563, "step": 11368 }, { "epoch": 0.91, "grad_norm": 1.548256727096305, "learning_rate": 2.0104039934130348e-07, "loss": 0.8065, "step": 11369 }, { "epoch": 0.91, "grad_norm": 1.6502457144534302, "learning_rate": 2.0067584502078742e-07, "loss": 0.7915, "step": 11370 }, { "epoch": 0.91, "grad_norm": 1.572075210524325, "learning_rate": 2.0031161476161132e-07, "loss": 0.7781, "step": 11371 }, { "epoch": 0.91, "grad_norm": 1.5542465922257869, "learning_rate": 1.999477085883711e-07, "loss": 0.718, "step": 11372 }, { "epoch": 0.91, "grad_norm": 1.6215007243798356, "learning_rate": 1.9958412652563763e-07, "loss": 0.8079, "step": 11373 }, { "epoch": 0.91, "grad_norm": 1.5850927859204047, "learning_rate": 1.9922086859796074e-07, "loss": 0.781, "step": 11374 }, { "epoch": 0.91, "grad_norm": 0.7578599866840234, "learning_rate": 1.9885793482986858e-07, "loss": 1.0551, "step": 11375 }, { "epoch": 0.91, "grad_norm": 1.554565299097713, "learning_rate": 1.984953252458671e-07, "loss": 0.7055, "step": 11376 }, { "epoch": 0.91, "grad_norm": 1.7057016109504752, "learning_rate": 1.981330398704395e-07, "loss": 0.8442, "step": 11377 }, { "epoch": 0.91, "grad_norm": 0.7687786945677014, "learning_rate": 1.9777107872805012e-07, "loss": 1.0357, "step": 11378 }, { "epoch": 0.91, "grad_norm": 1.6309992009151821, "learning_rate": 1.9740944184313882e-07, "loss": 0.7719, "step": 11379 }, { "epoch": 0.91, "grad_norm": 1.4823164888700506, "learning_rate": 1.9704812924012328e-07, "loss": 0.7197, "step": 11380 }, { "epoch": 0.91, "grad_norm": 1.6254977957477459, "learning_rate": 1.9668714094340012e-07, "loss": 0.7759, "step": 11381 }, { "epoch": 0.91, "grad_norm": 1.6053344074178564, "learning_rate": 1.963264769773454e-07, "loss": 0.7735, "step": 11382 }, { "epoch": 0.91, "grad_norm": 1.544267985245219, "learning_rate": 1.9596613736631133e-07, "loss": 0.7226, "step": 11383 }, { "epoch": 0.91, "grad_norm": 1.5744098460798852, "learning_rate": 1.9560612213462837e-07, "loss": 0.7621, "step": 11384 }, { "epoch": 0.91, "grad_norm": 1.6187999976972132, "learning_rate": 1.9524643130660658e-07, "loss": 0.7536, "step": 11385 }, { "epoch": 0.91, "grad_norm": 1.6159556627650153, "learning_rate": 1.948870649065321e-07, "loss": 0.811, "step": 11386 }, { "epoch": 0.91, "grad_norm": 0.7418941784714713, "learning_rate": 1.9452802295867047e-07, "loss": 1.0383, "step": 11387 }, { "epoch": 0.91, "grad_norm": 1.5325691043304228, "learning_rate": 1.9416930548726453e-07, "loss": 0.7722, "step": 11388 }, { "epoch": 0.91, "grad_norm": 1.6224231126453805, "learning_rate": 1.9381091251653717e-07, "loss": 0.7764, "step": 11389 }, { "epoch": 0.91, "grad_norm": 1.4410075295578681, "learning_rate": 1.9345284407068677e-07, "loss": 0.7782, "step": 11390 }, { "epoch": 0.91, "grad_norm": 1.474499161180752, "learning_rate": 1.930951001738901e-07, "loss": 0.7641, "step": 11391 }, { "epoch": 0.91, "grad_norm": 1.5108285936493677, "learning_rate": 1.9273768085030508e-07, "loss": 0.6794, "step": 11392 }, { "epoch": 0.91, "grad_norm": 1.5307231143686597, "learning_rate": 1.9238058612406408e-07, "loss": 0.7439, "step": 11393 }, { "epoch": 0.91, "grad_norm": 1.4680332193654813, "learning_rate": 1.920238160192789e-07, "loss": 0.6644, "step": 11394 }, { "epoch": 0.91, "grad_norm": 0.7656016827979132, "learning_rate": 1.9166737056004025e-07, "loss": 1.0204, "step": 11395 }, { "epoch": 0.91, "grad_norm": 1.3834336412804957, "learning_rate": 1.9131124977041616e-07, "loss": 0.7062, "step": 11396 }, { "epoch": 0.91, "grad_norm": 1.590309511905686, "learning_rate": 1.909554536744518e-07, "loss": 0.7402, "step": 11397 }, { "epoch": 0.91, "grad_norm": 1.4822310325950672, "learning_rate": 1.9059998229617072e-07, "loss": 0.7234, "step": 11398 }, { "epoch": 0.91, "grad_norm": 1.4564739918891403, "learning_rate": 1.9024483565957707e-07, "loss": 0.6794, "step": 11399 }, { "epoch": 0.91, "grad_norm": 1.8429645304930484, "learning_rate": 1.8989001378865058e-07, "loss": 0.7239, "step": 11400 }, { "epoch": 0.91, "grad_norm": 1.5391680154779082, "learning_rate": 1.8953551670734816e-07, "loss": 0.7659, "step": 11401 }, { "epoch": 0.91, "grad_norm": 0.7575024262552371, "learning_rate": 1.8918134443960844e-07, "loss": 1.1141, "step": 11402 }, { "epoch": 0.91, "grad_norm": 1.584139506191855, "learning_rate": 1.888274970093451e-07, "loss": 0.8057, "step": 11403 }, { "epoch": 0.91, "grad_norm": 1.6322459930469535, "learning_rate": 1.8847397444045013e-07, "loss": 0.799, "step": 11404 }, { "epoch": 0.92, "grad_norm": 1.5485833049923485, "learning_rate": 1.881207767567944e-07, "loss": 0.7613, "step": 11405 }, { "epoch": 0.92, "grad_norm": 1.5269753627444194, "learning_rate": 1.8776790398222722e-07, "loss": 0.7603, "step": 11406 }, { "epoch": 0.92, "grad_norm": 1.5910138953860988, "learning_rate": 1.8741535614057505e-07, "loss": 0.7769, "step": 11407 }, { "epoch": 0.92, "grad_norm": 1.590937775856611, "learning_rate": 1.8706313325564274e-07, "loss": 0.7356, "step": 11408 }, { "epoch": 0.92, "grad_norm": 0.7633210468185405, "learning_rate": 1.8671123535121294e-07, "loss": 1.0654, "step": 11409 }, { "epoch": 0.92, "grad_norm": 1.7067924136317525, "learning_rate": 1.8635966245104663e-07, "loss": 0.8412, "step": 11410 }, { "epoch": 0.92, "grad_norm": 1.6043094717052977, "learning_rate": 1.8600841457888264e-07, "loss": 0.8123, "step": 11411 }, { "epoch": 0.92, "grad_norm": 1.4873829583076292, "learning_rate": 1.8565749175843916e-07, "loss": 0.7361, "step": 11412 }, { "epoch": 0.92, "grad_norm": 1.6174578101058774, "learning_rate": 1.8530689401341006e-07, "loss": 0.862, "step": 11413 }, { "epoch": 0.92, "grad_norm": 1.5642273546638945, "learning_rate": 1.8495662136746916e-07, "loss": 0.7116, "step": 11414 }, { "epoch": 0.92, "grad_norm": 1.5601130418088414, "learning_rate": 1.84606673844267e-07, "loss": 0.8246, "step": 11415 }, { "epoch": 0.92, "grad_norm": 0.7310380754671386, "learning_rate": 1.8425705146743355e-07, "loss": 1.0731, "step": 11416 }, { "epoch": 0.92, "grad_norm": 1.577411879267811, "learning_rate": 1.8390775426057604e-07, "loss": 0.7092, "step": 11417 }, { "epoch": 0.92, "grad_norm": 1.5208825002750896, "learning_rate": 1.8355878224728008e-07, "loss": 0.7129, "step": 11418 }, { "epoch": 0.92, "grad_norm": 1.5348885302444468, "learning_rate": 1.8321013545110788e-07, "loss": 0.7849, "step": 11419 }, { "epoch": 0.92, "grad_norm": 0.7471209462423316, "learning_rate": 1.8286181389560176e-07, "loss": 1.0848, "step": 11420 }, { "epoch": 0.92, "grad_norm": 0.7289920642512187, "learning_rate": 1.825138176042812e-07, "loss": 1.0694, "step": 11421 }, { "epoch": 0.92, "grad_norm": 1.5342736798233756, "learning_rate": 1.8216614660064246e-07, "loss": 0.783, "step": 11422 }, { "epoch": 0.92, "grad_norm": 0.7287690144027845, "learning_rate": 1.818188009081634e-07, "loss": 1.0852, "step": 11423 }, { "epoch": 0.92, "grad_norm": 0.7280327765079978, "learning_rate": 1.814717805502958e-07, "loss": 1.0823, "step": 11424 }, { "epoch": 0.92, "grad_norm": 1.5351236312759993, "learning_rate": 1.8112508555047149e-07, "loss": 0.6963, "step": 11425 }, { "epoch": 0.92, "grad_norm": 1.5109366756364304, "learning_rate": 1.8077871593210116e-07, "loss": 0.7441, "step": 11426 }, { "epoch": 0.92, "grad_norm": 1.5252065508654027, "learning_rate": 1.804326717185717e-07, "loss": 0.7719, "step": 11427 }, { "epoch": 0.92, "grad_norm": 1.535205607011373, "learning_rate": 1.800869529332483e-07, "loss": 0.666, "step": 11428 }, { "epoch": 0.92, "grad_norm": 1.4664660078046563, "learning_rate": 1.7974155959947614e-07, "loss": 0.6822, "step": 11429 }, { "epoch": 0.92, "grad_norm": 1.5804398644730142, "learning_rate": 1.793964917405755e-07, "loss": 0.8742, "step": 11430 }, { "epoch": 0.92, "grad_norm": 1.7745488719338536, "learning_rate": 1.790517493798466e-07, "loss": 0.7553, "step": 11431 }, { "epoch": 0.92, "grad_norm": 1.5029140423058138, "learning_rate": 1.7870733254056692e-07, "loss": 0.7652, "step": 11432 }, { "epoch": 0.92, "grad_norm": 1.727022547733256, "learning_rate": 1.7836324124599348e-07, "loss": 0.8645, "step": 11433 }, { "epoch": 0.92, "grad_norm": 1.5004434146490058, "learning_rate": 1.780194755193593e-07, "loss": 0.7657, "step": 11434 }, { "epoch": 0.92, "grad_norm": 1.419702684106565, "learning_rate": 1.7767603538387523e-07, "loss": 0.7247, "step": 11435 }, { "epoch": 0.92, "grad_norm": 1.381237635860276, "learning_rate": 1.7733292086273336e-07, "loss": 0.7743, "step": 11436 }, { "epoch": 0.92, "grad_norm": 1.4854012378178016, "learning_rate": 1.7699013197909954e-07, "loss": 0.705, "step": 11437 }, { "epoch": 0.92, "grad_norm": 1.5441834987042333, "learning_rate": 1.7664766875612137e-07, "loss": 0.8339, "step": 11438 }, { "epoch": 0.92, "grad_norm": 1.533571948357021, "learning_rate": 1.7630553121692097e-07, "loss": 0.701, "step": 11439 }, { "epoch": 0.92, "grad_norm": 1.5596171382716337, "learning_rate": 1.75963719384602e-07, "loss": 0.7969, "step": 11440 }, { "epoch": 0.92, "grad_norm": 1.478467838908533, "learning_rate": 1.7562223328224327e-07, "loss": 0.6741, "step": 11441 }, { "epoch": 0.92, "grad_norm": 1.531512562772538, "learning_rate": 1.752810729329024e-07, "loss": 0.7, "step": 11442 }, { "epoch": 0.92, "grad_norm": 1.5911478878946343, "learning_rate": 1.7494023835961604e-07, "loss": 0.793, "step": 11443 }, { "epoch": 0.92, "grad_norm": 0.7508469808723031, "learning_rate": 1.745997295853985e-07, "loss": 1.0568, "step": 11444 }, { "epoch": 0.92, "grad_norm": 1.5729371124251879, "learning_rate": 1.7425954663324085e-07, "loss": 0.8133, "step": 11445 }, { "epoch": 0.92, "grad_norm": 1.4916463496518486, "learning_rate": 1.7391968952611304e-07, "loss": 0.7611, "step": 11446 }, { "epoch": 0.92, "grad_norm": 0.7475668508850616, "learning_rate": 1.735801582869634e-07, "loss": 1.0877, "step": 11447 }, { "epoch": 0.92, "grad_norm": 1.642997472506341, "learning_rate": 1.73240952938718e-07, "loss": 0.8914, "step": 11448 }, { "epoch": 0.92, "grad_norm": 1.520179586908612, "learning_rate": 1.7290207350428024e-07, "loss": 0.7442, "step": 11449 }, { "epoch": 0.92, "grad_norm": 1.4933845113980004, "learning_rate": 1.725635200065323e-07, "loss": 0.7784, "step": 11450 }, { "epoch": 0.92, "grad_norm": 1.5030162891800423, "learning_rate": 1.722252924683343e-07, "loss": 0.6874, "step": 11451 }, { "epoch": 0.92, "grad_norm": 1.504863120299521, "learning_rate": 1.7188739091252405e-07, "loss": 0.692, "step": 11452 }, { "epoch": 0.92, "grad_norm": 1.5257174758740994, "learning_rate": 1.7154981536191718e-07, "loss": 0.7823, "step": 11453 }, { "epoch": 0.92, "grad_norm": 1.4887136748046441, "learning_rate": 1.7121256583930824e-07, "loss": 0.7152, "step": 11454 }, { "epoch": 0.92, "grad_norm": 1.5918879692774812, "learning_rate": 1.708756423674679e-07, "loss": 0.6986, "step": 11455 }, { "epoch": 0.92, "grad_norm": 1.6225856745691278, "learning_rate": 1.7053904496914632e-07, "loss": 0.7048, "step": 11456 }, { "epoch": 0.92, "grad_norm": 1.4147256629482556, "learning_rate": 1.7020277366707193e-07, "loss": 0.7272, "step": 11457 }, { "epoch": 0.92, "grad_norm": 1.5811459998931663, "learning_rate": 1.6986682848395053e-07, "loss": 0.7987, "step": 11458 }, { "epoch": 0.92, "grad_norm": 1.48517917347595, "learning_rate": 1.6953120944246503e-07, "loss": 0.739, "step": 11459 }, { "epoch": 0.92, "grad_norm": 1.493051272487321, "learning_rate": 1.6919591656527846e-07, "loss": 0.7188, "step": 11460 }, { "epoch": 0.92, "grad_norm": 1.4409274952811308, "learning_rate": 1.6886094987502987e-07, "loss": 0.6987, "step": 11461 }, { "epoch": 0.92, "grad_norm": 1.5051182837032335, "learning_rate": 1.685263093943368e-07, "loss": 0.6591, "step": 11462 }, { "epoch": 0.92, "grad_norm": 1.5481860752983105, "learning_rate": 1.6819199514579553e-07, "loss": 0.7234, "step": 11463 }, { "epoch": 0.92, "grad_norm": 0.7621709945666707, "learning_rate": 1.678580071519792e-07, "loss": 1.0852, "step": 11464 }, { "epoch": 0.92, "grad_norm": 1.4285512874484294, "learning_rate": 1.6752434543543917e-07, "loss": 0.741, "step": 11465 }, { "epoch": 0.92, "grad_norm": 0.7571139393318409, "learning_rate": 1.671910100187052e-07, "loss": 1.0345, "step": 11466 }, { "epoch": 0.92, "grad_norm": 1.5117682035717561, "learning_rate": 1.6685800092428595e-07, "loss": 0.7861, "step": 11467 }, { "epoch": 0.92, "grad_norm": 0.7401157623599486, "learning_rate": 1.6652531817466566e-07, "loss": 1.0679, "step": 11468 }, { "epoch": 0.92, "grad_norm": 1.5346386629059132, "learning_rate": 1.6619296179230859e-07, "loss": 0.7865, "step": 11469 }, { "epoch": 0.92, "grad_norm": 1.6011090696668449, "learning_rate": 1.6586093179965513e-07, "loss": 0.7928, "step": 11470 }, { "epoch": 0.92, "grad_norm": 0.7512501314124169, "learning_rate": 1.655292282191262e-07, "loss": 1.0424, "step": 11471 }, { "epoch": 0.92, "grad_norm": 1.4579883700896463, "learning_rate": 1.651978510731189e-07, "loss": 0.7104, "step": 11472 }, { "epoch": 0.92, "grad_norm": 1.4351079828639386, "learning_rate": 1.648668003840076e-07, "loss": 0.6896, "step": 11473 }, { "epoch": 0.92, "grad_norm": 2.1237431096594652, "learning_rate": 1.6453607617414603e-07, "loss": 0.7255, "step": 11474 }, { "epoch": 0.92, "grad_norm": 1.5396295854504067, "learning_rate": 1.6420567846586577e-07, "loss": 0.734, "step": 11475 }, { "epoch": 0.92, "grad_norm": 1.3840271584890722, "learning_rate": 1.6387560728147512e-07, "loss": 0.8165, "step": 11476 }, { "epoch": 0.92, "grad_norm": 1.4694661420840485, "learning_rate": 1.635458626432629e-07, "loss": 0.7257, "step": 11477 }, { "epoch": 0.92, "grad_norm": 1.6232443236922922, "learning_rate": 1.6321644457349294e-07, "loss": 0.7529, "step": 11478 }, { "epoch": 0.92, "grad_norm": 1.620190345586592, "learning_rate": 1.6288735309440863e-07, "loss": 0.8184, "step": 11479 }, { "epoch": 0.92, "grad_norm": 1.4371915734488587, "learning_rate": 1.6255858822823044e-07, "loss": 0.6873, "step": 11480 }, { "epoch": 0.92, "grad_norm": 1.6035948310596557, "learning_rate": 1.6223014999715847e-07, "loss": 0.7394, "step": 11481 }, { "epoch": 0.92, "grad_norm": 1.5069007876758964, "learning_rate": 1.6190203842336882e-07, "loss": 0.7166, "step": 11482 }, { "epoch": 0.92, "grad_norm": 1.5006330195248903, "learning_rate": 1.6157425352901602e-07, "loss": 0.6951, "step": 11483 }, { "epoch": 0.92, "grad_norm": 1.4940483553666957, "learning_rate": 1.6124679533623456e-07, "loss": 0.6348, "step": 11484 }, { "epoch": 0.92, "grad_norm": 1.5270409123424928, "learning_rate": 1.6091966386713342e-07, "loss": 0.7084, "step": 11485 }, { "epoch": 0.92, "grad_norm": 1.5582611788860636, "learning_rate": 1.6059285914380164e-07, "loss": 0.754, "step": 11486 }, { "epoch": 0.92, "grad_norm": 0.7752840448714791, "learning_rate": 1.602663811883054e-07, "loss": 1.0279, "step": 11487 }, { "epoch": 0.92, "grad_norm": 1.3511309722943619, "learning_rate": 1.5994023002269043e-07, "loss": 0.7303, "step": 11488 }, { "epoch": 0.92, "grad_norm": 1.5511290017045107, "learning_rate": 1.5961440566897913e-07, "loss": 0.7698, "step": 11489 }, { "epoch": 0.92, "grad_norm": 1.5806244139689942, "learning_rate": 1.5928890814916997e-07, "loss": 0.7871, "step": 11490 }, { "epoch": 0.92, "grad_norm": 1.4312077983675053, "learning_rate": 1.5896373748524375e-07, "loss": 0.7092, "step": 11491 }, { "epoch": 0.92, "grad_norm": 1.4761960861286887, "learning_rate": 1.5863889369915564e-07, "loss": 0.7281, "step": 11492 }, { "epoch": 0.92, "grad_norm": 1.5174314626407077, "learning_rate": 1.5831437681283924e-07, "loss": 0.7342, "step": 11493 }, { "epoch": 0.92, "grad_norm": 1.570956077400536, "learning_rate": 1.5799018684820756e-07, "loss": 0.7139, "step": 11494 }, { "epoch": 0.92, "grad_norm": 1.4660625720569322, "learning_rate": 1.5766632382715084e-07, "loss": 0.6747, "step": 11495 }, { "epoch": 0.92, "grad_norm": 1.551357353272507, "learning_rate": 1.573427877715361e-07, "loss": 0.7003, "step": 11496 }, { "epoch": 0.92, "grad_norm": 1.6342115826928392, "learning_rate": 1.5701957870321026e-07, "loss": 0.7643, "step": 11497 }, { "epoch": 0.92, "grad_norm": 1.5214767706236938, "learning_rate": 1.5669669664399645e-07, "loss": 0.7309, "step": 11498 }, { "epoch": 0.92, "grad_norm": 1.4477134552366848, "learning_rate": 1.5637414161569663e-07, "loss": 0.6941, "step": 11499 }, { "epoch": 0.92, "grad_norm": 1.4450729044845816, "learning_rate": 1.5605191364008954e-07, "loss": 0.7229, "step": 11500 }, { "epoch": 0.92, "grad_norm": 1.5317795901649498, "learning_rate": 1.557300127389344e-07, "loss": 0.6943, "step": 11501 }, { "epoch": 0.92, "grad_norm": 1.4936487279546737, "learning_rate": 1.554084389339655e-07, "loss": 0.76, "step": 11502 }, { "epoch": 0.92, "grad_norm": 1.7766714721370767, "learning_rate": 1.5508719224689716e-07, "loss": 0.7396, "step": 11503 }, { "epoch": 0.92, "grad_norm": 1.470656583729276, "learning_rate": 1.5476627269941925e-07, "loss": 0.8036, "step": 11504 }, { "epoch": 0.92, "grad_norm": 1.5772042853455661, "learning_rate": 1.5444568031320272e-07, "loss": 0.7142, "step": 11505 }, { "epoch": 0.92, "grad_norm": 1.8829054092821704, "learning_rate": 1.5412541510989364e-07, "loss": 0.7398, "step": 11506 }, { "epoch": 0.92, "grad_norm": 1.5853339349726727, "learning_rate": 1.538054771111175e-07, "loss": 0.7857, "step": 11507 }, { "epoch": 0.92, "grad_norm": 1.5359688014008768, "learning_rate": 1.5348586633847695e-07, "loss": 0.7898, "step": 11508 }, { "epoch": 0.92, "grad_norm": 1.4844990476034075, "learning_rate": 1.5316658281355313e-07, "loss": 0.74, "step": 11509 }, { "epoch": 0.92, "grad_norm": 1.5395766919084806, "learning_rate": 1.5284762655790374e-07, "loss": 0.8287, "step": 11510 }, { "epoch": 0.92, "grad_norm": 1.6214939415226979, "learning_rate": 1.5252899759306716e-07, "loss": 0.7061, "step": 11511 }, { "epoch": 0.92, "grad_norm": 0.7619562953047476, "learning_rate": 1.522106959405567e-07, "loss": 1.0944, "step": 11512 }, { "epoch": 0.92, "grad_norm": 1.5728129142286058, "learning_rate": 1.518927216218652e-07, "loss": 0.7533, "step": 11513 }, { "epoch": 0.92, "grad_norm": 1.502451237987401, "learning_rate": 1.5157507465846267e-07, "loss": 0.6511, "step": 11514 }, { "epoch": 0.92, "grad_norm": 1.488325766313183, "learning_rate": 1.5125775507179806e-07, "loss": 0.7312, "step": 11515 }, { "epoch": 0.92, "grad_norm": 1.7712915684782717, "learning_rate": 1.5094076288329762e-07, "loss": 0.729, "step": 11516 }, { "epoch": 0.92, "grad_norm": 1.4699292089255036, "learning_rate": 1.5062409811436474e-07, "loss": 0.7306, "step": 11517 }, { "epoch": 0.92, "grad_norm": 1.4309383359486156, "learning_rate": 1.503077607863812e-07, "loss": 0.7297, "step": 11518 }, { "epoch": 0.92, "grad_norm": 1.423943505346532, "learning_rate": 1.4999175092070716e-07, "loss": 0.763, "step": 11519 }, { "epoch": 0.92, "grad_norm": 0.7360472015572849, "learning_rate": 1.4967606853868056e-07, "loss": 1.0295, "step": 11520 }, { "epoch": 0.92, "grad_norm": 1.5082824794770961, "learning_rate": 1.4936071366161598e-07, "loss": 0.7838, "step": 11521 }, { "epoch": 0.92, "grad_norm": 0.7686269211682747, "learning_rate": 1.4904568631080807e-07, "loss": 1.0536, "step": 11522 }, { "epoch": 0.92, "grad_norm": 1.5122478384894131, "learning_rate": 1.4873098650752815e-07, "loss": 0.7676, "step": 11523 }, { "epoch": 0.92, "grad_norm": 0.7489220062439181, "learning_rate": 1.484166142730248e-07, "loss": 1.0754, "step": 11524 }, { "epoch": 0.92, "grad_norm": 1.88312768089819, "learning_rate": 1.4810256962852543e-07, "loss": 0.7102, "step": 11525 }, { "epoch": 0.92, "grad_norm": 1.6164030281615904, "learning_rate": 1.4778885259523535e-07, "loss": 0.7318, "step": 11526 }, { "epoch": 0.92, "grad_norm": 1.4930876423843507, "learning_rate": 1.4747546319433702e-07, "loss": 0.7382, "step": 11527 }, { "epoch": 0.92, "grad_norm": 1.5738756583285554, "learning_rate": 1.471624014469919e-07, "loss": 0.6884, "step": 11528 }, { "epoch": 0.92, "grad_norm": 1.5184991791901412, "learning_rate": 1.4684966737433748e-07, "loss": 0.7916, "step": 11529 }, { "epoch": 0.93, "grad_norm": 1.6068277119774252, "learning_rate": 1.4653726099749133e-07, "loss": 0.861, "step": 11530 }, { "epoch": 0.93, "grad_norm": 1.612661183723356, "learning_rate": 1.4622518233754713e-07, "loss": 0.7134, "step": 11531 }, { "epoch": 0.93, "grad_norm": 1.4472523931138574, "learning_rate": 1.4591343141557746e-07, "loss": 0.7751, "step": 11532 }, { "epoch": 0.93, "grad_norm": 1.5380144683059969, "learning_rate": 1.456020082526327e-07, "loss": 0.8304, "step": 11533 }, { "epoch": 0.93, "grad_norm": 1.5151271295596551, "learning_rate": 1.4529091286973994e-07, "loss": 0.7453, "step": 11534 }, { "epoch": 0.93, "grad_norm": 0.7450856670748359, "learning_rate": 1.4498014528790628e-07, "loss": 1.0703, "step": 11535 }, { "epoch": 0.93, "grad_norm": 1.460660110049066, "learning_rate": 1.4466970552811488e-07, "loss": 0.7464, "step": 11536 }, { "epoch": 0.93, "grad_norm": 0.7236001373892466, "learning_rate": 1.4435959361132735e-07, "loss": 1.0751, "step": 11537 }, { "epoch": 0.93, "grad_norm": 1.4578068009337162, "learning_rate": 1.4404980955848246e-07, "loss": 0.707, "step": 11538 }, { "epoch": 0.93, "grad_norm": 1.6139453857157324, "learning_rate": 1.4374035339049908e-07, "loss": 0.7078, "step": 11539 }, { "epoch": 0.93, "grad_norm": 1.5165889812499682, "learning_rate": 1.4343122512827102e-07, "loss": 0.7671, "step": 11540 }, { "epoch": 0.93, "grad_norm": 1.429989095557889, "learning_rate": 1.4312242479267213e-07, "loss": 0.7091, "step": 11541 }, { "epoch": 0.93, "grad_norm": 1.5183989451795163, "learning_rate": 1.4281395240455353e-07, "loss": 0.8018, "step": 11542 }, { "epoch": 0.93, "grad_norm": 1.4837379586155433, "learning_rate": 1.4250580798474301e-07, "loss": 0.688, "step": 11543 }, { "epoch": 0.93, "grad_norm": 1.4264785878017912, "learning_rate": 1.4219799155404778e-07, "loss": 0.755, "step": 11544 }, { "epoch": 0.93, "grad_norm": 1.7540764389059333, "learning_rate": 1.4189050313325126e-07, "loss": 0.7595, "step": 11545 }, { "epoch": 0.93, "grad_norm": 1.6029274298480944, "learning_rate": 1.4158334274311791e-07, "loss": 0.8616, "step": 11546 }, { "epoch": 0.93, "grad_norm": 0.7457878076437932, "learning_rate": 1.4127651040438618e-07, "loss": 1.0556, "step": 11547 }, { "epoch": 0.93, "grad_norm": 1.530962641062409, "learning_rate": 1.4097000613777445e-07, "loss": 0.775, "step": 11548 }, { "epoch": 0.93, "grad_norm": 0.7647828382705414, "learning_rate": 1.4066382996397898e-07, "loss": 1.0516, "step": 11549 }, { "epoch": 0.93, "grad_norm": 1.5597439567025544, "learning_rate": 1.4035798190367322e-07, "loss": 0.7381, "step": 11550 }, { "epoch": 0.93, "grad_norm": 1.4642254267128032, "learning_rate": 1.40052461977509e-07, "loss": 0.763, "step": 11551 }, { "epoch": 0.93, "grad_norm": 0.7313468943551944, "learning_rate": 1.3974727020611534e-07, "loss": 1.0644, "step": 11552 }, { "epoch": 0.93, "grad_norm": 0.7382761449142697, "learning_rate": 1.3944240661009968e-07, "loss": 1.0681, "step": 11553 }, { "epoch": 0.93, "grad_norm": 1.464398057092571, "learning_rate": 1.3913787121004717e-07, "loss": 0.7665, "step": 11554 }, { "epoch": 0.93, "grad_norm": 1.4887305471778307, "learning_rate": 1.3883366402652032e-07, "loss": 0.7714, "step": 11555 }, { "epoch": 0.93, "grad_norm": 1.5986057351770586, "learning_rate": 1.3852978508006044e-07, "loss": 0.7464, "step": 11556 }, { "epoch": 0.93, "grad_norm": 1.5698639474155487, "learning_rate": 1.3822623439118556e-07, "loss": 0.7659, "step": 11557 }, { "epoch": 0.93, "grad_norm": 1.501359788032596, "learning_rate": 1.379230119803926e-07, "loss": 0.7362, "step": 11558 }, { "epoch": 0.93, "grad_norm": 1.4830455050215656, "learning_rate": 1.3762011786815576e-07, "loss": 0.7035, "step": 11559 }, { "epoch": 0.93, "grad_norm": 1.5145768472936716, "learning_rate": 1.3731755207492703e-07, "loss": 0.6618, "step": 11560 }, { "epoch": 0.93, "grad_norm": 1.5104530912108303, "learning_rate": 1.370153146211367e-07, "loss": 0.7293, "step": 11561 }, { "epoch": 0.93, "grad_norm": 0.7482113044089997, "learning_rate": 1.367134055271918e-07, "loss": 1.0175, "step": 11562 }, { "epoch": 0.93, "grad_norm": 1.498538637472767, "learning_rate": 1.364118248134788e-07, "loss": 0.729, "step": 11563 }, { "epoch": 0.93, "grad_norm": 1.6119710991041687, "learning_rate": 1.361105725003603e-07, "loss": 0.7223, "step": 11564 }, { "epoch": 0.93, "grad_norm": 1.5062787926744414, "learning_rate": 1.358096486081778e-07, "loss": 0.6824, "step": 11565 }, { "epoch": 0.93, "grad_norm": 1.5693797918692383, "learning_rate": 1.3550905315725061e-07, "loss": 0.6722, "step": 11566 }, { "epoch": 0.93, "grad_norm": 0.735448322485531, "learning_rate": 1.3520878616787525e-07, "loss": 1.0911, "step": 11567 }, { "epoch": 0.93, "grad_norm": 1.4064885760342318, "learning_rate": 1.349088476603272e-07, "loss": 0.6831, "step": 11568 }, { "epoch": 0.93, "grad_norm": 1.475807224312743, "learning_rate": 1.3460923765485745e-07, "loss": 0.7511, "step": 11569 }, { "epoch": 0.93, "grad_norm": 1.4950323557007452, "learning_rate": 1.3430995617169817e-07, "loss": 0.6697, "step": 11570 }, { "epoch": 0.93, "grad_norm": 0.7628161873871482, "learning_rate": 1.340110032310571e-07, "loss": 1.0825, "step": 11571 }, { "epoch": 0.93, "grad_norm": 1.4528555469083737, "learning_rate": 1.3371237885311916e-07, "loss": 0.684, "step": 11572 }, { "epoch": 0.93, "grad_norm": 1.703514470748917, "learning_rate": 1.3341408305804938e-07, "loss": 0.7711, "step": 11573 }, { "epoch": 0.93, "grad_norm": 0.762305753271549, "learning_rate": 1.3311611586598828e-07, "loss": 1.048, "step": 11574 }, { "epoch": 0.93, "grad_norm": 1.6370509409793772, "learning_rate": 1.3281847729705589e-07, "loss": 0.816, "step": 11575 }, { "epoch": 0.93, "grad_norm": 1.6320169244200504, "learning_rate": 1.3252116737134946e-07, "loss": 0.7536, "step": 11576 }, { "epoch": 0.93, "grad_norm": 1.5704851102391233, "learning_rate": 1.3222418610894405e-07, "loss": 0.7761, "step": 11577 }, { "epoch": 0.93, "grad_norm": 1.4843012711189025, "learning_rate": 1.3192753352989252e-07, "loss": 0.7631, "step": 11578 }, { "epoch": 0.93, "grad_norm": 1.766732575021258, "learning_rate": 1.3163120965422494e-07, "loss": 0.6944, "step": 11579 }, { "epoch": 0.93, "grad_norm": 1.5028483984168686, "learning_rate": 1.3133521450195086e-07, "loss": 0.7177, "step": 11580 }, { "epoch": 0.93, "grad_norm": 0.7506150602632006, "learning_rate": 1.3103954809305596e-07, "loss": 1.0868, "step": 11581 }, { "epoch": 0.93, "grad_norm": 1.453024776622909, "learning_rate": 1.3074421044750375e-07, "loss": 0.6995, "step": 11582 }, { "epoch": 0.93, "grad_norm": 1.7279100129985856, "learning_rate": 1.3044920158523766e-07, "loss": 0.7605, "step": 11583 }, { "epoch": 0.93, "grad_norm": 1.5481841179736573, "learning_rate": 1.301545215261757e-07, "loss": 0.7632, "step": 11584 }, { "epoch": 0.93, "grad_norm": 0.7375625349562257, "learning_rate": 1.2986017029021692e-07, "loss": 1.0305, "step": 11585 }, { "epoch": 0.93, "grad_norm": 1.491996027660803, "learning_rate": 1.2956614789723433e-07, "loss": 0.7341, "step": 11586 }, { "epoch": 0.93, "grad_norm": 1.818206201928402, "learning_rate": 1.2927245436708314e-07, "loss": 0.7471, "step": 11587 }, { "epoch": 0.93, "grad_norm": 2.256319607334584, "learning_rate": 1.2897908971959307e-07, "loss": 0.8297, "step": 11588 }, { "epoch": 0.93, "grad_norm": 1.3751614442131928, "learning_rate": 1.2868605397457324e-07, "loss": 0.6566, "step": 11589 }, { "epoch": 0.93, "grad_norm": 1.5240284079646402, "learning_rate": 1.2839334715181005e-07, "loss": 0.8412, "step": 11590 }, { "epoch": 0.93, "grad_norm": 1.4800894079046318, "learning_rate": 1.2810096927106773e-07, "loss": 0.6998, "step": 11591 }, { "epoch": 0.93, "grad_norm": 1.5495902247994005, "learning_rate": 1.278089203520877e-07, "loss": 0.7096, "step": 11592 }, { "epoch": 0.93, "grad_norm": 1.4875233499089704, "learning_rate": 1.275172004145908e-07, "loss": 0.6832, "step": 11593 }, { "epoch": 0.93, "grad_norm": 1.3464019484190954, "learning_rate": 1.272258094782741e-07, "loss": 0.7559, "step": 11594 }, { "epoch": 0.93, "grad_norm": 0.7619689046036213, "learning_rate": 1.26934747562813e-07, "loss": 1.048, "step": 11595 }, { "epoch": 0.93, "grad_norm": 1.515918322230553, "learning_rate": 1.2664401468786114e-07, "loss": 0.7986, "step": 11596 }, { "epoch": 0.93, "grad_norm": 1.4228711042862285, "learning_rate": 1.2635361087304844e-07, "loss": 0.7471, "step": 11597 }, { "epoch": 0.93, "grad_norm": 1.5307554784079018, "learning_rate": 1.2606353613798418e-07, "loss": 0.6432, "step": 11598 }, { "epoch": 0.93, "grad_norm": 0.71454343275002, "learning_rate": 1.2577379050225491e-07, "loss": 1.0501, "step": 11599 }, { "epoch": 0.93, "grad_norm": 1.5409218078554658, "learning_rate": 1.2548437398542502e-07, "loss": 0.7137, "step": 11600 }, { "epoch": 0.93, "grad_norm": 1.678614326946253, "learning_rate": 1.2519528660703663e-07, "loss": 0.6968, "step": 11601 }, { "epoch": 0.93, "grad_norm": 2.0663517715771924, "learning_rate": 1.249065283866091e-07, "loss": 0.8285, "step": 11602 }, { "epoch": 0.93, "grad_norm": 0.7523775099071293, "learning_rate": 1.246180993436402e-07, "loss": 1.0747, "step": 11603 }, { "epoch": 0.93, "grad_norm": 0.7576258466765845, "learning_rate": 1.243299994976055e-07, "loss": 1.0566, "step": 11604 }, { "epoch": 0.93, "grad_norm": 1.8168605808587195, "learning_rate": 1.240422288679588e-07, "loss": 0.7224, "step": 11605 }, { "epoch": 0.93, "grad_norm": 1.5584679345055115, "learning_rate": 1.2375478747413017e-07, "loss": 0.706, "step": 11606 }, { "epoch": 0.93, "grad_norm": 1.4582320735744128, "learning_rate": 1.2346767533552795e-07, "loss": 0.7576, "step": 11607 }, { "epoch": 0.93, "grad_norm": 1.6141420662043873, "learning_rate": 1.231808924715394e-07, "loss": 0.7484, "step": 11608 }, { "epoch": 0.93, "grad_norm": 1.4935690414434502, "learning_rate": 1.2289443890152852e-07, "loss": 0.7857, "step": 11609 }, { "epoch": 0.93, "grad_norm": 1.5721604342563642, "learning_rate": 1.226083146448376e-07, "loss": 0.7694, "step": 11610 }, { "epoch": 0.93, "grad_norm": 0.7151994843761079, "learning_rate": 1.2232251972078558e-07, "loss": 1.0367, "step": 11611 }, { "epoch": 0.93, "grad_norm": 0.7651664023690908, "learning_rate": 1.2203705414867097e-07, "loss": 1.0586, "step": 11612 }, { "epoch": 0.93, "grad_norm": 1.5360545154623024, "learning_rate": 1.2175191794776775e-07, "loss": 0.7562, "step": 11613 }, { "epoch": 0.93, "grad_norm": 1.564692319699779, "learning_rate": 1.2146711113733055e-07, "loss": 0.7384, "step": 11614 }, { "epoch": 0.93, "grad_norm": 1.449899796210899, "learning_rate": 1.2118263373658956e-07, "loss": 0.7087, "step": 11615 }, { "epoch": 0.93, "grad_norm": 1.4829106428918795, "learning_rate": 1.2089848576475327e-07, "loss": 0.82, "step": 11616 }, { "epoch": 0.93, "grad_norm": 1.4995414997408316, "learning_rate": 1.20614667241008e-07, "loss": 0.7471, "step": 11617 }, { "epoch": 0.93, "grad_norm": 1.474125184970781, "learning_rate": 1.2033117818451734e-07, "loss": 0.7759, "step": 11618 }, { "epoch": 0.93, "grad_norm": 1.5862116446521561, "learning_rate": 1.2004801861442373e-07, "loss": 0.8041, "step": 11619 }, { "epoch": 0.93, "grad_norm": 1.5568671946989048, "learning_rate": 1.197651885498463e-07, "loss": 0.6689, "step": 11620 }, { "epoch": 0.93, "grad_norm": 1.6924081969267455, "learning_rate": 1.1948268800988317e-07, "loss": 0.75, "step": 11621 }, { "epoch": 0.93, "grad_norm": 0.7575881843739094, "learning_rate": 1.1920051701360902e-07, "loss": 1.0565, "step": 11622 }, { "epoch": 0.93, "grad_norm": 1.5289170285323785, "learning_rate": 1.1891867558007586e-07, "loss": 0.7796, "step": 11623 }, { "epoch": 0.93, "grad_norm": 1.5852809974989661, "learning_rate": 1.1863716372831513e-07, "loss": 0.7703, "step": 11624 }, { "epoch": 0.93, "grad_norm": 1.4779618454892063, "learning_rate": 1.1835598147733552e-07, "loss": 0.7801, "step": 11625 }, { "epoch": 0.93, "grad_norm": 1.5679329794102597, "learning_rate": 1.1807512884612238e-07, "loss": 0.7333, "step": 11626 }, { "epoch": 0.93, "grad_norm": 1.5041057881219142, "learning_rate": 1.1779460585363945e-07, "loss": 0.8837, "step": 11627 }, { "epoch": 0.93, "grad_norm": 1.6718821377989863, "learning_rate": 1.1751441251882878e-07, "loss": 0.737, "step": 11628 }, { "epoch": 0.93, "grad_norm": 1.5036640463753483, "learning_rate": 1.1723454886061026e-07, "loss": 0.7223, "step": 11629 }, { "epoch": 0.93, "grad_norm": 1.5509103320002373, "learning_rate": 1.1695501489787875e-07, "loss": 0.7911, "step": 11630 }, { "epoch": 0.93, "grad_norm": 1.5525405654021054, "learning_rate": 1.1667581064951139e-07, "loss": 0.7675, "step": 11631 }, { "epoch": 0.93, "grad_norm": 1.5766100412179762, "learning_rate": 1.1639693613435921e-07, "loss": 0.7956, "step": 11632 }, { "epoch": 0.93, "grad_norm": 0.7983363624813645, "learning_rate": 1.161183913712527e-07, "loss": 1.0588, "step": 11633 }, { "epoch": 0.93, "grad_norm": 1.5884298783633715, "learning_rate": 1.158401763790007e-07, "loss": 0.7725, "step": 11634 }, { "epoch": 0.93, "grad_norm": 0.7581949641237834, "learning_rate": 1.1556229117638818e-07, "loss": 1.0706, "step": 11635 }, { "epoch": 0.93, "grad_norm": 1.6017286776264739, "learning_rate": 1.1528473578217847e-07, "loss": 0.8001, "step": 11636 }, { "epoch": 0.93, "grad_norm": 1.626937196023273, "learning_rate": 1.1500751021511269e-07, "loss": 0.7518, "step": 11637 }, { "epoch": 0.93, "grad_norm": 1.562771297310525, "learning_rate": 1.1473061449391032e-07, "loss": 0.853, "step": 11638 }, { "epoch": 0.93, "grad_norm": 1.6042007113071641, "learning_rate": 1.1445404863726806e-07, "loss": 0.8139, "step": 11639 }, { "epoch": 0.93, "grad_norm": 0.7356215844187232, "learning_rate": 1.1417781266385986e-07, "loss": 1.0623, "step": 11640 }, { "epoch": 0.93, "grad_norm": 1.6939681652147394, "learning_rate": 1.1390190659233746e-07, "loss": 0.6997, "step": 11641 }, { "epoch": 0.93, "grad_norm": 0.7326529089180442, "learning_rate": 1.1362633044133153e-07, "loss": 1.0348, "step": 11642 }, { "epoch": 0.93, "grad_norm": 1.5102708604273685, "learning_rate": 1.1335108422944884e-07, "loss": 0.7754, "step": 11643 }, { "epoch": 0.93, "grad_norm": 1.6296167812020075, "learning_rate": 1.1307616797527454e-07, "loss": 0.817, "step": 11644 }, { "epoch": 0.93, "grad_norm": 1.5427396611259516, "learning_rate": 1.1280158169737265e-07, "loss": 0.783, "step": 11645 }, { "epoch": 0.93, "grad_norm": 1.4716251201785033, "learning_rate": 1.125273254142828e-07, "loss": 0.7673, "step": 11646 }, { "epoch": 0.93, "grad_norm": 1.4133705990483192, "learning_rate": 1.1225339914452349e-07, "loss": 0.6856, "step": 11647 }, { "epoch": 0.93, "grad_norm": 0.7767174657172488, "learning_rate": 1.1197980290659216e-07, "loss": 1.0576, "step": 11648 }, { "epoch": 0.93, "grad_norm": 0.7560863160825426, "learning_rate": 1.1170653671896125e-07, "loss": 1.0631, "step": 11649 }, { "epoch": 0.93, "grad_norm": 1.6036443781579655, "learning_rate": 1.1143360060008268e-07, "loss": 0.7592, "step": 11650 }, { "epoch": 0.93, "grad_norm": 1.5682676971547131, "learning_rate": 1.1116099456838558e-07, "loss": 0.7581, "step": 11651 }, { "epoch": 0.93, "grad_norm": 1.4463084254141356, "learning_rate": 1.1088871864227745e-07, "loss": 0.7311, "step": 11652 }, { "epoch": 0.93, "grad_norm": 1.5830524820119454, "learning_rate": 1.1061677284014305e-07, "loss": 0.7903, "step": 11653 }, { "epoch": 0.94, "grad_norm": 1.3679279255993526, "learning_rate": 1.1034515718034321e-07, "loss": 0.7699, "step": 11654 }, { "epoch": 0.94, "grad_norm": 1.4763963006796172, "learning_rate": 1.1007387168121997e-07, "loss": 0.7237, "step": 11655 }, { "epoch": 0.94, "grad_norm": 1.5955436492884354, "learning_rate": 1.0980291636109031e-07, "loss": 0.7934, "step": 11656 }, { "epoch": 0.94, "grad_norm": 1.647589204024626, "learning_rate": 1.095322912382496e-07, "loss": 0.7575, "step": 11657 }, { "epoch": 0.94, "grad_norm": 1.5130994375861957, "learning_rate": 1.0926199633097156e-07, "loss": 0.7561, "step": 11658 }, { "epoch": 0.94, "grad_norm": 1.4894364273882432, "learning_rate": 1.0899203165750716e-07, "loss": 0.7607, "step": 11659 }, { "epoch": 0.94, "grad_norm": 1.6005623521984795, "learning_rate": 1.0872239723608457e-07, "loss": 0.7551, "step": 11660 }, { "epoch": 0.94, "grad_norm": 1.7202470614442928, "learning_rate": 1.0845309308491036e-07, "loss": 0.8608, "step": 11661 }, { "epoch": 0.94, "grad_norm": 1.5052670296196762, "learning_rate": 1.0818411922216832e-07, "loss": 0.8036, "step": 11662 }, { "epoch": 0.94, "grad_norm": 1.596561042550473, "learning_rate": 1.0791547566602056e-07, "loss": 0.775, "step": 11663 }, { "epoch": 0.94, "grad_norm": 1.4948386271658631, "learning_rate": 1.0764716243460594e-07, "loss": 0.7221, "step": 11664 }, { "epoch": 0.94, "grad_norm": 1.4812744827755475, "learning_rate": 1.0737917954604216e-07, "loss": 0.8114, "step": 11665 }, { "epoch": 0.94, "grad_norm": 1.5219337879150776, "learning_rate": 1.071115270184242e-07, "loss": 0.6932, "step": 11666 }, { "epoch": 0.94, "grad_norm": 1.571391280105402, "learning_rate": 1.0684420486982316e-07, "loss": 0.7971, "step": 11667 }, { "epoch": 0.94, "grad_norm": 1.4406504200464427, "learning_rate": 1.0657721311829128e-07, "loss": 0.7413, "step": 11668 }, { "epoch": 0.94, "grad_norm": 1.5071843342454418, "learning_rate": 1.0631055178185523e-07, "loss": 0.722, "step": 11669 }, { "epoch": 0.94, "grad_norm": 1.4840383478285324, "learning_rate": 1.0604422087852063e-07, "loss": 0.7412, "step": 11670 }, { "epoch": 0.94, "grad_norm": 1.517206568814955, "learning_rate": 1.0577822042627084e-07, "loss": 0.6418, "step": 11671 }, { "epoch": 0.94, "grad_norm": 1.437331734191744, "learning_rate": 1.0551255044306707e-07, "loss": 0.7035, "step": 11672 }, { "epoch": 0.94, "grad_norm": 1.477440188740493, "learning_rate": 1.0524721094684831e-07, "loss": 0.7532, "step": 11673 }, { "epoch": 0.94, "grad_norm": 1.5720278056210752, "learning_rate": 1.0498220195552966e-07, "loss": 0.7326, "step": 11674 }, { "epoch": 0.94, "grad_norm": 0.7383120522287375, "learning_rate": 1.0471752348700625e-07, "loss": 1.0968, "step": 11675 }, { "epoch": 0.94, "grad_norm": 0.7283716498415547, "learning_rate": 1.0445317555914992e-07, "loss": 1.0843, "step": 11676 }, { "epoch": 0.94, "grad_norm": 0.754663957061084, "learning_rate": 1.0418915818980857e-07, "loss": 1.0235, "step": 11677 }, { "epoch": 0.94, "grad_norm": 1.4402222363095767, "learning_rate": 1.039254713968102e-07, "loss": 0.7515, "step": 11678 }, { "epoch": 0.94, "grad_norm": 1.5575922438225422, "learning_rate": 1.0366211519795999e-07, "loss": 0.7632, "step": 11679 }, { "epoch": 0.94, "grad_norm": 1.565138290457019, "learning_rate": 1.0339908961103984e-07, "loss": 0.7136, "step": 11680 }, { "epoch": 0.94, "grad_norm": 1.6135672842189217, "learning_rate": 1.0313639465380998e-07, "loss": 0.751, "step": 11681 }, { "epoch": 0.94, "grad_norm": 1.5675855260024139, "learning_rate": 1.0287403034400789e-07, "loss": 0.8167, "step": 11682 }, { "epoch": 0.94, "grad_norm": 1.5635533743025227, "learning_rate": 1.0261199669934941e-07, "loss": 0.7103, "step": 11683 }, { "epoch": 0.94, "grad_norm": 1.5562356871555558, "learning_rate": 1.0235029373752758e-07, "loss": 0.7826, "step": 11684 }, { "epoch": 0.94, "grad_norm": 1.491671569523729, "learning_rate": 1.020889214762133e-07, "loss": 0.7948, "step": 11685 }, { "epoch": 0.94, "grad_norm": 1.6927158954051493, "learning_rate": 1.018278799330541e-07, "loss": 0.8548, "step": 11686 }, { "epoch": 0.94, "grad_norm": 1.4646273274852137, "learning_rate": 1.0156716912567755e-07, "loss": 0.7548, "step": 11687 }, { "epoch": 0.94, "grad_norm": 1.640934436190595, "learning_rate": 1.0130678907168568e-07, "loss": 0.8405, "step": 11688 }, { "epoch": 0.94, "grad_norm": 1.7008136356663095, "learning_rate": 1.0104673978866164e-07, "loss": 0.7203, "step": 11689 }, { "epoch": 0.94, "grad_norm": 1.5631924363582475, "learning_rate": 1.0078702129416362e-07, "loss": 0.7474, "step": 11690 }, { "epoch": 0.94, "grad_norm": 1.4361952670313323, "learning_rate": 1.005276336057287e-07, "loss": 0.8235, "step": 11691 }, { "epoch": 0.94, "grad_norm": 1.759478753749927, "learning_rate": 1.002685767408712e-07, "loss": 0.7807, "step": 11692 }, { "epoch": 0.94, "grad_norm": 1.4679536496463905, "learning_rate": 1.000098507170838e-07, "loss": 0.8096, "step": 11693 }, { "epoch": 0.94, "grad_norm": 1.4268991337632444, "learning_rate": 9.97514555518353e-08, "loss": 0.7725, "step": 11694 }, { "epoch": 0.94, "grad_norm": 1.6825621810625369, "learning_rate": 9.949339126257396e-08, "loss": 0.7562, "step": 11695 }, { "epoch": 0.94, "grad_norm": 0.7696151699666551, "learning_rate": 9.923565786672418e-08, "loss": 1.084, "step": 11696 }, { "epoch": 0.94, "grad_norm": 1.65942114914688, "learning_rate": 9.897825538168926e-08, "loss": 0.79, "step": 11697 }, { "epoch": 0.94, "grad_norm": 1.4115584379576358, "learning_rate": 9.872118382484918e-08, "loss": 0.7058, "step": 11698 }, { "epoch": 0.94, "grad_norm": 1.6377153500107164, "learning_rate": 9.84644432135623e-08, "loss": 0.7047, "step": 11699 }, { "epoch": 0.94, "grad_norm": 0.7526202265906664, "learning_rate": 9.820803356516472e-08, "loss": 1.0592, "step": 11700 }, { "epoch": 0.94, "grad_norm": 1.4973918869544123, "learning_rate": 9.795195489696874e-08, "loss": 0.7236, "step": 11701 }, { "epoch": 0.94, "grad_norm": 1.4071118366236521, "learning_rate": 9.769620722626605e-08, "loss": 0.6988, "step": 11702 }, { "epoch": 0.94, "grad_norm": 1.5142132814618308, "learning_rate": 9.744079057032563e-08, "loss": 0.7094, "step": 11703 }, { "epoch": 0.94, "grad_norm": 1.457555360873698, "learning_rate": 9.718570494639312e-08, "loss": 0.7352, "step": 11704 }, { "epoch": 0.94, "grad_norm": 0.7536889764557025, "learning_rate": 9.693095037169254e-08, "loss": 1.0516, "step": 11705 }, { "epoch": 0.94, "grad_norm": 1.5300935630667072, "learning_rate": 9.667652686342621e-08, "loss": 0.7693, "step": 11706 }, { "epoch": 0.94, "grad_norm": 1.5335702734497474, "learning_rate": 9.642243443877264e-08, "loss": 0.7715, "step": 11707 }, { "epoch": 0.94, "grad_norm": 0.7402598132782229, "learning_rate": 9.61686731148892e-08, "loss": 1.0319, "step": 11708 }, { "epoch": 0.94, "grad_norm": 1.44513957444645, "learning_rate": 9.591524290890996e-08, "loss": 0.7283, "step": 11709 }, { "epoch": 0.94, "grad_norm": 0.7497276782012292, "learning_rate": 9.566214383794736e-08, "loss": 1.0732, "step": 11710 }, { "epoch": 0.94, "grad_norm": 1.6702294475564803, "learning_rate": 9.54093759190916e-08, "loss": 0.8138, "step": 11711 }, { "epoch": 0.94, "grad_norm": 1.4267981502015816, "learning_rate": 9.515693916940961e-08, "loss": 0.7001, "step": 11712 }, { "epoch": 0.94, "grad_norm": 1.7620638907709514, "learning_rate": 9.49048336059466e-08, "loss": 0.8312, "step": 11713 }, { "epoch": 0.94, "grad_norm": 1.5493834322913107, "learning_rate": 9.465305924572566e-08, "loss": 0.7315, "step": 11714 }, { "epoch": 0.94, "grad_norm": 1.421446190792139, "learning_rate": 9.44016161057465e-08, "loss": 0.6651, "step": 11715 }, { "epoch": 0.94, "grad_norm": 1.5454016833831585, "learning_rate": 9.415050420298777e-08, "loss": 0.7884, "step": 11716 }, { "epoch": 0.94, "grad_norm": 1.6827834355275006, "learning_rate": 9.389972355440535e-08, "loss": 0.7933, "step": 11717 }, { "epoch": 0.94, "grad_norm": 1.5605163666960093, "learning_rate": 9.364927417693181e-08, "loss": 0.7742, "step": 11718 }, { "epoch": 0.94, "grad_norm": 0.7468549194798393, "learning_rate": 9.339915608747807e-08, "loss": 1.0954, "step": 11719 }, { "epoch": 0.94, "grad_norm": 1.5365899628746467, "learning_rate": 9.314936930293283e-08, "loss": 0.8506, "step": 11720 }, { "epoch": 0.94, "grad_norm": 1.5644233425427656, "learning_rate": 9.289991384016262e-08, "loss": 0.7252, "step": 11721 }, { "epoch": 0.94, "grad_norm": 1.6111768938064022, "learning_rate": 9.26507897160106e-08, "loss": 0.8175, "step": 11722 }, { "epoch": 0.94, "grad_norm": 1.4939074605879605, "learning_rate": 9.240199694729946e-08, "loss": 0.758, "step": 11723 }, { "epoch": 0.94, "grad_norm": 1.5073270860292758, "learning_rate": 9.215353555082685e-08, "loss": 0.7041, "step": 11724 }, { "epoch": 0.94, "grad_norm": 1.5646367091803335, "learning_rate": 9.19054055433699e-08, "loss": 0.7921, "step": 11725 }, { "epoch": 0.94, "grad_norm": 0.7594709582291391, "learning_rate": 9.165760694168302e-08, "loss": 1.0629, "step": 11726 }, { "epoch": 0.94, "grad_norm": 1.5213469211719661, "learning_rate": 9.141013976249835e-08, "loss": 0.7832, "step": 11727 }, { "epoch": 0.94, "grad_norm": 1.5029727825529715, "learning_rate": 9.116300402252476e-08, "loss": 0.7058, "step": 11728 }, { "epoch": 0.94, "grad_norm": 1.459059259712718, "learning_rate": 9.091619973845056e-08, "loss": 0.8228, "step": 11729 }, { "epoch": 0.94, "grad_norm": 1.5044771140903666, "learning_rate": 9.066972692693966e-08, "loss": 0.7632, "step": 11730 }, { "epoch": 0.94, "grad_norm": 1.354584204528621, "learning_rate": 9.042358560463427e-08, "loss": 0.6751, "step": 11731 }, { "epoch": 0.94, "grad_norm": 1.4213141956802156, "learning_rate": 9.0177775788155e-08, "loss": 0.7574, "step": 11732 }, { "epoch": 0.94, "grad_norm": 1.4762576893190678, "learning_rate": 8.993229749409915e-08, "loss": 0.7532, "step": 11733 }, { "epoch": 0.94, "grad_norm": 1.5659915854633282, "learning_rate": 8.968715073904232e-08, "loss": 0.7847, "step": 11734 }, { "epoch": 0.94, "grad_norm": 1.521450188874236, "learning_rate": 8.944233553953741e-08, "loss": 0.7387, "step": 11735 }, { "epoch": 0.94, "grad_norm": 1.6296541462549299, "learning_rate": 8.919785191211395e-08, "loss": 0.753, "step": 11736 }, { "epoch": 0.94, "grad_norm": 1.405896126873497, "learning_rate": 8.89536998732815e-08, "loss": 0.7359, "step": 11737 }, { "epoch": 0.94, "grad_norm": 1.4416633891187987, "learning_rate": 8.87098794395247e-08, "loss": 0.6953, "step": 11738 }, { "epoch": 0.94, "grad_norm": 1.4364479772065941, "learning_rate": 8.8466390627307e-08, "loss": 0.7225, "step": 11739 }, { "epoch": 0.94, "grad_norm": 1.542870236938211, "learning_rate": 8.822323345306971e-08, "loss": 0.77, "step": 11740 }, { "epoch": 0.94, "grad_norm": 1.405822696575828, "learning_rate": 8.798040793323137e-08, "loss": 0.694, "step": 11741 }, { "epoch": 0.94, "grad_norm": 1.4648410254790962, "learning_rate": 8.773791408418775e-08, "loss": 0.729, "step": 11742 }, { "epoch": 0.94, "grad_norm": 1.5016611829850137, "learning_rate": 8.74957519223124e-08, "loss": 0.8453, "step": 11743 }, { "epoch": 0.94, "grad_norm": 0.7536762483976661, "learning_rate": 8.725392146395728e-08, "loss": 1.0755, "step": 11744 }, { "epoch": 0.94, "grad_norm": 1.5275084427509664, "learning_rate": 8.701242272545151e-08, "loss": 0.7887, "step": 11745 }, { "epoch": 0.94, "grad_norm": 1.5887238417834932, "learning_rate": 8.677125572310041e-08, "loss": 0.7894, "step": 11746 }, { "epoch": 0.94, "grad_norm": 1.6900224027099913, "learning_rate": 8.653042047318927e-08, "loss": 0.7691, "step": 11747 }, { "epoch": 0.94, "grad_norm": 0.7309195717792222, "learning_rate": 8.628991699197953e-08, "loss": 1.039, "step": 11748 }, { "epoch": 0.94, "grad_norm": 1.5591826654440557, "learning_rate": 8.604974529571042e-08, "loss": 0.7408, "step": 11749 }, { "epoch": 0.94, "grad_norm": 1.4437597580533539, "learning_rate": 8.580990540059896e-08, "loss": 0.7517, "step": 11750 }, { "epoch": 0.94, "grad_norm": 1.5659668206839303, "learning_rate": 8.557039732283945e-08, "loss": 0.7728, "step": 11751 }, { "epoch": 0.94, "grad_norm": 1.637517525048418, "learning_rate": 8.533122107860448e-08, "loss": 0.8054, "step": 11752 }, { "epoch": 0.94, "grad_norm": 0.7650628139697091, "learning_rate": 8.50923766840428e-08, "loss": 1.0245, "step": 11753 }, { "epoch": 0.94, "grad_norm": 1.5802260661457388, "learning_rate": 8.485386415528318e-08, "loss": 0.7142, "step": 11754 }, { "epoch": 0.94, "grad_norm": 1.6172581696048949, "learning_rate": 8.461568350842997e-08, "loss": 0.6795, "step": 11755 }, { "epoch": 0.94, "grad_norm": 1.433323643050015, "learning_rate": 8.437783475956473e-08, "loss": 0.7556, "step": 11756 }, { "epoch": 0.94, "grad_norm": 1.4810190263797607, "learning_rate": 8.414031792474908e-08, "loss": 0.7069, "step": 11757 }, { "epoch": 0.94, "grad_norm": 0.7883920303387122, "learning_rate": 8.390313302001962e-08, "loss": 1.0397, "step": 11758 }, { "epoch": 0.94, "grad_norm": 0.7647436816102453, "learning_rate": 8.366628006139243e-08, "loss": 1.0707, "step": 11759 }, { "epoch": 0.94, "grad_norm": 1.3451805958147889, "learning_rate": 8.342975906485918e-08, "loss": 0.7395, "step": 11760 }, { "epoch": 0.94, "grad_norm": 0.7429381294556304, "learning_rate": 8.31935700463915e-08, "loss": 1.0591, "step": 11761 }, { "epoch": 0.94, "grad_norm": 0.7493206279580875, "learning_rate": 8.295771302193723e-08, "loss": 1.0598, "step": 11762 }, { "epoch": 0.94, "grad_norm": 1.5036452488215097, "learning_rate": 8.272218800742083e-08, "loss": 0.7285, "step": 11763 }, { "epoch": 0.94, "grad_norm": 1.5667500417700027, "learning_rate": 8.24869950187468e-08, "loss": 0.6764, "step": 11764 }, { "epoch": 0.94, "grad_norm": 1.5100358619090153, "learning_rate": 8.225213407179522e-08, "loss": 0.7183, "step": 11765 }, { "epoch": 0.94, "grad_norm": 1.4856117843247338, "learning_rate": 8.201760518242452e-08, "loss": 0.7752, "step": 11766 }, { "epoch": 0.94, "grad_norm": 0.763072008276388, "learning_rate": 8.178340836647147e-08, "loss": 1.0634, "step": 11767 }, { "epoch": 0.94, "grad_norm": 1.5330096045685622, "learning_rate": 8.154954363974843e-08, "loss": 0.7716, "step": 11768 }, { "epoch": 0.94, "grad_norm": 0.7297756673796976, "learning_rate": 8.13160110180472e-08, "loss": 1.0294, "step": 11769 }, { "epoch": 0.94, "grad_norm": 1.5234862438734786, "learning_rate": 8.108281051713519e-08, "loss": 0.8424, "step": 11770 }, { "epoch": 0.94, "grad_norm": 1.3784341346016933, "learning_rate": 8.084994215276031e-08, "loss": 0.6756, "step": 11771 }, { "epoch": 0.94, "grad_norm": 1.4456075443285483, "learning_rate": 8.061740594064615e-08, "loss": 0.7322, "step": 11772 }, { "epoch": 0.94, "grad_norm": 0.7633586193682407, "learning_rate": 8.03852018964929e-08, "loss": 1.0205, "step": 11773 }, { "epoch": 0.94, "grad_norm": 1.6930761463925912, "learning_rate": 8.015333003598025e-08, "loss": 0.81, "step": 11774 }, { "epoch": 0.94, "grad_norm": 1.4759540470342736, "learning_rate": 7.992179037476511e-08, "loss": 0.7099, "step": 11775 }, { "epoch": 0.94, "grad_norm": 1.488881180583393, "learning_rate": 7.969058292848108e-08, "loss": 0.6745, "step": 11776 }, { "epoch": 0.94, "grad_norm": 1.529987281848464, "learning_rate": 7.945970771273903e-08, "loss": 0.7346, "step": 11777 }, { "epoch": 0.94, "grad_norm": 0.7631228291735633, "learning_rate": 7.922916474312981e-08, "loss": 1.0691, "step": 11778 }, { "epoch": 0.95, "grad_norm": 0.7597206052154587, "learning_rate": 7.899895403521928e-08, "loss": 1.0515, "step": 11779 }, { "epoch": 0.95, "grad_norm": 1.48563065674476, "learning_rate": 7.876907560455116e-08, "loss": 0.7739, "step": 11780 }, { "epoch": 0.95, "grad_norm": 1.5482618933332608, "learning_rate": 7.853952946664911e-08, "loss": 0.7047, "step": 11781 }, { "epoch": 0.95, "grad_norm": 1.4722832610193968, "learning_rate": 7.831031563701131e-08, "loss": 0.819, "step": 11782 }, { "epoch": 0.95, "grad_norm": 1.471805380998007, "learning_rate": 7.808143413111535e-08, "loss": 0.774, "step": 11783 }, { "epoch": 0.95, "grad_norm": 1.5841009706294713, "learning_rate": 7.7852884964415e-08, "loss": 0.7523, "step": 11784 }, { "epoch": 0.95, "grad_norm": 0.7869525292017979, "learning_rate": 7.762466815234349e-08, "loss": 1.078, "step": 11785 }, { "epoch": 0.95, "grad_norm": 0.7448045129001342, "learning_rate": 7.739678371031013e-08, "loss": 1.0402, "step": 11786 }, { "epoch": 0.95, "grad_norm": 1.4946439987848343, "learning_rate": 7.716923165370204e-08, "loss": 0.6985, "step": 11787 }, { "epoch": 0.95, "grad_norm": 1.5327709730214734, "learning_rate": 7.69420119978842e-08, "loss": 0.794, "step": 11788 }, { "epoch": 0.95, "grad_norm": 1.4973269304359622, "learning_rate": 7.671512475819876e-08, "loss": 0.6748, "step": 11789 }, { "epoch": 0.95, "grad_norm": 1.6621814856259682, "learning_rate": 7.648856994996568e-08, "loss": 0.7956, "step": 11790 }, { "epoch": 0.95, "grad_norm": 1.6820457244043674, "learning_rate": 7.626234758848272e-08, "loss": 0.7497, "step": 11791 }, { "epoch": 0.95, "grad_norm": 1.6471086436213207, "learning_rate": 7.603645768902546e-08, "loss": 0.7746, "step": 11792 }, { "epoch": 0.95, "grad_norm": 1.670613391628085, "learning_rate": 7.581090026684556e-08, "loss": 0.7717, "step": 11793 }, { "epoch": 0.95, "grad_norm": 0.7322210122207348, "learning_rate": 7.558567533717365e-08, "loss": 1.1143, "step": 11794 }, { "epoch": 0.95, "grad_norm": 1.503369080556754, "learning_rate": 7.536078291521698e-08, "loss": 0.7096, "step": 11795 }, { "epoch": 0.95, "grad_norm": 0.7570594101176908, "learning_rate": 7.51362230161612e-08, "loss": 1.0491, "step": 11796 }, { "epoch": 0.95, "grad_norm": 1.4491774827244222, "learning_rate": 7.491199565516916e-08, "loss": 0.7693, "step": 11797 }, { "epoch": 0.95, "grad_norm": 1.4031622879402925, "learning_rate": 7.468810084738099e-08, "loss": 0.7359, "step": 11798 }, { "epoch": 0.95, "grad_norm": 1.4828331438888935, "learning_rate": 7.446453860791458e-08, "loss": 0.7295, "step": 11799 }, { "epoch": 0.95, "grad_norm": 0.7605032363006203, "learning_rate": 7.424130895186565e-08, "loss": 1.05, "step": 11800 }, { "epoch": 0.95, "grad_norm": 1.3871194432115348, "learning_rate": 7.401841189430659e-08, "loss": 0.7131, "step": 11801 }, { "epoch": 0.95, "grad_norm": 1.613260509150101, "learning_rate": 7.379584745028867e-08, "loss": 0.7826, "step": 11802 }, { "epoch": 0.95, "grad_norm": 1.9685827129665163, "learning_rate": 7.357361563483934e-08, "loss": 0.7757, "step": 11803 }, { "epoch": 0.95, "grad_norm": 1.660722804320975, "learning_rate": 7.335171646296436e-08, "loss": 0.6767, "step": 11804 }, { "epoch": 0.95, "grad_norm": 1.435894053211387, "learning_rate": 7.313014994964729e-08, "loss": 0.6453, "step": 11805 }, { "epoch": 0.95, "grad_norm": 1.778537895212343, "learning_rate": 7.29089161098484e-08, "loss": 0.7194, "step": 11806 }, { "epoch": 0.95, "grad_norm": 1.4889314270757301, "learning_rate": 7.268801495850575e-08, "loss": 0.6992, "step": 11807 }, { "epoch": 0.95, "grad_norm": 0.7263062675888676, "learning_rate": 7.246744651053573e-08, "loss": 1.0713, "step": 11808 }, { "epoch": 0.95, "grad_norm": 1.6231280791850418, "learning_rate": 7.224721078083086e-08, "loss": 0.7586, "step": 11809 }, { "epoch": 0.95, "grad_norm": 1.6767833076080592, "learning_rate": 7.202730778426204e-08, "loss": 0.842, "step": 11810 }, { "epoch": 0.95, "grad_norm": 1.5541955677301218, "learning_rate": 7.180773753567793e-08, "loss": 0.7208, "step": 11811 }, { "epoch": 0.95, "grad_norm": 0.7636402699805047, "learning_rate": 7.158850004990503e-08, "loss": 1.059, "step": 11812 }, { "epoch": 0.95, "grad_norm": 1.5619522565460409, "learning_rate": 7.136959534174592e-08, "loss": 0.6761, "step": 11813 }, { "epoch": 0.95, "grad_norm": 1.5351168819064709, "learning_rate": 7.115102342598101e-08, "loss": 0.7013, "step": 11814 }, { "epoch": 0.95, "grad_norm": 1.6007756214991877, "learning_rate": 7.093278431737016e-08, "loss": 0.7931, "step": 11815 }, { "epoch": 0.95, "grad_norm": 1.5443329083230197, "learning_rate": 7.07148780306488e-08, "loss": 0.7653, "step": 11816 }, { "epoch": 0.95, "grad_norm": 1.6489810493605999, "learning_rate": 7.049730458053017e-08, "loss": 0.8171, "step": 11817 }, { "epoch": 0.95, "grad_norm": 1.605251702096956, "learning_rate": 7.028006398170528e-08, "loss": 0.7482, "step": 11818 }, { "epoch": 0.95, "grad_norm": 0.7316970400325731, "learning_rate": 7.006315624884296e-08, "loss": 1.0807, "step": 11819 }, { "epoch": 0.95, "grad_norm": 1.6823894590673796, "learning_rate": 6.984658139658984e-08, "loss": 0.7445, "step": 11820 }, { "epoch": 0.95, "grad_norm": 1.6008308001096743, "learning_rate": 6.963033943956866e-08, "loss": 0.7123, "step": 11821 }, { "epoch": 0.95, "grad_norm": 1.4605018128225222, "learning_rate": 6.94144303923805e-08, "loss": 0.7786, "step": 11822 }, { "epoch": 0.95, "grad_norm": 1.5153186239937928, "learning_rate": 6.919885426960538e-08, "loss": 0.7537, "step": 11823 }, { "epoch": 0.95, "grad_norm": 0.7578658994123622, "learning_rate": 6.898361108579832e-08, "loss": 1.0276, "step": 11824 }, { "epoch": 0.95, "grad_norm": 1.6558304940929784, "learning_rate": 6.87687008554927e-08, "loss": 0.7044, "step": 11825 }, { "epoch": 0.95, "grad_norm": 1.4244957382143164, "learning_rate": 6.85541235932008e-08, "loss": 0.7569, "step": 11826 }, { "epoch": 0.95, "grad_norm": 1.571055713405727, "learning_rate": 6.833987931341046e-08, "loss": 0.7225, "step": 11827 }, { "epoch": 0.95, "grad_norm": 1.55827626802485, "learning_rate": 6.812596803058902e-08, "loss": 0.7051, "step": 11828 }, { "epoch": 0.95, "grad_norm": 1.5682387513698197, "learning_rate": 6.791238975917935e-08, "loss": 0.6608, "step": 11829 }, { "epoch": 0.95, "grad_norm": 0.7503477238160616, "learning_rate": 6.769914451360327e-08, "loss": 1.0621, "step": 11830 }, { "epoch": 0.95, "grad_norm": 1.4760381737277677, "learning_rate": 6.748623230825868e-08, "loss": 0.7645, "step": 11831 }, { "epoch": 0.95, "grad_norm": 0.738219894819387, "learning_rate": 6.727365315752299e-08, "loss": 1.0536, "step": 11832 }, { "epoch": 0.95, "grad_norm": 0.7562067209889934, "learning_rate": 6.706140707574971e-08, "loss": 1.078, "step": 11833 }, { "epoch": 0.95, "grad_norm": 1.5788411561381988, "learning_rate": 6.684949407727015e-08, "loss": 0.7997, "step": 11834 }, { "epoch": 0.95, "grad_norm": 1.4785188416419548, "learning_rate": 6.663791417639287e-08, "loss": 0.7535, "step": 11835 }, { "epoch": 0.95, "grad_norm": 1.5261526960380205, "learning_rate": 6.642666738740422e-08, "loss": 0.7497, "step": 11836 }, { "epoch": 0.95, "grad_norm": 1.6229806474519988, "learning_rate": 6.62157537245689e-08, "loss": 0.7638, "step": 11837 }, { "epoch": 0.95, "grad_norm": 1.4880118524546733, "learning_rate": 6.600517320212719e-08, "loss": 0.7491, "step": 11838 }, { "epoch": 0.95, "grad_norm": 0.7295032853882354, "learning_rate": 6.57949258342988e-08, "loss": 1.0625, "step": 11839 }, { "epoch": 0.95, "grad_norm": 1.5428007236647525, "learning_rate": 6.558501163527964e-08, "loss": 0.7688, "step": 11840 }, { "epoch": 0.95, "grad_norm": 0.7253240834836505, "learning_rate": 6.537543061924334e-08, "loss": 1.0715, "step": 11841 }, { "epoch": 0.95, "grad_norm": 1.5211454557679596, "learning_rate": 6.516618280034192e-08, "loss": 0.8785, "step": 11842 }, { "epoch": 0.95, "grad_norm": 1.6932149778271643, "learning_rate": 6.49572681927041e-08, "loss": 0.761, "step": 11843 }, { "epoch": 0.95, "grad_norm": 1.463600828220428, "learning_rate": 6.474868681043578e-08, "loss": 0.7455, "step": 11844 }, { "epoch": 0.95, "grad_norm": 1.429585529211455, "learning_rate": 6.454043866762128e-08, "loss": 0.6949, "step": 11845 }, { "epoch": 0.95, "grad_norm": 1.44658496207876, "learning_rate": 6.433252377832267e-08, "loss": 0.6835, "step": 11846 }, { "epoch": 0.95, "grad_norm": 1.5407830540850413, "learning_rate": 6.41249421565776e-08, "loss": 0.7493, "step": 11847 }, { "epoch": 0.95, "grad_norm": 0.7480594566474317, "learning_rate": 6.391769381640268e-08, "loss": 1.0832, "step": 11848 }, { "epoch": 0.95, "grad_norm": 1.4741967470020076, "learning_rate": 6.371077877179277e-08, "loss": 0.6312, "step": 11849 }, { "epoch": 0.95, "grad_norm": 1.4219572412207049, "learning_rate": 6.350419703671896e-08, "loss": 0.7435, "step": 11850 }, { "epoch": 0.95, "grad_norm": 1.6989914135557433, "learning_rate": 6.329794862512895e-08, "loss": 0.782, "step": 11851 }, { "epoch": 0.95, "grad_norm": 1.5478382487931646, "learning_rate": 6.309203355094995e-08, "loss": 0.7662, "step": 11852 }, { "epoch": 0.95, "grad_norm": 1.5865679651948137, "learning_rate": 6.288645182808583e-08, "loss": 0.7619, "step": 11853 }, { "epoch": 0.95, "grad_norm": 1.4610707411945616, "learning_rate": 6.268120347041829e-08, "loss": 0.6829, "step": 11854 }, { "epoch": 0.95, "grad_norm": 0.7724639742333462, "learning_rate": 6.24762884918051e-08, "loss": 1.0535, "step": 11855 }, { "epoch": 0.95, "grad_norm": 0.7353164079383536, "learning_rate": 6.227170690608353e-08, "loss": 1.066, "step": 11856 }, { "epoch": 0.95, "grad_norm": 1.4653647413841036, "learning_rate": 6.206745872706754e-08, "loss": 0.762, "step": 11857 }, { "epoch": 0.95, "grad_norm": 1.5894842309074142, "learning_rate": 6.186354396854776e-08, "loss": 0.8143, "step": 11858 }, { "epoch": 0.95, "grad_norm": 1.4327530580489907, "learning_rate": 6.165996264429264e-08, "loss": 0.7615, "step": 11859 }, { "epoch": 0.95, "grad_norm": 0.7592551944699043, "learning_rate": 6.145671476804948e-08, "loss": 1.0752, "step": 11860 }, { "epoch": 0.95, "grad_norm": 1.428390695972985, "learning_rate": 6.125380035354179e-08, "loss": 0.7861, "step": 11861 }, { "epoch": 0.95, "grad_norm": 1.5184525491224714, "learning_rate": 6.105121941447024e-08, "loss": 0.7518, "step": 11862 }, { "epoch": 0.95, "grad_norm": 0.7307610400806844, "learning_rate": 6.084897196451445e-08, "loss": 1.083, "step": 11863 }, { "epoch": 0.95, "grad_norm": 1.489314247619539, "learning_rate": 6.064705801732962e-08, "loss": 0.7702, "step": 11864 }, { "epoch": 0.95, "grad_norm": 1.5085468312501191, "learning_rate": 6.044547758654983e-08, "loss": 0.7568, "step": 11865 }, { "epoch": 0.95, "grad_norm": 0.7302487957318256, "learning_rate": 6.0244230685787e-08, "loss": 1.0335, "step": 11866 }, { "epoch": 0.95, "grad_norm": 1.4364162069238844, "learning_rate": 6.004331732862856e-08, "loss": 0.7216, "step": 11867 }, { "epoch": 0.95, "grad_norm": 1.5065270305792933, "learning_rate": 5.9842737528642e-08, "loss": 0.6913, "step": 11868 }, { "epoch": 0.95, "grad_norm": 0.7652646780345472, "learning_rate": 5.964249129936927e-08, "loss": 1.0619, "step": 11869 }, { "epoch": 0.95, "grad_norm": 1.4935031705324187, "learning_rate": 5.9442578654332895e-08, "loss": 0.754, "step": 11870 }, { "epoch": 0.95, "grad_norm": 1.5362796411753907, "learning_rate": 5.9242999607030396e-08, "loss": 0.8099, "step": 11871 }, { "epoch": 0.95, "grad_norm": 1.556900369970313, "learning_rate": 5.904375417093877e-08, "loss": 0.6864, "step": 11872 }, { "epoch": 0.95, "grad_norm": 1.4441505435825608, "learning_rate": 5.8844842359511155e-08, "loss": 0.7893, "step": 11873 }, { "epoch": 0.95, "grad_norm": 1.5689316608983594, "learning_rate": 5.8646264186177914e-08, "loss": 0.7376, "step": 11874 }, { "epoch": 0.95, "grad_norm": 1.6377251779978368, "learning_rate": 5.844801966434832e-08, "loss": 0.7669, "step": 11875 }, { "epoch": 0.95, "grad_norm": 1.4442666505215322, "learning_rate": 5.8250108807407777e-08, "loss": 0.7123, "step": 11876 }, { "epoch": 0.95, "grad_norm": 1.5905658606002402, "learning_rate": 5.805253162872004e-08, "loss": 0.7203, "step": 11877 }, { "epoch": 0.95, "grad_norm": 1.5143451941449155, "learning_rate": 5.785528814162555e-08, "loss": 0.7638, "step": 11878 }, { "epoch": 0.95, "grad_norm": 0.7165477432974746, "learning_rate": 5.7658378359443104e-08, "loss": 1.0629, "step": 11879 }, { "epoch": 0.95, "grad_norm": 1.5146432856326892, "learning_rate": 5.746180229546816e-08, "loss": 0.7553, "step": 11880 }, { "epoch": 0.95, "grad_norm": 1.6041348213634052, "learning_rate": 5.726555996297456e-08, "loss": 0.8156, "step": 11881 }, { "epoch": 0.95, "grad_norm": 1.42502399154685, "learning_rate": 5.7069651375212255e-08, "loss": 0.7099, "step": 11882 }, { "epoch": 0.95, "grad_norm": 1.6198292179986065, "learning_rate": 5.687407654540955e-08, "loss": 0.7805, "step": 11883 }, { "epoch": 0.95, "grad_norm": 1.5593460687913763, "learning_rate": 5.667883548677311e-08, "loss": 0.744, "step": 11884 }, { "epoch": 0.95, "grad_norm": 0.7458700879376744, "learning_rate": 5.648392821248461e-08, "loss": 1.0394, "step": 11885 }, { "epoch": 0.95, "grad_norm": 1.5247179507537945, "learning_rate": 5.628935473570518e-08, "loss": 0.6685, "step": 11886 }, { "epoch": 0.95, "grad_norm": 1.4687403438170146, "learning_rate": 5.6095115069573216e-08, "loss": 0.7159, "step": 11887 }, { "epoch": 0.95, "grad_norm": 1.4950160674669732, "learning_rate": 5.590120922720432e-08, "loss": 0.7446, "step": 11888 }, { "epoch": 0.95, "grad_norm": 1.4818913538848177, "learning_rate": 5.5707637221690815e-08, "loss": 0.8281, "step": 11889 }, { "epoch": 0.95, "grad_norm": 2.2057876418645366, "learning_rate": 5.551439906610334e-08, "loss": 0.6878, "step": 11890 }, { "epoch": 0.95, "grad_norm": 1.5490490633282796, "learning_rate": 5.532149477349036e-08, "loss": 0.6937, "step": 11891 }, { "epoch": 0.95, "grad_norm": 1.490267414347693, "learning_rate": 5.512892435687645e-08, "loss": 0.7975, "step": 11892 }, { "epoch": 0.95, "grad_norm": 1.6173505439428486, "learning_rate": 5.493668782926453e-08, "loss": 0.7398, "step": 11893 }, { "epoch": 0.95, "grad_norm": 0.7588969226169844, "learning_rate": 5.4744785203635355e-08, "loss": 1.0928, "step": 11894 }, { "epoch": 0.95, "grad_norm": 0.7583972150800963, "learning_rate": 5.4553216492946317e-08, "loss": 1.0491, "step": 11895 }, { "epoch": 0.95, "grad_norm": 1.5299857730422597, "learning_rate": 5.436198171013207e-08, "loss": 0.7267, "step": 11896 }, { "epoch": 0.95, "grad_norm": 1.6182229180564647, "learning_rate": 5.417108086810618e-08, "loss": 0.832, "step": 11897 }, { "epoch": 0.95, "grad_norm": 0.7658880206484194, "learning_rate": 5.3980513979758344e-08, "loss": 1.0478, "step": 11898 }, { "epoch": 0.95, "grad_norm": 1.4170249597618734, "learning_rate": 5.379028105795547e-08, "loss": 0.7398, "step": 11899 }, { "epoch": 0.95, "grad_norm": 1.493441539105011, "learning_rate": 5.360038211554286e-08, "loss": 0.7791, "step": 11900 }, { "epoch": 0.95, "grad_norm": 1.5595171473853402, "learning_rate": 5.3410817165343576e-08, "loss": 0.8202, "step": 11901 }, { "epoch": 0.95, "grad_norm": 0.7438996169881661, "learning_rate": 5.322158622015683e-08, "loss": 1.0532, "step": 11902 }, { "epoch": 0.95, "grad_norm": 1.5188510827706196, "learning_rate": 5.3032689292760176e-08, "loss": 0.8028, "step": 11903 }, { "epoch": 0.96, "grad_norm": 1.7411912264654605, "learning_rate": 5.284412639590786e-08, "loss": 0.7786, "step": 11904 }, { "epoch": 0.96, "grad_norm": 1.549813520505425, "learning_rate": 5.265589754233302e-08, "loss": 0.8436, "step": 11905 }, { "epoch": 0.96, "grad_norm": 1.4828792420680912, "learning_rate": 5.246800274474439e-08, "loss": 0.6895, "step": 11906 }, { "epoch": 0.96, "grad_norm": 0.7681539552428225, "learning_rate": 5.22804420158296e-08, "loss": 1.0521, "step": 11907 }, { "epoch": 0.96, "grad_norm": 1.5616601959942615, "learning_rate": 5.209321536825351e-08, "loss": 0.7376, "step": 11908 }, { "epoch": 0.96, "grad_norm": 1.4866465643175135, "learning_rate": 5.190632281465713e-08, "loss": 0.6735, "step": 11909 }, { "epoch": 0.96, "grad_norm": 1.5063916260380588, "learning_rate": 5.17197643676598e-08, "loss": 0.7231, "step": 11910 }, { "epoch": 0.96, "grad_norm": 1.656488460358884, "learning_rate": 5.153354003985922e-08, "loss": 0.848, "step": 11911 }, { "epoch": 0.96, "grad_norm": 0.7358928921710034, "learning_rate": 5.134764984382978e-08, "loss": 1.0635, "step": 11912 }, { "epoch": 0.96, "grad_norm": 1.562811942513881, "learning_rate": 5.116209379212256e-08, "loss": 0.7965, "step": 11913 }, { "epoch": 0.96, "grad_norm": 1.5415302469121195, "learning_rate": 5.0976871897266966e-08, "loss": 0.8139, "step": 11914 }, { "epoch": 0.96, "grad_norm": 1.5080493925235028, "learning_rate": 5.079198417176967e-08, "loss": 0.7226, "step": 11915 }, { "epoch": 0.96, "grad_norm": 1.5259115828397163, "learning_rate": 5.060743062811457e-08, "loss": 0.7683, "step": 11916 }, { "epoch": 0.96, "grad_norm": 0.741953001199755, "learning_rate": 5.042321127876337e-08, "loss": 1.044, "step": 11917 }, { "epoch": 0.96, "grad_norm": 1.5949082969816555, "learning_rate": 5.0239326136154454e-08, "loss": 0.7461, "step": 11918 }, { "epoch": 0.96, "grad_norm": 1.4631116214277187, "learning_rate": 5.0055775212705107e-08, "loss": 0.6662, "step": 11919 }, { "epoch": 0.96, "grad_norm": 1.52818776203499, "learning_rate": 4.9872558520807635e-08, "loss": 0.7775, "step": 11920 }, { "epoch": 0.96, "grad_norm": 1.5479289744028941, "learning_rate": 4.968967607283493e-08, "loss": 0.8117, "step": 11921 }, { "epoch": 0.96, "grad_norm": 1.5863330323715512, "learning_rate": 4.9507127881134876e-08, "loss": 0.7693, "step": 11922 }, { "epoch": 0.96, "grad_norm": 1.5258386282299399, "learning_rate": 4.932491395803318e-08, "loss": 0.7536, "step": 11923 }, { "epoch": 0.96, "grad_norm": 1.672003749123705, "learning_rate": 4.914303431583389e-08, "loss": 0.8013, "step": 11924 }, { "epoch": 0.96, "grad_norm": 0.7836663401862163, "learning_rate": 4.896148896681774e-08, "loss": 1.0473, "step": 11925 }, { "epoch": 0.96, "grad_norm": 1.4106046622418669, "learning_rate": 4.8780277923243244e-08, "loss": 0.7594, "step": 11926 }, { "epoch": 0.96, "grad_norm": 1.68338462395273, "learning_rate": 4.859940119734563e-08, "loss": 0.7371, "step": 11927 }, { "epoch": 0.96, "grad_norm": 1.4645087854832732, "learning_rate": 4.8418858801339007e-08, "loss": 0.7456, "step": 11928 }, { "epoch": 0.96, "grad_norm": 1.6195679909651803, "learning_rate": 4.823865074741307e-08, "loss": 0.8092, "step": 11929 }, { "epoch": 0.96, "grad_norm": 1.3812763119736708, "learning_rate": 4.8058777047736427e-08, "loss": 0.7511, "step": 11930 }, { "epoch": 0.96, "grad_norm": 0.7567138053457433, "learning_rate": 4.787923771445435e-08, "loss": 1.0452, "step": 11931 }, { "epoch": 0.96, "grad_norm": 1.6407055102804997, "learning_rate": 4.7700032759690484e-08, "loss": 0.688, "step": 11932 }, { "epoch": 0.96, "grad_norm": 1.5418341145876793, "learning_rate": 4.752116219554403e-08, "loss": 0.6797, "step": 11933 }, { "epoch": 0.96, "grad_norm": 1.4994473307634746, "learning_rate": 4.734262603409312e-08, "loss": 0.791, "step": 11934 }, { "epoch": 0.96, "grad_norm": 1.6392252038486956, "learning_rate": 4.71644242873931e-08, "loss": 0.8438, "step": 11935 }, { "epoch": 0.96, "grad_norm": 1.4149812640305448, "learning_rate": 4.698655696747656e-08, "loss": 0.7777, "step": 11936 }, { "epoch": 0.96, "grad_norm": 1.5725697353290597, "learning_rate": 4.680902408635335e-08, "loss": 0.7694, "step": 11937 }, { "epoch": 0.96, "grad_norm": 1.5184439599193844, "learning_rate": 4.66318256560111e-08, "loss": 0.7753, "step": 11938 }, { "epoch": 0.96, "grad_norm": 0.7333901239806647, "learning_rate": 4.645496168841468e-08, "loss": 1.0329, "step": 11939 }, { "epoch": 0.96, "grad_norm": 1.6960488922416148, "learning_rate": 4.627843219550621e-08, "loss": 0.8026, "step": 11940 }, { "epoch": 0.96, "grad_norm": 0.7358954413653698, "learning_rate": 4.6102237189205036e-08, "loss": 1.0411, "step": 11941 }, { "epoch": 0.96, "grad_norm": 0.7709938868977536, "learning_rate": 4.592637668140887e-08, "loss": 1.0877, "step": 11942 }, { "epoch": 0.96, "grad_norm": 0.7571276015230615, "learning_rate": 4.575085068399154e-08, "loss": 1.0449, "step": 11943 }, { "epoch": 0.96, "grad_norm": 1.4326696685120044, "learning_rate": 4.55756592088058e-08, "loss": 0.7484, "step": 11944 }, { "epoch": 0.96, "grad_norm": 1.530256857722689, "learning_rate": 4.5400802267679955e-08, "loss": 0.7647, "step": 11945 }, { "epoch": 0.96, "grad_norm": 1.5259967522541673, "learning_rate": 4.5226279872421784e-08, "loss": 0.7557, "step": 11946 }, { "epoch": 0.96, "grad_norm": 1.7238977806797886, "learning_rate": 4.50520920348152e-08, "loss": 0.7986, "step": 11947 }, { "epoch": 0.96, "grad_norm": 1.4636137136076974, "learning_rate": 4.4878238766620784e-08, "loss": 0.7481, "step": 11948 }, { "epoch": 0.96, "grad_norm": 1.6645420255325523, "learning_rate": 4.4704720079579155e-08, "loss": 0.7524, "step": 11949 }, { "epoch": 0.96, "grad_norm": 0.7606768903387768, "learning_rate": 4.453153598540538e-08, "loss": 1.0753, "step": 11950 }, { "epoch": 0.96, "grad_norm": 1.5547758855050897, "learning_rate": 4.4358686495793444e-08, "loss": 0.7759, "step": 11951 }, { "epoch": 0.96, "grad_norm": 0.7611789257486212, "learning_rate": 4.418617162241512e-08, "loss": 1.0382, "step": 11952 }, { "epoch": 0.96, "grad_norm": 1.525104812367159, "learning_rate": 4.4013991376918306e-08, "loss": 0.7631, "step": 11953 }, { "epoch": 0.96, "grad_norm": 1.6968682697981758, "learning_rate": 4.384214577092927e-08, "loss": 0.8147, "step": 11954 }, { "epoch": 0.96, "grad_norm": 1.5701560272377886, "learning_rate": 4.3670634816052046e-08, "loss": 0.7426, "step": 11955 }, { "epoch": 0.96, "grad_norm": 0.738403380793251, "learning_rate": 4.349945852386628e-08, "loss": 1.0553, "step": 11956 }, { "epoch": 0.96, "grad_norm": 1.490444927169988, "learning_rate": 4.3328616905931595e-08, "loss": 0.6965, "step": 11957 }, { "epoch": 0.96, "grad_norm": 1.5948105701882, "learning_rate": 4.315810997378212e-08, "loss": 0.7251, "step": 11958 }, { "epoch": 0.96, "grad_norm": 1.5583364533491446, "learning_rate": 4.29879377389314e-08, "loss": 0.7971, "step": 11959 }, { "epoch": 0.96, "grad_norm": 0.7287752849192682, "learning_rate": 4.281810021287081e-08, "loss": 1.0379, "step": 11960 }, { "epoch": 0.96, "grad_norm": 1.5911090162255506, "learning_rate": 4.264859740706673e-08, "loss": 0.7685, "step": 11961 }, { "epoch": 0.96, "grad_norm": 1.4605787117398492, "learning_rate": 4.2479429332965006e-08, "loss": 0.7763, "step": 11962 }, { "epoch": 0.96, "grad_norm": 1.5430515702741172, "learning_rate": 4.2310596001988725e-08, "loss": 0.7806, "step": 11963 }, { "epoch": 0.96, "grad_norm": 1.58108609579862, "learning_rate": 4.21420974255371e-08, "loss": 0.7083, "step": 11964 }, { "epoch": 0.96, "grad_norm": 0.739197160727827, "learning_rate": 4.1973933614987693e-08, "loss": 1.0845, "step": 11965 }, { "epoch": 0.96, "grad_norm": 1.4802605234572235, "learning_rate": 4.180610458169587e-08, "loss": 0.7017, "step": 11966 }, { "epoch": 0.96, "grad_norm": 1.631052052292385, "learning_rate": 4.163861033699368e-08, "loss": 0.7641, "step": 11967 }, { "epoch": 0.96, "grad_norm": 1.669491476714782, "learning_rate": 4.147145089218985e-08, "loss": 0.7324, "step": 11968 }, { "epoch": 0.96, "grad_norm": 1.4257264678704684, "learning_rate": 4.130462625857257e-08, "loss": 0.6853, "step": 11969 }, { "epoch": 0.96, "grad_norm": 1.4832146218789752, "learning_rate": 4.1138136447405606e-08, "loss": 0.7328, "step": 11970 }, { "epoch": 0.96, "grad_norm": 1.6500276753073948, "learning_rate": 4.097198146993053e-08, "loss": 0.7906, "step": 11971 }, { "epoch": 0.96, "grad_norm": 1.4924071764496711, "learning_rate": 4.08061613373667e-08, "loss": 0.7789, "step": 11972 }, { "epoch": 0.96, "grad_norm": 1.488221216646638, "learning_rate": 4.0640676060911267e-08, "loss": 0.7905, "step": 11973 }, { "epoch": 0.96, "grad_norm": 1.7242425339719398, "learning_rate": 4.047552565173751e-08, "loss": 0.7885, "step": 11974 }, { "epoch": 0.96, "grad_norm": 1.656934514029331, "learning_rate": 4.0310710120995966e-08, "loss": 0.8449, "step": 11975 }, { "epoch": 0.96, "grad_norm": 1.487160068651432, "learning_rate": 4.014622947981717e-08, "loss": 0.7569, "step": 11976 }, { "epoch": 0.96, "grad_norm": 1.7039889842019553, "learning_rate": 3.998208373930612e-08, "loss": 0.8199, "step": 11977 }, { "epoch": 0.96, "grad_norm": 1.5409773225870376, "learning_rate": 3.981827291054674e-08, "loss": 0.7206, "step": 11978 }, { "epoch": 0.96, "grad_norm": 1.481048774691378, "learning_rate": 3.965479700459962e-08, "loss": 0.803, "step": 11979 }, { "epoch": 0.96, "grad_norm": 1.567369732938257, "learning_rate": 3.949165603250316e-08, "loss": 0.7681, "step": 11980 }, { "epoch": 0.96, "grad_norm": 1.5258270200374175, "learning_rate": 3.932885000527298e-08, "loss": 0.8035, "step": 11981 }, { "epoch": 0.96, "grad_norm": 1.5182144855270965, "learning_rate": 3.916637893390196e-08, "loss": 0.7164, "step": 11982 }, { "epoch": 0.96, "grad_norm": 1.5313258106643597, "learning_rate": 3.900424282936077e-08, "loss": 0.7806, "step": 11983 }, { "epoch": 0.96, "grad_norm": 1.4531249492882428, "learning_rate": 3.884244170259732e-08, "loss": 0.7364, "step": 11984 }, { "epoch": 0.96, "grad_norm": 1.5484451732470077, "learning_rate": 3.8680975564536206e-08, "loss": 0.7606, "step": 11985 }, { "epoch": 0.96, "grad_norm": 0.74215290038469, "learning_rate": 3.851984442608036e-08, "loss": 1.0606, "step": 11986 }, { "epoch": 0.96, "grad_norm": 1.6473463094015097, "learning_rate": 3.835904829810999e-08, "loss": 0.7448, "step": 11987 }, { "epoch": 0.96, "grad_norm": 1.5249144636943242, "learning_rate": 3.8198587191481394e-08, "loss": 0.7682, "step": 11988 }, { "epoch": 0.96, "grad_norm": 1.564264827407454, "learning_rate": 3.8038461117030914e-08, "loss": 0.7725, "step": 11989 }, { "epoch": 0.96, "grad_norm": 1.5875614394893987, "learning_rate": 3.787867008556934e-08, "loss": 0.8201, "step": 11990 }, { "epoch": 0.96, "grad_norm": 1.4470535230395765, "learning_rate": 3.771921410788693e-08, "loss": 0.7402, "step": 11991 }, { "epoch": 0.96, "grad_norm": 1.6477725145761186, "learning_rate": 3.756009319474951e-08, "loss": 0.7028, "step": 11992 }, { "epoch": 0.96, "grad_norm": 1.419199782037499, "learning_rate": 3.7401307356902395e-08, "loss": 0.7247, "step": 11993 }, { "epoch": 0.96, "grad_norm": 1.676819214103522, "learning_rate": 3.724285660506699e-08, "loss": 0.7165, "step": 11994 }, { "epoch": 0.96, "grad_norm": 1.538935410488219, "learning_rate": 3.708474094994141e-08, "loss": 0.6726, "step": 11995 }, { "epoch": 0.96, "grad_norm": 1.5708417726003436, "learning_rate": 3.6926960402202674e-08, "loss": 0.814, "step": 11996 }, { "epoch": 0.96, "grad_norm": 0.755790789578418, "learning_rate": 3.676951497250447e-08, "loss": 1.0644, "step": 11997 }, { "epoch": 0.96, "grad_norm": 1.5481452784040748, "learning_rate": 3.6612404671477197e-08, "loss": 0.7383, "step": 11998 }, { "epoch": 0.96, "grad_norm": 1.5774879174507113, "learning_rate": 3.645562950973014e-08, "loss": 0.7276, "step": 11999 }, { "epoch": 0.96, "grad_norm": 1.4950879757144344, "learning_rate": 3.629918949784872e-08, "loss": 0.7425, "step": 12000 }, { "epoch": 0.96, "grad_norm": 1.4720948102130558, "learning_rate": 3.6143084646396156e-08, "loss": 0.7873, "step": 12001 }, { "epoch": 0.96, "grad_norm": 1.5572405382743781, "learning_rate": 3.5987314965913456e-08, "loss": 0.7572, "step": 12002 }, { "epoch": 0.96, "grad_norm": 1.6108717533855452, "learning_rate": 3.583188046691777e-08, "loss": 0.7542, "step": 12003 }, { "epoch": 0.96, "grad_norm": 1.4656890129282163, "learning_rate": 3.567678115990514e-08, "loss": 0.6733, "step": 12004 }, { "epoch": 0.96, "grad_norm": 1.5758244132348622, "learning_rate": 3.552201705534775e-08, "loss": 0.7518, "step": 12005 }, { "epoch": 0.96, "grad_norm": 1.5796032396367683, "learning_rate": 3.5367588163695566e-08, "loss": 0.7415, "step": 12006 }, { "epoch": 0.96, "grad_norm": 1.459730184716668, "learning_rate": 3.5213494495376364e-08, "loss": 0.7294, "step": 12007 }, { "epoch": 0.96, "grad_norm": 0.7585135262931583, "learning_rate": 3.505973606079405e-08, "loss": 1.0586, "step": 12008 }, { "epoch": 0.96, "grad_norm": 1.5394306903499744, "learning_rate": 3.4906312870331973e-08, "loss": 0.7943, "step": 12009 }, { "epoch": 0.96, "grad_norm": 1.5911551930270873, "learning_rate": 3.475322493434907e-08, "loss": 0.7899, "step": 12010 }, { "epoch": 0.96, "grad_norm": 1.6514995022182055, "learning_rate": 3.460047226318208e-08, "loss": 0.822, "step": 12011 }, { "epoch": 0.96, "grad_norm": 1.4984882179802255, "learning_rate": 3.4448054867144424e-08, "loss": 0.7139, "step": 12012 }, { "epoch": 0.96, "grad_norm": 1.5631710317787282, "learning_rate": 3.429597275652952e-08, "loss": 0.7907, "step": 12013 }, { "epoch": 0.96, "grad_norm": 1.4462195018317614, "learning_rate": 3.414422594160527e-08, "loss": 0.7945, "step": 12014 }, { "epoch": 0.96, "grad_norm": 1.5420659129850651, "learning_rate": 3.399281443261793e-08, "loss": 0.752, "step": 12015 }, { "epoch": 0.96, "grad_norm": 1.485810319178031, "learning_rate": 3.384173823979098e-08, "loss": 0.7221, "step": 12016 }, { "epoch": 0.96, "grad_norm": 0.7425265740157266, "learning_rate": 3.3690997373325705e-08, "loss": 1.0642, "step": 12017 }, { "epoch": 0.96, "grad_norm": 1.5021346667771052, "learning_rate": 3.354059184340064e-08, "loss": 0.7374, "step": 12018 }, { "epoch": 0.96, "grad_norm": 1.4789291484268914, "learning_rate": 3.339052166017098e-08, "loss": 0.8289, "step": 12019 }, { "epoch": 0.96, "grad_norm": 1.6791615753343079, "learning_rate": 3.32407868337703e-08, "loss": 0.7754, "step": 12020 }, { "epoch": 0.96, "grad_norm": 1.6482472975226365, "learning_rate": 3.309138737430884e-08, "loss": 0.7562, "step": 12021 }, { "epoch": 0.96, "grad_norm": 0.7575972704545427, "learning_rate": 3.294232329187408e-08, "loss": 1.0659, "step": 12022 }, { "epoch": 0.96, "grad_norm": 0.7483060896071535, "learning_rate": 3.279359459653242e-08, "loss": 1.0567, "step": 12023 }, { "epoch": 0.96, "grad_norm": 1.6227054438209503, "learning_rate": 3.264520129832471e-08, "loss": 0.7142, "step": 12024 }, { "epoch": 0.96, "grad_norm": 1.5648380980115593, "learning_rate": 3.2497143407271837e-08, "loss": 0.7769, "step": 12025 }, { "epoch": 0.96, "grad_norm": 0.770718914632636, "learning_rate": 3.234942093337079e-08, "loss": 1.07, "step": 12026 }, { "epoch": 0.96, "grad_norm": 1.511612786798848, "learning_rate": 3.220203388659582e-08, "loss": 0.7516, "step": 12027 }, { "epoch": 0.97, "grad_norm": 1.6309551109201752, "learning_rate": 3.2054982276899516e-08, "loss": 0.8533, "step": 12028 }, { "epoch": 0.97, "grad_norm": 1.5443701878247627, "learning_rate": 3.1908266114210054e-08, "loss": 0.7216, "step": 12029 }, { "epoch": 0.97, "grad_norm": 1.7726090847214648, "learning_rate": 3.1761885408435055e-08, "loss": 0.7975, "step": 12030 }, { "epoch": 0.97, "grad_norm": 1.5886782185557369, "learning_rate": 3.161584016945829e-08, "loss": 0.75, "step": 12031 }, { "epoch": 0.97, "grad_norm": 1.5973790357198578, "learning_rate": 3.147013040714075e-08, "loss": 0.7522, "step": 12032 }, { "epoch": 0.97, "grad_norm": 1.5721669701526224, "learning_rate": 3.1324756131320685e-08, "loss": 0.8757, "step": 12033 }, { "epoch": 0.97, "grad_norm": 1.4695380893574135, "learning_rate": 3.1179717351815245e-08, "loss": 0.6312, "step": 12034 }, { "epoch": 0.97, "grad_norm": 1.6217474470555753, "learning_rate": 3.1035014078417136e-08, "loss": 0.7235, "step": 12035 }, { "epoch": 0.97, "grad_norm": 1.7516883031973185, "learning_rate": 3.089064632089689e-08, "loss": 0.7184, "step": 12036 }, { "epoch": 0.97, "grad_norm": 1.6957071983562524, "learning_rate": 3.0746614089002814e-08, "loss": 0.6764, "step": 12037 }, { "epoch": 0.97, "grad_norm": 1.4528770045594799, "learning_rate": 3.0602917392460463e-08, "loss": 0.7174, "step": 12038 }, { "epoch": 0.97, "grad_norm": 1.5121904655596383, "learning_rate": 3.0459556240972635e-08, "loss": 0.7968, "step": 12039 }, { "epoch": 0.97, "grad_norm": 1.5496453186608181, "learning_rate": 3.03165306442188e-08, "loss": 0.7928, "step": 12040 }, { "epoch": 0.97, "grad_norm": 1.512845943940522, "learning_rate": 3.017384061185624e-08, "loss": 0.8261, "step": 12041 }, { "epoch": 0.97, "grad_norm": 1.538267013249084, "learning_rate": 3.003148615352058e-08, "loss": 0.7503, "step": 12042 }, { "epoch": 0.97, "grad_norm": 1.5608459940011419, "learning_rate": 2.988946727882303e-08, "loss": 0.7148, "step": 12043 }, { "epoch": 0.97, "grad_norm": 1.4595637563485164, "learning_rate": 2.974778399735423e-08, "loss": 0.7502, "step": 12044 }, { "epoch": 0.97, "grad_norm": 1.5753283212260905, "learning_rate": 2.9606436318679878e-08, "loss": 0.718, "step": 12045 }, { "epoch": 0.97, "grad_norm": 1.5020035950143162, "learning_rate": 2.9465424252343998e-08, "loss": 0.7015, "step": 12046 }, { "epoch": 0.97, "grad_norm": 1.4379269748535652, "learning_rate": 2.932474780786898e-08, "loss": 0.7363, "step": 12047 }, { "epoch": 0.97, "grad_norm": 1.5917438528305938, "learning_rate": 2.9184406994753335e-08, "loss": 0.6887, "step": 12048 }, { "epoch": 0.97, "grad_norm": 1.5500904830223958, "learning_rate": 2.904440182247281e-08, "loss": 0.7487, "step": 12049 }, { "epoch": 0.97, "grad_norm": 0.7342263536085256, "learning_rate": 2.8904732300480965e-08, "loss": 1.0791, "step": 12050 }, { "epoch": 0.97, "grad_norm": 1.779008490156847, "learning_rate": 2.8765398438208582e-08, "loss": 0.8237, "step": 12051 }, { "epoch": 0.97, "grad_norm": 1.5132382132552953, "learning_rate": 2.8626400245064247e-08, "loss": 0.7223, "step": 12052 }, { "epoch": 0.97, "grad_norm": 1.667518437491313, "learning_rate": 2.8487737730432674e-08, "loss": 0.7882, "step": 12053 }, { "epoch": 0.97, "grad_norm": 1.8040965769087123, "learning_rate": 2.8349410903677488e-08, "loss": 0.7262, "step": 12054 }, { "epoch": 0.97, "grad_norm": 1.495068151923647, "learning_rate": 2.8211419774137882e-08, "loss": 0.7767, "step": 12055 }, { "epoch": 0.97, "grad_norm": 1.2951634006422768, "learning_rate": 2.8073764351132517e-08, "loss": 0.5863, "step": 12056 }, { "epoch": 0.97, "grad_norm": 0.759360951724486, "learning_rate": 2.7936444643955063e-08, "loss": 1.0661, "step": 12057 }, { "epoch": 0.97, "grad_norm": 1.5143809292066703, "learning_rate": 2.7799460661878663e-08, "loss": 0.7292, "step": 12058 }, { "epoch": 0.97, "grad_norm": 1.6715454818568098, "learning_rate": 2.766281241415203e-08, "loss": 0.7805, "step": 12059 }, { "epoch": 0.97, "grad_norm": 1.7298366674264836, "learning_rate": 2.7526499910002225e-08, "loss": 0.7579, "step": 12060 }, { "epoch": 0.97, "grad_norm": 1.5370114786502842, "learning_rate": 2.7390523158633552e-08, "loss": 0.7856, "step": 12061 }, { "epoch": 0.97, "grad_norm": 1.529890359233118, "learning_rate": 2.7254882169227002e-08, "loss": 0.6484, "step": 12062 }, { "epoch": 0.97, "grad_norm": 1.4655259074787756, "learning_rate": 2.7119576950941918e-08, "loss": 0.8372, "step": 12063 }, { "epoch": 0.97, "grad_norm": 1.4905858429465033, "learning_rate": 2.698460751291432e-08, "loss": 0.7676, "step": 12064 }, { "epoch": 0.97, "grad_norm": 0.7281693143567856, "learning_rate": 2.684997386425692e-08, "loss": 1.0332, "step": 12065 }, { "epoch": 0.97, "grad_norm": 1.5131673673512678, "learning_rate": 2.6715676014061887e-08, "loss": 0.8068, "step": 12066 }, { "epoch": 0.97, "grad_norm": 1.562162478433678, "learning_rate": 2.658171397139586e-08, "loss": 0.7659, "step": 12067 }, { "epoch": 0.97, "grad_norm": 1.4347873939509137, "learning_rate": 2.644808774530494e-08, "loss": 0.7743, "step": 12068 }, { "epoch": 0.97, "grad_norm": 1.5621475830483658, "learning_rate": 2.631479734481246e-08, "loss": 0.807, "step": 12069 }, { "epoch": 0.97, "grad_norm": 1.5821912201303974, "learning_rate": 2.6181842778917332e-08, "loss": 0.7561, "step": 12070 }, { "epoch": 0.97, "grad_norm": 1.4940135049611913, "learning_rate": 2.6049224056597933e-08, "loss": 0.754, "step": 12071 }, { "epoch": 0.97, "grad_norm": 0.7337815418747571, "learning_rate": 2.591694118680821e-08, "loss": 1.063, "step": 12072 }, { "epoch": 0.97, "grad_norm": 1.5872876443814612, "learning_rate": 2.578499417848046e-08, "loss": 0.685, "step": 12073 }, { "epoch": 0.97, "grad_norm": 1.5030384380613944, "learning_rate": 2.5653383040524228e-08, "loss": 0.8115, "step": 12074 }, { "epoch": 0.97, "grad_norm": 1.6400037322774594, "learning_rate": 2.552210778182629e-08, "loss": 0.7785, "step": 12075 }, { "epoch": 0.97, "grad_norm": 1.6309772703087977, "learning_rate": 2.5391168411250665e-08, "loss": 0.7908, "step": 12076 }, { "epoch": 0.97, "grad_norm": 0.7310115041380982, "learning_rate": 2.526056493763751e-08, "loss": 1.061, "step": 12077 }, { "epoch": 0.97, "grad_norm": 1.5721089136938338, "learning_rate": 2.5130297369807543e-08, "loss": 0.751, "step": 12078 }, { "epoch": 0.97, "grad_norm": 1.6283089586074007, "learning_rate": 2.5000365716554843e-08, "loss": 0.6954, "step": 12079 }, { "epoch": 0.97, "grad_norm": 1.6143667127654806, "learning_rate": 2.4870769986654054e-08, "loss": 0.8307, "step": 12080 }, { "epoch": 0.97, "grad_norm": 1.544125946752776, "learning_rate": 2.4741510188854843e-08, "loss": 0.7587, "step": 12081 }, { "epoch": 0.97, "grad_norm": 0.7368203431129738, "learning_rate": 2.461258633188579e-08, "loss": 1.0404, "step": 12082 }, { "epoch": 0.97, "grad_norm": 1.4762284314298035, "learning_rate": 2.4483998424451593e-08, "loss": 0.8304, "step": 12083 }, { "epoch": 0.97, "grad_norm": 0.7566756581564131, "learning_rate": 2.4355746475234755e-08, "loss": 1.0532, "step": 12084 }, { "epoch": 0.97, "grad_norm": 0.7545129706382248, "learning_rate": 2.422783049289612e-08, "loss": 1.0522, "step": 12085 }, { "epoch": 0.97, "grad_norm": 1.6314778991217525, "learning_rate": 2.4100250486071565e-08, "loss": 0.7771, "step": 12086 }, { "epoch": 0.97, "grad_norm": 0.7445415248824638, "learning_rate": 2.3973006463376412e-08, "loss": 1.0722, "step": 12087 }, { "epoch": 0.97, "grad_norm": 0.7472520441798517, "learning_rate": 2.384609843340213e-08, "loss": 1.0784, "step": 12088 }, { "epoch": 0.97, "grad_norm": 1.4437707071227226, "learning_rate": 2.3719526404717975e-08, "loss": 0.7269, "step": 12089 }, { "epoch": 0.97, "grad_norm": 1.6031409134867034, "learning_rate": 2.3593290385870436e-08, "loss": 0.6971, "step": 12090 }, { "epoch": 0.97, "grad_norm": 1.5957027219055668, "learning_rate": 2.3467390385382706e-08, "loss": 0.6955, "step": 12091 }, { "epoch": 0.97, "grad_norm": 1.4825727413774954, "learning_rate": 2.3341826411756863e-08, "loss": 0.757, "step": 12092 }, { "epoch": 0.97, "grad_norm": 1.5795420642215066, "learning_rate": 2.3216598473470575e-08, "loss": 0.8322, "step": 12093 }, { "epoch": 0.97, "grad_norm": 1.6821250601939293, "learning_rate": 2.3091706578979857e-08, "loss": 0.7579, "step": 12094 }, { "epoch": 0.97, "grad_norm": 1.4766261286680555, "learning_rate": 2.2967150736717402e-08, "loss": 0.7179, "step": 12095 }, { "epoch": 0.97, "grad_norm": 1.5280060479286353, "learning_rate": 2.2842930955093158e-08, "loss": 0.7735, "step": 12096 }, { "epoch": 0.97, "grad_norm": 1.5691790528506049, "learning_rate": 2.2719047242495406e-08, "loss": 0.7902, "step": 12097 }, { "epoch": 0.97, "grad_norm": 1.5957527229822552, "learning_rate": 2.2595499607289127e-08, "loss": 0.7689, "step": 12098 }, { "epoch": 0.97, "grad_norm": 1.5255819363272125, "learning_rate": 2.2472288057815984e-08, "loss": 0.6977, "step": 12099 }, { "epoch": 0.97, "grad_norm": 1.6159646744970761, "learning_rate": 2.234941260239598e-08, "loss": 0.7433, "step": 12100 }, { "epoch": 0.97, "grad_norm": 1.3920847198574284, "learning_rate": 2.2226873249325263e-08, "loss": 0.6379, "step": 12101 }, { "epoch": 0.97, "grad_norm": 1.5910744886051125, "learning_rate": 2.2104670006878883e-08, "loss": 0.7379, "step": 12102 }, { "epoch": 0.97, "grad_norm": 1.4659846137387946, "learning_rate": 2.1982802883307453e-08, "loss": 0.7469, "step": 12103 }, { "epoch": 0.97, "grad_norm": 1.500338275173877, "learning_rate": 2.1861271886840508e-08, "loss": 0.7952, "step": 12104 }, { "epoch": 0.97, "grad_norm": 1.4366635451948462, "learning_rate": 2.1740077025683703e-08, "loss": 0.7385, "step": 12105 }, { "epoch": 0.97, "grad_norm": 1.586521187657662, "learning_rate": 2.161921830801994e-08, "loss": 0.75, "step": 12106 }, { "epoch": 0.97, "grad_norm": 1.564629514885037, "learning_rate": 2.149869574201047e-08, "loss": 0.7362, "step": 12107 }, { "epoch": 0.97, "grad_norm": 0.7391356847167303, "learning_rate": 2.1378509335793217e-08, "loss": 1.1054, "step": 12108 }, { "epoch": 0.97, "grad_norm": 1.588425342510197, "learning_rate": 2.125865909748337e-08, "loss": 0.7634, "step": 12109 }, { "epoch": 0.97, "grad_norm": 1.4743592309817515, "learning_rate": 2.1139145035173336e-08, "loss": 0.7177, "step": 12110 }, { "epoch": 0.97, "grad_norm": 0.7408690567085984, "learning_rate": 2.1019967156932774e-08, "loss": 1.0512, "step": 12111 }, { "epoch": 0.97, "grad_norm": 1.4660455431411379, "learning_rate": 2.090112547080969e-08, "loss": 0.7893, "step": 12112 }, { "epoch": 0.97, "grad_norm": 1.49794075140457, "learning_rate": 2.0782619984827667e-08, "loss": 0.7573, "step": 12113 }, { "epoch": 0.97, "grad_norm": 1.5478142221315458, "learning_rate": 2.0664450706988636e-08, "loss": 0.7745, "step": 12114 }, { "epoch": 0.97, "grad_norm": 1.5592176632428518, "learning_rate": 2.0546617645272325e-08, "loss": 0.7687, "step": 12115 }, { "epoch": 0.97, "grad_norm": 1.520778902736634, "learning_rate": 2.0429120807634595e-08, "loss": 0.7783, "step": 12116 }, { "epoch": 0.97, "grad_norm": 1.4993376085191434, "learning_rate": 2.0311960202009097e-08, "loss": 0.7079, "step": 12117 }, { "epoch": 0.97, "grad_norm": 1.5637398314716326, "learning_rate": 2.0195135836306168e-08, "loss": 0.7597, "step": 12118 }, { "epoch": 0.97, "grad_norm": 1.4371945917201197, "learning_rate": 2.0078647718415058e-08, "loss": 0.7039, "step": 12119 }, { "epoch": 0.97, "grad_norm": 1.5112828324028913, "learning_rate": 1.9962495856201135e-08, "loss": 0.81, "step": 12120 }, { "epoch": 0.97, "grad_norm": 1.478274913631058, "learning_rate": 1.984668025750647e-08, "loss": 0.7395, "step": 12121 }, { "epoch": 0.97, "grad_norm": 1.4730426256989468, "learning_rate": 1.9731200930152572e-08, "loss": 0.769, "step": 12122 }, { "epoch": 0.97, "grad_norm": 0.7249290090168846, "learning_rate": 1.9616057881935436e-08, "loss": 1.0065, "step": 12123 }, { "epoch": 0.97, "grad_norm": 1.934300895769558, "learning_rate": 1.950125112063106e-08, "loss": 0.6933, "step": 12124 }, { "epoch": 0.97, "grad_norm": 1.5452861105453484, "learning_rate": 1.9386780653989913e-08, "loss": 0.8033, "step": 12125 }, { "epoch": 0.97, "grad_norm": 1.5322404185331482, "learning_rate": 1.927264648974303e-08, "loss": 0.7366, "step": 12126 }, { "epoch": 0.97, "grad_norm": 1.5599745376133634, "learning_rate": 1.9158848635595915e-08, "loss": 0.7927, "step": 12127 }, { "epoch": 0.97, "grad_norm": 0.732465566611681, "learning_rate": 1.9045387099232425e-08, "loss": 1.048, "step": 12128 }, { "epoch": 0.97, "grad_norm": 0.7429380353699753, "learning_rate": 1.8932261888314207e-08, "loss": 1.0279, "step": 12129 }, { "epoch": 0.97, "grad_norm": 0.7733017992885716, "learning_rate": 1.8819473010479594e-08, "loss": 1.0631, "step": 12130 }, { "epoch": 0.97, "grad_norm": 1.4892529781677015, "learning_rate": 1.8707020473344163e-08, "loss": 0.7897, "step": 12131 }, { "epoch": 0.97, "grad_norm": 1.562627988393497, "learning_rate": 1.8594904284501282e-08, "loss": 0.8471, "step": 12132 }, { "epoch": 0.97, "grad_norm": 1.4155780910907019, "learning_rate": 1.8483124451521005e-08, "loss": 0.7685, "step": 12133 }, { "epoch": 0.97, "grad_norm": 1.730907653826536, "learning_rate": 1.837168098195119e-08, "loss": 0.8345, "step": 12134 }, { "epoch": 0.97, "grad_norm": 1.5351954783836654, "learning_rate": 1.8260573883316924e-08, "loss": 0.793, "step": 12135 }, { "epoch": 0.97, "grad_norm": 0.7594083629174347, "learning_rate": 1.8149803163119984e-08, "loss": 1.0336, "step": 12136 }, { "epoch": 0.97, "grad_norm": 1.4819924222313472, "learning_rate": 1.8039368828839942e-08, "loss": 0.7892, "step": 12137 }, { "epoch": 0.97, "grad_norm": 1.4968492206310025, "learning_rate": 1.7929270887933615e-08, "loss": 0.7435, "step": 12138 }, { "epoch": 0.97, "grad_norm": 1.5138212975706289, "learning_rate": 1.781950934783505e-08, "loss": 0.7165, "step": 12139 }, { "epoch": 0.97, "grad_norm": 1.9518202903729691, "learning_rate": 1.7710084215956104e-08, "loss": 0.784, "step": 12140 }, { "epoch": 0.97, "grad_norm": 1.4884694456253735, "learning_rate": 1.7600995499684193e-08, "loss": 0.7749, "step": 12141 }, { "epoch": 0.97, "grad_norm": 1.47020519026117, "learning_rate": 1.749224320638676e-08, "loss": 0.7358, "step": 12142 }, { "epoch": 0.97, "grad_norm": 1.6770860230917108, "learning_rate": 1.7383827343405712e-08, "loss": 0.7223, "step": 12143 }, { "epoch": 0.97, "grad_norm": 1.6592490153695485, "learning_rate": 1.7275747918062414e-08, "loss": 0.8116, "step": 12144 }, { "epoch": 0.97, "grad_norm": 1.4850221318698864, "learning_rate": 1.7168004937653803e-08, "loss": 0.7656, "step": 12145 }, { "epoch": 0.97, "grad_norm": 1.5254932174204305, "learning_rate": 1.7060598409456286e-08, "loss": 0.7162, "step": 12146 }, { "epoch": 0.97, "grad_norm": 1.6630279755455382, "learning_rate": 1.6953528340720726e-08, "loss": 0.7856, "step": 12147 }, { "epoch": 0.97, "grad_norm": 1.4404833276850257, "learning_rate": 1.684679473867745e-08, "loss": 0.7861, "step": 12148 }, { "epoch": 0.97, "grad_norm": 1.5288606432333816, "learning_rate": 1.6740397610533477e-08, "loss": 0.7445, "step": 12149 }, { "epoch": 0.97, "grad_norm": 1.5270304084888775, "learning_rate": 1.66343369634725e-08, "loss": 0.7462, "step": 12150 }, { "epoch": 0.97, "grad_norm": 1.4014274947959138, "learning_rate": 1.6528612804656565e-08, "loss": 0.6737, "step": 12151 }, { "epoch": 0.97, "grad_norm": 0.75149451056459, "learning_rate": 1.6423225141223854e-08, "loss": 1.0709, "step": 12152 }, { "epoch": 0.98, "grad_norm": 1.5714300105812147, "learning_rate": 1.631817398029034e-08, "loss": 0.7818, "step": 12153 }, { "epoch": 0.98, "grad_norm": 0.761393438369206, "learning_rate": 1.6213459328950355e-08, "loss": 1.0507, "step": 12154 }, { "epoch": 0.98, "grad_norm": 1.4792679649349916, "learning_rate": 1.6109081194273235e-08, "loss": 0.7382, "step": 12155 }, { "epoch": 0.98, "grad_norm": 1.615248952856287, "learning_rate": 1.6005039583307237e-08, "loss": 0.8374, "step": 12156 }, { "epoch": 0.98, "grad_norm": 1.4979722703922982, "learning_rate": 1.5901334503077294e-08, "loss": 0.6995, "step": 12157 }, { "epoch": 0.98, "grad_norm": 1.4820566431298845, "learning_rate": 1.5797965960586693e-08, "loss": 0.8018, "step": 12158 }, { "epoch": 0.98, "grad_norm": 1.5412998835312395, "learning_rate": 1.5694933962814295e-08, "loss": 0.8163, "step": 12159 }, { "epoch": 0.98, "grad_norm": 1.473489735798859, "learning_rate": 1.5592238516717317e-08, "loss": 0.81, "step": 12160 }, { "epoch": 0.98, "grad_norm": 1.5761408308232585, "learning_rate": 1.5489879629229654e-08, "loss": 0.741, "step": 12161 }, { "epoch": 0.98, "grad_norm": 0.7337510317941677, "learning_rate": 1.5387857307262998e-08, "loss": 1.0419, "step": 12162 }, { "epoch": 0.98, "grad_norm": 1.6268546975060227, "learning_rate": 1.528617155770684e-08, "loss": 0.716, "step": 12163 }, { "epoch": 0.98, "grad_norm": 1.586119697029699, "learning_rate": 1.5184822387426246e-08, "loss": 0.8348, "step": 12164 }, { "epoch": 0.98, "grad_norm": 1.5635134674236129, "learning_rate": 1.508380980326518e-08, "loss": 0.8243, "step": 12165 }, { "epoch": 0.98, "grad_norm": 1.629279359497004, "learning_rate": 1.4983133812043193e-08, "loss": 0.7169, "step": 12166 }, { "epoch": 0.98, "grad_norm": 1.5289622337579658, "learning_rate": 1.4882794420559843e-08, "loss": 0.8071, "step": 12167 }, { "epoch": 0.98, "grad_norm": 1.5628548278801926, "learning_rate": 1.4782791635588601e-08, "loss": 0.7987, "step": 12168 }, { "epoch": 0.98, "grad_norm": 1.399331753968418, "learning_rate": 1.4683125463882952e-08, "loss": 0.7418, "step": 12169 }, { "epoch": 0.98, "grad_norm": 1.5046578779024695, "learning_rate": 1.4583795912172516e-08, "loss": 0.7832, "step": 12170 }, { "epoch": 0.98, "grad_norm": 1.4167722045976292, "learning_rate": 1.4484802987164147e-08, "loss": 0.7759, "step": 12171 }, { "epoch": 0.98, "grad_norm": 1.4340380497918501, "learning_rate": 1.4386146695541947e-08, "loss": 0.7545, "step": 12172 }, { "epoch": 0.98, "grad_norm": 1.538427771093233, "learning_rate": 1.4287827043966696e-08, "loss": 0.7348, "step": 12173 }, { "epoch": 0.98, "grad_norm": 1.4646971790107552, "learning_rate": 1.4189844039078638e-08, "loss": 0.783, "step": 12174 }, { "epoch": 0.98, "grad_norm": 0.7407695081387405, "learning_rate": 1.4092197687492481e-08, "loss": 1.0788, "step": 12175 }, { "epoch": 0.98, "grad_norm": 1.557378403086291, "learning_rate": 1.3994887995802397e-08, "loss": 0.7435, "step": 12176 }, { "epoch": 0.98, "grad_norm": 0.7582511984515554, "learning_rate": 1.3897914970578685e-08, "loss": 1.0737, "step": 12177 }, { "epoch": 0.98, "grad_norm": 1.5429132344068073, "learning_rate": 1.380127861836944e-08, "loss": 0.7072, "step": 12178 }, { "epoch": 0.98, "grad_norm": 0.7238813197452245, "learning_rate": 1.3704978945698888e-08, "loss": 1.0689, "step": 12179 }, { "epoch": 0.98, "grad_norm": 0.7547551122783296, "learning_rate": 1.360901595907016e-08, "loss": 1.0833, "step": 12180 }, { "epoch": 0.98, "grad_norm": 1.5796905791865619, "learning_rate": 1.3513389664963073e-08, "loss": 0.7853, "step": 12181 }, { "epoch": 0.98, "grad_norm": 1.5687554019722012, "learning_rate": 1.3418100069834128e-08, "loss": 0.7726, "step": 12182 }, { "epoch": 0.98, "grad_norm": 1.7966044576258473, "learning_rate": 1.3323147180117624e-08, "loss": 0.8358, "step": 12183 }, { "epoch": 0.98, "grad_norm": 1.4957682004706594, "learning_rate": 1.3228531002224543e-08, "loss": 0.7773, "step": 12184 }, { "epoch": 0.98, "grad_norm": 1.5078880755380488, "learning_rate": 1.3134251542544774e-08, "loss": 0.7344, "step": 12185 }, { "epoch": 0.98, "grad_norm": 1.6518981545978477, "learning_rate": 1.304030880744267e-08, "loss": 0.7499, "step": 12186 }, { "epoch": 0.98, "grad_norm": 1.4220032050240292, "learning_rate": 1.29467028032626e-08, "loss": 0.7305, "step": 12187 }, { "epoch": 0.98, "grad_norm": 1.5724802340640325, "learning_rate": 1.2853433536324512e-08, "loss": 0.7265, "step": 12188 }, { "epoch": 0.98, "grad_norm": 0.7606724698007911, "learning_rate": 1.2760501012926696e-08, "loss": 1.035, "step": 12189 }, { "epoch": 0.98, "grad_norm": 1.8088481903441134, "learning_rate": 1.2667905239343581e-08, "loss": 0.7663, "step": 12190 }, { "epoch": 0.98, "grad_norm": 1.704325475671645, "learning_rate": 1.2575646221828497e-08, "loss": 0.7927, "step": 12191 }, { "epoch": 0.98, "grad_norm": 0.7422223512058327, "learning_rate": 1.248372396660924e-08, "loss": 1.012, "step": 12192 }, { "epoch": 0.98, "grad_norm": 1.5346367435299728, "learning_rate": 1.2392138479894178e-08, "loss": 0.6972, "step": 12193 }, { "epoch": 0.98, "grad_norm": 1.604897177068113, "learning_rate": 1.2300889767866697e-08, "loss": 0.772, "step": 12194 }, { "epoch": 0.98, "grad_norm": 1.5386245259233673, "learning_rate": 1.220997783668798e-08, "loss": 0.71, "step": 12195 }, { "epoch": 0.98, "grad_norm": 1.5163094757362094, "learning_rate": 1.2119402692497007e-08, "loss": 0.7425, "step": 12196 }, { "epoch": 0.98, "grad_norm": 1.5600418730945607, "learning_rate": 1.2029164341409438e-08, "loss": 0.755, "step": 12197 }, { "epoch": 0.98, "grad_norm": 1.5438999322038198, "learning_rate": 1.193926278951818e-08, "loss": 0.7851, "step": 12198 }, { "epoch": 0.98, "grad_norm": 1.9039441628649647, "learning_rate": 1.1849698042893932e-08, "loss": 0.7971, "step": 12199 }, { "epoch": 0.98, "grad_norm": 1.5244553125306592, "learning_rate": 1.1760470107584077e-08, "loss": 0.669, "step": 12200 }, { "epoch": 0.98, "grad_norm": 1.6571796498573543, "learning_rate": 1.1671578989613796e-08, "loss": 0.8851, "step": 12201 }, { "epoch": 0.98, "grad_norm": 1.5792420161315612, "learning_rate": 1.1583024694984956e-08, "loss": 0.8281, "step": 12202 }, { "epoch": 0.98, "grad_norm": 1.4954007442504227, "learning_rate": 1.1494807229677218e-08, "loss": 0.7824, "step": 12203 }, { "epoch": 0.98, "grad_norm": 1.5326699272644617, "learning_rate": 1.1406926599646373e-08, "loss": 0.7484, "step": 12204 }, { "epoch": 0.98, "grad_norm": 1.4790226908789161, "learning_rate": 1.1319382810827673e-08, "loss": 0.7237, "step": 12205 }, { "epoch": 0.98, "grad_norm": 1.6044290431390875, "learning_rate": 1.1232175869130835e-08, "loss": 0.7247, "step": 12206 }, { "epoch": 0.98, "grad_norm": 1.4967648529623887, "learning_rate": 1.1145305780445036e-08, "loss": 0.735, "step": 12207 }, { "epoch": 0.98, "grad_norm": 1.399727849332947, "learning_rate": 1.1058772550636699e-08, "loss": 0.6726, "step": 12208 }, { "epoch": 0.98, "grad_norm": 1.469642876023869, "learning_rate": 1.0972576185547256e-08, "loss": 0.682, "step": 12209 }, { "epoch": 0.98, "grad_norm": 1.5457970431485608, "learning_rate": 1.0886716690997612e-08, "loss": 0.7142, "step": 12210 }, { "epoch": 0.98, "grad_norm": 0.7337792759909446, "learning_rate": 1.0801194072785348e-08, "loss": 1.0753, "step": 12211 }, { "epoch": 0.98, "grad_norm": 1.5445360428739587, "learning_rate": 1.0716008336684736e-08, "loss": 0.8391, "step": 12212 }, { "epoch": 0.98, "grad_norm": 1.4321594692713804, "learning_rate": 1.0631159488448395e-08, "loss": 0.6916, "step": 12213 }, { "epoch": 0.98, "grad_norm": 1.5586477823684421, "learning_rate": 1.0546647533804521e-08, "loss": 0.7221, "step": 12214 }, { "epoch": 0.98, "grad_norm": 1.6090382372569705, "learning_rate": 1.0462472478460217e-08, "loss": 0.7399, "step": 12215 }, { "epoch": 0.98, "grad_norm": 0.764550963972967, "learning_rate": 1.0378634328099268e-08, "loss": 1.0562, "step": 12216 }, { "epoch": 0.98, "grad_norm": 0.7500419281453206, "learning_rate": 1.0295133088382147e-08, "loss": 1.0978, "step": 12217 }, { "epoch": 0.98, "grad_norm": 1.53997796571693, "learning_rate": 1.0211968764947122e-08, "loss": 0.7174, "step": 12218 }, { "epoch": 0.98, "grad_norm": 1.467213267723149, "learning_rate": 1.0129141363410256e-08, "loss": 0.757, "step": 12219 }, { "epoch": 0.98, "grad_norm": 1.561631613941726, "learning_rate": 1.0046650889363741e-08, "loss": 0.7382, "step": 12220 }, { "epoch": 0.98, "grad_norm": 1.5240943044044848, "learning_rate": 9.964497348377012e-09, "loss": 0.7563, "step": 12221 }, { "epoch": 0.98, "grad_norm": 0.752855470745556, "learning_rate": 9.882680745998408e-09, "loss": 1.0589, "step": 12222 }, { "epoch": 0.98, "grad_norm": 1.625856349735403, "learning_rate": 9.801201087751843e-09, "loss": 0.65, "step": 12223 }, { "epoch": 0.98, "grad_norm": 1.4978713995477744, "learning_rate": 9.720058379138474e-09, "loss": 0.7269, "step": 12224 }, { "epoch": 0.98, "grad_norm": 1.6235485307082411, "learning_rate": 9.639252625638363e-09, "loss": 0.7029, "step": 12225 }, { "epoch": 0.98, "grad_norm": 1.456299330964164, "learning_rate": 9.558783832706586e-09, "loss": 0.6858, "step": 12226 }, { "epoch": 0.98, "grad_norm": 1.4815290204210438, "learning_rate": 9.478652005777134e-09, "loss": 0.7101, "step": 12227 }, { "epoch": 0.98, "grad_norm": 1.5480540597804195, "learning_rate": 9.398857150260676e-09, "loss": 0.7437, "step": 12228 }, { "epoch": 0.98, "grad_norm": 1.504194315596324, "learning_rate": 9.319399271545126e-09, "loss": 0.7913, "step": 12229 }, { "epoch": 0.98, "grad_norm": 0.7205311415841864, "learning_rate": 9.240278374995637e-09, "loss": 1.0272, "step": 12230 }, { "epoch": 0.98, "grad_norm": 0.7565575528862646, "learning_rate": 9.161494465954046e-09, "loss": 1.0857, "step": 12231 }, { "epoch": 0.98, "grad_norm": 0.7223992819351541, "learning_rate": 9.083047549741098e-09, "loss": 1.0589, "step": 12232 }, { "epoch": 0.98, "grad_norm": 0.7398082890804571, "learning_rate": 9.004937631653664e-09, "loss": 1.0458, "step": 12233 }, { "epoch": 0.98, "grad_norm": 1.473484726937989, "learning_rate": 8.927164716964754e-09, "loss": 0.7676, "step": 12234 }, { "epoch": 0.98, "grad_norm": 1.4150576374942574, "learning_rate": 8.849728810926273e-09, "loss": 0.7829, "step": 12235 }, { "epoch": 0.98, "grad_norm": 0.7437032756234213, "learning_rate": 8.772629918767927e-09, "loss": 1.0891, "step": 12236 }, { "epoch": 0.98, "grad_norm": 1.55966802403685, "learning_rate": 8.695868045693889e-09, "loss": 0.7449, "step": 12237 }, { "epoch": 0.98, "grad_norm": 0.7276919885787497, "learning_rate": 8.6194431968889e-09, "loss": 1.0696, "step": 12238 }, { "epoch": 0.98, "grad_norm": 1.6172237678374088, "learning_rate": 8.543355377512164e-09, "loss": 0.7746, "step": 12239 }, { "epoch": 0.98, "grad_norm": 3.827159983902288, "learning_rate": 8.467604592701795e-09, "loss": 0.6425, "step": 12240 }, { "epoch": 0.98, "grad_norm": 1.6346821276296246, "learning_rate": 8.392190847572035e-09, "loss": 0.7592, "step": 12241 }, { "epoch": 0.98, "grad_norm": 1.505159639603996, "learning_rate": 8.317114147216587e-09, "loss": 0.7164, "step": 12242 }, { "epoch": 0.98, "grad_norm": 1.5455081144281047, "learning_rate": 8.242374496703065e-09, "loss": 0.717, "step": 12243 }, { "epoch": 0.98, "grad_norm": 1.4721416534375171, "learning_rate": 8.167971901079097e-09, "loss": 0.707, "step": 12244 }, { "epoch": 0.98, "grad_norm": 1.4624194382802544, "learning_rate": 8.093906365367888e-09, "loss": 0.7247, "step": 12245 }, { "epoch": 0.98, "grad_norm": 0.733955807661556, "learning_rate": 8.02017789457099e-09, "loss": 1.0643, "step": 12246 }, { "epoch": 0.98, "grad_norm": 1.5876674612042119, "learning_rate": 7.946786493666647e-09, "loss": 0.7661, "step": 12247 }, { "epoch": 0.98, "grad_norm": 1.4459884898297926, "learning_rate": 7.873732167609782e-09, "loss": 0.6675, "step": 12248 }, { "epoch": 0.98, "grad_norm": 1.4128138476298862, "learning_rate": 7.801014921334227e-09, "loss": 0.7762, "step": 12249 }, { "epoch": 0.98, "grad_norm": 1.5298272637513937, "learning_rate": 7.728634759749387e-09, "loss": 0.6915, "step": 12250 }, { "epoch": 0.98, "grad_norm": 1.5177313017399514, "learning_rate": 7.656591687742465e-09, "loss": 0.7556, "step": 12251 }, { "epoch": 0.98, "grad_norm": 1.5280165687731564, "learning_rate": 7.584885710178457e-09, "loss": 0.721, "step": 12252 }, { "epoch": 0.98, "grad_norm": 1.6622074214088207, "learning_rate": 7.513516831898493e-09, "loss": 0.8115, "step": 12253 }, { "epoch": 0.98, "grad_norm": 1.4622559665760364, "learning_rate": 7.442485057722048e-09, "loss": 0.7728, "step": 12254 }, { "epoch": 0.98, "grad_norm": 1.4417249800598522, "learning_rate": 7.371790392445288e-09, "loss": 0.7786, "step": 12255 }, { "epoch": 0.98, "grad_norm": 1.5158115866071773, "learning_rate": 7.301432840841061e-09, "loss": 0.7685, "step": 12256 }, { "epoch": 0.98, "grad_norm": 0.7558309309576888, "learning_rate": 7.2314124076611205e-09, "loss": 1.0598, "step": 12257 }, { "epoch": 0.98, "grad_norm": 1.606740303559623, "learning_rate": 7.161729097632797e-09, "loss": 0.8409, "step": 12258 }, { "epoch": 0.98, "grad_norm": 1.512776199757214, "learning_rate": 7.092382915461215e-09, "loss": 0.7463, "step": 12259 }, { "epoch": 0.98, "grad_norm": 1.519617019758592, "learning_rate": 7.0233738658292974e-09, "loss": 0.7287, "step": 12260 }, { "epoch": 0.98, "grad_norm": 1.4857528512043399, "learning_rate": 6.954701953395538e-09, "loss": 0.7144, "step": 12261 }, { "epoch": 0.98, "grad_norm": 1.4078448056642565, "learning_rate": 6.886367182798448e-09, "loss": 0.7416, "step": 12262 }, { "epoch": 0.98, "grad_norm": 1.5805433124971253, "learning_rate": 6.81836955865045e-09, "loss": 0.7657, "step": 12263 }, { "epoch": 0.98, "grad_norm": 1.4551513256776019, "learning_rate": 6.750709085544538e-09, "loss": 0.6867, "step": 12264 }, { "epoch": 0.98, "grad_norm": 0.7461464435757642, "learning_rate": 6.683385768047612e-09, "loss": 1.0363, "step": 12265 }, { "epoch": 0.98, "grad_norm": 1.505883029816718, "learning_rate": 6.616399610707147e-09, "loss": 0.693, "step": 12266 }, { "epoch": 0.98, "grad_norm": 1.4967490856542023, "learning_rate": 6.5497506180450806e-09, "loss": 0.8166, "step": 12267 }, { "epoch": 0.98, "grad_norm": 0.7696148995996944, "learning_rate": 6.483438794562258e-09, "loss": 1.0609, "step": 12268 }, { "epoch": 0.98, "grad_norm": 1.6188971110285095, "learning_rate": 6.417464144736208e-09, "loss": 0.8396, "step": 12269 }, { "epoch": 0.98, "grad_norm": 1.4369398514815348, "learning_rate": 6.351826673021144e-09, "loss": 0.8023, "step": 12270 }, { "epoch": 0.98, "grad_norm": 1.744131829892106, "learning_rate": 6.286526383849078e-09, "loss": 0.7583, "step": 12271 }, { "epoch": 0.98, "grad_norm": 1.4962403839258664, "learning_rate": 6.221563281630372e-09, "loss": 0.7545, "step": 12272 }, { "epoch": 0.98, "grad_norm": 1.5675149444466239, "learning_rate": 6.156937370750405e-09, "loss": 0.7732, "step": 12273 }, { "epoch": 0.98, "grad_norm": 1.6276991159329492, "learning_rate": 6.092648655572908e-09, "loss": 0.7693, "step": 12274 }, { "epoch": 0.98, "grad_norm": 1.420451375620458, "learning_rate": 6.028697140438855e-09, "loss": 0.7683, "step": 12275 }, { "epoch": 0.98, "grad_norm": 1.449157585532989, "learning_rate": 5.965082829667013e-09, "loss": 0.8051, "step": 12276 }, { "epoch": 0.98, "grad_norm": 1.5036171976180839, "learning_rate": 5.901805727552279e-09, "loss": 0.7015, "step": 12277 }, { "epoch": 0.99, "grad_norm": 1.5565950931177601, "learning_rate": 5.838865838366792e-09, "loss": 0.6848, "step": 12278 }, { "epoch": 0.99, "grad_norm": 1.5244985592092153, "learning_rate": 5.7762631663615955e-09, "loss": 0.7202, "step": 12279 }, { "epoch": 0.99, "grad_norm": 1.4919569470831324, "learning_rate": 5.713997715762754e-09, "loss": 0.8281, "step": 12280 }, { "epoch": 0.99, "grad_norm": 0.723380816698547, "learning_rate": 5.652069490775236e-09, "loss": 1.0816, "step": 12281 }, { "epoch": 0.99, "grad_norm": 1.3695862600501325, "learning_rate": 5.590478495580143e-09, "loss": 0.7026, "step": 12282 }, { "epoch": 0.99, "grad_norm": 0.7490532668940209, "learning_rate": 5.529224734335814e-09, "loss": 1.036, "step": 12283 }, { "epoch": 0.99, "grad_norm": 1.4354644292201042, "learning_rate": 5.468308211179496e-09, "loss": 0.6877, "step": 12284 }, { "epoch": 0.99, "grad_norm": 1.627537603888248, "learning_rate": 5.407728930223455e-09, "loss": 0.7639, "step": 12285 }, { "epoch": 0.99, "grad_norm": 1.5356280054695592, "learning_rate": 5.347486895558307e-09, "loss": 0.7435, "step": 12286 }, { "epoch": 0.99, "grad_norm": 1.417226413121078, "learning_rate": 5.2875821112513544e-09, "loss": 0.6819, "step": 12287 }, { "epoch": 0.99, "grad_norm": 0.7496895079320679, "learning_rate": 5.228014581348806e-09, "loss": 1.0641, "step": 12288 }, { "epoch": 0.99, "grad_norm": 1.7009314785075278, "learning_rate": 5.168784309871333e-09, "loss": 0.7172, "step": 12289 }, { "epoch": 0.99, "grad_norm": 1.6564218583516788, "learning_rate": 5.10989130081907e-09, "loss": 0.7294, "step": 12290 }, { "epoch": 0.99, "grad_norm": 0.7624380478789122, "learning_rate": 5.051335558168835e-09, "loss": 1.0592, "step": 12291 }, { "epoch": 0.99, "grad_norm": 1.4539798483309492, "learning_rate": 4.993117085873578e-09, "loss": 0.6984, "step": 12292 }, { "epoch": 0.99, "grad_norm": 1.5996949254168764, "learning_rate": 4.935235887865153e-09, "loss": 0.7422, "step": 12293 }, { "epoch": 0.99, "grad_norm": 1.6242460162536, "learning_rate": 4.877691968051545e-09, "loss": 0.7289, "step": 12294 }, { "epoch": 0.99, "grad_norm": 1.627077630921616, "learning_rate": 4.820485330317981e-09, "loss": 0.7909, "step": 12295 }, { "epoch": 0.99, "grad_norm": 1.6660590276334288, "learning_rate": 4.763615978526925e-09, "loss": 0.7124, "step": 12296 }, { "epoch": 0.99, "grad_norm": 1.4695998113360933, "learning_rate": 4.70708391651975e-09, "loss": 0.7634, "step": 12297 }, { "epoch": 0.99, "grad_norm": 1.7557761399981202, "learning_rate": 4.650889148112292e-09, "loss": 0.819, "step": 12298 }, { "epoch": 0.99, "grad_norm": 1.5096338109168732, "learning_rate": 4.595031677099293e-09, "loss": 0.7463, "step": 12299 }, { "epoch": 0.99, "grad_norm": 1.4398946937485608, "learning_rate": 4.539511507252181e-09, "loss": 0.8178, "step": 12300 }, { "epoch": 0.99, "grad_norm": 0.7334287062633463, "learning_rate": 4.484328642320734e-09, "loss": 1.0539, "step": 12301 }, { "epoch": 0.99, "grad_norm": 1.614745375485301, "learning_rate": 4.429483086029751e-09, "loss": 0.8122, "step": 12302 }, { "epoch": 0.99, "grad_norm": 1.5355098288615516, "learning_rate": 4.37497484208349e-09, "loss": 0.7743, "step": 12303 }, { "epoch": 0.99, "grad_norm": 0.7420222044383968, "learning_rate": 4.320803914162341e-09, "loss": 1.0718, "step": 12304 }, { "epoch": 0.99, "grad_norm": 1.5042719620185068, "learning_rate": 4.266970305923379e-09, "loss": 0.7242, "step": 12305 }, { "epoch": 0.99, "grad_norm": 1.488958591933096, "learning_rate": 4.213474021002029e-09, "loss": 0.739, "step": 12306 }, { "epoch": 0.99, "grad_norm": 1.9413089085789392, "learning_rate": 4.1603150630104005e-09, "loss": 0.7143, "step": 12307 }, { "epoch": 0.99, "grad_norm": 1.5291371954056623, "learning_rate": 4.1074934355384015e-09, "loss": 0.664, "step": 12308 }, { "epoch": 0.99, "grad_norm": 1.5226469832861347, "learning_rate": 4.055009142152066e-09, "loss": 0.7468, "step": 12309 }, { "epoch": 0.99, "grad_norm": 1.472435045984113, "learning_rate": 4.002862186395229e-09, "loss": 0.7149, "step": 12310 }, { "epoch": 0.99, "grad_norm": 1.551233199829396, "learning_rate": 3.95105257178896e-09, "loss": 0.6887, "step": 12311 }, { "epoch": 0.99, "grad_norm": 1.4505864695931416, "learning_rate": 3.8995803018321285e-09, "loss": 0.7775, "step": 12312 }, { "epoch": 0.99, "grad_norm": 1.9558651676799548, "learning_rate": 3.848445380000288e-09, "loss": 0.757, "step": 12313 }, { "epoch": 0.99, "grad_norm": 1.4798073500410793, "learning_rate": 3.7976478097451196e-09, "loss": 0.8093, "step": 12314 }, { "epoch": 0.99, "grad_norm": 0.7611462589757173, "learning_rate": 3.74718759449777e-09, "loss": 1.0348, "step": 12315 }, { "epoch": 0.99, "grad_norm": 1.5488027863679448, "learning_rate": 3.697064737664402e-09, "loss": 0.7572, "step": 12316 }, { "epoch": 0.99, "grad_norm": 1.5590785671528982, "learning_rate": 3.6472792426306413e-09, "loss": 0.7432, "step": 12317 }, { "epoch": 0.99, "grad_norm": 1.4510692780082382, "learning_rate": 3.597831112757133e-09, "loss": 0.6941, "step": 12318 }, { "epoch": 0.99, "grad_norm": 1.4500311150031706, "learning_rate": 3.548720351382873e-09, "loss": 0.8135, "step": 12319 }, { "epoch": 0.99, "grad_norm": 0.7558188513076751, "learning_rate": 3.499946961824097e-09, "loss": 1.0481, "step": 12320 }, { "epoch": 0.99, "grad_norm": 1.4804567419309809, "learning_rate": 3.4515109473742815e-09, "loss": 0.7222, "step": 12321 }, { "epoch": 0.99, "grad_norm": 1.5024753755205242, "learning_rate": 3.4034123113035888e-09, "loss": 0.7078, "step": 12322 }, { "epoch": 0.99, "grad_norm": 1.5108404869825345, "learning_rate": 3.3556510568599763e-09, "loss": 0.7315, "step": 12323 }, { "epoch": 0.99, "grad_norm": 0.7361429246047747, "learning_rate": 3.308227187268642e-09, "loss": 1.056, "step": 12324 }, { "epoch": 0.99, "grad_norm": 1.635077365001119, "learning_rate": 3.261140705730914e-09, "loss": 0.7758, "step": 12325 }, { "epoch": 0.99, "grad_norm": 0.7740550873080957, "learning_rate": 3.214391615427026e-09, "loss": 1.0498, "step": 12326 }, { "epoch": 0.99, "grad_norm": 0.731086193580534, "learning_rate": 3.167979919512787e-09, "loss": 1.0604, "step": 12327 }, { "epoch": 0.99, "grad_norm": 1.4093174091429432, "learning_rate": 3.1219056211229117e-09, "loss": 0.7245, "step": 12328 }, { "epoch": 0.99, "grad_norm": 1.4871815680536813, "learning_rate": 3.0761687233682445e-09, "loss": 0.7547, "step": 12329 }, { "epoch": 0.99, "grad_norm": 1.7943795299152325, "learning_rate": 3.0307692293363165e-09, "loss": 0.7, "step": 12330 }, { "epoch": 0.99, "grad_norm": 1.4706743874043986, "learning_rate": 2.9857071420935636e-09, "loss": 0.7739, "step": 12331 }, { "epoch": 0.99, "grad_norm": 1.4529138111504463, "learning_rate": 2.9409824646819962e-09, "loss": 0.7916, "step": 12332 }, { "epoch": 0.99, "grad_norm": 1.4086396144610462, "learning_rate": 2.8965952001214217e-09, "loss": 0.7088, "step": 12333 }, { "epoch": 0.99, "grad_norm": 1.590369034122501, "learning_rate": 2.8525453514099966e-09, "loss": 0.7223, "step": 12334 }, { "epoch": 0.99, "grad_norm": 1.503029607013171, "learning_rate": 2.808832921520899e-09, "loss": 0.7441, "step": 12335 }, { "epoch": 0.99, "grad_norm": 0.7512640571878971, "learning_rate": 2.765457913406211e-09, "loss": 1.0645, "step": 12336 }, { "epoch": 0.99, "grad_norm": 1.3814650380745912, "learning_rate": 2.7224203299947017e-09, "loss": 0.8012, "step": 12337 }, { "epoch": 0.99, "grad_norm": 1.5183323609979942, "learning_rate": 2.6797201741923797e-09, "loss": 0.7867, "step": 12338 }, { "epoch": 0.99, "grad_norm": 1.4902387076345842, "learning_rate": 2.637357448882494e-09, "loss": 0.7897, "step": 12339 }, { "epoch": 0.99, "grad_norm": 1.7513237719229644, "learning_rate": 2.595332156925534e-09, "loss": 0.8393, "step": 12340 }, { "epoch": 0.99, "grad_norm": 1.5716354831052264, "learning_rate": 2.5536443011586753e-09, "loss": 0.7885, "step": 12341 }, { "epoch": 0.99, "grad_norm": 0.7228462327458982, "learning_rate": 2.512293884396888e-09, "loss": 1.0603, "step": 12342 }, { "epoch": 0.99, "grad_norm": 1.4718607566052602, "learning_rate": 2.4712809094329382e-09, "loss": 0.8277, "step": 12343 }, { "epoch": 0.99, "grad_norm": 1.5376839158606592, "learning_rate": 2.430605379035722e-09, "loss": 0.703, "step": 12344 }, { "epoch": 0.99, "grad_norm": 1.5045276046640932, "learning_rate": 2.3902672959513763e-09, "loss": 0.7565, "step": 12345 }, { "epoch": 0.99, "grad_norm": 1.51695777515481, "learning_rate": 2.350266662903833e-09, "loss": 0.7423, "step": 12346 }, { "epoch": 0.99, "grad_norm": 1.5991948080269145, "learning_rate": 2.3106034825942647e-09, "loss": 0.6999, "step": 12347 }, { "epoch": 0.99, "grad_norm": 1.5413705772405415, "learning_rate": 2.2712777577005297e-09, "loss": 0.8805, "step": 12348 }, { "epoch": 0.99, "grad_norm": 1.5083754354977248, "learning_rate": 2.232289490878281e-09, "loss": 0.7604, "step": 12349 }, { "epoch": 0.99, "grad_norm": 1.5105730557216628, "learning_rate": 2.193638684759858e-09, "loss": 0.6205, "step": 12350 }, { "epoch": 0.99, "grad_norm": 1.5449658289786963, "learning_rate": 2.1553253419553943e-09, "loss": 0.7027, "step": 12351 }, { "epoch": 0.99, "grad_norm": 1.4684063082632595, "learning_rate": 2.117349465051155e-09, "loss": 0.7107, "step": 12352 }, { "epoch": 0.99, "grad_norm": 1.6665053839933863, "learning_rate": 2.079711056611755e-09, "loss": 0.777, "step": 12353 }, { "epoch": 0.99, "grad_norm": 1.4839248520617672, "learning_rate": 2.04241011917905e-09, "loss": 0.7693, "step": 12354 }, { "epoch": 0.99, "grad_norm": 1.486740195732885, "learning_rate": 2.005446655271581e-09, "loss": 0.7065, "step": 12355 }, { "epoch": 0.99, "grad_norm": 1.493732473635201, "learning_rate": 1.9688206673845746e-09, "loss": 0.749, "step": 12356 }, { "epoch": 0.99, "grad_norm": 1.690767987170318, "learning_rate": 1.9325321579916066e-09, "loss": 0.7226, "step": 12357 }, { "epoch": 0.99, "grad_norm": 1.4783019805803208, "learning_rate": 1.8965811295423854e-09, "loss": 0.6912, "step": 12358 }, { "epoch": 0.99, "grad_norm": 0.7579287411647264, "learning_rate": 1.8609675844655228e-09, "loss": 1.0781, "step": 12359 }, { "epoch": 0.99, "grad_norm": 1.406472877462492, "learning_rate": 1.8256915251646524e-09, "loss": 0.7407, "step": 12360 }, { "epoch": 0.99, "grad_norm": 1.5482737380476663, "learning_rate": 1.7907529540223123e-09, "loss": 0.7179, "step": 12361 }, { "epoch": 0.99, "grad_norm": 1.383327947631836, "learning_rate": 1.7561518733977267e-09, "loss": 0.7153, "step": 12362 }, { "epoch": 0.99, "grad_norm": 1.5136647498613403, "learning_rate": 1.7218882856262498e-09, "loss": 0.7669, "step": 12363 }, { "epoch": 0.99, "grad_norm": 1.6695650755174116, "learning_rate": 1.6879621930226963e-09, "loss": 0.6661, "step": 12364 }, { "epoch": 0.99, "grad_norm": 2.3339222890611686, "learning_rate": 1.6543735978769016e-09, "loss": 0.7798, "step": 12365 }, { "epoch": 0.99, "grad_norm": 1.4379200470380373, "learning_rate": 1.621122502457606e-09, "loss": 0.7277, "step": 12366 }, { "epoch": 0.99, "grad_norm": 1.5596792388106255, "learning_rate": 1.5882089090091257e-09, "loss": 0.807, "step": 12367 }, { "epoch": 0.99, "grad_norm": 1.5379239496515134, "learning_rate": 1.555632819754682e-09, "loss": 0.7234, "step": 12368 }, { "epoch": 0.99, "grad_norm": 1.472812001735286, "learning_rate": 1.523394236893072e-09, "loss": 0.7131, "step": 12369 }, { "epoch": 0.99, "grad_norm": 1.5090647393358532, "learning_rate": 1.491493162601998e-09, "loss": 0.7962, "step": 12370 }, { "epoch": 0.99, "grad_norm": 1.554072329024694, "learning_rate": 1.4599295990352924e-09, "loss": 0.7458, "step": 12371 }, { "epoch": 0.99, "grad_norm": 1.5105888924677444, "learning_rate": 1.4287035483234734e-09, "loss": 0.7906, "step": 12372 }, { "epoch": 0.99, "grad_norm": 0.755455342623131, "learning_rate": 1.3978150125759649e-09, "loss": 1.0282, "step": 12373 }, { "epoch": 0.99, "grad_norm": 1.615004117411443, "learning_rate": 1.3672639938777655e-09, "loss": 0.705, "step": 12374 }, { "epoch": 0.99, "grad_norm": 1.4652872621022046, "learning_rate": 1.3370504942922246e-09, "loss": 0.7823, "step": 12375 }, { "epoch": 0.99, "grad_norm": 1.6091127744552964, "learning_rate": 1.3071745158588222e-09, "loss": 0.7278, "step": 12376 }, { "epoch": 0.99, "grad_norm": 1.5352274209639358, "learning_rate": 1.2776360605953887e-09, "loss": 0.7839, "step": 12377 }, { "epoch": 0.99, "grad_norm": 1.4072945170315727, "learning_rate": 1.2484351304958841e-09, "loss": 0.6929, "step": 12378 }, { "epoch": 0.99, "grad_norm": 1.5433956173817382, "learning_rate": 1.2195717275326201e-09, "loss": 0.8055, "step": 12379 }, { "epoch": 0.99, "grad_norm": 1.4568118489246025, "learning_rate": 1.1910458536545932e-09, "loss": 0.7475, "step": 12380 }, { "epoch": 0.99, "grad_norm": 1.554751420041771, "learning_rate": 1.16285751078693e-09, "loss": 0.7412, "step": 12381 }, { "epoch": 0.99, "grad_norm": 1.385779361034528, "learning_rate": 1.135006700834218e-09, "loss": 0.7669, "step": 12382 }, { "epoch": 0.99, "grad_norm": 1.5270886371657446, "learning_rate": 1.107493425676065e-09, "loss": 0.7365, "step": 12383 }, { "epoch": 0.99, "grad_norm": 1.419052047834366, "learning_rate": 1.080317687170984e-09, "loss": 0.6503, "step": 12384 }, { "epoch": 0.99, "grad_norm": 1.5640293123782238, "learning_rate": 1.0534794871536192e-09, "loss": 0.748, "step": 12385 }, { "epoch": 0.99, "grad_norm": 1.3438032628627603, "learning_rate": 1.026978827435854e-09, "loss": 0.7754, "step": 12386 }, { "epoch": 0.99, "grad_norm": 1.5077917461770538, "learning_rate": 1.0008157098073678e-09, "loss": 0.8437, "step": 12387 }, { "epoch": 0.99, "grad_norm": 1.3589242801510977, "learning_rate": 9.749901360345259e-10, "loss": 0.7538, "step": 12388 }, { "epoch": 0.99, "grad_norm": 1.5695586354462252, "learning_rate": 9.495021078614885e-10, "loss": 0.7758, "step": 12389 }, { "epoch": 0.99, "grad_norm": 1.4540572797076856, "learning_rate": 9.243516270091013e-10, "loss": 0.7178, "step": 12390 }, { "epoch": 0.99, "grad_norm": 1.5701355305410813, "learning_rate": 8.995386951754503e-10, "loss": 0.7607, "step": 12391 }, { "epoch": 0.99, "grad_norm": 1.5546246274554316, "learning_rate": 8.750633140364173e-10, "loss": 0.6916, "step": 12392 }, { "epoch": 0.99, "grad_norm": 1.508841982687282, "learning_rate": 8.509254852440141e-10, "loss": 0.8504, "step": 12393 }, { "epoch": 0.99, "grad_norm": 1.5253749977451414, "learning_rate": 8.271252104286032e-10, "loss": 0.7436, "step": 12394 }, { "epoch": 0.99, "grad_norm": 1.4940266725044131, "learning_rate": 8.036624911966773e-10, "loss": 0.6499, "step": 12395 }, { "epoch": 0.99, "grad_norm": 1.5403224290178037, "learning_rate": 7.805373291330797e-10, "loss": 0.7586, "step": 12396 }, { "epoch": 0.99, "grad_norm": 0.7716576136903933, "learning_rate": 7.577497257987842e-10, "loss": 1.0623, "step": 12397 }, { "epoch": 0.99, "grad_norm": 1.5126627112817044, "learning_rate": 7.352996827325598e-10, "loss": 0.7789, "step": 12398 }, { "epoch": 0.99, "grad_norm": 1.46529754912899, "learning_rate": 7.131872014509711e-10, "loss": 0.713, "step": 12399 }, { "epoch": 0.99, "grad_norm": 1.5364920105125914, "learning_rate": 6.914122834461578e-10, "loss": 0.7531, "step": 12400 }, { "epoch": 0.99, "grad_norm": 1.5392092102316044, "learning_rate": 6.699749301886105e-10, "loss": 0.7368, "step": 12401 }, { "epoch": 1.0, "grad_norm": 1.563051643212222, "learning_rate": 6.488751431266149e-10, "loss": 0.7665, "step": 12402 }, { "epoch": 1.0, "grad_norm": 1.5778020215977786, "learning_rate": 6.281129236834771e-10, "loss": 0.7361, "step": 12403 }, { "epoch": 1.0, "grad_norm": 1.5131734586385042, "learning_rate": 6.076882732625189e-10, "loss": 0.7586, "step": 12404 }, { "epoch": 1.0, "grad_norm": 1.5682417403069193, "learning_rate": 5.876011932420822e-10, "loss": 0.7201, "step": 12405 }, { "epoch": 1.0, "grad_norm": 1.4273772435684127, "learning_rate": 5.678516849788596e-10, "loss": 0.7603, "step": 12406 }, { "epoch": 1.0, "grad_norm": 1.6289370658945002, "learning_rate": 5.484397498056737e-10, "loss": 0.7714, "step": 12407 }, { "epoch": 1.0, "grad_norm": 1.581494306729302, "learning_rate": 5.293653890342531e-10, "loss": 0.6706, "step": 12408 }, { "epoch": 1.0, "grad_norm": 1.5645787385282115, "learning_rate": 5.106286039519015e-10, "loss": 0.7714, "step": 12409 }, { "epoch": 1.0, "grad_norm": 1.4123927399625205, "learning_rate": 4.922293958237179e-10, "loss": 0.7601, "step": 12410 }, { "epoch": 1.0, "grad_norm": 0.7379264995326732, "learning_rate": 4.74167765892597e-10, "loss": 1.0322, "step": 12411 }, { "epoch": 1.0, "grad_norm": 0.7369683261807992, "learning_rate": 4.5644371537756363e-10, "loss": 1.0672, "step": 12412 }, { "epoch": 1.0, "grad_norm": 1.649078715418298, "learning_rate": 4.390572454759934e-10, "loss": 0.8638, "step": 12413 }, { "epoch": 1.0, "grad_norm": 0.7412885669364163, "learning_rate": 4.220083573608369e-10, "loss": 1.032, "step": 12414 }, { "epoch": 1.0, "grad_norm": 1.4385200448089004, "learning_rate": 4.0529705218450565e-10, "loss": 0.6634, "step": 12415 }, { "epoch": 1.0, "grad_norm": 0.7443068490867211, "learning_rate": 3.8892333107443114e-10, "loss": 1.0158, "step": 12416 }, { "epoch": 1.0, "grad_norm": 1.6079441955540794, "learning_rate": 3.7288719513639547e-10, "loss": 0.7567, "step": 12417 }, { "epoch": 1.0, "grad_norm": 1.4130708091305078, "learning_rate": 3.5718864545397636e-10, "loss": 0.6372, "step": 12418 }, { "epoch": 1.0, "grad_norm": 1.5859746330420321, "learning_rate": 3.4182768308577143e-10, "loss": 0.7583, "step": 12419 }, { "epoch": 1.0, "grad_norm": 1.4701033062344357, "learning_rate": 3.268043090703943e-10, "loss": 0.703, "step": 12420 }, { "epoch": 1.0, "grad_norm": 1.4179732202659425, "learning_rate": 3.121185244214786e-10, "loss": 0.7286, "step": 12421 }, { "epoch": 1.0, "grad_norm": 0.7472972937964611, "learning_rate": 2.9777033013045354e-10, "loss": 1.025, "step": 12422 }, { "epoch": 1.0, "grad_norm": 1.5326423140331222, "learning_rate": 2.8375972716709887e-10, "loss": 0.7196, "step": 12423 }, { "epoch": 1.0, "grad_norm": 1.5818943315646454, "learning_rate": 2.7008671647621443e-10, "loss": 0.818, "step": 12424 }, { "epoch": 1.0, "grad_norm": 1.5629073842611907, "learning_rate": 2.5675129898206086e-10, "loss": 0.706, "step": 12425 }, { "epoch": 1.0, "grad_norm": 1.5064179116304723, "learning_rate": 2.437534755844739e-10, "loss": 0.7921, "step": 12426 }, { "epoch": 1.0, "grad_norm": 1.435174155150378, "learning_rate": 2.3109324716108494e-10, "loss": 0.7652, "step": 12427 }, { "epoch": 1.0, "grad_norm": 0.7633479043862755, "learning_rate": 2.1877061456732073e-10, "loss": 1.0864, "step": 12428 }, { "epoch": 1.0, "grad_norm": 1.607472226370612, "learning_rate": 2.0678557863473837e-10, "loss": 0.901, "step": 12429 }, { "epoch": 1.0, "grad_norm": 2.040320903586556, "learning_rate": 1.951381401726904e-10, "loss": 0.7566, "step": 12430 }, { "epoch": 1.0, "grad_norm": 1.4686642639889502, "learning_rate": 1.8382829996776986e-10, "loss": 0.7557, "step": 12431 }, { "epoch": 1.0, "grad_norm": 1.46272487402183, "learning_rate": 1.7285605878325507e-10, "loss": 0.789, "step": 12432 }, { "epoch": 1.0, "grad_norm": 1.591254863078844, "learning_rate": 1.622214173602199e-10, "loss": 0.6589, "step": 12433 }, { "epoch": 1.0, "grad_norm": 1.604645232425831, "learning_rate": 1.5192437641753376e-10, "loss": 0.6246, "step": 12434 }, { "epoch": 1.0, "grad_norm": 1.3995279883125502, "learning_rate": 1.4196493664908607e-10, "loss": 0.6972, "step": 12435 }, { "epoch": 1.0, "grad_norm": 1.453823331467375, "learning_rate": 1.3234309872822703e-10, "loss": 0.7527, "step": 12436 }, { "epoch": 1.0, "grad_norm": 1.8064464675817895, "learning_rate": 1.2305886330443716e-10, "loss": 0.8389, "step": 12437 }, { "epoch": 1.0, "grad_norm": 1.4556165441019453, "learning_rate": 1.1411223100443735e-10, "loss": 0.6374, "step": 12438 }, { "epoch": 1.0, "grad_norm": 0.7506123674036117, "learning_rate": 1.0550320243274403e-10, "loss": 1.0391, "step": 12439 }, { "epoch": 1.0, "grad_norm": 1.428876842869932, "learning_rate": 9.723177817055896e-11, "loss": 0.6849, "step": 12440 }, { "epoch": 1.0, "grad_norm": 1.450156566962703, "learning_rate": 8.92979587757692e-11, "loss": 0.6504, "step": 12441 }, { "epoch": 1.0, "grad_norm": 1.5118516833768456, "learning_rate": 8.170174478516756e-11, "loss": 0.845, "step": 12442 }, { "epoch": 1.0, "grad_norm": 1.6105020279930118, "learning_rate": 7.44431367105669e-11, "loss": 0.7797, "step": 12443 }, { "epoch": 1.0, "grad_norm": 1.6155013061660637, "learning_rate": 6.752213504268579e-11, "loss": 0.8018, "step": 12444 }, { "epoch": 1.0, "grad_norm": 1.7001580338741185, "learning_rate": 6.09387402489281e-11, "loss": 0.7511, "step": 12445 }, { "epoch": 1.0, "grad_norm": 1.3942888866988503, "learning_rate": 5.4692952773938155e-11, "loss": 0.704, "step": 12446 }, { "epoch": 1.0, "grad_norm": 1.5279643983982922, "learning_rate": 4.87847730384905e-11, "loss": 0.7237, "step": 12447 }, { "epoch": 1.0, "grad_norm": 1.605312948576224, "learning_rate": 4.32142014422654e-11, "loss": 0.77, "step": 12448 }, { "epoch": 1.0, "grad_norm": 1.6121600648041308, "learning_rate": 3.798123836162848e-11, "loss": 0.7497, "step": 12449 }, { "epoch": 1.0, "grad_norm": 1.6002711686780942, "learning_rate": 3.3085884149630654e-11, "loss": 0.803, "step": 12450 }, { "epoch": 1.0, "grad_norm": 1.5475687904048794, "learning_rate": 2.8528139136563272e-11, "loss": 0.7028, "step": 12451 }, { "epoch": 1.0, "grad_norm": 1.4948529313309675, "learning_rate": 2.4308003630513222e-11, "loss": 0.7782, "step": 12452 }, { "epoch": 1.0, "grad_norm": 1.519321225948849, "learning_rate": 2.042547791625271e-11, "loss": 0.7944, "step": 12453 }, { "epoch": 1.0, "grad_norm": 1.6002422473651947, "learning_rate": 1.6880562255794375e-11, "loss": 0.7763, "step": 12454 }, { "epoch": 1.0, "grad_norm": 1.580476020350656, "learning_rate": 1.3673256889501496e-11, "loss": 0.7203, "step": 12455 }, { "epoch": 1.0, "grad_norm": 1.4724385027106994, "learning_rate": 1.080356203220223e-11, "loss": 0.6918, "step": 12456 }, { "epoch": 1.0, "grad_norm": 1.4346772718229068, "learning_rate": 8.27147787929583e-12, "loss": 0.7416, "step": 12457 }, { "epoch": 1.0, "grad_norm": 1.6017589775414862, "learning_rate": 6.077004600646419e-12, "loss": 0.7453, "step": 12458 }, { "epoch": 1.0, "grad_norm": 1.5739089051251347, "learning_rate": 4.220142345023881e-12, "loss": 0.714, "step": 12459 }, { "epoch": 1.0, "grad_norm": 1.6094207268184746, "learning_rate": 2.700891237328307e-12, "loss": 0.725, "step": 12460 }, { "epoch": 1.0, "grad_norm": 1.5980993674607418, "learning_rate": 1.5192513808104381e-12, "loss": 0.8227, "step": 12461 }, { "epoch": 1.0, "grad_norm": 1.5140374223815996, "learning_rate": 6.752228548512208e-13, "loss": 0.7531, "step": 12462 }, { "epoch": 1.0, "grad_norm": 0.7687265906311198, "learning_rate": 1.6880571662714064e-13, "loss": 1.0721, "step": 12463 }, { "epoch": 1.0, "grad_norm": 1.4548610715303427, "learning_rate": 0.0, "loss": 0.6391, "step": 12464 }, { "epoch": 1.0, "step": 12464, "total_flos": 7038884969086976.0, "train_loss": 0.8383206710774112, "train_runtime": 88593.6851, "train_samples_per_second": 36.017, "train_steps_per_second": 0.141 } ], "logging_steps": 1.0, "max_steps": 12464, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 7038884969086976.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }