{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.98961937716263, "eval_steps": 500, "global_step": 3897, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002306805074971165, "grad_norm": 0.0, "learning_rate": 1.7094017094017097e-07, "loss": 8.4508, "step": 1 }, { "epoch": 0.00461361014994233, "grad_norm": 0.0, "learning_rate": 3.4188034188034194e-07, "loss": 8.0474, "step": 2 }, { "epoch": 0.006920415224913495, "grad_norm": 0.0, "learning_rate": 5.128205128205128e-07, "loss": 8.3708, "step": 3 }, { "epoch": 0.00922722029988466, "grad_norm": 0.0, "learning_rate": 6.837606837606839e-07, "loss": 8.4639, "step": 4 }, { "epoch": 0.011534025374855825, "grad_norm": 0.0, "learning_rate": 8.547008547008548e-07, "loss": 8.0678, "step": 5 }, { "epoch": 0.01384083044982699, "grad_norm": 0.0, "learning_rate": 1.0256410256410257e-06, "loss": 7.6616, "step": 6 }, { "epoch": 0.016147635524798153, "grad_norm": 0.0, "learning_rate": 1.1965811965811968e-06, "loss": 8.9771, "step": 7 }, { "epoch": 0.01845444059976932, "grad_norm": 0.0, "learning_rate": 1.3675213675213678e-06, "loss": 8.5794, "step": 8 }, { "epoch": 0.020761245674740483, "grad_norm": 0.0, "learning_rate": 1.5384615384615387e-06, "loss": 8.0316, "step": 9 }, { "epoch": 0.02306805074971165, "grad_norm": 0.0, "learning_rate": 1.7094017094017097e-06, "loss": 8.0619, "step": 10 }, { "epoch": 0.025374855824682813, "grad_norm": 0.0, "learning_rate": 1.8803418803418804e-06, "loss": 7.922, "step": 11 }, { "epoch": 0.02768166089965398, "grad_norm": 0.0, "learning_rate": 2.0512820512820513e-06, "loss": 8.1813, "step": 12 }, { "epoch": 0.029988465974625143, "grad_norm": 0.0, "learning_rate": 2.222222222222222e-06, "loss": 7.2392, "step": 13 }, { "epoch": 0.03229527104959631, "grad_norm": 0.0, "learning_rate": 2.3931623931623937e-06, "loss": 6.1259, "step": 14 }, { "epoch": 0.03460207612456748, "grad_norm": 0.0, "learning_rate": 2.564102564102564e-06, "loss": 6.2235, "step": 15 }, { "epoch": 0.03690888119953864, "grad_norm": 0.0, "learning_rate": 2.7350427350427355e-06, "loss": 5.5032, "step": 16 }, { "epoch": 0.0392156862745098, "grad_norm": 0.0, "learning_rate": 2.9059829059829063e-06, "loss": 5.3191, "step": 17 }, { "epoch": 0.04152249134948097, "grad_norm": 0.0, "learning_rate": 3.0769230769230774e-06, "loss": 5.3464, "step": 18 }, { "epoch": 0.04382929642445214, "grad_norm": 0.0, "learning_rate": 3.247863247863248e-06, "loss": 4.8332, "step": 19 }, { "epoch": 0.0461361014994233, "grad_norm": 0.0, "learning_rate": 3.4188034188034193e-06, "loss": 4.3945, "step": 20 }, { "epoch": 0.04844290657439446, "grad_norm": 0.0, "learning_rate": 3.58974358974359e-06, "loss": 4.1624, "step": 21 }, { "epoch": 0.05074971164936563, "grad_norm": 0.0, "learning_rate": 3.760683760683761e-06, "loss": 2.8312, "step": 22 }, { "epoch": 0.0530565167243368, "grad_norm": 0.0, "learning_rate": 3.9316239316239315e-06, "loss": 2.5994, "step": 23 }, { "epoch": 0.05536332179930796, "grad_norm": 0.0, "learning_rate": 4.102564102564103e-06, "loss": 2.7753, "step": 24 }, { "epoch": 0.05767012687427912, "grad_norm": 0.0, "learning_rate": 4.273504273504274e-06, "loss": 2.7568, "step": 25 }, { "epoch": 0.05997693194925029, "grad_norm": 0.0, "learning_rate": 4.444444444444444e-06, "loss": 2.5088, "step": 26 }, { "epoch": 0.06228373702422145, "grad_norm": 0.0, "learning_rate": 4.615384615384616e-06, "loss": 2.3828, "step": 27 }, { "epoch": 0.06459054209919261, "grad_norm": 0.0, "learning_rate": 4.786324786324787e-06, "loss": 2.2915, "step": 28 }, { "epoch": 0.06689734717416378, "grad_norm": 0.0, "learning_rate": 4.957264957264958e-06, "loss": 2.2262, "step": 29 }, { "epoch": 0.06920415224913495, "grad_norm": 0.0, "learning_rate": 5.128205128205128e-06, "loss": 3.11, "step": 30 }, { "epoch": 0.07151095732410612, "grad_norm": 0.0, "learning_rate": 5.2991452991453e-06, "loss": 2.3123, "step": 31 }, { "epoch": 0.07381776239907728, "grad_norm": 0.0, "learning_rate": 5.470085470085471e-06, "loss": 2.3687, "step": 32 }, { "epoch": 0.07612456747404844, "grad_norm": 0.0, "learning_rate": 5.641025641025641e-06, "loss": 2.1863, "step": 33 }, { "epoch": 0.0784313725490196, "grad_norm": 0.0, "learning_rate": 5.8119658119658126e-06, "loss": 2.8675, "step": 34 }, { "epoch": 0.08073817762399077, "grad_norm": 0.0, "learning_rate": 5.982905982905983e-06, "loss": 1.8582, "step": 35 }, { "epoch": 0.08304498269896193, "grad_norm": 0.0, "learning_rate": 6.153846153846155e-06, "loss": 2.0706, "step": 36 }, { "epoch": 0.0853517877739331, "grad_norm": 0.0, "learning_rate": 6.324786324786325e-06, "loss": 2.4735, "step": 37 }, { "epoch": 0.08765859284890427, "grad_norm": 0.0, "learning_rate": 6.495726495726496e-06, "loss": 2.0614, "step": 38 }, { "epoch": 0.08996539792387544, "grad_norm": 0.0, "learning_rate": 6.666666666666667e-06, "loss": 2.5187, "step": 39 }, { "epoch": 0.0922722029988466, "grad_norm": 0.0, "learning_rate": 6.837606837606839e-06, "loss": 1.9505, "step": 40 }, { "epoch": 0.09457900807381776, "grad_norm": 0.0, "learning_rate": 7.008547008547009e-06, "loss": 1.9036, "step": 41 }, { "epoch": 0.09688581314878893, "grad_norm": 0.0, "learning_rate": 7.17948717948718e-06, "loss": 1.8737, "step": 42 }, { "epoch": 0.09919261822376009, "grad_norm": 0.0, "learning_rate": 7.350427350427351e-06, "loss": 2.0507, "step": 43 }, { "epoch": 0.10149942329873125, "grad_norm": 0.0, "learning_rate": 7.521367521367522e-06, "loss": 1.3861, "step": 44 }, { "epoch": 0.10380622837370242, "grad_norm": 0.0, "learning_rate": 7.692307692307694e-06, "loss": 1.7961, "step": 45 }, { "epoch": 0.1061130334486736, "grad_norm": 0.0, "learning_rate": 7.863247863247863e-06, "loss": 1.8537, "step": 46 }, { "epoch": 0.10841983852364476, "grad_norm": 0.0, "learning_rate": 8.034188034188036e-06, "loss": 2.1222, "step": 47 }, { "epoch": 0.11072664359861592, "grad_norm": 0.0, "learning_rate": 8.205128205128205e-06, "loss": 1.7843, "step": 48 }, { "epoch": 0.11303344867358708, "grad_norm": 0.0, "learning_rate": 8.376068376068377e-06, "loss": 1.607, "step": 49 }, { "epoch": 0.11534025374855825, "grad_norm": 0.0, "learning_rate": 8.547008547008548e-06, "loss": 2.1408, "step": 50 }, { "epoch": 0.11764705882352941, "grad_norm": 0.0, "learning_rate": 8.717948717948719e-06, "loss": 1.9771, "step": 51 }, { "epoch": 0.11995386389850057, "grad_norm": 0.0, "learning_rate": 8.888888888888888e-06, "loss": 1.6614, "step": 52 }, { "epoch": 0.12226066897347174, "grad_norm": 0.0, "learning_rate": 9.059829059829061e-06, "loss": 1.3775, "step": 53 }, { "epoch": 0.1245674740484429, "grad_norm": 0.0, "learning_rate": 9.230769230769232e-06, "loss": 1.7402, "step": 54 }, { "epoch": 0.12687427912341406, "grad_norm": 0.0, "learning_rate": 9.401709401709402e-06, "loss": 1.6679, "step": 55 }, { "epoch": 0.12918108419838523, "grad_norm": 0.0, "learning_rate": 9.572649572649575e-06, "loss": 1.6464, "step": 56 }, { "epoch": 0.1314878892733564, "grad_norm": 0.0, "learning_rate": 9.743589743589744e-06, "loss": 1.4238, "step": 57 }, { "epoch": 0.13379469434832755, "grad_norm": 0.0, "learning_rate": 9.914529914529915e-06, "loss": 1.7369, "step": 58 }, { "epoch": 0.13610149942329874, "grad_norm": 0.0, "learning_rate": 1.0085470085470086e-05, "loss": 1.0986, "step": 59 }, { "epoch": 0.1384083044982699, "grad_norm": 0.0, "learning_rate": 1.0256410256410256e-05, "loss": 1.5508, "step": 60 }, { "epoch": 0.14071510957324107, "grad_norm": 0.0, "learning_rate": 1.0427350427350429e-05, "loss": 1.1049, "step": 61 }, { "epoch": 0.14302191464821223, "grad_norm": 0.0, "learning_rate": 1.05982905982906e-05, "loss": 1.6256, "step": 62 }, { "epoch": 0.1453287197231834, "grad_norm": 0.0, "learning_rate": 1.076923076923077e-05, "loss": 1.4749, "step": 63 }, { "epoch": 0.14763552479815456, "grad_norm": 0.0, "learning_rate": 1.0940170940170942e-05, "loss": 1.6895, "step": 64 }, { "epoch": 0.14994232987312572, "grad_norm": 0.0, "learning_rate": 1.1111111111111113e-05, "loss": 1.8539, "step": 65 }, { "epoch": 0.1522491349480969, "grad_norm": 0.0, "learning_rate": 1.1282051282051283e-05, "loss": 1.5428, "step": 66 }, { "epoch": 0.15455594002306805, "grad_norm": 0.0, "learning_rate": 1.1452991452991454e-05, "loss": 1.1057, "step": 67 }, { "epoch": 0.1568627450980392, "grad_norm": 0.0, "learning_rate": 1.1623931623931625e-05, "loss": 1.4131, "step": 68 }, { "epoch": 0.15916955017301038, "grad_norm": 0.0, "learning_rate": 1.1794871794871796e-05, "loss": 1.6288, "step": 69 }, { "epoch": 0.16147635524798154, "grad_norm": 0.0, "learning_rate": 1.1965811965811966e-05, "loss": 1.6981, "step": 70 }, { "epoch": 0.1637831603229527, "grad_norm": 0.0, "learning_rate": 1.2136752136752137e-05, "loss": 1.5066, "step": 71 }, { "epoch": 0.16608996539792387, "grad_norm": 0.0, "learning_rate": 1.230769230769231e-05, "loss": 2.0456, "step": 72 }, { "epoch": 0.16839677047289503, "grad_norm": 0.0, "learning_rate": 1.247863247863248e-05, "loss": 1.2475, "step": 73 }, { "epoch": 0.1707035755478662, "grad_norm": 0.0, "learning_rate": 1.264957264957265e-05, "loss": 1.7418, "step": 74 }, { "epoch": 0.17301038062283736, "grad_norm": 0.0, "learning_rate": 1.2820512820512823e-05, "loss": 1.444, "step": 75 }, { "epoch": 0.17531718569780855, "grad_norm": 0.0, "learning_rate": 1.2991452991452993e-05, "loss": 1.688, "step": 76 }, { "epoch": 0.1776239907727797, "grad_norm": 0.0, "learning_rate": 1.3162393162393164e-05, "loss": 1.4214, "step": 77 }, { "epoch": 0.17993079584775087, "grad_norm": 0.0, "learning_rate": 1.3333333333333333e-05, "loss": 1.9668, "step": 78 }, { "epoch": 0.18223760092272204, "grad_norm": 0.0, "learning_rate": 1.3504273504273506e-05, "loss": 1.748, "step": 79 }, { "epoch": 0.1845444059976932, "grad_norm": 0.0, "learning_rate": 1.3675213675213677e-05, "loss": 1.4736, "step": 80 }, { "epoch": 0.18685121107266436, "grad_norm": 0.0, "learning_rate": 1.3846153846153847e-05, "loss": 1.6399, "step": 81 }, { "epoch": 0.18915801614763553, "grad_norm": 0.0, "learning_rate": 1.4017094017094018e-05, "loss": 1.1332, "step": 82 }, { "epoch": 0.1914648212226067, "grad_norm": 0.0, "learning_rate": 1.4188034188034189e-05, "loss": 1.4893, "step": 83 }, { "epoch": 0.19377162629757785, "grad_norm": 0.0, "learning_rate": 1.435897435897436e-05, "loss": 1.3734, "step": 84 }, { "epoch": 0.19607843137254902, "grad_norm": 0.0, "learning_rate": 1.4529914529914531e-05, "loss": 1.3934, "step": 85 }, { "epoch": 0.19838523644752018, "grad_norm": 0.0, "learning_rate": 1.4700854700854703e-05, "loss": 1.3689, "step": 86 }, { "epoch": 0.20069204152249134, "grad_norm": 0.0, "learning_rate": 1.4871794871794874e-05, "loss": 1.0738, "step": 87 }, { "epoch": 0.2029988465974625, "grad_norm": 0.0, "learning_rate": 1.5042735042735043e-05, "loss": 1.2578, "step": 88 }, { "epoch": 0.20530565167243367, "grad_norm": 0.0, "learning_rate": 1.5213675213675214e-05, "loss": 1.6239, "step": 89 }, { "epoch": 0.20761245674740483, "grad_norm": 0.0, "learning_rate": 1.5384615384615387e-05, "loss": 1.7462, "step": 90 }, { "epoch": 0.209919261822376, "grad_norm": 0.0, "learning_rate": 1.555555555555556e-05, "loss": 1.2939, "step": 91 }, { "epoch": 0.2122260668973472, "grad_norm": 0.0, "learning_rate": 1.5726495726495726e-05, "loss": 1.2444, "step": 92 }, { "epoch": 0.21453287197231835, "grad_norm": 0.0, "learning_rate": 1.5897435897435897e-05, "loss": 1.4019, "step": 93 }, { "epoch": 0.21683967704728951, "grad_norm": 0.0, "learning_rate": 1.6068376068376072e-05, "loss": 1.3926, "step": 94 }, { "epoch": 0.21914648212226068, "grad_norm": 0.0, "learning_rate": 1.623931623931624e-05, "loss": 1.4956, "step": 95 }, { "epoch": 0.22145328719723184, "grad_norm": 0.0, "learning_rate": 1.641025641025641e-05, "loss": 1.4025, "step": 96 }, { "epoch": 0.223760092272203, "grad_norm": 0.0, "learning_rate": 1.6581196581196585e-05, "loss": 2.0986, "step": 97 }, { "epoch": 0.22606689734717417, "grad_norm": 0.0, "learning_rate": 1.6752136752136753e-05, "loss": 1.3982, "step": 98 }, { "epoch": 0.22837370242214533, "grad_norm": 0.0, "learning_rate": 1.6923076923076924e-05, "loss": 1.6585, "step": 99 }, { "epoch": 0.2306805074971165, "grad_norm": 0.0, "learning_rate": 1.7094017094017095e-05, "loss": 1.5725, "step": 100 }, { "epoch": 0.23298731257208766, "grad_norm": 0.0, "learning_rate": 1.7264957264957267e-05, "loss": 1.3001, "step": 101 }, { "epoch": 0.23529411764705882, "grad_norm": 0.0, "learning_rate": 1.7435897435897438e-05, "loss": 1.4395, "step": 102 }, { "epoch": 0.23760092272202998, "grad_norm": 0.0, "learning_rate": 1.760683760683761e-05, "loss": 1.7317, "step": 103 }, { "epoch": 0.23990772779700115, "grad_norm": 0.0, "learning_rate": 1.7777777777777777e-05, "loss": 1.5907, "step": 104 }, { "epoch": 0.2422145328719723, "grad_norm": 0.0, "learning_rate": 1.794871794871795e-05, "loss": 1.7229, "step": 105 }, { "epoch": 0.24452133794694347, "grad_norm": 0.0, "learning_rate": 1.8119658119658122e-05, "loss": 1.7672, "step": 106 }, { "epoch": 0.24682814302191464, "grad_norm": 0.0, "learning_rate": 1.829059829059829e-05, "loss": 1.511, "step": 107 }, { "epoch": 0.2491349480968858, "grad_norm": 0.0, "learning_rate": 1.8461538461538465e-05, "loss": 1.6003, "step": 108 }, { "epoch": 0.25144175317185696, "grad_norm": 0.0, "learning_rate": 1.8632478632478636e-05, "loss": 1.2508, "step": 109 }, { "epoch": 0.2537485582468281, "grad_norm": 0.0, "learning_rate": 1.8803418803418804e-05, "loss": 1.2233, "step": 110 }, { "epoch": 0.2560553633217993, "grad_norm": 0.0, "learning_rate": 1.8974358974358975e-05, "loss": 1.4052, "step": 111 }, { "epoch": 0.25836216839677045, "grad_norm": 0.0, "learning_rate": 1.914529914529915e-05, "loss": 1.1971, "step": 112 }, { "epoch": 0.2606689734717416, "grad_norm": 0.0, "learning_rate": 1.9316239316239317e-05, "loss": 1.4098, "step": 113 }, { "epoch": 0.2629757785467128, "grad_norm": 0.0, "learning_rate": 1.9487179487179488e-05, "loss": 1.4005, "step": 114 }, { "epoch": 0.26528258362168394, "grad_norm": 0.0, "learning_rate": 1.965811965811966e-05, "loss": 1.5905, "step": 115 }, { "epoch": 0.2675893886966551, "grad_norm": 0.0, "learning_rate": 1.982905982905983e-05, "loss": 1.2455, "step": 116 }, { "epoch": 0.2698961937716263, "grad_norm": 0.0, "learning_rate": 2e-05, "loss": 0.967, "step": 117 }, { "epoch": 0.2722029988465975, "grad_norm": 0.0, "learning_rate": 1.9999996546287957e-05, "loss": 2.0659, "step": 118 }, { "epoch": 0.27450980392156865, "grad_norm": 0.0, "learning_rate": 1.9999986185154213e-05, "loss": 1.5323, "step": 119 }, { "epoch": 0.2768166089965398, "grad_norm": 0.0, "learning_rate": 1.999996891660592e-05, "loss": 1.1827, "step": 120 }, { "epoch": 0.279123414071511, "grad_norm": 0.0, "learning_rate": 1.9999944740655016e-05, "loss": 1.4273, "step": 121 }, { "epoch": 0.28143021914648214, "grad_norm": 0.0, "learning_rate": 1.999991365731819e-05, "loss": 1.4416, "step": 122 }, { "epoch": 0.2837370242214533, "grad_norm": 0.0, "learning_rate": 1.9999875666616918e-05, "loss": 1.9135, "step": 123 }, { "epoch": 0.28604382929642447, "grad_norm": 0.0, "learning_rate": 1.9999830768577445e-05, "loss": 1.7693, "step": 124 }, { "epoch": 0.28835063437139563, "grad_norm": 0.0, "learning_rate": 1.9999778963230775e-05, "loss": 1.6087, "step": 125 }, { "epoch": 0.2906574394463668, "grad_norm": 0.0, "learning_rate": 1.99997202506127e-05, "loss": 1.473, "step": 126 }, { "epoch": 0.29296424452133796, "grad_norm": 0.0, "learning_rate": 1.999965463076377e-05, "loss": 1.4247, "step": 127 }, { "epoch": 0.2952710495963091, "grad_norm": 0.0, "learning_rate": 1.9999582103729316e-05, "loss": 1.2409, "step": 128 }, { "epoch": 0.2975778546712803, "grad_norm": 0.0, "learning_rate": 1.9999502669559432e-05, "loss": 1.5903, "step": 129 }, { "epoch": 0.29988465974625145, "grad_norm": 0.0, "learning_rate": 1.999941632830899e-05, "loss": 1.2961, "step": 130 }, { "epoch": 0.3021914648212226, "grad_norm": 0.0, "learning_rate": 1.9999323080037623e-05, "loss": 1.3102, "step": 131 }, { "epoch": 0.3044982698961938, "grad_norm": 0.0, "learning_rate": 1.999922292480975e-05, "loss": 1.5295, "step": 132 }, { "epoch": 0.30680507497116494, "grad_norm": 0.0, "learning_rate": 1.9999115862694547e-05, "loss": 1.2908, "step": 133 }, { "epoch": 0.3091118800461361, "grad_norm": 0.0, "learning_rate": 1.999900189376597e-05, "loss": 1.0529, "step": 134 }, { "epoch": 0.31141868512110726, "grad_norm": 0.0, "learning_rate": 1.9998881018102735e-05, "loss": 1.6819, "step": 135 }, { "epoch": 0.3137254901960784, "grad_norm": 0.0, "learning_rate": 1.9998753235788345e-05, "loss": 1.2558, "step": 136 }, { "epoch": 0.3160322952710496, "grad_norm": 0.0, "learning_rate": 1.999861854691106e-05, "loss": 1.5775, "step": 137 }, { "epoch": 0.31833910034602075, "grad_norm": 0.0, "learning_rate": 1.9998476951563914e-05, "loss": 1.4106, "step": 138 }, { "epoch": 0.3206459054209919, "grad_norm": 0.0, "learning_rate": 1.9998328449844715e-05, "loss": 1.4993, "step": 139 }, { "epoch": 0.3229527104959631, "grad_norm": 0.0, "learning_rate": 1.9998173041856042e-05, "loss": 1.4359, "step": 140 }, { "epoch": 0.32525951557093424, "grad_norm": 0.0, "learning_rate": 1.9998010727705237e-05, "loss": 1.2593, "step": 141 }, { "epoch": 0.3275663206459054, "grad_norm": 0.0, "learning_rate": 1.999784150750442e-05, "loss": 1.1497, "step": 142 }, { "epoch": 0.32987312572087657, "grad_norm": 0.0, "learning_rate": 1.9997665381370477e-05, "loss": 1.279, "step": 143 }, { "epoch": 0.33217993079584773, "grad_norm": 0.0, "learning_rate": 1.999748234942507e-05, "loss": 1.1369, "step": 144 }, { "epoch": 0.3344867358708189, "grad_norm": 0.0, "learning_rate": 1.999729241179462e-05, "loss": 1.5372, "step": 145 }, { "epoch": 0.33679354094579006, "grad_norm": 0.0, "learning_rate": 1.9997095568610326e-05, "loss": 0.9758, "step": 146 }, { "epoch": 0.3391003460207612, "grad_norm": 0.0, "learning_rate": 1.9996891820008165e-05, "loss": 1.4776, "step": 147 }, { "epoch": 0.3414071510957324, "grad_norm": 0.0, "learning_rate": 1.9996681166128862e-05, "loss": 1.3023, "step": 148 }, { "epoch": 0.34371395617070355, "grad_norm": 0.0, "learning_rate": 1.999646360711794e-05, "loss": 1.3596, "step": 149 }, { "epoch": 0.3460207612456747, "grad_norm": 0.0, "learning_rate": 1.999623914312566e-05, "loss": 1.426, "step": 150 }, { "epoch": 0.34832756632064593, "grad_norm": 0.0, "learning_rate": 1.9996007774307077e-05, "loss": 1.3939, "step": 151 }, { "epoch": 0.3506343713956171, "grad_norm": 0.0, "learning_rate": 1.9995769500822007e-05, "loss": 0.88, "step": 152 }, { "epoch": 0.35294117647058826, "grad_norm": 0.0, "learning_rate": 1.9995524322835035e-05, "loss": 1.4004, "step": 153 }, { "epoch": 0.3552479815455594, "grad_norm": 0.0, "learning_rate": 1.9995272240515515e-05, "loss": 1.2727, "step": 154 }, { "epoch": 0.3575547866205306, "grad_norm": 0.0, "learning_rate": 1.9995013254037574e-05, "loss": 1.2857, "step": 155 }, { "epoch": 0.35986159169550175, "grad_norm": 0.0, "learning_rate": 1.99947473635801e-05, "loss": 1.0745, "step": 156 }, { "epoch": 0.3621683967704729, "grad_norm": 0.0, "learning_rate": 1.999447456932676e-05, "loss": 1.232, "step": 157 }, { "epoch": 0.3644752018454441, "grad_norm": 0.0, "learning_rate": 1.9994194871465978e-05, "loss": 1.6852, "step": 158 }, { "epoch": 0.36678200692041524, "grad_norm": 0.0, "learning_rate": 1.999390827019096e-05, "loss": 1.257, "step": 159 }, { "epoch": 0.3690888119953864, "grad_norm": 0.0, "learning_rate": 1.999361476569967e-05, "loss": 1.0449, "step": 160 }, { "epoch": 0.37139561707035756, "grad_norm": 0.0, "learning_rate": 1.9993314358194843e-05, "loss": 1.0744, "step": 161 }, { "epoch": 0.3737024221453287, "grad_norm": 0.0, "learning_rate": 1.9993007047883988e-05, "loss": 1.5147, "step": 162 }, { "epoch": 0.3760092272202999, "grad_norm": 0.0, "learning_rate": 1.999269283497937e-05, "loss": 1.5317, "step": 163 }, { "epoch": 0.37831603229527105, "grad_norm": 0.0, "learning_rate": 1.999237171969804e-05, "loss": 1.0542, "step": 164 }, { "epoch": 0.3806228373702422, "grad_norm": 0.0, "learning_rate": 1.9992043702261795e-05, "loss": 1.4167, "step": 165 }, { "epoch": 0.3829296424452134, "grad_norm": 0.0, "learning_rate": 1.9991708782897214e-05, "loss": 1.3442, "step": 166 }, { "epoch": 0.38523644752018454, "grad_norm": 0.0, "learning_rate": 1.9991366961835643e-05, "loss": 1.2548, "step": 167 }, { "epoch": 0.3875432525951557, "grad_norm": 0.0, "learning_rate": 1.999101823931319e-05, "loss": 1.3884, "step": 168 }, { "epoch": 0.38985005767012687, "grad_norm": 0.0, "learning_rate": 1.999066261557073e-05, "loss": 1.3718, "step": 169 }, { "epoch": 0.39215686274509803, "grad_norm": 0.0, "learning_rate": 1.9990300090853917e-05, "loss": 1.7129, "step": 170 }, { "epoch": 0.3944636678200692, "grad_norm": 0.0, "learning_rate": 1.9989930665413148e-05, "loss": 1.31, "step": 171 }, { "epoch": 0.39677047289504036, "grad_norm": 0.0, "learning_rate": 1.9989554339503612e-05, "loss": 1.1476, "step": 172 }, { "epoch": 0.3990772779700115, "grad_norm": 0.0, "learning_rate": 1.998917111338525e-05, "loss": 1.0301, "step": 173 }, { "epoch": 0.4013840830449827, "grad_norm": 0.0, "learning_rate": 1.998878098732277e-05, "loss": 1.1074, "step": 174 }, { "epoch": 0.40369088811995385, "grad_norm": 0.0, "learning_rate": 1.9988383961585646e-05, "loss": 1.1239, "step": 175 }, { "epoch": 0.405997693194925, "grad_norm": 0.0, "learning_rate": 1.998798003644813e-05, "loss": 1.6276, "step": 176 }, { "epoch": 0.4083044982698962, "grad_norm": 0.0, "learning_rate": 1.9987569212189224e-05, "loss": 1.4676, "step": 177 }, { "epoch": 0.41061130334486734, "grad_norm": 0.0, "learning_rate": 1.9987151489092707e-05, "loss": 1.2155, "step": 178 }, { "epoch": 0.4129181084198385, "grad_norm": 0.0, "learning_rate": 1.998672686744711e-05, "loss": 1.6428, "step": 179 }, { "epoch": 0.41522491349480967, "grad_norm": 0.0, "learning_rate": 1.9986295347545738e-05, "loss": 1.1825, "step": 180 }, { "epoch": 0.41753171856978083, "grad_norm": 0.0, "learning_rate": 1.998585692968667e-05, "loss": 1.2577, "step": 181 }, { "epoch": 0.419838523644752, "grad_norm": 0.0, "learning_rate": 1.9985411614172728e-05, "loss": 1.1877, "step": 182 }, { "epoch": 0.42214532871972316, "grad_norm": 0.0, "learning_rate": 1.998495940131152e-05, "loss": 1.3028, "step": 183 }, { "epoch": 0.4244521337946944, "grad_norm": 0.0, "learning_rate": 1.9984500291415402e-05, "loss": 1.0642, "step": 184 }, { "epoch": 0.42675893886966554, "grad_norm": 0.0, "learning_rate": 1.99840342848015e-05, "loss": 1.7197, "step": 185 }, { "epoch": 0.4290657439446367, "grad_norm": 0.0, "learning_rate": 1.998356138179171e-05, "loss": 1.1801, "step": 186 }, { "epoch": 0.43137254901960786, "grad_norm": 0.0, "learning_rate": 1.9983081582712684e-05, "loss": 1.3857, "step": 187 }, { "epoch": 0.43367935409457903, "grad_norm": 0.0, "learning_rate": 1.9982594887895837e-05, "loss": 1.4002, "step": 188 }, { "epoch": 0.4359861591695502, "grad_norm": 0.0, "learning_rate": 1.998210129767735e-05, "loss": 1.0701, "step": 189 }, { "epoch": 0.43829296424452135, "grad_norm": 0.0, "learning_rate": 1.9981600812398175e-05, "loss": 1.2737, "step": 190 }, { "epoch": 0.4405997693194925, "grad_norm": 0.0, "learning_rate": 1.9981093432404006e-05, "loss": 1.0084, "step": 191 }, { "epoch": 0.4429065743944637, "grad_norm": 0.0, "learning_rate": 1.9980579158045322e-05, "loss": 1.3017, "step": 192 }, { "epoch": 0.44521337946943484, "grad_norm": 0.0, "learning_rate": 1.9980057989677345e-05, "loss": 1.0023, "step": 193 }, { "epoch": 0.447520184544406, "grad_norm": 0.0, "learning_rate": 1.9979529927660076e-05, "loss": 1.226, "step": 194 }, { "epoch": 0.44982698961937717, "grad_norm": 0.0, "learning_rate": 1.9978994972358265e-05, "loss": 0.7007, "step": 195 }, { "epoch": 0.45213379469434833, "grad_norm": 0.0, "learning_rate": 1.997845312414143e-05, "loss": 1.0334, "step": 196 }, { "epoch": 0.4544405997693195, "grad_norm": 0.0, "learning_rate": 1.997790438338385e-05, "loss": 1.2425, "step": 197 }, { "epoch": 0.45674740484429066, "grad_norm": 0.0, "learning_rate": 1.997734875046456e-05, "loss": 1.3561, "step": 198 }, { "epoch": 0.4590542099192618, "grad_norm": 0.0, "learning_rate": 1.9976786225767365e-05, "loss": 1.2625, "step": 199 }, { "epoch": 0.461361014994233, "grad_norm": 0.0, "learning_rate": 1.997621680968082e-05, "loss": 1.1393, "step": 200 }, { "epoch": 0.46366782006920415, "grad_norm": 0.0, "learning_rate": 1.9975640502598243e-05, "loss": 1.1219, "step": 201 }, { "epoch": 0.4659746251441753, "grad_norm": 0.0, "learning_rate": 1.997505730491772e-05, "loss": 1.2967, "step": 202 }, { "epoch": 0.4682814302191465, "grad_norm": 0.0, "learning_rate": 1.9974467217042086e-05, "loss": 1.2347, "step": 203 }, { "epoch": 0.47058823529411764, "grad_norm": 0.0, "learning_rate": 1.9973870239378938e-05, "loss": 1.2762, "step": 204 }, { "epoch": 0.4728950403690888, "grad_norm": 0.0, "learning_rate": 1.9973266372340638e-05, "loss": 0.8454, "step": 205 }, { "epoch": 0.47520184544405997, "grad_norm": 0.0, "learning_rate": 1.9972655616344303e-05, "loss": 1.4203, "step": 206 }, { "epoch": 0.47750865051903113, "grad_norm": 0.0, "learning_rate": 1.9972037971811802e-05, "loss": 1.4224, "step": 207 }, { "epoch": 0.4798154555940023, "grad_norm": 0.0, "learning_rate": 1.9971413439169777e-05, "loss": 1.4759, "step": 208 }, { "epoch": 0.48212226066897346, "grad_norm": 0.0, "learning_rate": 1.997078201884961e-05, "loss": 1.3751, "step": 209 }, { "epoch": 0.4844290657439446, "grad_norm": 0.0, "learning_rate": 1.997014371128746e-05, "loss": 1.4652, "step": 210 }, { "epoch": 0.4867358708189158, "grad_norm": 0.0, "learning_rate": 1.996949851692422e-05, "loss": 0.9827, "step": 211 }, { "epoch": 0.48904267589388695, "grad_norm": 0.0, "learning_rate": 1.9968846436205566e-05, "loss": 1.4189, "step": 212 }, { "epoch": 0.4913494809688581, "grad_norm": 0.0, "learning_rate": 1.996818746958191e-05, "loss": 1.1802, "step": 213 }, { "epoch": 0.4936562860438293, "grad_norm": 0.0, "learning_rate": 1.996752161750843e-05, "loss": 0.8799, "step": 214 }, { "epoch": 0.49596309111880044, "grad_norm": 0.0, "learning_rate": 1.996684888044506e-05, "loss": 1.6296, "step": 215 }, { "epoch": 0.4982698961937716, "grad_norm": 0.0, "learning_rate": 1.9966169258856488e-05, "loss": 0.8003, "step": 216 }, { "epoch": 0.5005767012687428, "grad_norm": 0.0, "learning_rate": 1.9965482753212154e-05, "loss": 1.4662, "step": 217 }, { "epoch": 0.5028835063437139, "grad_norm": 0.0, "learning_rate": 1.9964789363986262e-05, "loss": 1.6058, "step": 218 }, { "epoch": 0.5051903114186851, "grad_norm": 0.0, "learning_rate": 1.996408909165776e-05, "loss": 1.3763, "step": 219 }, { "epoch": 0.5074971164936563, "grad_norm": 0.0, "learning_rate": 1.996338193671036e-05, "loss": 1.097, "step": 220 }, { "epoch": 0.5098039215686274, "grad_norm": 0.0, "learning_rate": 1.996266789963252e-05, "loss": 0.9124, "step": 221 }, { "epoch": 0.5121107266435986, "grad_norm": 0.0, "learning_rate": 1.9961946980917457e-05, "loss": 1.6222, "step": 222 }, { "epoch": 0.5144175317185697, "grad_norm": 0.0, "learning_rate": 1.996121918106314e-05, "loss": 1.1872, "step": 223 }, { "epoch": 0.5167243367935409, "grad_norm": 0.0, "learning_rate": 1.9960484500572293e-05, "loss": 0.9698, "step": 224 }, { "epoch": 0.5190311418685121, "grad_norm": 0.0, "learning_rate": 1.9959742939952393e-05, "loss": 1.5913, "step": 225 }, { "epoch": 0.5213379469434832, "grad_norm": 0.0, "learning_rate": 1.995899449971566e-05, "loss": 1.1774, "step": 226 }, { "epoch": 0.5236447520184544, "grad_norm": 0.0, "learning_rate": 1.995823918037908e-05, "loss": 0.9025, "step": 227 }, { "epoch": 0.5259515570934256, "grad_norm": 0.0, "learning_rate": 1.9957476982464382e-05, "loss": 0.9416, "step": 228 }, { "epoch": 0.5282583621683967, "grad_norm": 0.0, "learning_rate": 1.9956707906498046e-05, "loss": 1.2601, "step": 229 }, { "epoch": 0.5305651672433679, "grad_norm": 0.0, "learning_rate": 1.995593195301131e-05, "loss": 1.2503, "step": 230 }, { "epoch": 0.532871972318339, "grad_norm": 0.0, "learning_rate": 1.995514912254015e-05, "loss": 0.93, "step": 231 }, { "epoch": 0.5351787773933102, "grad_norm": 0.0, "learning_rate": 1.9954359415625313e-05, "loss": 1.0196, "step": 232 }, { "epoch": 0.5374855824682814, "grad_norm": 0.0, "learning_rate": 1.995356283281227e-05, "loss": 1.5443, "step": 233 }, { "epoch": 0.5397923875432526, "grad_norm": 0.0, "learning_rate": 1.9952759374651266e-05, "loss": 0.9655, "step": 234 }, { "epoch": 0.5420991926182238, "grad_norm": 0.0, "learning_rate": 1.9951949041697272e-05, "loss": 0.8188, "step": 235 }, { "epoch": 0.544405997693195, "grad_norm": 0.0, "learning_rate": 1.9951131834510034e-05, "loss": 0.9194, "step": 236 }, { "epoch": 0.5467128027681661, "grad_norm": 0.0, "learning_rate": 1.9950307753654016e-05, "loss": 1.3431, "step": 237 }, { "epoch": 0.5490196078431373, "grad_norm": 0.0, "learning_rate": 1.9949476799698453e-05, "loss": 1.4364, "step": 238 }, { "epoch": 0.5513264129181085, "grad_norm": 0.0, "learning_rate": 1.9948638973217324e-05, "loss": 0.7999, "step": 239 }, { "epoch": 0.5536332179930796, "grad_norm": 0.0, "learning_rate": 1.994779427478934e-05, "loss": 0.947, "step": 240 }, { "epoch": 0.5559400230680508, "grad_norm": 0.0, "learning_rate": 1.9946942704997982e-05, "loss": 0.8431, "step": 241 }, { "epoch": 0.558246828143022, "grad_norm": 0.0, "learning_rate": 1.994608426443146e-05, "loss": 1.3369, "step": 242 }, { "epoch": 0.5605536332179931, "grad_norm": 0.0, "learning_rate": 1.9945218953682736e-05, "loss": 1.0996, "step": 243 }, { "epoch": 0.5628604382929643, "grad_norm": 0.0, "learning_rate": 1.9944346773349515e-05, "loss": 1.5973, "step": 244 }, { "epoch": 0.5651672433679354, "grad_norm": 0.0, "learning_rate": 1.9943467724034252e-05, "loss": 1.2742, "step": 245 }, { "epoch": 0.5674740484429066, "grad_norm": 0.0, "learning_rate": 1.994258180634414e-05, "loss": 0.9267, "step": 246 }, { "epoch": 0.5697808535178778, "grad_norm": 0.0, "learning_rate": 1.994168902089112e-05, "loss": 1.3129, "step": 247 }, { "epoch": 0.5720876585928489, "grad_norm": 0.0, "learning_rate": 1.9940789368291888e-05, "loss": 1.3963, "step": 248 }, { "epoch": 0.5743944636678201, "grad_norm": 0.0, "learning_rate": 1.9939882849167853e-05, "loss": 1.3136, "step": 249 }, { "epoch": 0.5767012687427913, "grad_norm": 0.0, "learning_rate": 1.99389694641452e-05, "loss": 1.5613, "step": 250 }, { "epoch": 0.5790080738177624, "grad_norm": 0.0, "learning_rate": 1.993804921385484e-05, "loss": 1.54, "step": 251 }, { "epoch": 0.5813148788927336, "grad_norm": 0.0, "learning_rate": 1.9937122098932428e-05, "loss": 1.3285, "step": 252 }, { "epoch": 0.5836216839677048, "grad_norm": 0.0, "learning_rate": 1.993618812001836e-05, "loss": 1.0518, "step": 253 }, { "epoch": 0.5859284890426759, "grad_norm": 0.0, "learning_rate": 1.9935247277757777e-05, "loss": 1.4438, "step": 254 }, { "epoch": 0.5882352941176471, "grad_norm": 0.0, "learning_rate": 1.9934299572800556e-05, "loss": 1.1655, "step": 255 }, { "epoch": 0.5905420991926182, "grad_norm": 0.0, "learning_rate": 1.9933345005801323e-05, "loss": 1.3974, "step": 256 }, { "epoch": 0.5928489042675894, "grad_norm": 0.0, "learning_rate": 1.9932383577419432e-05, "loss": 1.3321, "step": 257 }, { "epoch": 0.5951557093425606, "grad_norm": 0.0, "learning_rate": 1.9931415288318985e-05, "loss": 1.5007, "step": 258 }, { "epoch": 0.5974625144175317, "grad_norm": 0.0, "learning_rate": 1.993044013916882e-05, "loss": 1.536, "step": 259 }, { "epoch": 0.5997693194925029, "grad_norm": 0.0, "learning_rate": 1.992945813064251e-05, "loss": 0.936, "step": 260 }, { "epoch": 0.6020761245674741, "grad_norm": 0.0, "learning_rate": 1.9928469263418376e-05, "loss": 1.1215, "step": 261 }, { "epoch": 0.6043829296424452, "grad_norm": 0.0, "learning_rate": 1.9927473538179467e-05, "loss": 1.1639, "step": 262 }, { "epoch": 0.6066897347174164, "grad_norm": 0.0, "learning_rate": 1.9926470955613573e-05, "loss": 1.0756, "step": 263 }, { "epoch": 0.6089965397923875, "grad_norm": 0.0, "learning_rate": 1.9925461516413224e-05, "loss": 1.0336, "step": 264 }, { "epoch": 0.6113033448673587, "grad_norm": 0.0, "learning_rate": 1.9924445221275673e-05, "loss": 0.8772, "step": 265 }, { "epoch": 0.6136101499423299, "grad_norm": 0.0, "learning_rate": 1.9923422070902932e-05, "loss": 1.2074, "step": 266 }, { "epoch": 0.615916955017301, "grad_norm": 0.0, "learning_rate": 1.9922392066001724e-05, "loss": 1.0987, "step": 267 }, { "epoch": 0.6182237600922722, "grad_norm": 0.0, "learning_rate": 1.992135520728352e-05, "loss": 1.0906, "step": 268 }, { "epoch": 0.6205305651672434, "grad_norm": 0.0, "learning_rate": 1.992031149546452e-05, "loss": 1.2893, "step": 269 }, { "epoch": 0.6228373702422145, "grad_norm": 0.0, "learning_rate": 1.9919260931265666e-05, "loss": 0.9538, "step": 270 }, { "epoch": 0.6251441753171857, "grad_norm": 0.0, "learning_rate": 1.9918203515412616e-05, "loss": 1.1872, "step": 271 }, { "epoch": 0.6274509803921569, "grad_norm": 0.0, "learning_rate": 1.9917139248635788e-05, "loss": 0.8881, "step": 272 }, { "epoch": 0.629757785467128, "grad_norm": 0.0, "learning_rate": 1.9916068131670302e-05, "loss": 0.8143, "step": 273 }, { "epoch": 0.6320645905420992, "grad_norm": 0.0, "learning_rate": 1.9914990165256034e-05, "loss": 0.9903, "step": 274 }, { "epoch": 0.6343713956170703, "grad_norm": 0.0, "learning_rate": 1.9913905350137575e-05, "loss": 0.9394, "step": 275 }, { "epoch": 0.6366782006920415, "grad_norm": 0.0, "learning_rate": 1.9912813687064255e-05, "loss": 1.3582, "step": 276 }, { "epoch": 0.6389850057670127, "grad_norm": 0.0, "learning_rate": 1.991171517679013e-05, "loss": 1.3316, "step": 277 }, { "epoch": 0.6412918108419838, "grad_norm": 0.0, "learning_rate": 1.9910609820073986e-05, "loss": 1.006, "step": 278 }, { "epoch": 0.643598615916955, "grad_norm": 0.0, "learning_rate": 1.990949761767935e-05, "loss": 1.4049, "step": 279 }, { "epoch": 0.6459054209919262, "grad_norm": 0.0, "learning_rate": 1.9908378570374457e-05, "loss": 0.9906, "step": 280 }, { "epoch": 0.6482122260668973, "grad_norm": 0.0, "learning_rate": 1.990725267893228e-05, "loss": 1.1612, "step": 281 }, { "epoch": 0.6505190311418685, "grad_norm": 0.0, "learning_rate": 1.9906119944130527e-05, "loss": 1.0548, "step": 282 }, { "epoch": 0.6528258362168397, "grad_norm": 0.0, "learning_rate": 1.9904980366751624e-05, "loss": 1.3317, "step": 283 }, { "epoch": 0.6551326412918108, "grad_norm": 0.0, "learning_rate": 1.9903833947582722e-05, "loss": 1.3214, "step": 284 }, { "epoch": 0.657439446366782, "grad_norm": 0.0, "learning_rate": 1.9902680687415704e-05, "loss": 1.3262, "step": 285 }, { "epoch": 0.6597462514417531, "grad_norm": 0.0, "learning_rate": 1.9901520587047172e-05, "loss": 0.9366, "step": 286 }, { "epoch": 0.6620530565167243, "grad_norm": 0.0, "learning_rate": 1.9900353647278466e-05, "loss": 1.0506, "step": 287 }, { "epoch": 0.6643598615916955, "grad_norm": 0.0, "learning_rate": 1.989917986891563e-05, "loss": 1.3567, "step": 288 }, { "epoch": 0.6666666666666666, "grad_norm": 0.0, "learning_rate": 1.989799925276945e-05, "loss": 1.3788, "step": 289 }, { "epoch": 0.6689734717416378, "grad_norm": 0.0, "learning_rate": 1.989681179965542e-05, "loss": 1.284, "step": 290 }, { "epoch": 0.671280276816609, "grad_norm": 0.0, "learning_rate": 1.9895617510393773e-05, "loss": 0.6017, "step": 291 }, { "epoch": 0.6735870818915801, "grad_norm": 0.0, "learning_rate": 1.9894416385809444e-05, "loss": 1.0031, "step": 292 }, { "epoch": 0.6758938869665513, "grad_norm": 0.0, "learning_rate": 1.9893208426732115e-05, "loss": 1.3046, "step": 293 }, { "epoch": 0.6782006920415224, "grad_norm": 0.0, "learning_rate": 1.9891993633996164e-05, "loss": 0.8548, "step": 294 }, { "epoch": 0.6805074971164936, "grad_norm": 0.0, "learning_rate": 1.9890772008440703e-05, "loss": 0.9578, "step": 295 }, { "epoch": 0.6828143021914648, "grad_norm": 0.0, "learning_rate": 1.9889543550909562e-05, "loss": 0.8643, "step": 296 }, { "epoch": 0.6851211072664359, "grad_norm": 0.0, "learning_rate": 1.9888308262251286e-05, "loss": 1.0212, "step": 297 }, { "epoch": 0.6874279123414071, "grad_norm": 0.0, "learning_rate": 1.9887066143319145e-05, "loss": 0.8272, "step": 298 }, { "epoch": 0.6897347174163783, "grad_norm": 0.0, "learning_rate": 1.9885817194971116e-05, "loss": 1.0797, "step": 299 }, { "epoch": 0.6920415224913494, "grad_norm": 0.0, "learning_rate": 1.988456141806991e-05, "loss": 1.1806, "step": 300 }, { "epoch": 0.6943483275663207, "grad_norm": 0.0, "learning_rate": 1.988329881348294e-05, "loss": 1.3157, "step": 301 }, { "epoch": 0.6966551326412919, "grad_norm": 0.0, "learning_rate": 1.9882029382082342e-05, "loss": 0.932, "step": 302 }, { "epoch": 0.698961937716263, "grad_norm": 0.0, "learning_rate": 1.9880753124744964e-05, "loss": 1.2185, "step": 303 }, { "epoch": 0.7012687427912342, "grad_norm": 0.0, "learning_rate": 1.9879470042352372e-05, "loss": 1.4559, "step": 304 }, { "epoch": 0.7035755478662054, "grad_norm": 0.0, "learning_rate": 1.9878180135790844e-05, "loss": 1.3831, "step": 305 }, { "epoch": 0.7058823529411765, "grad_norm": 0.0, "learning_rate": 1.9876883405951378e-05, "loss": 1.0567, "step": 306 }, { "epoch": 0.7081891580161477, "grad_norm": 0.0, "learning_rate": 1.9875579853729677e-05, "loss": 0.7896, "step": 307 }, { "epoch": 0.7104959630911188, "grad_norm": 0.0, "learning_rate": 1.987426948002616e-05, "loss": 0.958, "step": 308 }, { "epoch": 0.71280276816609, "grad_norm": 0.0, "learning_rate": 1.9872952285745958e-05, "loss": 1.0281, "step": 309 }, { "epoch": 0.7151095732410612, "grad_norm": 0.0, "learning_rate": 1.987162827179891e-05, "loss": 1.0784, "step": 310 }, { "epoch": 0.7174163783160323, "grad_norm": 0.0, "learning_rate": 1.9870297439099576e-05, "loss": 1.2599, "step": 311 }, { "epoch": 0.7197231833910035, "grad_norm": 0.0, "learning_rate": 1.9868959788567213e-05, "loss": 1.4855, "step": 312 }, { "epoch": 0.7220299884659747, "grad_norm": 0.0, "learning_rate": 1.9867615321125796e-05, "loss": 1.1607, "step": 313 }, { "epoch": 0.7243367935409458, "grad_norm": 0.0, "learning_rate": 1.9866264037703996e-05, "loss": 1.1057, "step": 314 }, { "epoch": 0.726643598615917, "grad_norm": 0.0, "learning_rate": 1.9864905939235215e-05, "loss": 0.9996, "step": 315 }, { "epoch": 0.7289504036908881, "grad_norm": 0.0, "learning_rate": 1.9863541026657542e-05, "loss": 1.5419, "step": 316 }, { "epoch": 0.7312572087658593, "grad_norm": 0.0, "learning_rate": 1.9862169300913784e-05, "loss": 1.1848, "step": 317 }, { "epoch": 0.7335640138408305, "grad_norm": 0.0, "learning_rate": 1.9860790762951447e-05, "loss": 1.1695, "step": 318 }, { "epoch": 0.7358708189158016, "grad_norm": 0.0, "learning_rate": 1.9859405413722745e-05, "loss": 0.9904, "step": 319 }, { "epoch": 0.7381776239907728, "grad_norm": 0.0, "learning_rate": 1.9858013254184597e-05, "loss": 0.9056, "step": 320 }, { "epoch": 0.740484429065744, "grad_norm": 0.0, "learning_rate": 1.985661428529863e-05, "loss": 1.01, "step": 321 }, { "epoch": 0.7427912341407151, "grad_norm": 0.0, "learning_rate": 1.9855208508031173e-05, "loss": 1.3457, "step": 322 }, { "epoch": 0.7450980392156863, "grad_norm": 0.0, "learning_rate": 1.985379592335325e-05, "loss": 0.9593, "step": 323 }, { "epoch": 0.7474048442906575, "grad_norm": 0.0, "learning_rate": 1.9852376532240594e-05, "loss": 0.7881, "step": 324 }, { "epoch": 0.7497116493656286, "grad_norm": 0.0, "learning_rate": 1.985095033567364e-05, "loss": 0.9659, "step": 325 }, { "epoch": 0.7520184544405998, "grad_norm": 0.0, "learning_rate": 1.9849517334637527e-05, "loss": 0.7841, "step": 326 }, { "epoch": 0.754325259515571, "grad_norm": 0.0, "learning_rate": 1.9848077530122083e-05, "loss": 1.3307, "step": 327 }, { "epoch": 0.7566320645905421, "grad_norm": 0.0, "learning_rate": 1.9846630923121845e-05, "loss": 1.2489, "step": 328 }, { "epoch": 0.7589388696655133, "grad_norm": 0.0, "learning_rate": 1.984517751463604e-05, "loss": 1.2523, "step": 329 }, { "epoch": 0.7612456747404844, "grad_norm": 0.0, "learning_rate": 1.984371730566861e-05, "loss": 1.1361, "step": 330 }, { "epoch": 0.7635524798154556, "grad_norm": 0.0, "learning_rate": 1.984225029722818e-05, "loss": 0.9844, "step": 331 }, { "epoch": 0.7658592848904268, "grad_norm": 0.0, "learning_rate": 1.9840776490328067e-05, "loss": 1.0674, "step": 332 }, { "epoch": 0.7681660899653979, "grad_norm": 0.0, "learning_rate": 1.98392958859863e-05, "loss": 0.9208, "step": 333 }, { "epoch": 0.7704728950403691, "grad_norm": 0.0, "learning_rate": 1.983780848522559e-05, "loss": 1.2463, "step": 334 }, { "epoch": 0.7727797001153403, "grad_norm": 0.0, "learning_rate": 1.983631428907335e-05, "loss": 0.805, "step": 335 }, { "epoch": 0.7750865051903114, "grad_norm": 0.0, "learning_rate": 1.9834813298561686e-05, "loss": 1.2016, "step": 336 }, { "epoch": 0.7773933102652826, "grad_norm": 0.0, "learning_rate": 1.9833305514727396e-05, "loss": 0.7911, "step": 337 }, { "epoch": 0.7797001153402537, "grad_norm": 0.0, "learning_rate": 1.983179093861197e-05, "loss": 1.2782, "step": 338 }, { "epoch": 0.7820069204152249, "grad_norm": 0.0, "learning_rate": 1.9830269571261585e-05, "loss": 1.3264, "step": 339 }, { "epoch": 0.7843137254901961, "grad_norm": 0.0, "learning_rate": 1.9828741413727118e-05, "loss": 1.096, "step": 340 }, { "epoch": 0.7866205305651672, "grad_norm": 0.0, "learning_rate": 1.9827206467064133e-05, "loss": 0.6352, "step": 341 }, { "epoch": 0.7889273356401384, "grad_norm": 0.0, "learning_rate": 1.9825664732332886e-05, "loss": 1.0133, "step": 342 }, { "epoch": 0.7912341407151096, "grad_norm": 0.0, "learning_rate": 1.982411621059831e-05, "loss": 1.1626, "step": 343 }, { "epoch": 0.7935409457900807, "grad_norm": 0.0, "learning_rate": 1.982256090293004e-05, "loss": 1.4643, "step": 344 }, { "epoch": 0.7958477508650519, "grad_norm": 0.0, "learning_rate": 1.982099881040239e-05, "loss": 0.9051, "step": 345 }, { "epoch": 0.798154555940023, "grad_norm": 0.0, "learning_rate": 1.9819429934094366e-05, "loss": 1.2828, "step": 346 }, { "epoch": 0.8004613610149942, "grad_norm": 0.0, "learning_rate": 1.981785427508966e-05, "loss": 1.1372, "step": 347 }, { "epoch": 0.8027681660899654, "grad_norm": 0.0, "learning_rate": 1.9816271834476642e-05, "loss": 1.165, "step": 348 }, { "epoch": 0.8050749711649365, "grad_norm": 0.0, "learning_rate": 1.981468261334837e-05, "loss": 1.2639, "step": 349 }, { "epoch": 0.8073817762399077, "grad_norm": 0.0, "learning_rate": 1.981308661280259e-05, "loss": 0.8564, "step": 350 }, { "epoch": 0.8096885813148789, "grad_norm": 0.0, "learning_rate": 1.9811483833941726e-05, "loss": 0.787, "step": 351 }, { "epoch": 0.81199538638985, "grad_norm": 0.0, "learning_rate": 1.9809874277872886e-05, "loss": 1.2472, "step": 352 }, { "epoch": 0.8143021914648212, "grad_norm": 0.0, "learning_rate": 1.980825794570786e-05, "loss": 1.0271, "step": 353 }, { "epoch": 0.8166089965397924, "grad_norm": 0.0, "learning_rate": 1.9806634838563113e-05, "loss": 1.3831, "step": 354 }, { "epoch": 0.8189158016147635, "grad_norm": 0.0, "learning_rate": 1.9805004957559795e-05, "loss": 0.9824, "step": 355 }, { "epoch": 0.8212226066897347, "grad_norm": 0.0, "learning_rate": 1.9803368303823735e-05, "loss": 1.3427, "step": 356 }, { "epoch": 0.8235294117647058, "grad_norm": 0.0, "learning_rate": 1.9801724878485438e-05, "loss": 0.9904, "step": 357 }, { "epoch": 0.825836216839677, "grad_norm": 0.0, "learning_rate": 1.980007468268009e-05, "loss": 0.8121, "step": 358 }, { "epoch": 0.8281430219146482, "grad_norm": 0.0, "learning_rate": 1.9798417717547552e-05, "loss": 1.3321, "step": 359 }, { "epoch": 0.8304498269896193, "grad_norm": 0.0, "learning_rate": 1.9796753984232357e-05, "loss": 1.1041, "step": 360 }, { "epoch": 0.8327566320645905, "grad_norm": 0.0, "learning_rate": 1.9795083483883715e-05, "loss": 0.4604, "step": 361 }, { "epoch": 0.8350634371395617, "grad_norm": 0.0, "learning_rate": 1.9793406217655516e-05, "loss": 0.9785, "step": 362 }, { "epoch": 0.8373702422145328, "grad_norm": 0.0, "learning_rate": 1.9791722186706318e-05, "loss": 0.9659, "step": 363 }, { "epoch": 0.839677047289504, "grad_norm": 0.0, "learning_rate": 1.9790031392199348e-05, "loss": 0.7622, "step": 364 }, { "epoch": 0.8419838523644751, "grad_norm": 0.0, "learning_rate": 1.9788333835302512e-05, "loss": 1.3065, "step": 365 }, { "epoch": 0.8442906574394463, "grad_norm": 0.0, "learning_rate": 1.9786629517188384e-05, "loss": 0.9951, "step": 366 }, { "epoch": 0.8465974625144176, "grad_norm": 0.0, "learning_rate": 1.9784918439034216e-05, "loss": 1.1547, "step": 367 }, { "epoch": 0.8489042675893888, "grad_norm": 0.0, "learning_rate": 1.9783200602021912e-05, "loss": 0.9821, "step": 368 }, { "epoch": 0.8512110726643599, "grad_norm": 0.0, "learning_rate": 1.9781476007338058e-05, "loss": 1.0311, "step": 369 }, { "epoch": 0.8535178777393311, "grad_norm": 0.0, "learning_rate": 1.9779744656173907e-05, "loss": 1.4481, "step": 370 }, { "epoch": 0.8558246828143022, "grad_norm": 0.0, "learning_rate": 1.9778006549725375e-05, "loss": 1.1409, "step": 371 }, { "epoch": 0.8581314878892734, "grad_norm": 0.0, "learning_rate": 1.977626168919305e-05, "loss": 0.988, "step": 372 }, { "epoch": 0.8604382929642446, "grad_norm": 0.0, "learning_rate": 1.977451007578217e-05, "loss": 0.792, "step": 373 }, { "epoch": 0.8627450980392157, "grad_norm": 0.0, "learning_rate": 1.9772751710702663e-05, "loss": 1.0579, "step": 374 }, { "epoch": 0.8650519031141869, "grad_norm": 0.0, "learning_rate": 1.9770986595169096e-05, "loss": 0.8852, "step": 375 }, { "epoch": 0.8673587081891581, "grad_norm": 0.0, "learning_rate": 1.976921473040071e-05, "loss": 1.3128, "step": 376 }, { "epoch": 0.8696655132641292, "grad_norm": 0.0, "learning_rate": 1.9767436117621416e-05, "loss": 1.6096, "step": 377 }, { "epoch": 0.8719723183391004, "grad_norm": 0.0, "learning_rate": 1.9765650758059766e-05, "loss": 0.9685, "step": 378 }, { "epoch": 0.8742791234140715, "grad_norm": 0.0, "learning_rate": 1.9763858652948992e-05, "loss": 1.2446, "step": 379 }, { "epoch": 0.8765859284890427, "grad_norm": 0.0, "learning_rate": 1.976205980352697e-05, "loss": 0.9631, "step": 380 }, { "epoch": 0.8788927335640139, "grad_norm": 0.0, "learning_rate": 1.9760254211036245e-05, "loss": 1.0765, "step": 381 }, { "epoch": 0.881199538638985, "grad_norm": 0.0, "learning_rate": 1.975844187672402e-05, "loss": 1.3127, "step": 382 }, { "epoch": 0.8835063437139562, "grad_norm": 0.0, "learning_rate": 1.9756622801842144e-05, "loss": 1.0501, "step": 383 }, { "epoch": 0.8858131487889274, "grad_norm": 0.0, "learning_rate": 1.9754796987647134e-05, "loss": 0.9982, "step": 384 }, { "epoch": 0.8881199538638985, "grad_norm": 0.0, "learning_rate": 1.9752964435400156e-05, "loss": 0.8997, "step": 385 }, { "epoch": 0.8904267589388697, "grad_norm": 0.0, "learning_rate": 1.9751125146367036e-05, "loss": 1.0249, "step": 386 }, { "epoch": 0.8927335640138409, "grad_norm": 0.0, "learning_rate": 1.9749279121818235e-05, "loss": 0.8368, "step": 387 }, { "epoch": 0.895040369088812, "grad_norm": 0.0, "learning_rate": 1.9747426363028897e-05, "loss": 1.2199, "step": 388 }, { "epoch": 0.8973471741637832, "grad_norm": 0.0, "learning_rate": 1.9745566871278794e-05, "loss": 1.4212, "step": 389 }, { "epoch": 0.8996539792387543, "grad_norm": 0.0, "learning_rate": 1.9743700647852356e-05, "loss": 0.9981, "step": 390 }, { "epoch": 0.9019607843137255, "grad_norm": 0.0, "learning_rate": 1.974182769403866e-05, "loss": 0.5841, "step": 391 }, { "epoch": 0.9042675893886967, "grad_norm": 0.0, "learning_rate": 1.9739948011131438e-05, "loss": 1.1561, "step": 392 }, { "epoch": 0.9065743944636678, "grad_norm": 0.0, "learning_rate": 1.9738061600429062e-05, "loss": 1.0792, "step": 393 }, { "epoch": 0.908881199538639, "grad_norm": 0.0, "learning_rate": 1.9736168463234565e-05, "loss": 0.7952, "step": 394 }, { "epoch": 0.9111880046136102, "grad_norm": 0.0, "learning_rate": 1.9734268600855612e-05, "loss": 0.9505, "step": 395 }, { "epoch": 0.9134948096885813, "grad_norm": 0.0, "learning_rate": 1.9732362014604515e-05, "loss": 0.819, "step": 396 }, { "epoch": 0.9158016147635525, "grad_norm": 0.0, "learning_rate": 1.973044870579824e-05, "loss": 0.7531, "step": 397 }, { "epoch": 0.9181084198385236, "grad_norm": 0.0, "learning_rate": 1.972852867575839e-05, "loss": 1.0519, "step": 398 }, { "epoch": 0.9204152249134948, "grad_norm": 0.0, "learning_rate": 1.9726601925811204e-05, "loss": 1.2648, "step": 399 }, { "epoch": 0.922722029988466, "grad_norm": 0.0, "learning_rate": 1.9724668457287576e-05, "loss": 0.9938, "step": 400 }, { "epoch": 0.9250288350634371, "grad_norm": 0.0, "learning_rate": 1.9722728271523035e-05, "loss": 1.5296, "step": 401 }, { "epoch": 0.9273356401384083, "grad_norm": 0.0, "learning_rate": 1.9720781369857747e-05, "loss": 0.925, "step": 402 }, { "epoch": 0.9296424452133795, "grad_norm": 0.0, "learning_rate": 1.9718827753636522e-05, "loss": 1.3531, "step": 403 }, { "epoch": 0.9319492502883506, "grad_norm": 0.0, "learning_rate": 1.9716867424208805e-05, "loss": 0.9067, "step": 404 }, { "epoch": 0.9342560553633218, "grad_norm": 0.0, "learning_rate": 1.9714900382928674e-05, "loss": 1.0162, "step": 405 }, { "epoch": 0.936562860438293, "grad_norm": 0.0, "learning_rate": 1.9712926631154857e-05, "loss": 0.9944, "step": 406 }, { "epoch": 0.9388696655132641, "grad_norm": 0.0, "learning_rate": 1.9710946170250702e-05, "loss": 1.1977, "step": 407 }, { "epoch": 0.9411764705882353, "grad_norm": 0.0, "learning_rate": 1.9708959001584197e-05, "loss": 0.9416, "step": 408 }, { "epoch": 0.9434832756632064, "grad_norm": 0.0, "learning_rate": 1.9706965126527963e-05, "loss": 0.7721, "step": 409 }, { "epoch": 0.9457900807381776, "grad_norm": 0.0, "learning_rate": 1.9704964546459257e-05, "loss": 1.0792, "step": 410 }, { "epoch": 0.9480968858131488, "grad_norm": 0.0, "learning_rate": 1.9702957262759964e-05, "loss": 1.0404, "step": 411 }, { "epoch": 0.9504036908881199, "grad_norm": 0.0, "learning_rate": 1.9700943276816602e-05, "loss": 1.043, "step": 412 }, { "epoch": 0.9527104959630911, "grad_norm": 0.0, "learning_rate": 1.9698922590020314e-05, "loss": 1.162, "step": 413 }, { "epoch": 0.9550173010380623, "grad_norm": 0.0, "learning_rate": 1.969689520376687e-05, "loss": 1.1567, "step": 414 }, { "epoch": 0.9573241061130334, "grad_norm": 0.0, "learning_rate": 1.9694861119456677e-05, "loss": 0.952, "step": 415 }, { "epoch": 0.9596309111880046, "grad_norm": 0.0, "learning_rate": 1.9692820338494766e-05, "loss": 1.2744, "step": 416 }, { "epoch": 0.9619377162629758, "grad_norm": 0.0, "learning_rate": 1.969077286229078e-05, "loss": 1.0971, "step": 417 }, { "epoch": 0.9642445213379469, "grad_norm": 0.0, "learning_rate": 1.9688718692259007e-05, "loss": 0.9815, "step": 418 }, { "epoch": 0.9665513264129181, "grad_norm": 0.0, "learning_rate": 1.9686657829818353e-05, "loss": 1.19, "step": 419 }, { "epoch": 0.9688581314878892, "grad_norm": 0.0, "learning_rate": 1.968459027639233e-05, "loss": 0.9618, "step": 420 }, { "epoch": 0.9711649365628604, "grad_norm": 0.0, "learning_rate": 1.9682516033409094e-05, "loss": 1.2316, "step": 421 }, { "epoch": 0.9734717416378316, "grad_norm": 0.0, "learning_rate": 1.9680435102301412e-05, "loss": 1.2232, "step": 422 }, { "epoch": 0.9757785467128027, "grad_norm": 0.0, "learning_rate": 1.9678347484506667e-05, "loss": 0.9157, "step": 423 }, { "epoch": 0.9780853517877739, "grad_norm": 0.0, "learning_rate": 1.967625318146687e-05, "loss": 1.1693, "step": 424 }, { "epoch": 0.9803921568627451, "grad_norm": 0.0, "learning_rate": 1.967415219462864e-05, "loss": 1.3266, "step": 425 }, { "epoch": 0.9826989619377162, "grad_norm": 0.0, "learning_rate": 1.9672044525443222e-05, "loss": 0.9772, "step": 426 }, { "epoch": 0.9850057670126874, "grad_norm": 0.0, "learning_rate": 1.9669930175366474e-05, "loss": 1.2391, "step": 427 }, { "epoch": 0.9873125720876585, "grad_norm": 0.0, "learning_rate": 1.9667809145858863e-05, "loss": 1.3138, "step": 428 }, { "epoch": 0.9896193771626297, "grad_norm": 0.0, "learning_rate": 1.9665681438385475e-05, "loss": 0.8797, "step": 429 }, { "epoch": 0.9919261822376009, "grad_norm": 0.0, "learning_rate": 1.9663547054416006e-05, "loss": 1.0686, "step": 430 }, { "epoch": 0.994232987312572, "grad_norm": 0.0, "learning_rate": 1.966140599542477e-05, "loss": 1.3218, "step": 431 }, { "epoch": 0.9965397923875432, "grad_norm": 0.0, "learning_rate": 1.9659258262890683e-05, "loss": 1.0326, "step": 432 }, { "epoch": 0.9988465974625144, "grad_norm": 0.0, "learning_rate": 1.965710385829728e-05, "loss": 1.0026, "step": 433 }, { "epoch": 1.0011534025374855, "grad_norm": 0.0, "learning_rate": 1.9654942783132696e-05, "loss": 1.0721, "step": 434 }, { "epoch": 1.0034602076124568, "grad_norm": 0.0, "learning_rate": 1.9652775038889676e-05, "loss": 0.8152, "step": 435 }, { "epoch": 1.0057670126874279, "grad_norm": 0.0, "learning_rate": 1.9650600627065573e-05, "loss": 1.1702, "step": 436 }, { "epoch": 1.0080738177623991, "grad_norm": 0.0, "learning_rate": 1.964841954916235e-05, "loss": 0.9148, "step": 437 }, { "epoch": 1.0103806228373702, "grad_norm": 0.0, "learning_rate": 1.9646231806686566e-05, "loss": 0.8995, "step": 438 }, { "epoch": 1.0126874279123415, "grad_norm": 0.0, "learning_rate": 1.964403740114939e-05, "loss": 1.0761, "step": 439 }, { "epoch": 1.0149942329873125, "grad_norm": 0.0, "learning_rate": 1.964183633406659e-05, "loss": 0.9926, "step": 440 }, { "epoch": 1.0173010380622838, "grad_norm": 0.0, "learning_rate": 1.9639628606958535e-05, "loss": 0.653, "step": 441 }, { "epoch": 1.0196078431372548, "grad_norm": 0.0, "learning_rate": 1.9637414221350198e-05, "loss": 0.7684, "step": 442 }, { "epoch": 1.021914648212226, "grad_norm": 0.0, "learning_rate": 1.9635193178771144e-05, "loss": 0.5708, "step": 443 }, { "epoch": 1.0242214532871972, "grad_norm": 0.0, "learning_rate": 1.963296548075555e-05, "loss": 0.5926, "step": 444 }, { "epoch": 1.0265282583621684, "grad_norm": 0.0, "learning_rate": 1.963073112884217e-05, "loss": 0.594, "step": 445 }, { "epoch": 1.0288350634371395, "grad_norm": 0.0, "learning_rate": 1.962849012457438e-05, "loss": 0.5756, "step": 446 }, { "epoch": 1.0311418685121108, "grad_norm": 0.0, "learning_rate": 1.962624246950012e-05, "loss": 0.5881, "step": 447 }, { "epoch": 1.0334486735870818, "grad_norm": 0.0, "learning_rate": 1.9623988165171958e-05, "loss": 0.5506, "step": 448 }, { "epoch": 1.035755478662053, "grad_norm": 0.0, "learning_rate": 1.9621727213147025e-05, "loss": 0.8203, "step": 449 }, { "epoch": 1.0380622837370241, "grad_norm": 0.0, "learning_rate": 1.961945961498706e-05, "loss": 0.8565, "step": 450 }, { "epoch": 1.0403690888119954, "grad_norm": 0.0, "learning_rate": 1.961718537225839e-05, "loss": 0.9169, "step": 451 }, { "epoch": 1.0426758938869665, "grad_norm": 0.0, "learning_rate": 1.9614904486531935e-05, "loss": 0.8638, "step": 452 }, { "epoch": 1.0449826989619377, "grad_norm": 0.0, "learning_rate": 1.961261695938319e-05, "loss": 0.8872, "step": 453 }, { "epoch": 1.0472895040369088, "grad_norm": 0.0, "learning_rate": 1.9610322792392256e-05, "loss": 0.7931, "step": 454 }, { "epoch": 1.04959630911188, "grad_norm": 0.0, "learning_rate": 1.9608021987143805e-05, "loss": 1.0525, "step": 455 }, { "epoch": 1.0519031141868511, "grad_norm": 0.0, "learning_rate": 1.9605714545227105e-05, "loss": 0.6539, "step": 456 }, { "epoch": 1.0542099192618224, "grad_norm": 0.0, "learning_rate": 1.9603400468236e-05, "loss": 0.6141, "step": 457 }, { "epoch": 1.0565167243367934, "grad_norm": 0.0, "learning_rate": 1.9601079757768926e-05, "loss": 0.5129, "step": 458 }, { "epoch": 1.0588235294117647, "grad_norm": 0.0, "learning_rate": 1.9598752415428893e-05, "loss": 0.9438, "step": 459 }, { "epoch": 1.0611303344867358, "grad_norm": 0.0, "learning_rate": 1.9596418442823495e-05, "loss": 0.7448, "step": 460 }, { "epoch": 1.063437139561707, "grad_norm": 0.0, "learning_rate": 1.9594077841564905e-05, "loss": 1.0895, "step": 461 }, { "epoch": 1.065743944636678, "grad_norm": 0.0, "learning_rate": 1.9591730613269878e-05, "loss": 0.7897, "step": 462 }, { "epoch": 1.0680507497116494, "grad_norm": 0.0, "learning_rate": 1.9589376759559747e-05, "loss": 0.9522, "step": 463 }, { "epoch": 1.0703575547866206, "grad_norm": 0.0, "learning_rate": 1.958701628206041e-05, "loss": 0.9497, "step": 464 }, { "epoch": 1.0726643598615917, "grad_norm": 0.0, "learning_rate": 1.9584649182402358e-05, "loss": 0.6935, "step": 465 }, { "epoch": 1.0749711649365628, "grad_norm": 0.0, "learning_rate": 1.958227546222064e-05, "loss": 0.8873, "step": 466 }, { "epoch": 1.077277970011534, "grad_norm": 0.0, "learning_rate": 1.957989512315489e-05, "loss": 0.7781, "step": 467 }, { "epoch": 1.0795847750865053, "grad_norm": 0.0, "learning_rate": 1.9577508166849308e-05, "loss": 0.7, "step": 468 }, { "epoch": 1.0818915801614764, "grad_norm": 0.0, "learning_rate": 1.9575114594952662e-05, "loss": 0.7893, "step": 469 }, { "epoch": 1.0841983852364474, "grad_norm": 0.0, "learning_rate": 1.9572714409118297e-05, "loss": 0.9558, "step": 470 }, { "epoch": 1.0865051903114187, "grad_norm": 0.0, "learning_rate": 1.9570307611004124e-05, "loss": 0.5241, "step": 471 }, { "epoch": 1.08881199538639, "grad_norm": 0.0, "learning_rate": 1.9567894202272623e-05, "loss": 0.7681, "step": 472 }, { "epoch": 1.091118800461361, "grad_norm": 0.0, "learning_rate": 1.9565474184590827e-05, "loss": 0.7164, "step": 473 }, { "epoch": 1.0934256055363323, "grad_norm": 0.0, "learning_rate": 1.9563047559630356e-05, "loss": 0.7642, "step": 474 }, { "epoch": 1.0957324106113033, "grad_norm": 0.0, "learning_rate": 1.956061432906738e-05, "loss": 0.8846, "step": 475 }, { "epoch": 1.0980392156862746, "grad_norm": 0.0, "learning_rate": 1.955817449458263e-05, "loss": 1.1322, "step": 476 }, { "epoch": 1.1003460207612457, "grad_norm": 0.0, "learning_rate": 1.955572805786141e-05, "loss": 0.8562, "step": 477 }, { "epoch": 1.102652825836217, "grad_norm": 0.0, "learning_rate": 1.9553275020593573e-05, "loss": 0.8604, "step": 478 }, { "epoch": 1.104959630911188, "grad_norm": 0.0, "learning_rate": 1.9550815384473534e-05, "loss": 0.8703, "step": 479 }, { "epoch": 1.1072664359861593, "grad_norm": 0.0, "learning_rate": 1.9548349151200275e-05, "loss": 0.9451, "step": 480 }, { "epoch": 1.1095732410611303, "grad_norm": 0.0, "learning_rate": 1.954587632247732e-05, "loss": 0.7471, "step": 481 }, { "epoch": 1.1118800461361016, "grad_norm": 0.0, "learning_rate": 1.9543396900012763e-05, "loss": 0.6657, "step": 482 }, { "epoch": 1.1141868512110726, "grad_norm": 0.0, "learning_rate": 1.9540910885519243e-05, "loss": 0.8245, "step": 483 }, { "epoch": 1.116493656286044, "grad_norm": 0.0, "learning_rate": 1.9538418280713958e-05, "loss": 0.8052, "step": 484 }, { "epoch": 1.118800461361015, "grad_norm": 0.0, "learning_rate": 1.9535919087318654e-05, "loss": 0.7168, "step": 485 }, { "epoch": 1.1211072664359862, "grad_norm": 0.0, "learning_rate": 1.953341330705963e-05, "loss": 0.8016, "step": 486 }, { "epoch": 1.1234140715109573, "grad_norm": 0.0, "learning_rate": 1.9530900941667733e-05, "loss": 0.8135, "step": 487 }, { "epoch": 1.1257208765859286, "grad_norm": 0.0, "learning_rate": 1.9528381992878362e-05, "loss": 0.6256, "step": 488 }, { "epoch": 1.1280276816608996, "grad_norm": 0.0, "learning_rate": 1.9525856462431463e-05, "loss": 0.6397, "step": 489 }, { "epoch": 1.130334486735871, "grad_norm": 0.0, "learning_rate": 1.9523324352071527e-05, "loss": 0.7365, "step": 490 }, { "epoch": 1.132641291810842, "grad_norm": 0.0, "learning_rate": 1.9520785663547586e-05, "loss": 1.0175, "step": 491 }, { "epoch": 1.1349480968858132, "grad_norm": 0.0, "learning_rate": 1.9518240398613226e-05, "loss": 1.0185, "step": 492 }, { "epoch": 1.1372549019607843, "grad_norm": 0.0, "learning_rate": 1.9515688559026564e-05, "loss": 0.8635, "step": 493 }, { "epoch": 1.1395617070357555, "grad_norm": 0.0, "learning_rate": 1.9513130146550266e-05, "loss": 0.592, "step": 494 }, { "epoch": 1.1418685121107266, "grad_norm": 0.0, "learning_rate": 1.9510565162951538e-05, "loss": 0.855, "step": 495 }, { "epoch": 1.1441753171856979, "grad_norm": 0.0, "learning_rate": 1.9507993610002118e-05, "loss": 0.903, "step": 496 }, { "epoch": 1.146482122260669, "grad_norm": 0.0, "learning_rate": 1.9505415489478293e-05, "loss": 0.854, "step": 497 }, { "epoch": 1.1487889273356402, "grad_norm": 0.0, "learning_rate": 1.9502830803160873e-05, "loss": 0.6826, "step": 498 }, { "epoch": 1.1510957324106112, "grad_norm": 0.0, "learning_rate": 1.9500239552835216e-05, "loss": 0.826, "step": 499 }, { "epoch": 1.1534025374855825, "grad_norm": 0.0, "learning_rate": 1.9497641740291207e-05, "loss": 0.8879, "step": 500 }, { "epoch": 1.1557093425605536, "grad_norm": 0.0, "learning_rate": 1.9495037367323264e-05, "loss": 0.9472, "step": 501 }, { "epoch": 1.1580161476355249, "grad_norm": 0.0, "learning_rate": 1.949242643573034e-05, "loss": 0.7107, "step": 502 }, { "epoch": 1.160322952710496, "grad_norm": 0.0, "learning_rate": 1.9489808947315915e-05, "loss": 1.0408, "step": 503 }, { "epoch": 1.1626297577854672, "grad_norm": 0.0, "learning_rate": 1.9487184903888e-05, "loss": 0.8161, "step": 504 }, { "epoch": 1.1649365628604382, "grad_norm": 0.0, "learning_rate": 1.948455430725913e-05, "loss": 0.7252, "step": 505 }, { "epoch": 1.1672433679354095, "grad_norm": 0.0, "learning_rate": 1.9481917159246375e-05, "loss": 1.1161, "step": 506 }, { "epoch": 1.1695501730103806, "grad_norm": 0.0, "learning_rate": 1.947927346167132e-05, "loss": 0.773, "step": 507 }, { "epoch": 1.1718569780853518, "grad_norm": 0.0, "learning_rate": 1.947662321636008e-05, "loss": 1.0159, "step": 508 }, { "epoch": 1.1741637831603229, "grad_norm": 0.0, "learning_rate": 1.9473966425143292e-05, "loss": 0.684, "step": 509 }, { "epoch": 1.1764705882352942, "grad_norm": 0.0, "learning_rate": 1.947130308985612e-05, "loss": 0.6892, "step": 510 }, { "epoch": 1.1787773933102652, "grad_norm": 0.0, "learning_rate": 1.9468633212338236e-05, "loss": 0.8094, "step": 511 }, { "epoch": 1.1810841983852365, "grad_norm": 0.0, "learning_rate": 1.9465956794433837e-05, "loss": 0.9673, "step": 512 }, { "epoch": 1.1833910034602075, "grad_norm": 0.0, "learning_rate": 1.9463273837991643e-05, "loss": 0.9528, "step": 513 }, { "epoch": 1.1856978085351788, "grad_norm": 0.0, "learning_rate": 1.9460584344864885e-05, "loss": 0.6999, "step": 514 }, { "epoch": 1.1880046136101499, "grad_norm": 0.0, "learning_rate": 1.9457888316911305e-05, "loss": 0.8349, "step": 515 }, { "epoch": 1.1903114186851211, "grad_norm": 0.0, "learning_rate": 1.945518575599317e-05, "loss": 0.9279, "step": 516 }, { "epoch": 1.1926182237600922, "grad_norm": 0.0, "learning_rate": 1.945247666397725e-05, "loss": 0.6596, "step": 517 }, { "epoch": 1.1949250288350635, "grad_norm": 0.0, "learning_rate": 1.944976104273483e-05, "loss": 0.6871, "step": 518 }, { "epoch": 1.1972318339100345, "grad_norm": 0.0, "learning_rate": 1.9447038894141707e-05, "loss": 0.9282, "step": 519 }, { "epoch": 1.1995386389850058, "grad_norm": 0.0, "learning_rate": 1.944431022007818e-05, "loss": 0.8709, "step": 520 }, { "epoch": 1.2018454440599768, "grad_norm": 0.0, "learning_rate": 1.9441575022429065e-05, "loss": 0.4996, "step": 521 }, { "epoch": 1.2041522491349481, "grad_norm": 0.0, "learning_rate": 1.9438833303083677e-05, "loss": 0.8457, "step": 522 }, { "epoch": 1.2064590542099192, "grad_norm": 0.0, "learning_rate": 1.9436085063935837e-05, "loss": 0.7837, "step": 523 }, { "epoch": 1.2087658592848904, "grad_norm": 0.0, "learning_rate": 1.943333030688387e-05, "loss": 0.7091, "step": 524 }, { "epoch": 1.2110726643598615, "grad_norm": 0.0, "learning_rate": 1.9430569033830606e-05, "loss": 0.7262, "step": 525 }, { "epoch": 1.2133794694348328, "grad_norm": 0.0, "learning_rate": 1.942780124668337e-05, "loss": 0.88, "step": 526 }, { "epoch": 1.215686274509804, "grad_norm": 0.0, "learning_rate": 1.9425026947353994e-05, "loss": 0.6826, "step": 527 }, { "epoch": 1.217993079584775, "grad_norm": 0.0, "learning_rate": 1.94222461377588e-05, "loss": 1.0498, "step": 528 }, { "epoch": 1.2202998846597461, "grad_norm": 0.0, "learning_rate": 1.9419458819818617e-05, "loss": 0.7679, "step": 529 }, { "epoch": 1.2226066897347174, "grad_norm": 0.0, "learning_rate": 1.9416664995458756e-05, "loss": 0.8326, "step": 530 }, { "epoch": 1.2249134948096887, "grad_norm": 0.0, "learning_rate": 1.9413864666609036e-05, "loss": 0.902, "step": 531 }, { "epoch": 1.2272202998846597, "grad_norm": 0.0, "learning_rate": 1.9411057835203756e-05, "loss": 0.7393, "step": 532 }, { "epoch": 1.2295271049596308, "grad_norm": 0.0, "learning_rate": 1.9408244503181723e-05, "loss": 0.7971, "step": 533 }, { "epoch": 1.231833910034602, "grad_norm": 0.0, "learning_rate": 1.9405424672486218e-05, "loss": 0.651, "step": 534 }, { "epoch": 1.2341407151095733, "grad_norm": 0.0, "learning_rate": 1.940259834506502e-05, "loss": 0.799, "step": 535 }, { "epoch": 1.2364475201845444, "grad_norm": 0.0, "learning_rate": 1.939976552287039e-05, "loss": 0.9757, "step": 536 }, { "epoch": 1.2387543252595155, "grad_norm": 0.0, "learning_rate": 1.9396926207859085e-05, "loss": 0.9407, "step": 537 }, { "epoch": 1.2410611303344867, "grad_norm": 0.0, "learning_rate": 1.9394080401992336e-05, "loss": 0.7027, "step": 538 }, { "epoch": 1.243367935409458, "grad_norm": 0.0, "learning_rate": 1.939122810723586e-05, "loss": 0.7995, "step": 539 }, { "epoch": 1.245674740484429, "grad_norm": 0.0, "learning_rate": 1.9388369325559862e-05, "loss": 0.8117, "step": 540 }, { "epoch": 1.2479815455594, "grad_norm": 0.0, "learning_rate": 1.9385504058939023e-05, "loss": 0.6787, "step": 541 }, { "epoch": 1.2502883506343714, "grad_norm": 0.0, "learning_rate": 1.9382632309352503e-05, "loss": 0.8654, "step": 542 }, { "epoch": 1.2525951557093427, "grad_norm": 0.0, "learning_rate": 1.937975407878394e-05, "loss": 0.8878, "step": 543 }, { "epoch": 1.2549019607843137, "grad_norm": 0.0, "learning_rate": 1.937686936922145e-05, "loss": 0.7783, "step": 544 }, { "epoch": 1.2572087658592848, "grad_norm": 0.0, "learning_rate": 1.9373978182657628e-05, "loss": 1.1215, "step": 545 }, { "epoch": 1.259515570934256, "grad_norm": 0.0, "learning_rate": 1.9371080521089536e-05, "loss": 0.8044, "step": 546 }, { "epoch": 1.2618223760092273, "grad_norm": 0.0, "learning_rate": 1.936817638651871e-05, "loss": 0.7656, "step": 547 }, { "epoch": 1.2641291810841984, "grad_norm": 0.0, "learning_rate": 1.9365265780951167e-05, "loss": 0.8099, "step": 548 }, { "epoch": 1.2664359861591694, "grad_norm": 0.0, "learning_rate": 1.9362348706397374e-05, "loss": 0.9516, "step": 549 }, { "epoch": 1.2687427912341407, "grad_norm": 0.0, "learning_rate": 1.9359425164872285e-05, "loss": 0.7079, "step": 550 }, { "epoch": 1.271049596309112, "grad_norm": 0.0, "learning_rate": 1.9356495158395317e-05, "loss": 0.6025, "step": 551 }, { "epoch": 1.273356401384083, "grad_norm": 0.0, "learning_rate": 1.935355868899034e-05, "loss": 0.8121, "step": 552 }, { "epoch": 1.2756632064590543, "grad_norm": 0.0, "learning_rate": 1.935061575868571e-05, "loss": 0.5483, "step": 553 }, { "epoch": 1.2779700115340253, "grad_norm": 0.0, "learning_rate": 1.9347666369514225e-05, "loss": 0.8122, "step": 554 }, { "epoch": 1.2802768166089966, "grad_norm": 0.0, "learning_rate": 1.9344710523513157e-05, "loss": 0.8315, "step": 555 }, { "epoch": 1.2825836216839677, "grad_norm": 0.0, "learning_rate": 1.9341748222724233e-05, "loss": 0.818, "step": 556 }, { "epoch": 1.284890426758939, "grad_norm": 0.0, "learning_rate": 1.9338779469193638e-05, "loss": 0.802, "step": 557 }, { "epoch": 1.28719723183391, "grad_norm": 0.0, "learning_rate": 1.9335804264972018e-05, "loss": 0.527, "step": 558 }, { "epoch": 1.2895040369088813, "grad_norm": 0.0, "learning_rate": 1.9332822612114474e-05, "loss": 0.8959, "step": 559 }, { "epoch": 1.2918108419838523, "grad_norm": 0.0, "learning_rate": 1.9329834512680558e-05, "loss": 1.0901, "step": 560 }, { "epoch": 1.2941176470588236, "grad_norm": 0.0, "learning_rate": 1.9326839968734278e-05, "loss": 1.0168, "step": 561 }, { "epoch": 1.2964244521337946, "grad_norm": 0.0, "learning_rate": 1.9323838982344092e-05, "loss": 0.6239, "step": 562 }, { "epoch": 1.298731257208766, "grad_norm": 0.0, "learning_rate": 1.9320831555582908e-05, "loss": 0.7308, "step": 563 }, { "epoch": 1.301038062283737, "grad_norm": 0.0, "learning_rate": 1.9317817690528086e-05, "loss": 0.8554, "step": 564 }, { "epoch": 1.3033448673587082, "grad_norm": 0.0, "learning_rate": 1.9314797389261426e-05, "loss": 1.0442, "step": 565 }, { "epoch": 1.3056516724336793, "grad_norm": 0.0, "learning_rate": 1.931177065386918e-05, "loss": 1.1273, "step": 566 }, { "epoch": 1.3079584775086506, "grad_norm": 0.0, "learning_rate": 1.9308737486442045e-05, "loss": 0.5567, "step": 567 }, { "epoch": 1.3102652825836216, "grad_norm": 0.0, "learning_rate": 1.9305697889075155e-05, "loss": 0.6401, "step": 568 }, { "epoch": 1.312572087658593, "grad_norm": 0.0, "learning_rate": 1.9302651863868093e-05, "loss": 0.667, "step": 569 }, { "epoch": 1.314878892733564, "grad_norm": 0.0, "learning_rate": 1.929959941292487e-05, "loss": 0.8023, "step": 570 }, { "epoch": 1.3171856978085352, "grad_norm": 0.0, "learning_rate": 1.9296540538353948e-05, "loss": 0.9496, "step": 571 }, { "epoch": 1.3194925028835063, "grad_norm": 0.0, "learning_rate": 1.9293475242268224e-05, "loss": 0.5982, "step": 572 }, { "epoch": 1.3217993079584776, "grad_norm": 0.0, "learning_rate": 1.9290403526785025e-05, "loss": 0.8044, "step": 573 }, { "epoch": 1.3241061130334486, "grad_norm": 0.0, "learning_rate": 1.928732539402612e-05, "loss": 0.5639, "step": 574 }, { "epoch": 1.3264129181084199, "grad_norm": 0.0, "learning_rate": 1.9284240846117698e-05, "loss": 0.9358, "step": 575 }, { "epoch": 1.328719723183391, "grad_norm": 0.0, "learning_rate": 1.928114988519039e-05, "loss": 0.5317, "step": 576 }, { "epoch": 1.3310265282583622, "grad_norm": 0.0, "learning_rate": 1.9278052513379256e-05, "loss": 0.629, "step": 577 }, { "epoch": 1.3333333333333333, "grad_norm": 0.0, "learning_rate": 1.927494873282378e-05, "loss": 1.0959, "step": 578 }, { "epoch": 1.3356401384083045, "grad_norm": 0.0, "learning_rate": 1.9271838545667876e-05, "loss": 0.5953, "step": 579 }, { "epoch": 1.3379469434832756, "grad_norm": 0.0, "learning_rate": 1.9268721954059878e-05, "loss": 0.9326, "step": 580 }, { "epoch": 1.3402537485582469, "grad_norm": 0.0, "learning_rate": 1.9265598960152556e-05, "loss": 0.6775, "step": 581 }, { "epoch": 1.342560553633218, "grad_norm": 0.0, "learning_rate": 1.926246956610309e-05, "loss": 0.6612, "step": 582 }, { "epoch": 1.3448673587081892, "grad_norm": 0.0, "learning_rate": 1.9259333774073084e-05, "loss": 0.7408, "step": 583 }, { "epoch": 1.3471741637831602, "grad_norm": 0.0, "learning_rate": 1.925619158622856e-05, "loss": 0.7277, "step": 584 }, { "epoch": 1.3494809688581315, "grad_norm": 0.0, "learning_rate": 1.9253043004739967e-05, "loss": 0.8082, "step": 585 }, { "epoch": 1.3517877739331028, "grad_norm": 0.0, "learning_rate": 1.924988803178216e-05, "loss": 0.9178, "step": 586 }, { "epoch": 1.3540945790080738, "grad_norm": 0.0, "learning_rate": 1.9246726669534416e-05, "loss": 0.677, "step": 587 }, { "epoch": 1.356401384083045, "grad_norm": 0.0, "learning_rate": 1.9243558920180417e-05, "loss": 0.7831, "step": 588 }, { "epoch": 1.3587081891580162, "grad_norm": 0.0, "learning_rate": 1.9240384785908267e-05, "loss": 0.9029, "step": 589 }, { "epoch": 1.3610149942329874, "grad_norm": 0.0, "learning_rate": 1.923720426891047e-05, "loss": 0.7002, "step": 590 }, { "epoch": 1.3633217993079585, "grad_norm": 0.0, "learning_rate": 1.9234017371383946e-05, "loss": 0.7205, "step": 591 }, { "epoch": 1.3656286043829295, "grad_norm": 0.0, "learning_rate": 1.923082409553002e-05, "loss": 0.713, "step": 592 }, { "epoch": 1.3679354094579008, "grad_norm": 0.0, "learning_rate": 1.9227624443554425e-05, "loss": 0.712, "step": 593 }, { "epoch": 1.370242214532872, "grad_norm": 0.0, "learning_rate": 1.9224418417667295e-05, "loss": 1.0618, "step": 594 }, { "epoch": 1.3725490196078431, "grad_norm": 0.0, "learning_rate": 1.9221206020083166e-05, "loss": 1.0404, "step": 595 }, { "epoch": 1.3748558246828142, "grad_norm": 0.0, "learning_rate": 1.9217987253020982e-05, "loss": 0.8067, "step": 596 }, { "epoch": 1.3771626297577855, "grad_norm": 0.0, "learning_rate": 1.921476211870408e-05, "loss": 0.7118, "step": 597 }, { "epoch": 1.3794694348327567, "grad_norm": 0.0, "learning_rate": 1.9211530619360194e-05, "loss": 0.6202, "step": 598 }, { "epoch": 1.3817762399077278, "grad_norm": 0.0, "learning_rate": 1.920829275722146e-05, "loss": 1.0615, "step": 599 }, { "epoch": 1.3840830449826989, "grad_norm": 0.0, "learning_rate": 1.9205048534524405e-05, "loss": 0.7915, "step": 600 }, { "epoch": 1.3863898500576701, "grad_norm": 0.0, "learning_rate": 1.9201797953509954e-05, "loss": 0.9722, "step": 601 }, { "epoch": 1.3886966551326414, "grad_norm": 0.0, "learning_rate": 1.919854101642342e-05, "loss": 0.8785, "step": 602 }, { "epoch": 1.3910034602076125, "grad_norm": 0.0, "learning_rate": 1.919527772551451e-05, "loss": 0.7116, "step": 603 }, { "epoch": 1.3933102652825835, "grad_norm": 0.0, "learning_rate": 1.9192008083037314e-05, "loss": 0.7417, "step": 604 }, { "epoch": 1.3956170703575548, "grad_norm": 0.0, "learning_rate": 1.918873209125031e-05, "loss": 0.865, "step": 605 }, { "epoch": 1.397923875432526, "grad_norm": 0.0, "learning_rate": 1.9185449752416367e-05, "loss": 0.7466, "step": 606 }, { "epoch": 1.400230680507497, "grad_norm": 0.0, "learning_rate": 1.9182161068802742e-05, "loss": 0.9286, "step": 607 }, { "epoch": 1.4025374855824682, "grad_norm": 0.0, "learning_rate": 1.9178866042681062e-05, "loss": 1.0603, "step": 608 }, { "epoch": 1.4048442906574394, "grad_norm": 0.0, "learning_rate": 1.917556467632734e-05, "loss": 1.0477, "step": 609 }, { "epoch": 1.4071510957324107, "grad_norm": 0.0, "learning_rate": 1.917225697202197e-05, "loss": 0.8019, "step": 610 }, { "epoch": 1.4094579008073818, "grad_norm": 0.0, "learning_rate": 1.916894293204973e-05, "loss": 0.9148, "step": 611 }, { "epoch": 1.4117647058823528, "grad_norm": 0.0, "learning_rate": 1.9165622558699763e-05, "loss": 0.6256, "step": 612 }, { "epoch": 1.414071510957324, "grad_norm": 0.0, "learning_rate": 1.9162295854265593e-05, "loss": 0.6784, "step": 613 }, { "epoch": 1.4163783160322954, "grad_norm": 0.0, "learning_rate": 1.9158962821045113e-05, "loss": 0.8205, "step": 614 }, { "epoch": 1.4186851211072664, "grad_norm": 0.0, "learning_rate": 1.9155623461340594e-05, "loss": 0.6162, "step": 615 }, { "epoch": 1.4209919261822375, "grad_norm": 0.0, "learning_rate": 1.9152277777458667e-05, "loss": 0.6434, "step": 616 }, { "epoch": 1.4232987312572087, "grad_norm": 0.0, "learning_rate": 1.9148925771710347e-05, "loss": 0.8216, "step": 617 }, { "epoch": 1.42560553633218, "grad_norm": 0.0, "learning_rate": 1.9145567446411003e-05, "loss": 0.8651, "step": 618 }, { "epoch": 1.427912341407151, "grad_norm": 0.0, "learning_rate": 1.9142202803880373e-05, "loss": 0.8115, "step": 619 }, { "epoch": 1.4302191464821223, "grad_norm": 0.0, "learning_rate": 1.913883184644255e-05, "loss": 0.9731, "step": 620 }, { "epoch": 1.4325259515570934, "grad_norm": 0.0, "learning_rate": 1.913545457642601e-05, "loss": 0.4795, "step": 621 }, { "epoch": 1.4348327566320647, "grad_norm": 0.0, "learning_rate": 1.913207099616357e-05, "loss": 1.016, "step": 622 }, { "epoch": 1.4371395617070357, "grad_norm": 0.0, "learning_rate": 1.9128681107992415e-05, "loss": 0.8133, "step": 623 }, { "epoch": 1.439446366782007, "grad_norm": 0.0, "learning_rate": 1.912528491425408e-05, "loss": 0.7121, "step": 624 }, { "epoch": 1.441753171856978, "grad_norm": 0.0, "learning_rate": 1.9121882417294462e-05, "loss": 0.8303, "step": 625 }, { "epoch": 1.4440599769319493, "grad_norm": 0.0, "learning_rate": 1.9118473619463813e-05, "loss": 0.5516, "step": 626 }, { "epoch": 1.4463667820069204, "grad_norm": 0.0, "learning_rate": 1.9115058523116734e-05, "loss": 0.5597, "step": 627 }, { "epoch": 1.4486735870818916, "grad_norm": 0.0, "learning_rate": 1.9111637130612172e-05, "loss": 0.6982, "step": 628 }, { "epoch": 1.4509803921568627, "grad_norm": 0.0, "learning_rate": 1.9108209444313432e-05, "loss": 0.7925, "step": 629 }, { "epoch": 1.453287197231834, "grad_norm": 0.0, "learning_rate": 1.9104775466588162e-05, "loss": 0.7492, "step": 630 }, { "epoch": 1.455594002306805, "grad_norm": 0.0, "learning_rate": 1.9101335199808352e-05, "loss": 1.0452, "step": 631 }, { "epoch": 1.4579008073817763, "grad_norm": 0.0, "learning_rate": 1.9097888646350347e-05, "loss": 0.8255, "step": 632 }, { "epoch": 1.4602076124567474, "grad_norm": 0.0, "learning_rate": 1.9094435808594823e-05, "loss": 0.8823, "step": 633 }, { "epoch": 1.4625144175317186, "grad_norm": 0.0, "learning_rate": 1.9090976688926802e-05, "loss": 0.8228, "step": 634 }, { "epoch": 1.4648212226066897, "grad_norm": 0.0, "learning_rate": 1.9087511289735646e-05, "loss": 0.7167, "step": 635 }, { "epoch": 1.467128027681661, "grad_norm": 0.0, "learning_rate": 1.9084039613415052e-05, "loss": 0.6188, "step": 636 }, { "epoch": 1.469434832756632, "grad_norm": 0.0, "learning_rate": 1.908056166236305e-05, "loss": 0.5321, "step": 637 }, { "epoch": 1.4717416378316033, "grad_norm": 0.0, "learning_rate": 1.9077077438982016e-05, "loss": 0.5406, "step": 638 }, { "epoch": 1.4740484429065743, "grad_norm": 0.0, "learning_rate": 1.907358694567865e-05, "loss": 0.8034, "step": 639 }, { "epoch": 1.4763552479815456, "grad_norm": 0.0, "learning_rate": 1.907009018486398e-05, "loss": 0.7912, "step": 640 }, { "epoch": 1.4786620530565167, "grad_norm": 0.0, "learning_rate": 1.906658715895337e-05, "loss": 0.9115, "step": 641 }, { "epoch": 1.480968858131488, "grad_norm": 0.0, "learning_rate": 1.9063077870366504e-05, "loss": 0.6487, "step": 642 }, { "epoch": 1.483275663206459, "grad_norm": 0.0, "learning_rate": 1.9059562321527397e-05, "loss": 0.8266, "step": 643 }, { "epoch": 1.4855824682814303, "grad_norm": 0.0, "learning_rate": 1.905604051486439e-05, "loss": 0.637, "step": 644 }, { "epoch": 1.4878892733564013, "grad_norm": 0.0, "learning_rate": 1.905251245281015e-05, "loss": 0.9449, "step": 645 }, { "epoch": 1.4901960784313726, "grad_norm": 0.0, "learning_rate": 1.904897813780165e-05, "loss": 0.87, "step": 646 }, { "epoch": 1.4925028835063436, "grad_norm": 0.0, "learning_rate": 1.9045437572280193e-05, "loss": 1.0397, "step": 647 }, { "epoch": 1.494809688581315, "grad_norm": 0.0, "learning_rate": 1.9041890758691403e-05, "loss": 1.0072, "step": 648 }, { "epoch": 1.497116493656286, "grad_norm": 0.0, "learning_rate": 1.9038337699485207e-05, "loss": 0.7953, "step": 649 }, { "epoch": 1.4994232987312572, "grad_norm": 0.0, "learning_rate": 1.903477839711586e-05, "loss": 0.9631, "step": 650 }, { "epoch": 1.5017301038062283, "grad_norm": 0.0, "learning_rate": 1.903121285404192e-05, "loss": 0.7595, "step": 651 }, { "epoch": 1.5040369088811996, "grad_norm": 0.0, "learning_rate": 1.902764107272626e-05, "loss": 0.8466, "step": 652 }, { "epoch": 1.5063437139561708, "grad_norm": 0.0, "learning_rate": 1.9024063055636057e-05, "loss": 0.6512, "step": 653 }, { "epoch": 1.508650519031142, "grad_norm": 0.0, "learning_rate": 1.9020478805242803e-05, "loss": 0.6777, "step": 654 }, { "epoch": 1.510957324106113, "grad_norm": 0.0, "learning_rate": 1.9016888324022294e-05, "loss": 0.5648, "step": 655 }, { "epoch": 1.5132641291810842, "grad_norm": 0.0, "learning_rate": 1.9013291614454622e-05, "loss": 0.593, "step": 656 }, { "epoch": 1.5155709342560555, "grad_norm": 0.0, "learning_rate": 1.900968867902419e-05, "loss": 1.0741, "step": 657 }, { "epoch": 1.5178777393310265, "grad_norm": 0.0, "learning_rate": 1.90060795202197e-05, "loss": 0.596, "step": 658 }, { "epoch": 1.5201845444059976, "grad_norm": 0.0, "learning_rate": 1.9002464140534148e-05, "loss": 0.641, "step": 659 }, { "epoch": 1.5224913494809689, "grad_norm": 0.0, "learning_rate": 1.8998842542464833e-05, "loss": 0.5225, "step": 660 }, { "epoch": 1.5247981545559401, "grad_norm": 0.0, "learning_rate": 1.899521472851334e-05, "loss": 1.047, "step": 661 }, { "epoch": 1.5271049596309112, "grad_norm": 0.0, "learning_rate": 1.8991580701185564e-05, "loss": 1.0123, "step": 662 }, { "epoch": 1.5294117647058822, "grad_norm": 0.0, "learning_rate": 1.8987940462991673e-05, "loss": 0.6392, "step": 663 }, { "epoch": 1.5317185697808535, "grad_norm": 0.0, "learning_rate": 1.8984294016446135e-05, "loss": 0.6204, "step": 664 }, { "epoch": 1.5340253748558248, "grad_norm": 0.0, "learning_rate": 1.8980641364067708e-05, "loss": 0.5901, "step": 665 }, { "epoch": 1.5363321799307958, "grad_norm": 0.0, "learning_rate": 1.8976982508379436e-05, "loss": 0.5011, "step": 666 }, { "epoch": 1.538638985005767, "grad_norm": 0.0, "learning_rate": 1.8973317451908642e-05, "loss": 0.7011, "step": 667 }, { "epoch": 1.5409457900807382, "grad_norm": 0.0, "learning_rate": 1.8969646197186934e-05, "loss": 0.8344, "step": 668 }, { "epoch": 1.5432525951557095, "grad_norm": 0.0, "learning_rate": 1.896596874675021e-05, "loss": 0.6678, "step": 669 }, { "epoch": 1.5455594002306805, "grad_norm": 0.0, "learning_rate": 1.8962285103138637e-05, "loss": 0.7885, "step": 670 }, { "epoch": 1.5478662053056516, "grad_norm": 0.0, "learning_rate": 1.8958595268896662e-05, "loss": 0.7285, "step": 671 }, { "epoch": 1.5501730103806228, "grad_norm": 0.0, "learning_rate": 1.895489924657301e-05, "loss": 0.8522, "step": 672 }, { "epoch": 1.552479815455594, "grad_norm": 0.0, "learning_rate": 1.895119703872069e-05, "loss": 0.8042, "step": 673 }, { "epoch": 1.5547866205305652, "grad_norm": 0.0, "learning_rate": 1.894748864789696e-05, "loss": 0.581, "step": 674 }, { "epoch": 1.5570934256055362, "grad_norm": 0.0, "learning_rate": 1.8943774076663372e-05, "loss": 0.9724, "step": 675 }, { "epoch": 1.5594002306805075, "grad_norm": 0.0, "learning_rate": 1.8940053327585733e-05, "loss": 0.6206, "step": 676 }, { "epoch": 1.5617070357554788, "grad_norm": 0.0, "learning_rate": 1.8936326403234125e-05, "loss": 0.7367, "step": 677 }, { "epoch": 1.5640138408304498, "grad_norm": 0.0, "learning_rate": 1.893259330618289e-05, "loss": 0.5564, "step": 678 }, { "epoch": 1.5663206459054209, "grad_norm": 0.0, "learning_rate": 1.8928854039010643e-05, "loss": 0.4865, "step": 679 }, { "epoch": 1.5686274509803921, "grad_norm": 0.0, "learning_rate": 1.8925108604300245e-05, "loss": 0.671, "step": 680 }, { "epoch": 1.5709342560553634, "grad_norm": 0.0, "learning_rate": 1.8921357004638837e-05, "loss": 0.6144, "step": 681 }, { "epoch": 1.5732410611303345, "grad_norm": 0.0, "learning_rate": 1.8917599242617796e-05, "loss": 0.9663, "step": 682 }, { "epoch": 1.5755478662053055, "grad_norm": 0.0, "learning_rate": 1.8913835320832778e-05, "loss": 0.9865, "step": 683 }, { "epoch": 1.5778546712802768, "grad_norm": 0.0, "learning_rate": 1.891006524188368e-05, "loss": 0.9708, "step": 684 }, { "epoch": 1.580161476355248, "grad_norm": 0.0, "learning_rate": 1.8906289008374654e-05, "loss": 0.9549, "step": 685 }, { "epoch": 1.5824682814302191, "grad_norm": 0.0, "learning_rate": 1.8902506622914105e-05, "loss": 0.577, "step": 686 }, { "epoch": 1.5847750865051902, "grad_norm": 0.0, "learning_rate": 1.8898718088114688e-05, "loss": 0.6999, "step": 687 }, { "epoch": 1.5870818915801614, "grad_norm": 0.0, "learning_rate": 1.8894923406593305e-05, "loss": 1.0039, "step": 688 }, { "epoch": 1.5893886966551327, "grad_norm": 0.0, "learning_rate": 1.88911225809711e-05, "loss": 1.2906, "step": 689 }, { "epoch": 1.5916955017301038, "grad_norm": 0.0, "learning_rate": 1.888731561387347e-05, "loss": 0.6353, "step": 690 }, { "epoch": 1.5940023068050748, "grad_norm": 0.0, "learning_rate": 1.8883502507930044e-05, "loss": 0.5049, "step": 691 }, { "epoch": 1.596309111880046, "grad_norm": 0.0, "learning_rate": 1.8879683265774695e-05, "loss": 0.7501, "step": 692 }, { "epoch": 1.5986159169550174, "grad_norm": 0.0, "learning_rate": 1.8875857890045544e-05, "loss": 0.8276, "step": 693 }, { "epoch": 1.6009227220299884, "grad_norm": 0.0, "learning_rate": 1.887202638338493e-05, "loss": 0.6369, "step": 694 }, { "epoch": 1.6032295271049595, "grad_norm": 0.0, "learning_rate": 1.8868188748439444e-05, "loss": 0.8369, "step": 695 }, { "epoch": 1.6055363321799307, "grad_norm": 0.0, "learning_rate": 1.8864344987859898e-05, "loss": 0.8344, "step": 696 }, { "epoch": 1.607843137254902, "grad_norm": 0.0, "learning_rate": 1.8860495104301346e-05, "loss": 0.8726, "step": 697 }, { "epoch": 1.610149942329873, "grad_norm": 0.0, "learning_rate": 1.8856639100423062e-05, "loss": 0.9648, "step": 698 }, { "epoch": 1.6124567474048441, "grad_norm": 0.0, "learning_rate": 1.885277697888855e-05, "loss": 0.6344, "step": 699 }, { "epoch": 1.6147635524798154, "grad_norm": 0.0, "learning_rate": 1.8848908742365547e-05, "loss": 0.6824, "step": 700 }, { "epoch": 1.6170703575547867, "grad_norm": 0.0, "learning_rate": 1.8845034393526005e-05, "loss": 0.628, "step": 701 }, { "epoch": 1.6193771626297577, "grad_norm": 0.0, "learning_rate": 1.8841153935046098e-05, "loss": 0.6739, "step": 702 }, { "epoch": 1.621683967704729, "grad_norm": 0.0, "learning_rate": 1.8837267369606228e-05, "loss": 0.5553, "step": 703 }, { "epoch": 1.6239907727797003, "grad_norm": 0.0, "learning_rate": 1.883337469989101e-05, "loss": 0.5648, "step": 704 }, { "epoch": 1.6262975778546713, "grad_norm": 0.0, "learning_rate": 1.8829475928589272e-05, "loss": 0.6747, "step": 705 }, { "epoch": 1.6286043829296424, "grad_norm": 0.0, "learning_rate": 1.882557105839406e-05, "loss": 0.9867, "step": 706 }, { "epoch": 1.6309111880046137, "grad_norm": 0.0, "learning_rate": 1.8821660092002642e-05, "loss": 0.843, "step": 707 }, { "epoch": 1.633217993079585, "grad_norm": 0.0, "learning_rate": 1.8817743032116483e-05, "loss": 0.5533, "step": 708 }, { "epoch": 1.635524798154556, "grad_norm": 0.0, "learning_rate": 1.881381988144126e-05, "loss": 0.8924, "step": 709 }, { "epoch": 1.637831603229527, "grad_norm": 0.0, "learning_rate": 1.8809890642686862e-05, "loss": 1.0735, "step": 710 }, { "epoch": 1.6401384083044983, "grad_norm": 0.0, "learning_rate": 1.880595531856738e-05, "loss": 0.6316, "step": 711 }, { "epoch": 1.6424452133794696, "grad_norm": 0.0, "learning_rate": 1.880201391180111e-05, "loss": 1.0137, "step": 712 }, { "epoch": 1.6447520184544406, "grad_norm": 0.0, "learning_rate": 1.879806642511055e-05, "loss": 0.4879, "step": 713 }, { "epoch": 1.6470588235294117, "grad_norm": 0.0, "learning_rate": 1.87941128612224e-05, "loss": 0.8189, "step": 714 }, { "epoch": 1.649365628604383, "grad_norm": 0.0, "learning_rate": 1.879015322286754e-05, "loss": 1.1396, "step": 715 }, { "epoch": 1.6516724336793542, "grad_norm": 0.0, "learning_rate": 1.878618751278108e-05, "loss": 1.1921, "step": 716 }, { "epoch": 1.6539792387543253, "grad_norm": 0.0, "learning_rate": 1.8782215733702286e-05, "loss": 0.6635, "step": 717 }, { "epoch": 1.6562860438292963, "grad_norm": 0.0, "learning_rate": 1.8778237888374647e-05, "loss": 0.5903, "step": 718 }, { "epoch": 1.6585928489042676, "grad_norm": 0.0, "learning_rate": 1.877425397954582e-05, "loss": 0.7721, "step": 719 }, { "epoch": 1.6608996539792389, "grad_norm": 0.0, "learning_rate": 1.8770264009967667e-05, "loss": 0.8322, "step": 720 }, { "epoch": 1.66320645905421, "grad_norm": 0.0, "learning_rate": 1.8766267982396225e-05, "loss": 0.8399, "step": 721 }, { "epoch": 1.665513264129181, "grad_norm": 0.0, "learning_rate": 1.8762265899591724e-05, "loss": 0.773, "step": 722 }, { "epoch": 1.6678200692041523, "grad_norm": 0.0, "learning_rate": 1.8758257764318566e-05, "loss": 0.8928, "step": 723 }, { "epoch": 1.6701268742791235, "grad_norm": 0.0, "learning_rate": 1.8754243579345347e-05, "loss": 0.7347, "step": 724 }, { "epoch": 1.6724336793540946, "grad_norm": 0.0, "learning_rate": 1.875022334744483e-05, "loss": 0.7234, "step": 725 }, { "epoch": 1.6747404844290656, "grad_norm": 0.0, "learning_rate": 1.874619707139396e-05, "loss": 1.0121, "step": 726 }, { "epoch": 1.677047289504037, "grad_norm": 0.0, "learning_rate": 1.874216475397386e-05, "loss": 0.7666, "step": 727 }, { "epoch": 1.6793540945790082, "grad_norm": 0.0, "learning_rate": 1.8738126397969818e-05, "loss": 0.6755, "step": 728 }, { "epoch": 1.6816608996539792, "grad_norm": 0.0, "learning_rate": 1.87340820061713e-05, "loss": 0.7993, "step": 729 }, { "epoch": 1.6839677047289503, "grad_norm": 0.0, "learning_rate": 1.873003158137194e-05, "loss": 1.0176, "step": 730 }, { "epoch": 1.6862745098039216, "grad_norm": 0.0, "learning_rate": 1.8725975126369535e-05, "loss": 0.6397, "step": 731 }, { "epoch": 1.6885813148788928, "grad_norm": 0.0, "learning_rate": 1.8721912643966055e-05, "loss": 0.8066, "step": 732 }, { "epoch": 1.690888119953864, "grad_norm": 0.0, "learning_rate": 1.8717844136967626e-05, "loss": 0.7193, "step": 733 }, { "epoch": 1.693194925028835, "grad_norm": 0.0, "learning_rate": 1.871376960818454e-05, "loss": 0.5262, "step": 734 }, { "epoch": 1.6955017301038062, "grad_norm": 0.0, "learning_rate": 1.8709689060431242e-05, "loss": 0.8603, "step": 735 }, { "epoch": 1.6978085351787775, "grad_norm": 0.0, "learning_rate": 1.8705602496526344e-05, "loss": 0.6934, "step": 736 }, { "epoch": 1.7001153402537486, "grad_norm": 0.0, "learning_rate": 1.870150991929261e-05, "loss": 1.194, "step": 737 }, { "epoch": 1.7024221453287196, "grad_norm": 0.0, "learning_rate": 1.8697411331556958e-05, "loss": 0.8173, "step": 738 }, { "epoch": 1.7047289504036909, "grad_norm": 0.0, "learning_rate": 1.8693306736150445e-05, "loss": 0.373, "step": 739 }, { "epoch": 1.7070357554786622, "grad_norm": 0.0, "learning_rate": 1.8689196135908303e-05, "loss": 0.8672, "step": 740 }, { "epoch": 1.7093425605536332, "grad_norm": 0.0, "learning_rate": 1.868507953366989e-05, "loss": 0.871, "step": 741 }, { "epoch": 1.7116493656286043, "grad_norm": 0.0, "learning_rate": 1.868095693227872e-05, "loss": 0.6499, "step": 742 }, { "epoch": 1.7139561707035755, "grad_norm": 0.0, "learning_rate": 1.867682833458245e-05, "loss": 0.8416, "step": 743 }, { "epoch": 1.7162629757785468, "grad_norm": 0.0, "learning_rate": 1.8672693743432875e-05, "loss": 0.5984, "step": 744 }, { "epoch": 1.7185697808535179, "grad_norm": 0.0, "learning_rate": 1.8668553161685932e-05, "loss": 0.7311, "step": 745 }, { "epoch": 1.720876585928489, "grad_norm": 0.0, "learning_rate": 1.86644065922017e-05, "loss": 0.8443, "step": 746 }, { "epoch": 1.7231833910034602, "grad_norm": 0.0, "learning_rate": 1.866025403784439e-05, "loss": 0.7564, "step": 747 }, { "epoch": 1.7254901960784315, "grad_norm": 0.0, "learning_rate": 1.8656095501482342e-05, "loss": 0.7704, "step": 748 }, { "epoch": 1.7277970011534025, "grad_norm": 0.0, "learning_rate": 1.8651930985988037e-05, "loss": 0.5496, "step": 749 }, { "epoch": 1.7301038062283736, "grad_norm": 0.0, "learning_rate": 1.8647760494238082e-05, "loss": 0.8499, "step": 750 }, { "epoch": 1.7324106113033448, "grad_norm": 0.0, "learning_rate": 1.8643584029113215e-05, "loss": 1.0669, "step": 751 }, { "epoch": 1.7347174163783161, "grad_norm": 0.0, "learning_rate": 1.86394015934983e-05, "loss": 0.7644, "step": 752 }, { "epoch": 1.7370242214532872, "grad_norm": 0.0, "learning_rate": 1.8635213190282312e-05, "loss": 0.8404, "step": 753 }, { "epoch": 1.7393310265282582, "grad_norm": 0.0, "learning_rate": 1.8631018822358363e-05, "loss": 0.6913, "step": 754 }, { "epoch": 1.7416378316032295, "grad_norm": 0.0, "learning_rate": 1.8626818492623688e-05, "loss": 0.7011, "step": 755 }, { "epoch": 1.7439446366782008, "grad_norm": 0.0, "learning_rate": 1.8622612203979628e-05, "loss": 0.5566, "step": 756 }, { "epoch": 1.7462514417531718, "grad_norm": 0.0, "learning_rate": 1.8618399959331642e-05, "loss": 1.0118, "step": 757 }, { "epoch": 1.7485582468281429, "grad_norm": 0.0, "learning_rate": 1.861418176158931e-05, "loss": 0.448, "step": 758 }, { "epoch": 1.7508650519031141, "grad_norm": 0.0, "learning_rate": 1.8609957613666316e-05, "loss": 0.7561, "step": 759 }, { "epoch": 1.7531718569780854, "grad_norm": 0.0, "learning_rate": 1.8605727518480462e-05, "loss": 0.7707, "step": 760 }, { "epoch": 1.7554786620530565, "grad_norm": 0.0, "learning_rate": 1.860149147895366e-05, "loss": 0.7954, "step": 761 }, { "epoch": 1.7577854671280275, "grad_norm": 0.0, "learning_rate": 1.8597249498011906e-05, "loss": 0.7691, "step": 762 }, { "epoch": 1.7600922722029988, "grad_norm": 0.0, "learning_rate": 1.8593001578585325e-05, "loss": 0.6448, "step": 763 }, { "epoch": 1.76239907727797, "grad_norm": 0.0, "learning_rate": 1.858874772360814e-05, "loss": 0.7916, "step": 764 }, { "epoch": 1.7647058823529411, "grad_norm": 0.0, "learning_rate": 1.8584487936018663e-05, "loss": 0.7074, "step": 765 }, { "epoch": 1.7670126874279122, "grad_norm": 0.0, "learning_rate": 1.8580222218759312e-05, "loss": 0.7928, "step": 766 }, { "epoch": 1.7693194925028837, "grad_norm": 0.0, "learning_rate": 1.8575950574776595e-05, "loss": 0.6985, "step": 767 }, { "epoch": 1.7716262975778547, "grad_norm": 0.0, "learning_rate": 1.8571673007021124e-05, "loss": 0.6274, "step": 768 }, { "epoch": 1.7739331026528258, "grad_norm": 0.0, "learning_rate": 1.856738951844759e-05, "loss": 0.7821, "step": 769 }, { "epoch": 1.776239907727797, "grad_norm": 0.0, "learning_rate": 1.856310011201479e-05, "loss": 0.6322, "step": 770 }, { "epoch": 1.7785467128027683, "grad_norm": 0.0, "learning_rate": 1.855880479068559e-05, "loss": 0.7361, "step": 771 }, { "epoch": 1.7808535178777394, "grad_norm": 0.0, "learning_rate": 1.855450355742695e-05, "loss": 0.7843, "step": 772 }, { "epoch": 1.7831603229527104, "grad_norm": 0.0, "learning_rate": 1.8550196415209916e-05, "loss": 0.6768, "step": 773 }, { "epoch": 1.7854671280276817, "grad_norm": 0.0, "learning_rate": 1.854588336700962e-05, "loss": 0.8734, "step": 774 }, { "epoch": 1.787773933102653, "grad_norm": 0.0, "learning_rate": 1.854156441580526e-05, "loss": 0.3777, "step": 775 }, { "epoch": 1.790080738177624, "grad_norm": 0.0, "learning_rate": 1.8537239564580117e-05, "loss": 0.7942, "step": 776 }, { "epoch": 1.792387543252595, "grad_norm": 0.0, "learning_rate": 1.8532908816321557e-05, "loss": 0.5416, "step": 777 }, { "epoch": 1.7946943483275664, "grad_norm": 0.0, "learning_rate": 1.852857217402101e-05, "loss": 0.7822, "step": 778 }, { "epoch": 1.7970011534025376, "grad_norm": 0.0, "learning_rate": 1.8524229640673974e-05, "loss": 0.5352, "step": 779 }, { "epoch": 1.7993079584775087, "grad_norm": 0.0, "learning_rate": 1.851988121928002e-05, "loss": 0.7994, "step": 780 }, { "epoch": 1.8016147635524797, "grad_norm": 0.0, "learning_rate": 1.8515526912842796e-05, "loss": 0.6714, "step": 781 }, { "epoch": 1.803921568627451, "grad_norm": 0.0, "learning_rate": 1.8511166724369997e-05, "loss": 0.6343, "step": 782 }, { "epoch": 1.8062283737024223, "grad_norm": 0.0, "learning_rate": 1.8506800656873397e-05, "loss": 0.5991, "step": 783 }, { "epoch": 1.8085351787773933, "grad_norm": 0.0, "learning_rate": 1.8502428713368818e-05, "loss": 0.722, "step": 784 }, { "epoch": 1.8108419838523644, "grad_norm": 0.0, "learning_rate": 1.8498050896876152e-05, "loss": 1.0617, "step": 785 }, { "epoch": 1.8131487889273357, "grad_norm": 0.0, "learning_rate": 1.8493667210419337e-05, "loss": 0.6679, "step": 786 }, { "epoch": 1.815455594002307, "grad_norm": 0.0, "learning_rate": 1.8489277657026377e-05, "loss": 1.0058, "step": 787 }, { "epoch": 1.817762399077278, "grad_norm": 0.0, "learning_rate": 1.8484882239729315e-05, "loss": 0.6065, "step": 788 }, { "epoch": 1.820069204152249, "grad_norm": 0.0, "learning_rate": 1.848048096156426e-05, "loss": 0.8293, "step": 789 }, { "epoch": 1.8223760092272203, "grad_norm": 0.0, "learning_rate": 1.847607382557136e-05, "loss": 0.5846, "step": 790 }, { "epoch": 1.8246828143021916, "grad_norm": 0.0, "learning_rate": 1.8471660834794807e-05, "loss": 0.6811, "step": 791 }, { "epoch": 1.8269896193771626, "grad_norm": 0.0, "learning_rate": 1.8467241992282842e-05, "loss": 0.7027, "step": 792 }, { "epoch": 1.8292964244521337, "grad_norm": 0.0, "learning_rate": 1.846281730108775e-05, "loss": 0.8191, "step": 793 }, { "epoch": 1.831603229527105, "grad_norm": 0.0, "learning_rate": 1.8458386764265852e-05, "loss": 0.8021, "step": 794 }, { "epoch": 1.8339100346020762, "grad_norm": 0.0, "learning_rate": 1.8453950384877504e-05, "loss": 0.6784, "step": 795 }, { "epoch": 1.8362168396770473, "grad_norm": 0.0, "learning_rate": 1.8449508165987106e-05, "loss": 0.6081, "step": 796 }, { "epoch": 1.8385236447520183, "grad_norm": 0.0, "learning_rate": 1.844506011066308e-05, "loss": 0.9829, "step": 797 }, { "epoch": 1.8408304498269896, "grad_norm": 0.0, "learning_rate": 1.8440606221977893e-05, "loss": 0.8106, "step": 798 }, { "epoch": 1.843137254901961, "grad_norm": 0.0, "learning_rate": 1.8436146503008036e-05, "loss": 0.559, "step": 799 }, { "epoch": 1.845444059976932, "grad_norm": 0.0, "learning_rate": 1.843168095683402e-05, "loss": 0.3478, "step": 800 }, { "epoch": 1.847750865051903, "grad_norm": 0.0, "learning_rate": 1.8427209586540392e-05, "loss": 0.9071, "step": 801 }, { "epoch": 1.8500576701268743, "grad_norm": 0.0, "learning_rate": 1.8422732395215717e-05, "loss": 0.637, "step": 802 }, { "epoch": 1.8523644752018456, "grad_norm": 0.0, "learning_rate": 1.8418249385952575e-05, "loss": 0.617, "step": 803 }, { "epoch": 1.8546712802768166, "grad_norm": 0.0, "learning_rate": 1.841376056184758e-05, "loss": 0.9638, "step": 804 }, { "epoch": 1.8569780853517877, "grad_norm": 0.0, "learning_rate": 1.8409265926001342e-05, "loss": 0.9811, "step": 805 }, { "epoch": 1.859284890426759, "grad_norm": 0.0, "learning_rate": 1.8404765481518506e-05, "loss": 0.8957, "step": 806 }, { "epoch": 1.8615916955017302, "grad_norm": 0.0, "learning_rate": 1.8400259231507716e-05, "loss": 0.862, "step": 807 }, { "epoch": 1.8638985005767013, "grad_norm": 0.0, "learning_rate": 1.839574717908163e-05, "loss": 0.4463, "step": 808 }, { "epoch": 1.8662053056516723, "grad_norm": 0.0, "learning_rate": 1.8391229327356916e-05, "loss": 0.7919, "step": 809 }, { "epoch": 1.8685121107266436, "grad_norm": 0.0, "learning_rate": 1.8386705679454243e-05, "loss": 0.6183, "step": 810 }, { "epoch": 1.8708189158016149, "grad_norm": 0.0, "learning_rate": 1.8382176238498287e-05, "loss": 0.6153, "step": 811 }, { "epoch": 1.873125720876586, "grad_norm": 0.0, "learning_rate": 1.8377641007617724e-05, "loss": 0.6181, "step": 812 }, { "epoch": 1.875432525951557, "grad_norm": 0.0, "learning_rate": 1.8373099989945236e-05, "loss": 0.6922, "step": 813 }, { "epoch": 1.8777393310265282, "grad_norm": 0.0, "learning_rate": 1.836855318861749e-05, "loss": 0.9716, "step": 814 }, { "epoch": 1.8800461361014995, "grad_norm": 0.0, "learning_rate": 1.8364000606775158e-05, "loss": 1.0532, "step": 815 }, { "epoch": 1.8823529411764706, "grad_norm": 0.0, "learning_rate": 1.8359442247562896e-05, "loss": 0.9168, "step": 816 }, { "epoch": 1.8846597462514416, "grad_norm": 0.0, "learning_rate": 1.8354878114129368e-05, "loss": 0.8284, "step": 817 }, { "epoch": 1.8869665513264129, "grad_norm": 0.0, "learning_rate": 1.8350308209627198e-05, "loss": 0.7451, "step": 818 }, { "epoch": 1.8892733564013842, "grad_norm": 0.0, "learning_rate": 1.834573253721303e-05, "loss": 0.9864, "step": 819 }, { "epoch": 1.8915801614763552, "grad_norm": 0.0, "learning_rate": 1.8341151100047462e-05, "loss": 1.0663, "step": 820 }, { "epoch": 1.8938869665513263, "grad_norm": 0.0, "learning_rate": 1.833656390129509e-05, "loss": 0.9384, "step": 821 }, { "epoch": 1.8961937716262975, "grad_norm": 0.0, "learning_rate": 1.833197094412449e-05, "loss": 0.6839, "step": 822 }, { "epoch": 1.8985005767012688, "grad_norm": 0.0, "learning_rate": 1.832737223170821e-05, "loss": 0.4785, "step": 823 }, { "epoch": 1.9008073817762399, "grad_norm": 0.0, "learning_rate": 1.832276776722278e-05, "loss": 0.5989, "step": 824 }, { "epoch": 1.903114186851211, "grad_norm": 0.0, "learning_rate": 1.8318157553848694e-05, "loss": 0.7849, "step": 825 }, { "epoch": 1.9054209919261822, "grad_norm": 0.0, "learning_rate": 1.8313541594770417e-05, "loss": 0.7207, "step": 826 }, { "epoch": 1.9077277970011535, "grad_norm": 0.0, "learning_rate": 1.8308919893176397e-05, "loss": 0.7589, "step": 827 }, { "epoch": 1.9100346020761245, "grad_norm": 0.0, "learning_rate": 1.8304292452259037e-05, "loss": 0.6873, "step": 828 }, { "epoch": 1.9123414071510956, "grad_norm": 0.0, "learning_rate": 1.8299659275214708e-05, "loss": 0.7201, "step": 829 }, { "epoch": 1.9146482122260668, "grad_norm": 0.0, "learning_rate": 1.8295020365243736e-05, "loss": 0.4706, "step": 830 }, { "epoch": 1.9169550173010381, "grad_norm": 0.0, "learning_rate": 1.8290375725550417e-05, "loss": 0.73, "step": 831 }, { "epoch": 1.9192618223760092, "grad_norm": 0.0, "learning_rate": 1.8285725359343e-05, "loss": 0.3995, "step": 832 }, { "epoch": 1.9215686274509802, "grad_norm": 0.0, "learning_rate": 1.8281069269833694e-05, "loss": 0.9151, "step": 833 }, { "epoch": 1.9238754325259517, "grad_norm": 0.0, "learning_rate": 1.827640746023865e-05, "loss": 0.8479, "step": 834 }, { "epoch": 1.9261822376009228, "grad_norm": 0.0, "learning_rate": 1.827173993377798e-05, "loss": 0.663, "step": 835 }, { "epoch": 1.9284890426758938, "grad_norm": 0.0, "learning_rate": 1.8267066693675745e-05, "loss": 0.8283, "step": 836 }, { "epoch": 1.930795847750865, "grad_norm": 0.0, "learning_rate": 1.826238774315995e-05, "loss": 0.9821, "step": 837 }, { "epoch": 1.9331026528258364, "grad_norm": 0.0, "learning_rate": 1.8257703085462542e-05, "loss": 0.7109, "step": 838 }, { "epoch": 1.9354094579008074, "grad_norm": 0.0, "learning_rate": 1.8253012723819417e-05, "loss": 0.7126, "step": 839 }, { "epoch": 1.9377162629757785, "grad_norm": 0.0, "learning_rate": 1.82483166614704e-05, "loss": 0.534, "step": 840 }, { "epoch": 1.9400230680507498, "grad_norm": 0.0, "learning_rate": 1.8243614901659265e-05, "loss": 0.6956, "step": 841 }, { "epoch": 1.942329873125721, "grad_norm": 0.0, "learning_rate": 1.8238907447633716e-05, "loss": 0.6861, "step": 842 }, { "epoch": 1.944636678200692, "grad_norm": 0.0, "learning_rate": 1.8234194302645393e-05, "loss": 1.1298, "step": 843 }, { "epoch": 1.9469434832756631, "grad_norm": 0.0, "learning_rate": 1.8229475469949865e-05, "loss": 0.7579, "step": 844 }, { "epoch": 1.9492502883506344, "grad_norm": 0.0, "learning_rate": 1.8224750952806626e-05, "loss": 0.6374, "step": 845 }, { "epoch": 1.9515570934256057, "grad_norm": 0.0, "learning_rate": 1.8220020754479104e-05, "loss": 0.8151, "step": 846 }, { "epoch": 1.9538638985005767, "grad_norm": 0.0, "learning_rate": 1.8215284878234644e-05, "loss": 0.4476, "step": 847 }, { "epoch": 1.9561707035755478, "grad_norm": 0.0, "learning_rate": 1.8210543327344518e-05, "loss": 0.8777, "step": 848 }, { "epoch": 1.958477508650519, "grad_norm": 0.0, "learning_rate": 1.8205796105083917e-05, "loss": 0.7625, "step": 849 }, { "epoch": 1.9607843137254903, "grad_norm": 0.0, "learning_rate": 1.820104321473195e-05, "loss": 0.7084, "step": 850 }, { "epoch": 1.9630911188004614, "grad_norm": 0.0, "learning_rate": 1.819628465957164e-05, "loss": 0.9646, "step": 851 }, { "epoch": 1.9653979238754324, "grad_norm": 0.0, "learning_rate": 1.819152044288992e-05, "loss": 0.462, "step": 852 }, { "epoch": 1.9677047289504037, "grad_norm": 0.0, "learning_rate": 1.8186750567977638e-05, "loss": 0.8403, "step": 853 }, { "epoch": 1.970011534025375, "grad_norm": 0.0, "learning_rate": 1.818197503812955e-05, "loss": 0.5777, "step": 854 }, { "epoch": 1.972318339100346, "grad_norm": 0.0, "learning_rate": 1.8177193856644315e-05, "loss": 0.5461, "step": 855 }, { "epoch": 1.974625144175317, "grad_norm": 0.0, "learning_rate": 1.8172407026824498e-05, "loss": 0.6516, "step": 856 }, { "epoch": 1.9769319492502884, "grad_norm": 0.0, "learning_rate": 1.816761455197657e-05, "loss": 0.5545, "step": 857 }, { "epoch": 1.9792387543252596, "grad_norm": 0.0, "learning_rate": 1.8162816435410892e-05, "loss": 0.6475, "step": 858 }, { "epoch": 1.9815455594002307, "grad_norm": 0.0, "learning_rate": 1.8158012680441723e-05, "loss": 1.0847, "step": 859 }, { "epoch": 1.9838523644752017, "grad_norm": 0.0, "learning_rate": 1.8153203290387224e-05, "loss": 0.8766, "step": 860 }, { "epoch": 1.986159169550173, "grad_norm": 0.0, "learning_rate": 1.8148388268569453e-05, "loss": 0.8599, "step": 861 }, { "epoch": 1.9884659746251443, "grad_norm": 0.0, "learning_rate": 1.8143567618314336e-05, "loss": 0.8292, "step": 862 }, { "epoch": 1.9907727797001153, "grad_norm": 0.0, "learning_rate": 1.8138741342951706e-05, "loss": 0.8669, "step": 863 }, { "epoch": 1.9930795847750864, "grad_norm": 0.0, "learning_rate": 1.8133909445815277e-05, "loss": 0.4969, "step": 864 }, { "epoch": 1.9953863898500577, "grad_norm": 0.0, "learning_rate": 1.8129071930242648e-05, "loss": 0.6403, "step": 865 }, { "epoch": 1.997693194925029, "grad_norm": 0.0, "learning_rate": 1.8124228799575295e-05, "loss": 0.5471, "step": 866 }, { "epoch": 2.0, "grad_norm": 0.0, "learning_rate": 1.811938005715857e-05, "loss": 0.5799, "step": 867 }, { "epoch": 2.002306805074971, "grad_norm": 0.0, "learning_rate": 1.8114525706341702e-05, "loss": 0.3581, "step": 868 }, { "epoch": 2.0046136101499425, "grad_norm": 0.0, "learning_rate": 1.8109665750477806e-05, "loss": 0.3694, "step": 869 }, { "epoch": 2.0069204152249136, "grad_norm": 0.0, "learning_rate": 1.8104800192923856e-05, "loss": 0.46, "step": 870 }, { "epoch": 2.0092272202998847, "grad_norm": 0.0, "learning_rate": 1.8099929037040695e-05, "loss": 0.5724, "step": 871 }, { "epoch": 2.0115340253748557, "grad_norm": 0.0, "learning_rate": 1.8095052286193044e-05, "loss": 0.3565, "step": 872 }, { "epoch": 2.013840830449827, "grad_norm": 0.0, "learning_rate": 1.8090169943749477e-05, "loss": 0.3936, "step": 873 }, { "epoch": 2.0161476355247983, "grad_norm": 0.0, "learning_rate": 1.8085282013082436e-05, "loss": 0.5218, "step": 874 }, { "epoch": 2.0184544405997693, "grad_norm": 0.0, "learning_rate": 1.808038849756822e-05, "loss": 0.5057, "step": 875 }, { "epoch": 2.0207612456747404, "grad_norm": 0.0, "learning_rate": 1.8075489400586993e-05, "loss": 0.4666, "step": 876 }, { "epoch": 2.023068050749712, "grad_norm": 0.0, "learning_rate": 1.8070584725522763e-05, "loss": 0.4672, "step": 877 }, { "epoch": 2.025374855824683, "grad_norm": 0.0, "learning_rate": 1.8065674475763398e-05, "loss": 0.5051, "step": 878 }, { "epoch": 2.027681660899654, "grad_norm": 0.0, "learning_rate": 1.8060758654700622e-05, "loss": 0.3915, "step": 879 }, { "epoch": 2.029988465974625, "grad_norm": 0.0, "learning_rate": 1.8055837265729996e-05, "loss": 0.4161, "step": 880 }, { "epoch": 2.0322952710495965, "grad_norm": 0.0, "learning_rate": 1.805091031225093e-05, "loss": 0.4157, "step": 881 }, { "epoch": 2.0346020761245676, "grad_norm": 0.0, "learning_rate": 1.8045977797666685e-05, "loss": 0.5167, "step": 882 }, { "epoch": 2.0369088811995386, "grad_norm": 0.0, "learning_rate": 1.804103972538435e-05, "loss": 0.4641, "step": 883 }, { "epoch": 2.0392156862745097, "grad_norm": 0.0, "learning_rate": 1.8036096098814875e-05, "loss": 0.3374, "step": 884 }, { "epoch": 2.041522491349481, "grad_norm": 0.0, "learning_rate": 1.803114692137302e-05, "loss": 0.5364, "step": 885 }, { "epoch": 2.043829296424452, "grad_norm": 0.0, "learning_rate": 1.8026192196477395e-05, "loss": 0.6081, "step": 886 }, { "epoch": 2.0461361014994233, "grad_norm": 0.0, "learning_rate": 1.802123192755044e-05, "loss": 0.5771, "step": 887 }, { "epoch": 2.0484429065743943, "grad_norm": 0.0, "learning_rate": 1.801626611801842e-05, "loss": 0.5026, "step": 888 }, { "epoch": 2.050749711649366, "grad_norm": 0.0, "learning_rate": 1.8011294771311436e-05, "loss": 0.5586, "step": 889 }, { "epoch": 2.053056516724337, "grad_norm": 0.0, "learning_rate": 1.80063178908634e-05, "loss": 0.3922, "step": 890 }, { "epoch": 2.055363321799308, "grad_norm": 0.0, "learning_rate": 1.8001335480112067e-05, "loss": 0.5573, "step": 891 }, { "epoch": 2.057670126874279, "grad_norm": 0.0, "learning_rate": 1.7996347542498983e-05, "loss": 0.3228, "step": 892 }, { "epoch": 2.0599769319492505, "grad_norm": 0.0, "learning_rate": 1.799135408146954e-05, "loss": 0.405, "step": 893 }, { "epoch": 2.0622837370242215, "grad_norm": 0.0, "learning_rate": 1.798635510047293e-05, "loss": 0.5568, "step": 894 }, { "epoch": 2.0645905420991926, "grad_norm": 0.0, "learning_rate": 1.798135060296216e-05, "loss": 0.4499, "step": 895 }, { "epoch": 2.0668973471741636, "grad_norm": 0.0, "learning_rate": 1.797634059239405e-05, "loss": 0.3977, "step": 896 }, { "epoch": 2.069204152249135, "grad_norm": 0.0, "learning_rate": 1.7971325072229227e-05, "loss": 0.4029, "step": 897 }, { "epoch": 2.071510957324106, "grad_norm": 0.0, "learning_rate": 1.7966304045932122e-05, "loss": 0.4059, "step": 898 }, { "epoch": 2.0738177623990772, "grad_norm": 0.0, "learning_rate": 1.796127751697097e-05, "loss": 0.6208, "step": 899 }, { "epoch": 2.0761245674740483, "grad_norm": 0.0, "learning_rate": 1.795624548881781e-05, "loss": 0.5197, "step": 900 }, { "epoch": 2.0784313725490198, "grad_norm": 0.0, "learning_rate": 1.795120796494848e-05, "loss": 0.3885, "step": 901 }, { "epoch": 2.080738177623991, "grad_norm": 0.0, "learning_rate": 1.7946164948842604e-05, "loss": 0.4355, "step": 902 }, { "epoch": 2.083044982698962, "grad_norm": 0.0, "learning_rate": 1.7941116443983612e-05, "loss": 0.3537, "step": 903 }, { "epoch": 2.085351787773933, "grad_norm": 0.0, "learning_rate": 1.7936062453858724e-05, "loss": 0.4278, "step": 904 }, { "epoch": 2.0876585928489044, "grad_norm": 0.0, "learning_rate": 1.7931002981958933e-05, "loss": 0.6057, "step": 905 }, { "epoch": 2.0899653979238755, "grad_norm": 0.0, "learning_rate": 1.7925938031779044e-05, "loss": 0.5894, "step": 906 }, { "epoch": 2.0922722029988465, "grad_norm": 0.0, "learning_rate": 1.7920867606817625e-05, "loss": 0.404, "step": 907 }, { "epoch": 2.0945790080738176, "grad_norm": 0.0, "learning_rate": 1.7915791710577035e-05, "loss": 0.3701, "step": 908 }, { "epoch": 2.096885813148789, "grad_norm": 0.0, "learning_rate": 1.7910710346563417e-05, "loss": 0.4394, "step": 909 }, { "epoch": 2.09919261822376, "grad_norm": 0.0, "learning_rate": 1.7905623518286673e-05, "loss": 0.3412, "step": 910 }, { "epoch": 2.101499423298731, "grad_norm": 0.0, "learning_rate": 1.79005312292605e-05, "loss": 0.3976, "step": 911 }, { "epoch": 2.1038062283737022, "grad_norm": 0.0, "learning_rate": 1.7895433483002356e-05, "loss": 0.5541, "step": 912 }, { "epoch": 2.1061130334486737, "grad_norm": 0.0, "learning_rate": 1.7890330283033467e-05, "loss": 0.4665, "step": 913 }, { "epoch": 2.108419838523645, "grad_norm": 0.0, "learning_rate": 1.7885221632878837e-05, "loss": 0.6224, "step": 914 }, { "epoch": 2.110726643598616, "grad_norm": 0.0, "learning_rate": 1.788010753606722e-05, "loss": 0.5937, "step": 915 }, { "epoch": 2.113033448673587, "grad_norm": 0.0, "learning_rate": 1.7874987996131144e-05, "loss": 0.5993, "step": 916 }, { "epoch": 2.1153402537485584, "grad_norm": 0.0, "learning_rate": 1.7869863016606893e-05, "loss": 0.5363, "step": 917 }, { "epoch": 2.1176470588235294, "grad_norm": 0.0, "learning_rate": 1.78647326010345e-05, "loss": 0.6504, "step": 918 }, { "epoch": 2.1199538638985005, "grad_norm": 0.0, "learning_rate": 1.7859596752957768e-05, "loss": 0.4933, "step": 919 }, { "epoch": 2.1222606689734715, "grad_norm": 0.0, "learning_rate": 1.7854455475924245e-05, "loss": 0.4761, "step": 920 }, { "epoch": 2.124567474048443, "grad_norm": 0.0, "learning_rate": 1.7849308773485226e-05, "loss": 0.3558, "step": 921 }, { "epoch": 2.126874279123414, "grad_norm": 0.0, "learning_rate": 1.784415664919576e-05, "loss": 0.7222, "step": 922 }, { "epoch": 2.129181084198385, "grad_norm": 0.0, "learning_rate": 1.783899910661463e-05, "loss": 0.6698, "step": 923 }, { "epoch": 2.131487889273356, "grad_norm": 0.0, "learning_rate": 1.783383614930438e-05, "loss": 0.3221, "step": 924 }, { "epoch": 2.1337946943483277, "grad_norm": 0.0, "learning_rate": 1.782866778083128e-05, "loss": 0.5143, "step": 925 }, { "epoch": 2.1361014994232987, "grad_norm": 0.0, "learning_rate": 1.7823494004765336e-05, "loss": 0.4885, "step": 926 }, { "epoch": 2.13840830449827, "grad_norm": 0.0, "learning_rate": 1.78183148246803e-05, "loss": 0.4965, "step": 927 }, { "epoch": 2.1407151095732413, "grad_norm": 0.0, "learning_rate": 1.7813130244153648e-05, "loss": 0.3964, "step": 928 }, { "epoch": 2.1430219146482123, "grad_norm": 0.0, "learning_rate": 1.7807940266766595e-05, "loss": 0.4904, "step": 929 }, { "epoch": 2.1453287197231834, "grad_norm": 0.0, "learning_rate": 1.780274489610407e-05, "loss": 0.3475, "step": 930 }, { "epoch": 2.1476355247981544, "grad_norm": 0.0, "learning_rate": 1.7797544135754744e-05, "loss": 0.593, "step": 931 }, { "epoch": 2.1499423298731255, "grad_norm": 0.0, "learning_rate": 1.7792337989311e-05, "loss": 0.4602, "step": 932 }, { "epoch": 2.152249134948097, "grad_norm": 0.0, "learning_rate": 1.778712646036894e-05, "loss": 0.4539, "step": 933 }, { "epoch": 2.154555940023068, "grad_norm": 0.0, "learning_rate": 1.7781909552528395e-05, "loss": 0.733, "step": 934 }, { "epoch": 2.156862745098039, "grad_norm": 0.0, "learning_rate": 1.77766872693929e-05, "loss": 0.5275, "step": 935 }, { "epoch": 2.1591695501730106, "grad_norm": 0.0, "learning_rate": 1.777145961456971e-05, "loss": 0.5843, "step": 936 }, { "epoch": 2.1614763552479817, "grad_norm": 0.0, "learning_rate": 1.7766226591669787e-05, "loss": 0.5257, "step": 937 }, { "epoch": 2.1637831603229527, "grad_norm": 0.0, "learning_rate": 1.7760988204307798e-05, "loss": 0.7901, "step": 938 }, { "epoch": 2.1660899653979238, "grad_norm": 0.0, "learning_rate": 1.7755744456102123e-05, "loss": 0.5024, "step": 939 }, { "epoch": 2.168396770472895, "grad_norm": 0.0, "learning_rate": 1.7750495350674844e-05, "loss": 0.4521, "step": 940 }, { "epoch": 2.1707035755478663, "grad_norm": 0.0, "learning_rate": 1.7745240891651736e-05, "loss": 0.4274, "step": 941 }, { "epoch": 2.1730103806228374, "grad_norm": 0.0, "learning_rate": 1.7739981082662275e-05, "loss": 0.2473, "step": 942 }, { "epoch": 2.1753171856978084, "grad_norm": 0.0, "learning_rate": 1.7734715927339642e-05, "loss": 0.5154, "step": 943 }, { "epoch": 2.17762399077278, "grad_norm": 0.0, "learning_rate": 1.7729445429320696e-05, "loss": 0.2906, "step": 944 }, { "epoch": 2.179930795847751, "grad_norm": 0.0, "learning_rate": 1.7724169592245996e-05, "loss": 0.5342, "step": 945 }, { "epoch": 2.182237600922722, "grad_norm": 0.0, "learning_rate": 1.771888841975979e-05, "loss": 0.357, "step": 946 }, { "epoch": 2.184544405997693, "grad_norm": 0.0, "learning_rate": 1.771360191551e-05, "loss": 0.4143, "step": 947 }, { "epoch": 2.1868512110726646, "grad_norm": 0.0, "learning_rate": 1.7708310083148242e-05, "loss": 0.3994, "step": 948 }, { "epoch": 2.1891580161476356, "grad_norm": 0.0, "learning_rate": 1.7703012926329813e-05, "loss": 0.925, "step": 949 }, { "epoch": 2.1914648212226067, "grad_norm": 0.0, "learning_rate": 1.769771044871368e-05, "loss": 0.5006, "step": 950 }, { "epoch": 2.1937716262975777, "grad_norm": 0.0, "learning_rate": 1.769240265396249e-05, "loss": 0.3427, "step": 951 }, { "epoch": 2.196078431372549, "grad_norm": 0.0, "learning_rate": 1.768708954574256e-05, "loss": 0.6347, "step": 952 }, { "epoch": 2.1983852364475203, "grad_norm": 0.0, "learning_rate": 1.7681771127723883e-05, "loss": 0.5081, "step": 953 }, { "epoch": 2.2006920415224913, "grad_norm": 0.0, "learning_rate": 1.7676447403580114e-05, "loss": 0.3524, "step": 954 }, { "epoch": 2.2029988465974624, "grad_norm": 0.0, "learning_rate": 1.7671118376988575e-05, "loss": 0.3097, "step": 955 }, { "epoch": 2.205305651672434, "grad_norm": 0.0, "learning_rate": 1.766578405163025e-05, "loss": 0.3783, "step": 956 }, { "epoch": 2.207612456747405, "grad_norm": 0.0, "learning_rate": 1.766044443118978e-05, "loss": 0.6402, "step": 957 }, { "epoch": 2.209919261822376, "grad_norm": 0.0, "learning_rate": 1.7655099519355477e-05, "loss": 0.5871, "step": 958 }, { "epoch": 2.212226066897347, "grad_norm": 0.0, "learning_rate": 1.764974931981929e-05, "loss": 0.6701, "step": 959 }, { "epoch": 2.2145328719723185, "grad_norm": 0.0, "learning_rate": 1.7644393836276832e-05, "loss": 0.5957, "step": 960 }, { "epoch": 2.2168396770472896, "grad_norm": 0.0, "learning_rate": 1.7639033072427367e-05, "loss": 0.4668, "step": 961 }, { "epoch": 2.2191464821222606, "grad_norm": 0.0, "learning_rate": 1.7633667031973793e-05, "loss": 0.4917, "step": 962 }, { "epoch": 2.2214532871972317, "grad_norm": 0.0, "learning_rate": 1.7628295718622666e-05, "loss": 0.4671, "step": 963 }, { "epoch": 2.223760092272203, "grad_norm": 0.0, "learning_rate": 1.7622919136084183e-05, "loss": 0.4348, "step": 964 }, { "epoch": 2.2260668973471742, "grad_norm": 0.0, "learning_rate": 1.761753728807217e-05, "loss": 0.2565, "step": 965 }, { "epoch": 2.2283737024221453, "grad_norm": 0.0, "learning_rate": 1.7612150178304102e-05, "loss": 0.5, "step": 966 }, { "epoch": 2.2306805074971163, "grad_norm": 0.0, "learning_rate": 1.760675781050109e-05, "loss": 0.6313, "step": 967 }, { "epoch": 2.232987312572088, "grad_norm": 0.0, "learning_rate": 1.760136018838786e-05, "loss": 0.334, "step": 968 }, { "epoch": 2.235294117647059, "grad_norm": 0.0, "learning_rate": 1.7595957315692782e-05, "loss": 0.5062, "step": 969 }, { "epoch": 2.23760092272203, "grad_norm": 0.0, "learning_rate": 1.7590549196147854e-05, "loss": 0.4561, "step": 970 }, { "epoch": 2.239907727797001, "grad_norm": 0.0, "learning_rate": 1.7585135833488692e-05, "loss": 0.3431, "step": 971 }, { "epoch": 2.2422145328719725, "grad_norm": 0.0, "learning_rate": 1.757971723145453e-05, "loss": 0.4566, "step": 972 }, { "epoch": 2.2445213379469435, "grad_norm": 0.0, "learning_rate": 1.7574293393788236e-05, "loss": 0.5182, "step": 973 }, { "epoch": 2.2468281430219146, "grad_norm": 0.0, "learning_rate": 1.7568864324236276e-05, "loss": 0.4189, "step": 974 }, { "epoch": 2.2491349480968856, "grad_norm": 0.0, "learning_rate": 1.7563430026548737e-05, "loss": 0.3915, "step": 975 }, { "epoch": 2.251441753171857, "grad_norm": 0.0, "learning_rate": 1.7557990504479328e-05, "loss": 0.4564, "step": 976 }, { "epoch": 2.253748558246828, "grad_norm": 0.0, "learning_rate": 1.755254576178535e-05, "loss": 0.5378, "step": 977 }, { "epoch": 2.2560553633217992, "grad_norm": 0.0, "learning_rate": 1.7547095802227723e-05, "loss": 0.421, "step": 978 }, { "epoch": 2.2583621683967703, "grad_norm": 0.0, "learning_rate": 1.754164062957096e-05, "loss": 0.4231, "step": 979 }, { "epoch": 2.260668973471742, "grad_norm": 0.0, "learning_rate": 1.7536180247583182e-05, "loss": 0.3338, "step": 980 }, { "epoch": 2.262975778546713, "grad_norm": 0.0, "learning_rate": 1.7530714660036112e-05, "loss": 0.4675, "step": 981 }, { "epoch": 2.265282583621684, "grad_norm": 0.0, "learning_rate": 1.7525243870705052e-05, "loss": 0.4478, "step": 982 }, { "epoch": 2.267589388696655, "grad_norm": 0.0, "learning_rate": 1.751976788336892e-05, "loss": 0.461, "step": 983 }, { "epoch": 2.2698961937716264, "grad_norm": 0.0, "learning_rate": 1.7514286701810203e-05, "loss": 0.4818, "step": 984 }, { "epoch": 2.2722029988465975, "grad_norm": 0.0, "learning_rate": 1.7508800329814993e-05, "loss": 0.5986, "step": 985 }, { "epoch": 2.2745098039215685, "grad_norm": 0.0, "learning_rate": 1.7503308771172955e-05, "loss": 0.6687, "step": 986 }, { "epoch": 2.2768166089965396, "grad_norm": 0.0, "learning_rate": 1.7497812029677344e-05, "loss": 0.7566, "step": 987 }, { "epoch": 2.279123414071511, "grad_norm": 0.0, "learning_rate": 1.7492310109124992e-05, "loss": 0.5464, "step": 988 }, { "epoch": 2.281430219146482, "grad_norm": 0.0, "learning_rate": 1.74868030133163e-05, "loss": 0.6083, "step": 989 }, { "epoch": 2.283737024221453, "grad_norm": 0.0, "learning_rate": 1.748129074605527e-05, "loss": 0.5148, "step": 990 }, { "epoch": 2.2860438292964247, "grad_norm": 0.0, "learning_rate": 1.7475773311149448e-05, "loss": 0.3181, "step": 991 }, { "epoch": 2.2883506343713957, "grad_norm": 0.0, "learning_rate": 1.7470250712409963e-05, "loss": 0.5571, "step": 992 }, { "epoch": 2.290657439446367, "grad_norm": 0.0, "learning_rate": 1.7464722953651504e-05, "loss": 0.4269, "step": 993 }, { "epoch": 2.292964244521338, "grad_norm": 0.0, "learning_rate": 1.7459190038692333e-05, "loss": 0.4766, "step": 994 }, { "epoch": 2.295271049596309, "grad_norm": 0.0, "learning_rate": 1.7453651971354265e-05, "loss": 0.5265, "step": 995 }, { "epoch": 2.2975778546712804, "grad_norm": 0.0, "learning_rate": 1.7448108755462684e-05, "loss": 0.4346, "step": 996 }, { "epoch": 2.2998846597462514, "grad_norm": 0.0, "learning_rate": 1.7442560394846518e-05, "loss": 0.5165, "step": 997 }, { "epoch": 2.3021914648212225, "grad_norm": 0.0, "learning_rate": 1.743700689333826e-05, "loss": 0.4669, "step": 998 }, { "epoch": 2.304498269896194, "grad_norm": 0.0, "learning_rate": 1.7431448254773943e-05, "loss": 0.3175, "step": 999 }, { "epoch": 2.306805074971165, "grad_norm": 0.0, "learning_rate": 1.742588448299316e-05, "loss": 0.5314, "step": 1000 }, { "epoch": 2.309111880046136, "grad_norm": 0.0, "learning_rate": 1.7420315581839045e-05, "loss": 0.5397, "step": 1001 }, { "epoch": 2.311418685121107, "grad_norm": 0.0, "learning_rate": 1.741474155515827e-05, "loss": 0.4425, "step": 1002 }, { "epoch": 2.313725490196078, "grad_norm": 0.0, "learning_rate": 1.7409162406801053e-05, "loss": 0.5258, "step": 1003 }, { "epoch": 2.3160322952710497, "grad_norm": 0.0, "learning_rate": 1.7403578140621147e-05, "loss": 0.5102, "step": 1004 }, { "epoch": 2.3183391003460208, "grad_norm": 0.0, "learning_rate": 1.7397988760475842e-05, "loss": 0.3762, "step": 1005 }, { "epoch": 2.320645905420992, "grad_norm": 0.0, "learning_rate": 1.739239427022596e-05, "loss": 0.3951, "step": 1006 }, { "epoch": 2.3229527104959633, "grad_norm": 0.0, "learning_rate": 1.738679467373586e-05, "loss": 0.6036, "step": 1007 }, { "epoch": 2.3252595155709344, "grad_norm": 0.0, "learning_rate": 1.738118997487341e-05, "loss": 0.5719, "step": 1008 }, { "epoch": 2.3275663206459054, "grad_norm": 0.0, "learning_rate": 1.7375580177510017e-05, "loss": 0.6319, "step": 1009 }, { "epoch": 2.3298731257208765, "grad_norm": 0.0, "learning_rate": 1.7369965285520606e-05, "loss": 0.5118, "step": 1010 }, { "epoch": 2.3321799307958475, "grad_norm": 0.0, "learning_rate": 1.736434530278362e-05, "loss": 0.5066, "step": 1011 }, { "epoch": 2.334486735870819, "grad_norm": 0.0, "learning_rate": 1.7358720233181023e-05, "loss": 0.5939, "step": 1012 }, { "epoch": 2.33679354094579, "grad_norm": 0.0, "learning_rate": 1.735309008059829e-05, "loss": 0.7063, "step": 1013 }, { "epoch": 2.339100346020761, "grad_norm": 0.0, "learning_rate": 1.73474548489244e-05, "loss": 0.3889, "step": 1014 }, { "epoch": 2.3414071510957326, "grad_norm": 0.0, "learning_rate": 1.7341814542051845e-05, "loss": 0.4144, "step": 1015 }, { "epoch": 2.3437139561707037, "grad_norm": 0.0, "learning_rate": 1.7336169163876637e-05, "loss": 0.467, "step": 1016 }, { "epoch": 2.3460207612456747, "grad_norm": 0.0, "learning_rate": 1.7330518718298263e-05, "loss": 0.6608, "step": 1017 }, { "epoch": 2.3483275663206458, "grad_norm": 0.0, "learning_rate": 1.7324863209219736e-05, "loss": 0.577, "step": 1018 }, { "epoch": 2.3506343713956173, "grad_norm": 0.0, "learning_rate": 1.7319202640547552e-05, "loss": 0.5533, "step": 1019 }, { "epoch": 2.3529411764705883, "grad_norm": 0.0, "learning_rate": 1.7313537016191706e-05, "loss": 0.4271, "step": 1020 }, { "epoch": 2.3552479815455594, "grad_norm": 0.0, "learning_rate": 1.7307866340065684e-05, "loss": 0.4294, "step": 1021 }, { "epoch": 2.3575547866205304, "grad_norm": 0.0, "learning_rate": 1.7302190616086464e-05, "loss": 0.4588, "step": 1022 }, { "epoch": 2.359861591695502, "grad_norm": 0.0, "learning_rate": 1.729650984817451e-05, "loss": 0.5191, "step": 1023 }, { "epoch": 2.362168396770473, "grad_norm": 0.0, "learning_rate": 1.729082404025377e-05, "loss": 0.6365, "step": 1024 }, { "epoch": 2.364475201845444, "grad_norm": 0.0, "learning_rate": 1.7285133196251664e-05, "loss": 0.5164, "step": 1025 }, { "epoch": 2.366782006920415, "grad_norm": 0.0, "learning_rate": 1.727943732009911e-05, "loss": 0.3322, "step": 1026 }, { "epoch": 2.3690888119953866, "grad_norm": 0.0, "learning_rate": 1.7273736415730488e-05, "loss": 0.6997, "step": 1027 }, { "epoch": 2.3713956170703576, "grad_norm": 0.0, "learning_rate": 1.7268030487083654e-05, "loss": 0.3441, "step": 1028 }, { "epoch": 2.3737024221453287, "grad_norm": 0.0, "learning_rate": 1.726231953809993e-05, "loss": 0.4374, "step": 1029 }, { "epoch": 2.3760092272202997, "grad_norm": 0.0, "learning_rate": 1.725660357272412e-05, "loss": 0.4929, "step": 1030 }, { "epoch": 2.378316032295271, "grad_norm": 0.0, "learning_rate": 1.725088259490448e-05, "loss": 0.371, "step": 1031 }, { "epoch": 2.3806228373702423, "grad_norm": 0.0, "learning_rate": 1.7245156608592727e-05, "loss": 0.2966, "step": 1032 }, { "epoch": 2.3829296424452133, "grad_norm": 0.0, "learning_rate": 1.723942561774405e-05, "loss": 0.3369, "step": 1033 }, { "epoch": 2.3852364475201844, "grad_norm": 0.0, "learning_rate": 1.723368962631708e-05, "loss": 0.413, "step": 1034 }, { "epoch": 2.387543252595156, "grad_norm": 0.0, "learning_rate": 1.7227948638273918e-05, "loss": 0.4057, "step": 1035 }, { "epoch": 2.389850057670127, "grad_norm": 0.0, "learning_rate": 1.72222026575801e-05, "loss": 0.319, "step": 1036 }, { "epoch": 2.392156862745098, "grad_norm": 0.0, "learning_rate": 1.7216451688204623e-05, "loss": 0.6871, "step": 1037 }, { "epoch": 2.394463667820069, "grad_norm": 0.0, "learning_rate": 1.7210695734119926e-05, "loss": 0.5843, "step": 1038 }, { "epoch": 2.3967704728950405, "grad_norm": 0.0, "learning_rate": 1.7204934799301883e-05, "loss": 0.6087, "step": 1039 }, { "epoch": 2.3990772779700116, "grad_norm": 0.0, "learning_rate": 1.719916888772983e-05, "loss": 0.3969, "step": 1040 }, { "epoch": 2.4013840830449826, "grad_norm": 0.0, "learning_rate": 1.7193398003386514e-05, "loss": 0.486, "step": 1041 }, { "epoch": 2.4036908881199537, "grad_norm": 0.0, "learning_rate": 1.718762215025813e-05, "loss": 0.4047, "step": 1042 }, { "epoch": 2.405997693194925, "grad_norm": 0.0, "learning_rate": 1.718184133233432e-05, "loss": 0.7963, "step": 1043 }, { "epoch": 2.4083044982698962, "grad_norm": 0.0, "learning_rate": 1.717605555360812e-05, "loss": 0.3697, "step": 1044 }, { "epoch": 2.4106113033448673, "grad_norm": 0.0, "learning_rate": 1.7170264818076027e-05, "loss": 0.3917, "step": 1045 }, { "epoch": 2.4129181084198383, "grad_norm": 0.0, "learning_rate": 1.7164469129737936e-05, "loss": 0.56, "step": 1046 }, { "epoch": 2.41522491349481, "grad_norm": 0.0, "learning_rate": 1.7158668492597186e-05, "loss": 0.4465, "step": 1047 }, { "epoch": 2.417531718569781, "grad_norm": 0.0, "learning_rate": 1.7152862910660516e-05, "loss": 0.5596, "step": 1048 }, { "epoch": 2.419838523644752, "grad_norm": 0.0, "learning_rate": 1.7147052387938094e-05, "loss": 0.4819, "step": 1049 }, { "epoch": 2.422145328719723, "grad_norm": 0.0, "learning_rate": 1.7141236928443482e-05, "loss": 0.3809, "step": 1050 }, { "epoch": 2.4244521337946945, "grad_norm": 0.0, "learning_rate": 1.7135416536193678e-05, "loss": 0.5057, "step": 1051 }, { "epoch": 2.4267589388696655, "grad_norm": 0.0, "learning_rate": 1.712959121520907e-05, "loss": 0.4327, "step": 1052 }, { "epoch": 2.4290657439446366, "grad_norm": 0.0, "learning_rate": 1.712376096951345e-05, "loss": 0.4292, "step": 1053 }, { "epoch": 2.431372549019608, "grad_norm": 0.0, "learning_rate": 1.7117925803134017e-05, "loss": 0.533, "step": 1054 }, { "epoch": 2.433679354094579, "grad_norm": 0.0, "learning_rate": 1.711208572010137e-05, "loss": 0.4858, "step": 1055 }, { "epoch": 2.43598615916955, "grad_norm": 0.0, "learning_rate": 1.7106240724449507e-05, "loss": 0.4681, "step": 1056 }, { "epoch": 2.4382929642445212, "grad_norm": 0.0, "learning_rate": 1.7100390820215805e-05, "loss": 0.6602, "step": 1057 }, { "epoch": 2.4405997693194923, "grad_norm": 0.0, "learning_rate": 1.7094536011441046e-05, "loss": 0.6562, "step": 1058 }, { "epoch": 2.442906574394464, "grad_norm": 0.0, "learning_rate": 1.7088676302169394e-05, "loss": 0.3111, "step": 1059 }, { "epoch": 2.445213379469435, "grad_norm": 0.0, "learning_rate": 1.7082811696448397e-05, "loss": 0.4315, "step": 1060 }, { "epoch": 2.447520184544406, "grad_norm": 0.0, "learning_rate": 1.7076942198328987e-05, "loss": 0.4883, "step": 1061 }, { "epoch": 2.4498269896193774, "grad_norm": 0.0, "learning_rate": 1.7071067811865477e-05, "loss": 0.6466, "step": 1062 }, { "epoch": 2.4521337946943484, "grad_norm": 0.0, "learning_rate": 1.7065188541115554e-05, "loss": 0.4887, "step": 1063 }, { "epoch": 2.4544405997693195, "grad_norm": 0.0, "learning_rate": 1.705930439014028e-05, "loss": 0.6299, "step": 1064 }, { "epoch": 2.4567474048442905, "grad_norm": 0.0, "learning_rate": 1.705341536300409e-05, "loss": 0.3956, "step": 1065 }, { "epoch": 2.4590542099192616, "grad_norm": 0.0, "learning_rate": 1.704752146377478e-05, "loss": 0.4812, "step": 1066 }, { "epoch": 2.461361014994233, "grad_norm": 0.0, "learning_rate": 1.704162269652352e-05, "loss": 0.4718, "step": 1067 }, { "epoch": 2.463667820069204, "grad_norm": 0.0, "learning_rate": 1.7035719065324837e-05, "loss": 0.537, "step": 1068 }, { "epoch": 2.465974625144175, "grad_norm": 0.0, "learning_rate": 1.702981057425662e-05, "loss": 0.3821, "step": 1069 }, { "epoch": 2.4682814302191467, "grad_norm": 0.0, "learning_rate": 1.7023897227400113e-05, "loss": 0.3955, "step": 1070 }, { "epoch": 2.4705882352941178, "grad_norm": 0.0, "learning_rate": 1.7017979028839918e-05, "loss": 0.4731, "step": 1071 }, { "epoch": 2.472895040369089, "grad_norm": 0.0, "learning_rate": 1.701205598266398e-05, "loss": 0.401, "step": 1072 }, { "epoch": 2.47520184544406, "grad_norm": 0.0, "learning_rate": 1.7006128092963604e-05, "loss": 0.4288, "step": 1073 }, { "epoch": 2.477508650519031, "grad_norm": 0.0, "learning_rate": 1.7000195363833434e-05, "loss": 0.5251, "step": 1074 }, { "epoch": 2.4798154555940024, "grad_norm": 0.0, "learning_rate": 1.6994257799371457e-05, "loss": 0.4595, "step": 1075 }, { "epoch": 2.4821222606689735, "grad_norm": 0.0, "learning_rate": 1.6988315403679e-05, "loss": 0.3999, "step": 1076 }, { "epoch": 2.4844290657439445, "grad_norm": 0.0, "learning_rate": 1.698236818086073e-05, "loss": 0.4639, "step": 1077 }, { "epoch": 2.486735870818916, "grad_norm": 0.0, "learning_rate": 1.697641613502464e-05, "loss": 0.4915, "step": 1078 }, { "epoch": 2.489042675893887, "grad_norm": 0.0, "learning_rate": 1.6970459270282068e-05, "loss": 0.273, "step": 1079 }, { "epoch": 2.491349480968858, "grad_norm": 0.0, "learning_rate": 1.696449759074767e-05, "loss": 0.3822, "step": 1080 }, { "epoch": 2.493656286043829, "grad_norm": 0.0, "learning_rate": 1.6958531100539428e-05, "loss": 0.4648, "step": 1081 }, { "epoch": 2.4959630911188, "grad_norm": 0.0, "learning_rate": 1.6952559803778656e-05, "loss": 0.3983, "step": 1082 }, { "epoch": 2.4982698961937717, "grad_norm": 0.0, "learning_rate": 1.6946583704589973e-05, "loss": 0.5093, "step": 1083 }, { "epoch": 2.5005767012687428, "grad_norm": 0.0, "learning_rate": 1.6940602807101335e-05, "loss": 0.6034, "step": 1084 }, { "epoch": 2.502883506343714, "grad_norm": 0.0, "learning_rate": 1.6934617115443993e-05, "loss": 0.3113, "step": 1085 }, { "epoch": 2.5051903114186853, "grad_norm": 0.0, "learning_rate": 1.692862663375252e-05, "loss": 0.652, "step": 1086 }, { "epoch": 2.5074971164936564, "grad_norm": 0.0, "learning_rate": 1.6922631366164795e-05, "loss": 0.4327, "step": 1087 }, { "epoch": 2.5098039215686274, "grad_norm": 0.0, "learning_rate": 1.6916631316822013e-05, "loss": 0.4376, "step": 1088 }, { "epoch": 2.5121107266435985, "grad_norm": 0.0, "learning_rate": 1.691062648986865e-05, "loss": 0.3329, "step": 1089 }, { "epoch": 2.5144175317185695, "grad_norm": 0.0, "learning_rate": 1.6904616889452497e-05, "loss": 0.5643, "step": 1090 }, { "epoch": 2.516724336793541, "grad_norm": 0.0, "learning_rate": 1.6898602519724647e-05, "loss": 0.5856, "step": 1091 }, { "epoch": 2.519031141868512, "grad_norm": 0.0, "learning_rate": 1.689258338483947e-05, "loss": 0.5064, "step": 1092 }, { "epoch": 2.521337946943483, "grad_norm": 0.0, "learning_rate": 1.6886559488954647e-05, "loss": 0.3837, "step": 1093 }, { "epoch": 2.5236447520184546, "grad_norm": 0.0, "learning_rate": 1.6880530836231137e-05, "loss": 0.3516, "step": 1094 }, { "epoch": 2.5259515570934257, "grad_norm": 0.0, "learning_rate": 1.6874497430833182e-05, "loss": 0.3592, "step": 1095 }, { "epoch": 2.5282583621683967, "grad_norm": 0.0, "learning_rate": 1.6868459276928312e-05, "loss": 0.4723, "step": 1096 }, { "epoch": 2.5305651672433678, "grad_norm": 0.0, "learning_rate": 1.686241637868734e-05, "loss": 0.2838, "step": 1097 }, { "epoch": 2.532871972318339, "grad_norm": 0.0, "learning_rate": 1.6856368740284342e-05, "loss": 0.5495, "step": 1098 }, { "epoch": 2.5351787773933103, "grad_norm": 0.0, "learning_rate": 1.6850316365896692e-05, "loss": 0.4764, "step": 1099 }, { "epoch": 2.5374855824682814, "grad_norm": 0.0, "learning_rate": 1.684425925970501e-05, "loss": 0.5037, "step": 1100 }, { "epoch": 2.539792387543253, "grad_norm": 0.0, "learning_rate": 1.68381974258932e-05, "loss": 0.5278, "step": 1101 }, { "epoch": 2.542099192618224, "grad_norm": 0.0, "learning_rate": 1.683213086864843e-05, "loss": 0.5424, "step": 1102 }, { "epoch": 2.544405997693195, "grad_norm": 0.0, "learning_rate": 1.6826059592161136e-05, "loss": 0.6143, "step": 1103 }, { "epoch": 2.546712802768166, "grad_norm": 0.0, "learning_rate": 1.6819983600624986e-05, "loss": 0.6507, "step": 1104 }, { "epoch": 2.549019607843137, "grad_norm": 0.0, "learning_rate": 1.681390289823694e-05, "loss": 0.3845, "step": 1105 }, { "epoch": 2.5513264129181086, "grad_norm": 0.0, "learning_rate": 1.6807817489197192e-05, "loss": 0.4351, "step": 1106 }, { "epoch": 2.5536332179930796, "grad_norm": 0.0, "learning_rate": 1.6801727377709195e-05, "loss": 0.4947, "step": 1107 }, { "epoch": 2.5559400230680507, "grad_norm": 0.0, "learning_rate": 1.6795632567979643e-05, "loss": 0.3902, "step": 1108 }, { "epoch": 2.558246828143022, "grad_norm": 0.0, "learning_rate": 1.6789533064218487e-05, "loss": 0.4433, "step": 1109 }, { "epoch": 2.5605536332179932, "grad_norm": 0.0, "learning_rate": 1.6783428870638904e-05, "loss": 0.5687, "step": 1110 }, { "epoch": 2.5628604382929643, "grad_norm": 0.0, "learning_rate": 1.6777319991457325e-05, "loss": 0.5395, "step": 1111 }, { "epoch": 2.5651672433679353, "grad_norm": 0.0, "learning_rate": 1.6771206430893408e-05, "loss": 0.6602, "step": 1112 }, { "epoch": 2.5674740484429064, "grad_norm": 0.0, "learning_rate": 1.6765088193170055e-05, "loss": 0.4511, "step": 1113 }, { "epoch": 2.569780853517878, "grad_norm": 0.0, "learning_rate": 1.6758965282513383e-05, "loss": 0.2627, "step": 1114 }, { "epoch": 2.572087658592849, "grad_norm": 0.0, "learning_rate": 1.6752837703152754e-05, "loss": 0.4823, "step": 1115 }, { "epoch": 2.57439446366782, "grad_norm": 0.0, "learning_rate": 1.6746705459320746e-05, "loss": 0.4651, "step": 1116 }, { "epoch": 2.5767012687427915, "grad_norm": 0.0, "learning_rate": 1.6740568555253153e-05, "loss": 0.7135, "step": 1117 }, { "epoch": 2.5790080738177625, "grad_norm": 0.0, "learning_rate": 1.6734426995189003e-05, "loss": 0.4316, "step": 1118 }, { "epoch": 2.5813148788927336, "grad_norm": 0.0, "learning_rate": 1.672828078337053e-05, "loss": 0.473, "step": 1119 }, { "epoch": 2.5836216839677046, "grad_norm": 0.0, "learning_rate": 1.6722129924043184e-05, "loss": 0.392, "step": 1120 }, { "epoch": 2.5859284890426757, "grad_norm": 0.0, "learning_rate": 1.6715974421455615e-05, "loss": 0.3433, "step": 1121 }, { "epoch": 2.588235294117647, "grad_norm": 0.0, "learning_rate": 1.67098142798597e-05, "loss": 0.6067, "step": 1122 }, { "epoch": 2.5905420991926182, "grad_norm": 0.0, "learning_rate": 1.6703649503510514e-05, "loss": 0.3693, "step": 1123 }, { "epoch": 2.5928489042675893, "grad_norm": 0.0, "learning_rate": 1.6697480096666313e-05, "loss": 0.6032, "step": 1124 }, { "epoch": 2.595155709342561, "grad_norm": 0.0, "learning_rate": 1.6691306063588583e-05, "loss": 0.3967, "step": 1125 }, { "epoch": 2.597462514417532, "grad_norm": 0.0, "learning_rate": 1.6685127408541986e-05, "loss": 0.483, "step": 1126 }, { "epoch": 2.599769319492503, "grad_norm": 0.0, "learning_rate": 1.6678944135794375e-05, "loss": 0.4309, "step": 1127 }, { "epoch": 2.602076124567474, "grad_norm": 0.0, "learning_rate": 1.667275624961681e-05, "loss": 0.4103, "step": 1128 }, { "epoch": 2.604382929642445, "grad_norm": 0.0, "learning_rate": 1.6666563754283517e-05, "loss": 0.4266, "step": 1129 }, { "epoch": 2.6066897347174165, "grad_norm": 0.0, "learning_rate": 1.6660366654071917e-05, "loss": 0.5928, "step": 1130 }, { "epoch": 2.6089965397923875, "grad_norm": 0.0, "learning_rate": 1.6654164953262614e-05, "loss": 0.504, "step": 1131 }, { "epoch": 2.6113033448673586, "grad_norm": 0.0, "learning_rate": 1.6647958656139377e-05, "loss": 0.4914, "step": 1132 }, { "epoch": 2.61361014994233, "grad_norm": 0.0, "learning_rate": 1.6641747766989173e-05, "loss": 0.688, "step": 1133 }, { "epoch": 2.615916955017301, "grad_norm": 0.0, "learning_rate": 1.6635532290102114e-05, "loss": 0.5327, "step": 1134 }, { "epoch": 2.618223760092272, "grad_norm": 0.0, "learning_rate": 1.6629312229771497e-05, "loss": 0.3177, "step": 1135 }, { "epoch": 2.6205305651672433, "grad_norm": 0.0, "learning_rate": 1.6623087590293786e-05, "loss": 0.5426, "step": 1136 }, { "epoch": 2.6228373702422143, "grad_norm": 0.0, "learning_rate": 1.6616858375968596e-05, "loss": 0.3675, "step": 1137 }, { "epoch": 2.625144175317186, "grad_norm": 0.0, "learning_rate": 1.6610624591098716e-05, "loss": 0.4293, "step": 1138 }, { "epoch": 2.627450980392157, "grad_norm": 0.0, "learning_rate": 1.6604386239990077e-05, "loss": 0.4405, "step": 1139 }, { "epoch": 2.629757785467128, "grad_norm": 0.0, "learning_rate": 1.6598143326951784e-05, "loss": 0.4093, "step": 1140 }, { "epoch": 2.6320645905420994, "grad_norm": 0.0, "learning_rate": 1.6591895856296075e-05, "loss": 0.486, "step": 1141 }, { "epoch": 2.6343713956170705, "grad_norm": 0.0, "learning_rate": 1.6585643832338342e-05, "loss": 0.4883, "step": 1142 }, { "epoch": 2.6366782006920415, "grad_norm": 0.0, "learning_rate": 1.657938725939713e-05, "loss": 0.343, "step": 1143 }, { "epoch": 2.6389850057670126, "grad_norm": 0.0, "learning_rate": 1.6573126141794108e-05, "loss": 0.4574, "step": 1144 }, { "epoch": 2.6412918108419836, "grad_norm": 0.0, "learning_rate": 1.6566860483854106e-05, "loss": 0.6207, "step": 1145 }, { "epoch": 2.643598615916955, "grad_norm": 0.0, "learning_rate": 1.6560590289905074e-05, "loss": 0.4351, "step": 1146 }, { "epoch": 2.645905420991926, "grad_norm": 0.0, "learning_rate": 1.6554315564278102e-05, "loss": 0.5095, "step": 1147 }, { "epoch": 2.648212226066897, "grad_norm": 0.0, "learning_rate": 1.654803631130741e-05, "loss": 0.3427, "step": 1148 }, { "epoch": 2.6505190311418687, "grad_norm": 0.0, "learning_rate": 1.6541752535330345e-05, "loss": 0.494, "step": 1149 }, { "epoch": 2.6528258362168398, "grad_norm": 0.0, "learning_rate": 1.6535464240687376e-05, "loss": 0.3953, "step": 1150 }, { "epoch": 2.655132641291811, "grad_norm": 0.0, "learning_rate": 1.6529171431722097e-05, "loss": 0.351, "step": 1151 }, { "epoch": 2.657439446366782, "grad_norm": 0.0, "learning_rate": 1.6522874112781213e-05, "loss": 0.5334, "step": 1152 }, { "epoch": 2.659746251441753, "grad_norm": 0.0, "learning_rate": 1.6516572288214555e-05, "loss": 0.4919, "step": 1153 }, { "epoch": 2.6620530565167244, "grad_norm": 0.0, "learning_rate": 1.6510265962375054e-05, "loss": 0.4449, "step": 1154 }, { "epoch": 2.6643598615916955, "grad_norm": 0.0, "learning_rate": 1.6503955139618765e-05, "loss": 0.4589, "step": 1155 }, { "epoch": 2.6666666666666665, "grad_norm": 0.0, "learning_rate": 1.6497639824304833e-05, "loss": 0.4384, "step": 1156 }, { "epoch": 2.668973471741638, "grad_norm": 0.0, "learning_rate": 1.649132002079552e-05, "loss": 0.6397, "step": 1157 }, { "epoch": 2.671280276816609, "grad_norm": 0.0, "learning_rate": 1.6484995733456178e-05, "loss": 0.4139, "step": 1158 }, { "epoch": 2.67358708189158, "grad_norm": 0.0, "learning_rate": 1.6478666966655266e-05, "loss": 0.5623, "step": 1159 }, { "epoch": 2.675893886966551, "grad_norm": 0.0, "learning_rate": 1.6472333724764326e-05, "loss": 0.6482, "step": 1160 }, { "epoch": 2.6782006920415222, "grad_norm": 0.0, "learning_rate": 1.6465996012157996e-05, "loss": 0.3822, "step": 1161 }, { "epoch": 2.6805074971164937, "grad_norm": 0.0, "learning_rate": 1.645965383321401e-05, "loss": 0.4891, "step": 1162 }, { "epoch": 2.6828143021914648, "grad_norm": 0.0, "learning_rate": 1.6453307192313176e-05, "loss": 0.5097, "step": 1163 }, { "epoch": 2.685121107266436, "grad_norm": 0.0, "learning_rate": 1.6446956093839385e-05, "loss": 0.3205, "step": 1164 }, { "epoch": 2.6874279123414073, "grad_norm": 0.0, "learning_rate": 1.6440600542179613e-05, "loss": 0.456, "step": 1165 }, { "epoch": 2.6897347174163784, "grad_norm": 0.0, "learning_rate": 1.6434240541723908e-05, "loss": 0.4557, "step": 1166 }, { "epoch": 2.6920415224913494, "grad_norm": 0.0, "learning_rate": 1.6427876096865394e-05, "loss": 0.5876, "step": 1167 }, { "epoch": 2.6943483275663205, "grad_norm": 0.0, "learning_rate": 1.6421507212000262e-05, "loss": 0.4488, "step": 1168 }, { "epoch": 2.696655132641292, "grad_norm": 0.0, "learning_rate": 1.641513389152777e-05, "loss": 0.6073, "step": 1169 }, { "epoch": 2.698961937716263, "grad_norm": 0.0, "learning_rate": 1.6408756139850243e-05, "loss": 0.482, "step": 1170 }, { "epoch": 2.701268742791234, "grad_norm": 0.0, "learning_rate": 1.640237396137306e-05, "loss": 0.6577, "step": 1171 }, { "epoch": 2.7035755478662056, "grad_norm": 0.0, "learning_rate": 1.6395987360504667e-05, "loss": 0.5384, "step": 1172 }, { "epoch": 2.7058823529411766, "grad_norm": 0.0, "learning_rate": 1.638959634165656e-05, "loss": 0.5178, "step": 1173 }, { "epoch": 2.7081891580161477, "grad_norm": 0.0, "learning_rate": 1.6383200909243285e-05, "loss": 0.5686, "step": 1174 }, { "epoch": 2.7104959630911187, "grad_norm": 0.0, "learning_rate": 1.6376801067682433e-05, "loss": 0.4074, "step": 1175 }, { "epoch": 2.71280276816609, "grad_norm": 0.0, "learning_rate": 1.637039682139466e-05, "loss": 0.7056, "step": 1176 }, { "epoch": 2.7151095732410613, "grad_norm": 0.0, "learning_rate": 1.6363988174803638e-05, "loss": 0.4399, "step": 1177 }, { "epoch": 2.7174163783160323, "grad_norm": 0.0, "learning_rate": 1.6357575132336093e-05, "loss": 0.5103, "step": 1178 }, { "epoch": 2.7197231833910034, "grad_norm": 0.0, "learning_rate": 1.635115769842179e-05, "loss": 0.5517, "step": 1179 }, { "epoch": 2.722029988465975, "grad_norm": 0.0, "learning_rate": 1.6344735877493518e-05, "loss": 0.5515, "step": 1180 }, { "epoch": 2.724336793540946, "grad_norm": 0.0, "learning_rate": 1.63383096739871e-05, "loss": 0.3965, "step": 1181 }, { "epoch": 2.726643598615917, "grad_norm": 0.0, "learning_rate": 1.6331879092341402e-05, "loss": 0.5723, "step": 1182 }, { "epoch": 2.728950403690888, "grad_norm": 0.0, "learning_rate": 1.6325444136998277e-05, "loss": 0.3967, "step": 1183 }, { "epoch": 2.731257208765859, "grad_norm": 0.0, "learning_rate": 1.6319004812402637e-05, "loss": 0.3694, "step": 1184 }, { "epoch": 2.7335640138408306, "grad_norm": 0.0, "learning_rate": 1.631256112300239e-05, "loss": 0.5395, "step": 1185 }, { "epoch": 2.7358708189158016, "grad_norm": 0.0, "learning_rate": 1.630611307324847e-05, "loss": 0.6903, "step": 1186 }, { "epoch": 2.7381776239907727, "grad_norm": 0.0, "learning_rate": 1.6299660667594814e-05, "loss": 0.5268, "step": 1187 }, { "epoch": 2.740484429065744, "grad_norm": 0.0, "learning_rate": 1.6293203910498375e-05, "loss": 0.584, "step": 1188 }, { "epoch": 2.7427912341407152, "grad_norm": 0.0, "learning_rate": 1.628674280641911e-05, "loss": 0.4556, "step": 1189 }, { "epoch": 2.7450980392156863, "grad_norm": 0.0, "learning_rate": 1.6280277359819973e-05, "loss": 0.5736, "step": 1190 }, { "epoch": 2.7474048442906573, "grad_norm": 0.0, "learning_rate": 1.6273807575166927e-05, "loss": 0.3524, "step": 1191 }, { "epoch": 2.7497116493656284, "grad_norm": 0.0, "learning_rate": 1.626733345692892e-05, "loss": 0.6067, "step": 1192 }, { "epoch": 2.7520184544406, "grad_norm": 0.0, "learning_rate": 1.6260855009577912e-05, "loss": 0.4256, "step": 1193 }, { "epoch": 2.754325259515571, "grad_norm": 0.0, "learning_rate": 1.625437223758883e-05, "loss": 0.496, "step": 1194 }, { "epoch": 2.756632064590542, "grad_norm": 0.0, "learning_rate": 1.6247885145439602e-05, "loss": 0.3951, "step": 1195 }, { "epoch": 2.7589388696655135, "grad_norm": 0.0, "learning_rate": 1.624139373761114e-05, "loss": 0.2879, "step": 1196 }, { "epoch": 2.7612456747404845, "grad_norm": 0.0, "learning_rate": 1.6234898018587336e-05, "loss": 0.3298, "step": 1197 }, { "epoch": 2.7635524798154556, "grad_norm": 0.0, "learning_rate": 1.6228397992855053e-05, "loss": 0.4574, "step": 1198 }, { "epoch": 2.7658592848904267, "grad_norm": 0.0, "learning_rate": 1.6221893664904142e-05, "loss": 0.6728, "step": 1199 }, { "epoch": 2.7681660899653977, "grad_norm": 0.0, "learning_rate": 1.621538503922741e-05, "loss": 0.426, "step": 1200 }, { "epoch": 2.770472895040369, "grad_norm": 0.0, "learning_rate": 1.6208872120320647e-05, "loss": 0.5025, "step": 1201 }, { "epoch": 2.7727797001153403, "grad_norm": 0.0, "learning_rate": 1.6202354912682602e-05, "loss": 0.4905, "step": 1202 }, { "epoch": 2.7750865051903113, "grad_norm": 0.0, "learning_rate": 1.6195833420814983e-05, "loss": 0.4269, "step": 1203 }, { "epoch": 2.777393310265283, "grad_norm": 0.0, "learning_rate": 1.6189307649222463e-05, "loss": 0.3785, "step": 1204 }, { "epoch": 2.779700115340254, "grad_norm": 0.0, "learning_rate": 1.618277760241267e-05, "loss": 0.436, "step": 1205 }, { "epoch": 2.782006920415225, "grad_norm": 0.0, "learning_rate": 1.617624328489618e-05, "loss": 0.4231, "step": 1206 }, { "epoch": 2.784313725490196, "grad_norm": 0.0, "learning_rate": 1.6169704701186528e-05, "loss": 0.3647, "step": 1207 }, { "epoch": 2.786620530565167, "grad_norm": 0.0, "learning_rate": 1.616316185580019e-05, "loss": 0.4274, "step": 1208 }, { "epoch": 2.7889273356401385, "grad_norm": 0.0, "learning_rate": 1.6156614753256583e-05, "loss": 0.5199, "step": 1209 }, { "epoch": 2.7912341407151096, "grad_norm": 0.0, "learning_rate": 1.6150063398078074e-05, "loss": 0.4447, "step": 1210 }, { "epoch": 2.7935409457900806, "grad_norm": 0.0, "learning_rate": 1.6143507794789962e-05, "loss": 0.5074, "step": 1211 }, { "epoch": 2.795847750865052, "grad_norm": 0.0, "learning_rate": 1.6136947947920477e-05, "loss": 0.4375, "step": 1212 }, { "epoch": 2.798154555940023, "grad_norm": 0.0, "learning_rate": 1.6130383862000783e-05, "loss": 0.479, "step": 1213 }, { "epoch": 2.800461361014994, "grad_norm": 0.0, "learning_rate": 1.6123815541564973e-05, "loss": 0.5022, "step": 1214 }, { "epoch": 2.8027681660899653, "grad_norm": 0.0, "learning_rate": 1.6117242991150064e-05, "loss": 0.3574, "step": 1215 }, { "epoch": 2.8050749711649363, "grad_norm": 0.0, "learning_rate": 1.6110666215296e-05, "loss": 0.424, "step": 1216 }, { "epoch": 2.807381776239908, "grad_norm": 0.0, "learning_rate": 1.6104085218545633e-05, "loss": 0.537, "step": 1217 }, { "epoch": 2.809688581314879, "grad_norm": 0.0, "learning_rate": 1.609750000544474e-05, "loss": 0.482, "step": 1218 }, { "epoch": 2.81199538638985, "grad_norm": 0.0, "learning_rate": 1.6090910580542006e-05, "loss": 0.4754, "step": 1219 }, { "epoch": 2.8143021914648214, "grad_norm": 0.0, "learning_rate": 1.6084316948389027e-05, "loss": 0.4355, "step": 1220 }, { "epoch": 2.8166089965397925, "grad_norm": 0.0, "learning_rate": 1.6077719113540303e-05, "loss": 0.4877, "step": 1221 }, { "epoch": 2.8189158016147635, "grad_norm": 0.0, "learning_rate": 1.6071117080553236e-05, "loss": 0.3583, "step": 1222 }, { "epoch": 2.8212226066897346, "grad_norm": 0.0, "learning_rate": 1.6064510853988137e-05, "loss": 0.5303, "step": 1223 }, { "epoch": 2.8235294117647056, "grad_norm": 0.0, "learning_rate": 1.60579004384082e-05, "loss": 0.6129, "step": 1224 }, { "epoch": 2.825836216839677, "grad_norm": 0.0, "learning_rate": 1.6051285838379525e-05, "loss": 0.4577, "step": 1225 }, { "epoch": 2.828143021914648, "grad_norm": 0.0, "learning_rate": 1.6044667058471093e-05, "loss": 0.3249, "step": 1226 }, { "epoch": 2.830449826989619, "grad_norm": 0.0, "learning_rate": 1.6038044103254775e-05, "loss": 0.5886, "step": 1227 }, { "epoch": 2.8327566320645907, "grad_norm": 0.0, "learning_rate": 1.603141697730533e-05, "loss": 0.363, "step": 1228 }, { "epoch": 2.8350634371395618, "grad_norm": 0.0, "learning_rate": 1.6024785685200396e-05, "loss": 0.4216, "step": 1229 }, { "epoch": 2.837370242214533, "grad_norm": 0.0, "learning_rate": 1.6018150231520486e-05, "loss": 0.4745, "step": 1230 }, { "epoch": 2.839677047289504, "grad_norm": 0.0, "learning_rate": 1.6011510620848985e-05, "loss": 0.5405, "step": 1231 }, { "epoch": 2.841983852364475, "grad_norm": 0.0, "learning_rate": 1.600486685777216e-05, "loss": 0.5265, "step": 1232 }, { "epoch": 2.8442906574394464, "grad_norm": 0.0, "learning_rate": 1.599821894687914e-05, "loss": 0.5119, "step": 1233 }, { "epoch": 2.8465974625144175, "grad_norm": 0.0, "learning_rate": 1.5991566892761913e-05, "loss": 0.5269, "step": 1234 }, { "epoch": 2.848904267589389, "grad_norm": 0.0, "learning_rate": 1.5984910700015337e-05, "loss": 0.3895, "step": 1235 }, { "epoch": 2.85121107266436, "grad_norm": 0.0, "learning_rate": 1.5978250373237132e-05, "loss": 0.4819, "step": 1236 }, { "epoch": 2.853517877739331, "grad_norm": 0.0, "learning_rate": 1.5971585917027864e-05, "loss": 0.5676, "step": 1237 }, { "epoch": 2.855824682814302, "grad_norm": 0.0, "learning_rate": 1.5964917335990953e-05, "loss": 0.5602, "step": 1238 }, { "epoch": 2.858131487889273, "grad_norm": 0.0, "learning_rate": 1.5958244634732673e-05, "loss": 0.5511, "step": 1239 }, { "epoch": 2.8604382929642447, "grad_norm": 0.0, "learning_rate": 1.5951567817862147e-05, "loss": 0.5623, "step": 1240 }, { "epoch": 2.8627450980392157, "grad_norm": 0.0, "learning_rate": 1.5944886889991326e-05, "loss": 0.588, "step": 1241 }, { "epoch": 2.865051903114187, "grad_norm": 0.0, "learning_rate": 1.5938201855735017e-05, "loss": 0.3131, "step": 1242 }, { "epoch": 2.8673587081891583, "grad_norm": 0.0, "learning_rate": 1.5931512719710855e-05, "loss": 0.6199, "step": 1243 }, { "epoch": 2.8696655132641293, "grad_norm": 0.0, "learning_rate": 1.592481948653931e-05, "loss": 0.5341, "step": 1244 }, { "epoch": 2.8719723183391004, "grad_norm": 0.0, "learning_rate": 1.591812216084368e-05, "loss": 0.5037, "step": 1245 }, { "epoch": 2.8742791234140714, "grad_norm": 0.0, "learning_rate": 1.5911420747250094e-05, "loss": 0.301, "step": 1246 }, { "epoch": 2.8765859284890425, "grad_norm": 0.0, "learning_rate": 1.5904715250387498e-05, "loss": 0.3529, "step": 1247 }, { "epoch": 2.878892733564014, "grad_norm": 0.0, "learning_rate": 1.5898005674887673e-05, "loss": 0.3962, "step": 1248 }, { "epoch": 2.881199538638985, "grad_norm": 0.0, "learning_rate": 1.58912920253852e-05, "loss": 0.3829, "step": 1249 }, { "epoch": 2.883506343713956, "grad_norm": 0.0, "learning_rate": 1.5884574306517482e-05, "loss": 0.454, "step": 1250 }, { "epoch": 2.8858131487889276, "grad_norm": 0.0, "learning_rate": 1.5877852522924733e-05, "loss": 0.5881, "step": 1251 }, { "epoch": 2.8881199538638986, "grad_norm": 0.0, "learning_rate": 1.5871126679249977e-05, "loss": 0.2544, "step": 1252 }, { "epoch": 2.8904267589388697, "grad_norm": 0.0, "learning_rate": 1.586439678013903e-05, "loss": 0.288, "step": 1253 }, { "epoch": 2.8927335640138407, "grad_norm": 0.0, "learning_rate": 1.585766283024053e-05, "loss": 0.5505, "step": 1254 }, { "epoch": 2.895040369088812, "grad_norm": 0.0, "learning_rate": 1.5850924834205897e-05, "loss": 0.5685, "step": 1255 }, { "epoch": 2.8973471741637833, "grad_norm": 0.0, "learning_rate": 1.5844182796689348e-05, "loss": 0.6052, "step": 1256 }, { "epoch": 2.8996539792387543, "grad_norm": 0.0, "learning_rate": 1.5837436722347902e-05, "loss": 0.4579, "step": 1257 }, { "epoch": 2.9019607843137254, "grad_norm": 0.0, "learning_rate": 1.5830686615841348e-05, "loss": 0.4537, "step": 1258 }, { "epoch": 2.904267589388697, "grad_norm": 0.0, "learning_rate": 1.582393248183228e-05, "loss": 0.3697, "step": 1259 }, { "epoch": 2.906574394463668, "grad_norm": 0.0, "learning_rate": 1.581717432498606e-05, "loss": 0.5533, "step": 1260 }, { "epoch": 2.908881199538639, "grad_norm": 0.0, "learning_rate": 1.5810412149970832e-05, "loss": 0.4279, "step": 1261 }, { "epoch": 2.91118800461361, "grad_norm": 0.0, "learning_rate": 1.5803645961457522e-05, "loss": 0.4076, "step": 1262 }, { "epoch": 2.913494809688581, "grad_norm": 0.0, "learning_rate": 1.5796875764119826e-05, "loss": 0.4123, "step": 1263 }, { "epoch": 2.9158016147635526, "grad_norm": 0.0, "learning_rate": 1.5790101562634194e-05, "loss": 0.4087, "step": 1264 }, { "epoch": 2.9181084198385236, "grad_norm": 0.0, "learning_rate": 1.5783323361679865e-05, "loss": 0.5587, "step": 1265 }, { "epoch": 2.9204152249134947, "grad_norm": 0.0, "learning_rate": 1.577654116593883e-05, "loss": 0.5905, "step": 1266 }, { "epoch": 2.922722029988466, "grad_norm": 0.0, "learning_rate": 1.576975498009583e-05, "loss": 0.3051, "step": 1267 }, { "epoch": 2.9250288350634372, "grad_norm": 0.0, "learning_rate": 1.576296480883838e-05, "loss": 0.4562, "step": 1268 }, { "epoch": 2.9273356401384083, "grad_norm": 0.0, "learning_rate": 1.575617065685674e-05, "loss": 0.4578, "step": 1269 }, { "epoch": 2.9296424452133794, "grad_norm": 0.0, "learning_rate": 1.5749372528843908e-05, "loss": 0.5221, "step": 1270 }, { "epoch": 2.9319492502883504, "grad_norm": 0.0, "learning_rate": 1.574257042949565e-05, "loss": 0.554, "step": 1271 }, { "epoch": 2.934256055363322, "grad_norm": 0.0, "learning_rate": 1.573576436351046e-05, "loss": 0.522, "step": 1272 }, { "epoch": 2.936562860438293, "grad_norm": 0.0, "learning_rate": 1.572895433558958e-05, "loss": 0.5842, "step": 1273 }, { "epoch": 2.938869665513264, "grad_norm": 0.0, "learning_rate": 1.5722140350436984e-05, "loss": 0.4128, "step": 1274 }, { "epoch": 2.9411764705882355, "grad_norm": 0.0, "learning_rate": 1.5715322412759374e-05, "loss": 0.3207, "step": 1275 }, { "epoch": 2.9434832756632066, "grad_norm": 0.0, "learning_rate": 1.57085005272662e-05, "loss": 0.4646, "step": 1276 }, { "epoch": 2.9457900807381776, "grad_norm": 0.0, "learning_rate": 1.570167469866962e-05, "loss": 0.6126, "step": 1277 }, { "epoch": 2.9480968858131487, "grad_norm": 0.0, "learning_rate": 1.569484493168452e-05, "loss": 0.5577, "step": 1278 }, { "epoch": 2.9504036908881197, "grad_norm": 0.0, "learning_rate": 1.568801123102852e-05, "loss": 0.5127, "step": 1279 }, { "epoch": 2.952710495963091, "grad_norm": 0.0, "learning_rate": 1.568117360142194e-05, "loss": 0.389, "step": 1280 }, { "epoch": 2.9550173010380623, "grad_norm": 0.0, "learning_rate": 1.567433204758782e-05, "loss": 0.4067, "step": 1281 }, { "epoch": 2.9573241061130333, "grad_norm": 0.0, "learning_rate": 1.5667486574251916e-05, "loss": 0.4942, "step": 1282 }, { "epoch": 2.959630911188005, "grad_norm": 0.0, "learning_rate": 1.566063718614268e-05, "loss": 0.5117, "step": 1283 }, { "epoch": 2.961937716262976, "grad_norm": 0.0, "learning_rate": 1.5653783887991282e-05, "loss": 0.4091, "step": 1284 }, { "epoch": 2.964244521337947, "grad_norm": 0.0, "learning_rate": 1.5646926684531586e-05, "loss": 0.5213, "step": 1285 }, { "epoch": 2.966551326412918, "grad_norm": 0.0, "learning_rate": 1.5640065580500146e-05, "loss": 0.6082, "step": 1286 }, { "epoch": 2.968858131487889, "grad_norm": 0.0, "learning_rate": 1.563320058063622e-05, "loss": 0.5679, "step": 1287 }, { "epoch": 2.9711649365628605, "grad_norm": 0.0, "learning_rate": 1.562633168968176e-05, "loss": 0.5482, "step": 1288 }, { "epoch": 2.9734717416378316, "grad_norm": 0.0, "learning_rate": 1.5619458912381397e-05, "loss": 0.4218, "step": 1289 }, { "epoch": 2.9757785467128026, "grad_norm": 0.0, "learning_rate": 1.5612582253482444e-05, "loss": 0.4031, "step": 1290 }, { "epoch": 2.978085351787774, "grad_norm": 0.0, "learning_rate": 1.5605701717734908e-05, "loss": 0.46, "step": 1291 }, { "epoch": 2.980392156862745, "grad_norm": 0.0, "learning_rate": 1.5598817309891466e-05, "loss": 0.3907, "step": 1292 }, { "epoch": 2.982698961937716, "grad_norm": 0.0, "learning_rate": 1.5591929034707468e-05, "loss": 0.6269, "step": 1293 }, { "epoch": 2.9850057670126873, "grad_norm": 0.0, "learning_rate": 1.558503689694094e-05, "loss": 0.3448, "step": 1294 }, { "epoch": 2.9873125720876583, "grad_norm": 0.0, "learning_rate": 1.5578140901352576e-05, "loss": 0.3264, "step": 1295 }, { "epoch": 2.98961937716263, "grad_norm": 0.0, "learning_rate": 1.5571241052705724e-05, "loss": 0.4899, "step": 1296 }, { "epoch": 2.991926182237601, "grad_norm": 0.0, "learning_rate": 1.5564337355766412e-05, "loss": 0.438, "step": 1297 }, { "epoch": 2.994232987312572, "grad_norm": 0.0, "learning_rate": 1.555742981530331e-05, "loss": 0.3405, "step": 1298 }, { "epoch": 2.9965397923875434, "grad_norm": 0.0, "learning_rate": 1.5550518436087753e-05, "loss": 0.4214, "step": 1299 }, { "epoch": 2.9988465974625145, "grad_norm": 0.0, "learning_rate": 1.5543603222893718e-05, "loss": 0.5982, "step": 1300 }, { "epoch": 3.0011534025374855, "grad_norm": 0.0, "learning_rate": 1.5536684180497838e-05, "loss": 0.2961, "step": 1301 }, { "epoch": 3.0034602076124566, "grad_norm": 0.0, "learning_rate": 1.5529761313679396e-05, "loss": 0.469, "step": 1302 }, { "epoch": 3.005767012687428, "grad_norm": 0.0, "learning_rate": 1.55228346272203e-05, "loss": 0.3034, "step": 1303 }, { "epoch": 3.008073817762399, "grad_norm": 0.0, "learning_rate": 1.5515904125905118e-05, "loss": 0.2598, "step": 1304 }, { "epoch": 3.01038062283737, "grad_norm": 0.0, "learning_rate": 1.5508969814521026e-05, "loss": 0.403, "step": 1305 }, { "epoch": 3.0126874279123412, "grad_norm": 0.0, "learning_rate": 1.5502031697857858e-05, "loss": 0.2381, "step": 1306 }, { "epoch": 3.0149942329873127, "grad_norm": 0.0, "learning_rate": 1.5495089780708062e-05, "loss": 0.37, "step": 1307 }, { "epoch": 3.017301038062284, "grad_norm": 0.0, "learning_rate": 1.548814406786671e-05, "loss": 0.2217, "step": 1308 }, { "epoch": 3.019607843137255, "grad_norm": 0.0, "learning_rate": 1.5481194564131512e-05, "loss": 0.3474, "step": 1309 }, { "epoch": 3.021914648212226, "grad_norm": 0.0, "learning_rate": 1.5474241274302777e-05, "loss": 0.3048, "step": 1310 }, { "epoch": 3.0242214532871974, "grad_norm": 0.0, "learning_rate": 1.5467284203183437e-05, "loss": 0.2127, "step": 1311 }, { "epoch": 3.0265282583621684, "grad_norm": 0.0, "learning_rate": 1.5460323355579035e-05, "loss": 0.2924, "step": 1312 }, { "epoch": 3.0288350634371395, "grad_norm": 0.0, "learning_rate": 1.5453358736297727e-05, "loss": 0.324, "step": 1313 }, { "epoch": 3.0311418685121105, "grad_norm": 0.0, "learning_rate": 1.5446390350150272e-05, "loss": 0.3157, "step": 1314 }, { "epoch": 3.033448673587082, "grad_norm": 0.0, "learning_rate": 1.5439418201950025e-05, "loss": 0.4296, "step": 1315 }, { "epoch": 3.035755478662053, "grad_norm": 0.0, "learning_rate": 1.543244229651295e-05, "loss": 0.1885, "step": 1316 }, { "epoch": 3.038062283737024, "grad_norm": 0.0, "learning_rate": 1.5425462638657597e-05, "loss": 0.2345, "step": 1317 }, { "epoch": 3.040369088811995, "grad_norm": 0.0, "learning_rate": 1.5418479233205112e-05, "loss": 0.2252, "step": 1318 }, { "epoch": 3.0426758938869667, "grad_norm": 0.0, "learning_rate": 1.541149208497923e-05, "loss": 0.2805, "step": 1319 }, { "epoch": 3.0449826989619377, "grad_norm": 0.0, "learning_rate": 1.5404501198806267e-05, "loss": 0.1897, "step": 1320 }, { "epoch": 3.047289504036909, "grad_norm": 0.0, "learning_rate": 1.539750657951513e-05, "loss": 0.2589, "step": 1321 }, { "epoch": 3.04959630911188, "grad_norm": 0.0, "learning_rate": 1.53905082319373e-05, "loss": 0.2638, "step": 1322 }, { "epoch": 3.0519031141868513, "grad_norm": 0.0, "learning_rate": 1.5383506160906826e-05, "loss": 0.3877, "step": 1323 }, { "epoch": 3.0542099192618224, "grad_norm": 0.0, "learning_rate": 1.5376500371260335e-05, "loss": 0.3145, "step": 1324 }, { "epoch": 3.0565167243367934, "grad_norm": 0.0, "learning_rate": 1.5369490867837037e-05, "loss": 0.3886, "step": 1325 }, { "epoch": 3.0588235294117645, "grad_norm": 0.0, "learning_rate": 1.5362477655478677e-05, "loss": 0.4357, "step": 1326 }, { "epoch": 3.061130334486736, "grad_norm": 0.0, "learning_rate": 1.5355460739029585e-05, "loss": 0.3708, "step": 1327 }, { "epoch": 3.063437139561707, "grad_norm": 0.0, "learning_rate": 1.5348440123336647e-05, "loss": 0.2363, "step": 1328 }, { "epoch": 3.065743944636678, "grad_norm": 0.0, "learning_rate": 1.534141581324929e-05, "loss": 0.2681, "step": 1329 }, { "epoch": 3.0680507497116496, "grad_norm": 0.0, "learning_rate": 1.5334387813619508e-05, "loss": 0.2168, "step": 1330 }, { "epoch": 3.0703575547866206, "grad_norm": 0.0, "learning_rate": 1.532735612930184e-05, "loss": 0.2744, "step": 1331 }, { "epoch": 3.0726643598615917, "grad_norm": 0.0, "learning_rate": 1.5320320765153367e-05, "loss": 0.3737, "step": 1332 }, { "epoch": 3.0749711649365628, "grad_norm": 0.0, "learning_rate": 1.5313281726033714e-05, "loss": 0.2949, "step": 1333 }, { "epoch": 3.077277970011534, "grad_norm": 0.0, "learning_rate": 1.5306239016805045e-05, "loss": 0.2605, "step": 1334 }, { "epoch": 3.0795847750865053, "grad_norm": 0.0, "learning_rate": 1.529919264233205e-05, "loss": 0.3751, "step": 1335 }, { "epoch": 3.0818915801614764, "grad_norm": 0.0, "learning_rate": 1.529214260748197e-05, "loss": 0.2882, "step": 1336 }, { "epoch": 3.0841983852364474, "grad_norm": 0.0, "learning_rate": 1.5285088917124555e-05, "loss": 0.3046, "step": 1337 }, { "epoch": 3.086505190311419, "grad_norm": 0.0, "learning_rate": 1.527803157613209e-05, "loss": 0.2279, "step": 1338 }, { "epoch": 3.08881199538639, "grad_norm": 0.0, "learning_rate": 1.5270970589379387e-05, "loss": 0.1535, "step": 1339 }, { "epoch": 3.091118800461361, "grad_norm": 0.0, "learning_rate": 1.5263905961743758e-05, "loss": 0.5102, "step": 1340 }, { "epoch": 3.093425605536332, "grad_norm": 0.0, "learning_rate": 1.5256837698105047e-05, "loss": 0.241, "step": 1341 }, { "epoch": 3.0957324106113036, "grad_norm": 0.0, "learning_rate": 1.5249765803345602e-05, "loss": 0.2284, "step": 1342 }, { "epoch": 3.0980392156862746, "grad_norm": 0.0, "learning_rate": 1.5242690282350281e-05, "loss": 0.3061, "step": 1343 }, { "epoch": 3.1003460207612457, "grad_norm": 0.0, "learning_rate": 1.5235611140006446e-05, "loss": 0.3612, "step": 1344 }, { "epoch": 3.1026528258362167, "grad_norm": 0.0, "learning_rate": 1.5228528381203962e-05, "loss": 0.3075, "step": 1345 }, { "epoch": 3.104959630911188, "grad_norm": 0.0, "learning_rate": 1.5221442010835187e-05, "loss": 0.4081, "step": 1346 }, { "epoch": 3.1072664359861593, "grad_norm": 0.0, "learning_rate": 1.5214352033794981e-05, "loss": 0.1777, "step": 1347 }, { "epoch": 3.1095732410611303, "grad_norm": 0.0, "learning_rate": 1.5207258454980694e-05, "loss": 0.2036, "step": 1348 }, { "epoch": 3.1118800461361014, "grad_norm": 0.0, "learning_rate": 1.5200161279292154e-05, "loss": 0.3655, "step": 1349 }, { "epoch": 3.114186851211073, "grad_norm": 0.0, "learning_rate": 1.5193060511631692e-05, "loss": 0.2986, "step": 1350 }, { "epoch": 3.116493656286044, "grad_norm": 0.0, "learning_rate": 1.51859561569041e-05, "loss": 0.2411, "step": 1351 }, { "epoch": 3.118800461361015, "grad_norm": 0.0, "learning_rate": 1.517884822001666e-05, "loss": 0.1674, "step": 1352 }, { "epoch": 3.121107266435986, "grad_norm": 0.0, "learning_rate": 1.5171736705879127e-05, "loss": 0.3046, "step": 1353 }, { "epoch": 3.1234140715109575, "grad_norm": 0.0, "learning_rate": 1.5164621619403725e-05, "loss": 0.2204, "step": 1354 }, { "epoch": 3.1257208765859286, "grad_norm": 0.0, "learning_rate": 1.5157502965505144e-05, "loss": 0.2174, "step": 1355 }, { "epoch": 3.1280276816608996, "grad_norm": 0.0, "learning_rate": 1.5150380749100545e-05, "loss": 0.2485, "step": 1356 }, { "epoch": 3.1303344867358707, "grad_norm": 0.0, "learning_rate": 1.5143254975109538e-05, "loss": 0.2246, "step": 1357 }, { "epoch": 3.132641291810842, "grad_norm": 0.0, "learning_rate": 1.51361256484542e-05, "loss": 0.2828, "step": 1358 }, { "epoch": 3.134948096885813, "grad_norm": 0.0, "learning_rate": 1.5128992774059063e-05, "loss": 0.3656, "step": 1359 }, { "epoch": 3.1372549019607843, "grad_norm": 0.0, "learning_rate": 1.5121856356851101e-05, "loss": 0.3268, "step": 1360 }, { "epoch": 3.1395617070357553, "grad_norm": 0.0, "learning_rate": 1.5114716401759741e-05, "loss": 0.4582, "step": 1361 }, { "epoch": 3.141868512110727, "grad_norm": 0.0, "learning_rate": 1.5107572913716859e-05, "loss": 0.3062, "step": 1362 }, { "epoch": 3.144175317185698, "grad_norm": 0.0, "learning_rate": 1.5100425897656754e-05, "loss": 0.2255, "step": 1363 }, { "epoch": 3.146482122260669, "grad_norm": 0.0, "learning_rate": 1.5093275358516182e-05, "loss": 0.3411, "step": 1364 }, { "epoch": 3.14878892733564, "grad_norm": 0.0, "learning_rate": 1.5086121301234318e-05, "loss": 0.2545, "step": 1365 }, { "epoch": 3.1510957324106115, "grad_norm": 0.0, "learning_rate": 1.5078963730752775e-05, "loss": 0.2674, "step": 1366 }, { "epoch": 3.1534025374855825, "grad_norm": 0.0, "learning_rate": 1.5071802652015592e-05, "loss": 0.1963, "step": 1367 }, { "epoch": 3.1557093425605536, "grad_norm": 0.0, "learning_rate": 1.5064638069969228e-05, "loss": 0.2392, "step": 1368 }, { "epoch": 3.1580161476355246, "grad_norm": 0.0, "learning_rate": 1.5057469989562568e-05, "loss": 0.4015, "step": 1369 }, { "epoch": 3.160322952710496, "grad_norm": 0.0, "learning_rate": 1.5050298415746903e-05, "loss": 0.2642, "step": 1370 }, { "epoch": 3.162629757785467, "grad_norm": 0.0, "learning_rate": 1.5043123353475944e-05, "loss": 0.2594, "step": 1371 }, { "epoch": 3.1649365628604382, "grad_norm": 0.0, "learning_rate": 1.503594480770581e-05, "loss": 0.2049, "step": 1372 }, { "epoch": 3.1672433679354093, "grad_norm": 0.0, "learning_rate": 1.5028762783395035e-05, "loss": 0.1665, "step": 1373 }, { "epoch": 3.169550173010381, "grad_norm": 0.0, "learning_rate": 1.5021577285504538e-05, "loss": 0.3518, "step": 1374 }, { "epoch": 3.171856978085352, "grad_norm": 0.0, "learning_rate": 1.5014388318997655e-05, "loss": 0.2681, "step": 1375 }, { "epoch": 3.174163783160323, "grad_norm": 0.0, "learning_rate": 1.5007195888840102e-05, "loss": 0.268, "step": 1376 }, { "epoch": 3.176470588235294, "grad_norm": 0.0, "learning_rate": 1.5000000000000002e-05, "loss": 0.2367, "step": 1377 }, { "epoch": 3.1787773933102654, "grad_norm": 0.0, "learning_rate": 1.4992800657447858e-05, "loss": 0.181, "step": 1378 }, { "epoch": 3.1810841983852365, "grad_norm": 0.0, "learning_rate": 1.498559786615656e-05, "loss": 0.4208, "step": 1379 }, { "epoch": 3.1833910034602075, "grad_norm": 0.0, "learning_rate": 1.4978391631101383e-05, "loss": 0.2387, "step": 1380 }, { "epoch": 3.1856978085351786, "grad_norm": 0.0, "learning_rate": 1.4971181957259982e-05, "loss": 0.2334, "step": 1381 }, { "epoch": 3.18800461361015, "grad_norm": 0.0, "learning_rate": 1.496396884961238e-05, "loss": 0.3558, "step": 1382 }, { "epoch": 3.190311418685121, "grad_norm": 0.0, "learning_rate": 1.4956752313140978e-05, "loss": 0.2694, "step": 1383 }, { "epoch": 3.192618223760092, "grad_norm": 0.0, "learning_rate": 1.4949532352830543e-05, "loss": 0.2966, "step": 1384 }, { "epoch": 3.1949250288350632, "grad_norm": 0.0, "learning_rate": 1.494230897366821e-05, "loss": 0.3562, "step": 1385 }, { "epoch": 3.1972318339100347, "grad_norm": 0.0, "learning_rate": 1.493508218064347e-05, "loss": 0.2147, "step": 1386 }, { "epoch": 3.199538638985006, "grad_norm": 0.0, "learning_rate": 1.4927851978748177e-05, "loss": 0.3268, "step": 1387 }, { "epoch": 3.201845444059977, "grad_norm": 0.0, "learning_rate": 1.492061837297654e-05, "loss": 0.3238, "step": 1388 }, { "epoch": 3.204152249134948, "grad_norm": 0.0, "learning_rate": 1.4913381368325115e-05, "loss": 0.483, "step": 1389 }, { "epoch": 3.2064590542099194, "grad_norm": 0.0, "learning_rate": 1.4906140969792808e-05, "loss": 0.4238, "step": 1390 }, { "epoch": 3.2087658592848904, "grad_norm": 0.0, "learning_rate": 1.4898897182380872e-05, "loss": 0.2129, "step": 1391 }, { "epoch": 3.2110726643598615, "grad_norm": 0.0, "learning_rate": 1.4891650011092896e-05, "loss": 0.3535, "step": 1392 }, { "epoch": 3.213379469434833, "grad_norm": 0.0, "learning_rate": 1.4884399460934806e-05, "loss": 0.3541, "step": 1393 }, { "epoch": 3.215686274509804, "grad_norm": 0.0, "learning_rate": 1.487714553691487e-05, "loss": 0.3834, "step": 1394 }, { "epoch": 3.217993079584775, "grad_norm": 0.0, "learning_rate": 1.4869888244043674e-05, "loss": 0.2178, "step": 1395 }, { "epoch": 3.220299884659746, "grad_norm": 0.0, "learning_rate": 1.4862627587334144e-05, "loss": 0.2678, "step": 1396 }, { "epoch": 3.222606689734717, "grad_norm": 0.0, "learning_rate": 1.4855363571801523e-05, "loss": 0.2013, "step": 1397 }, { "epoch": 3.2249134948096887, "grad_norm": 0.0, "learning_rate": 1.4848096202463373e-05, "loss": 0.2318, "step": 1398 }, { "epoch": 3.2272202998846597, "grad_norm": 0.0, "learning_rate": 1.4840825484339574e-05, "loss": 0.2473, "step": 1399 }, { "epoch": 3.229527104959631, "grad_norm": 0.0, "learning_rate": 1.483355142245232e-05, "loss": 0.3816, "step": 1400 }, { "epoch": 3.2318339100346023, "grad_norm": 0.0, "learning_rate": 1.482627402182611e-05, "loss": 0.2547, "step": 1401 }, { "epoch": 3.2341407151095733, "grad_norm": 0.0, "learning_rate": 1.481899328748776e-05, "loss": 0.2829, "step": 1402 }, { "epoch": 3.2364475201845444, "grad_norm": 0.0, "learning_rate": 1.481170922446638e-05, "loss": 0.307, "step": 1403 }, { "epoch": 3.2387543252595155, "grad_norm": 0.0, "learning_rate": 1.4804421837793379e-05, "loss": 0.3478, "step": 1404 }, { "epoch": 3.2410611303344865, "grad_norm": 0.0, "learning_rate": 1.4797131132502464e-05, "loss": 0.2051, "step": 1405 }, { "epoch": 3.243367935409458, "grad_norm": 0.0, "learning_rate": 1.4789837113629637e-05, "loss": 0.2191, "step": 1406 }, { "epoch": 3.245674740484429, "grad_norm": 0.0, "learning_rate": 1.4782539786213184e-05, "loss": 0.2577, "step": 1407 }, { "epoch": 3.2479815455594, "grad_norm": 0.0, "learning_rate": 1.477523915529368e-05, "loss": 0.2612, "step": 1408 }, { "epoch": 3.2502883506343716, "grad_norm": 0.0, "learning_rate": 1.4767935225913976e-05, "loss": 0.1385, "step": 1409 }, { "epoch": 3.2525951557093427, "grad_norm": 0.0, "learning_rate": 1.4760628003119213e-05, "loss": 0.3205, "step": 1410 }, { "epoch": 3.2549019607843137, "grad_norm": 0.0, "learning_rate": 1.4753317491956798e-05, "loss": 0.3668, "step": 1411 }, { "epoch": 3.2572087658592848, "grad_norm": 0.0, "learning_rate": 1.4746003697476406e-05, "loss": 0.1318, "step": 1412 }, { "epoch": 3.259515570934256, "grad_norm": 0.0, "learning_rate": 1.4738686624729987e-05, "loss": 0.4485, "step": 1413 }, { "epoch": 3.2618223760092273, "grad_norm": 0.0, "learning_rate": 1.473136627877176e-05, "loss": 0.4717, "step": 1414 }, { "epoch": 3.2641291810841984, "grad_norm": 0.0, "learning_rate": 1.4724042664658185e-05, "loss": 0.1885, "step": 1415 }, { "epoch": 3.2664359861591694, "grad_norm": 0.0, "learning_rate": 1.4716715787448007e-05, "loss": 0.2858, "step": 1416 }, { "epoch": 3.268742791234141, "grad_norm": 0.0, "learning_rate": 1.4709385652202204e-05, "loss": 0.3106, "step": 1417 }, { "epoch": 3.271049596309112, "grad_norm": 0.0, "learning_rate": 1.470205226398401e-05, "loss": 0.3519, "step": 1418 }, { "epoch": 3.273356401384083, "grad_norm": 0.0, "learning_rate": 1.469471562785891e-05, "loss": 0.3678, "step": 1419 }, { "epoch": 3.275663206459054, "grad_norm": 0.0, "learning_rate": 1.4687375748894628e-05, "loss": 0.35, "step": 1420 }, { "epoch": 3.2779700115340256, "grad_norm": 0.0, "learning_rate": 1.468003263216113e-05, "loss": 0.1518, "step": 1421 }, { "epoch": 3.2802768166089966, "grad_norm": 0.0, "learning_rate": 1.4672686282730622e-05, "loss": 0.3924, "step": 1422 }, { "epoch": 3.2825836216839677, "grad_norm": 0.0, "learning_rate": 1.4665336705677533e-05, "loss": 0.3525, "step": 1423 }, { "epoch": 3.2848904267589387, "grad_norm": 0.0, "learning_rate": 1.4657983906078533e-05, "loss": 0.1596, "step": 1424 }, { "epoch": 3.28719723183391, "grad_norm": 0.0, "learning_rate": 1.4650627889012507e-05, "loss": 0.2008, "step": 1425 }, { "epoch": 3.2895040369088813, "grad_norm": 0.0, "learning_rate": 1.4643268659560571e-05, "loss": 0.2452, "step": 1426 }, { "epoch": 3.2918108419838523, "grad_norm": 0.0, "learning_rate": 1.4635906222806058e-05, "loss": 0.3666, "step": 1427 }, { "epoch": 3.2941176470588234, "grad_norm": 0.0, "learning_rate": 1.4628540583834511e-05, "loss": 0.3948, "step": 1428 }, { "epoch": 3.296424452133795, "grad_norm": 0.0, "learning_rate": 1.4621171747733698e-05, "loss": 0.1876, "step": 1429 }, { "epoch": 3.298731257208766, "grad_norm": 0.0, "learning_rate": 1.4613799719593577e-05, "loss": 0.222, "step": 1430 }, { "epoch": 3.301038062283737, "grad_norm": 0.0, "learning_rate": 1.4606424504506325e-05, "loss": 0.2961, "step": 1431 }, { "epoch": 3.303344867358708, "grad_norm": 0.0, "learning_rate": 1.4599046107566314e-05, "loss": 0.2402, "step": 1432 }, { "epoch": 3.3056516724336795, "grad_norm": 0.0, "learning_rate": 1.4591664533870118e-05, "loss": 0.4048, "step": 1433 }, { "epoch": 3.3079584775086506, "grad_norm": 0.0, "learning_rate": 1.45842797885165e-05, "loss": 0.3493, "step": 1434 }, { "epoch": 3.3102652825836216, "grad_norm": 0.0, "learning_rate": 1.4576891876606421e-05, "loss": 0.2753, "step": 1435 }, { "epoch": 3.3125720876585927, "grad_norm": 0.0, "learning_rate": 1.4569500803243021e-05, "loss": 0.1955, "step": 1436 }, { "epoch": 3.314878892733564, "grad_norm": 0.0, "learning_rate": 1.4562106573531632e-05, "loss": 0.3036, "step": 1437 }, { "epoch": 3.3171856978085352, "grad_norm": 0.0, "learning_rate": 1.4554709192579757e-05, "loss": 0.3426, "step": 1438 }, { "epoch": 3.3194925028835063, "grad_norm": 0.0, "learning_rate": 1.4547308665497082e-05, "loss": 0.2458, "step": 1439 }, { "epoch": 3.3217993079584773, "grad_norm": 0.0, "learning_rate": 1.4539904997395468e-05, "loss": 0.2772, "step": 1440 }, { "epoch": 3.324106113033449, "grad_norm": 0.0, "learning_rate": 1.4532498193388941e-05, "loss": 0.3181, "step": 1441 }, { "epoch": 3.32641291810842, "grad_norm": 0.0, "learning_rate": 1.4525088258593695e-05, "loss": 0.3993, "step": 1442 }, { "epoch": 3.328719723183391, "grad_norm": 0.0, "learning_rate": 1.4517675198128086e-05, "loss": 0.3119, "step": 1443 }, { "epoch": 3.331026528258362, "grad_norm": 0.0, "learning_rate": 1.4510259017112624e-05, "loss": 0.4084, "step": 1444 }, { "epoch": 3.3333333333333335, "grad_norm": 0.0, "learning_rate": 1.4502839720669988e-05, "loss": 0.2698, "step": 1445 }, { "epoch": 3.3356401384083045, "grad_norm": 0.0, "learning_rate": 1.4495417313924996e-05, "loss": 0.2571, "step": 1446 }, { "epoch": 3.3379469434832756, "grad_norm": 0.0, "learning_rate": 1.4487991802004625e-05, "loss": 0.3403, "step": 1447 }, { "epoch": 3.3402537485582466, "grad_norm": 0.0, "learning_rate": 1.4480563190037981e-05, "loss": 0.2974, "step": 1448 }, { "epoch": 3.342560553633218, "grad_norm": 0.0, "learning_rate": 1.4473131483156326e-05, "loss": 0.3071, "step": 1449 }, { "epoch": 3.344867358708189, "grad_norm": 0.0, "learning_rate": 1.446569668649306e-05, "loss": 0.3214, "step": 1450 }, { "epoch": 3.3471741637831602, "grad_norm": 0.0, "learning_rate": 1.4458258805183704e-05, "loss": 0.2755, "step": 1451 }, { "epoch": 3.3494809688581313, "grad_norm": 0.0, "learning_rate": 1.4450817844365924e-05, "loss": 0.1851, "step": 1452 }, { "epoch": 3.351787773933103, "grad_norm": 0.0, "learning_rate": 1.4443373809179508e-05, "loss": 0.3576, "step": 1453 }, { "epoch": 3.354094579008074, "grad_norm": 0.0, "learning_rate": 1.4435926704766364e-05, "loss": 0.2239, "step": 1454 }, { "epoch": 3.356401384083045, "grad_norm": 0.0, "learning_rate": 1.4428476536270517e-05, "loss": 0.2896, "step": 1455 }, { "epoch": 3.3587081891580164, "grad_norm": 0.0, "learning_rate": 1.4421023308838124e-05, "loss": 0.2987, "step": 1456 }, { "epoch": 3.3610149942329874, "grad_norm": 0.0, "learning_rate": 1.4413567027617442e-05, "loss": 0.2778, "step": 1457 }, { "epoch": 3.3633217993079585, "grad_norm": 0.0, "learning_rate": 1.4406107697758838e-05, "loss": 0.2483, "step": 1458 }, { "epoch": 3.3656286043829295, "grad_norm": 0.0, "learning_rate": 1.4398645324414792e-05, "loss": 0.3586, "step": 1459 }, { "epoch": 3.3679354094579006, "grad_norm": 0.0, "learning_rate": 1.4391179912739881e-05, "loss": 0.353, "step": 1460 }, { "epoch": 3.370242214532872, "grad_norm": 0.0, "learning_rate": 1.4383711467890776e-05, "loss": 0.3174, "step": 1461 }, { "epoch": 3.372549019607843, "grad_norm": 0.0, "learning_rate": 1.4376239995026254e-05, "loss": 0.2725, "step": 1462 }, { "epoch": 3.374855824682814, "grad_norm": 0.0, "learning_rate": 1.4368765499307177e-05, "loss": 0.3365, "step": 1463 }, { "epoch": 3.3771626297577857, "grad_norm": 0.0, "learning_rate": 1.4361287985896495e-05, "loss": 0.2724, "step": 1464 }, { "epoch": 3.3794694348327567, "grad_norm": 0.0, "learning_rate": 1.4353807459959243e-05, "loss": 0.3259, "step": 1465 }, { "epoch": 3.381776239907728, "grad_norm": 0.0, "learning_rate": 1.4346323926662541e-05, "loss": 0.3477, "step": 1466 }, { "epoch": 3.384083044982699, "grad_norm": 0.0, "learning_rate": 1.4338837391175582e-05, "loss": 0.3697, "step": 1467 }, { "epoch": 3.38638985005767, "grad_norm": 0.0, "learning_rate": 1.4331347858669631e-05, "loss": 0.3652, "step": 1468 }, { "epoch": 3.3886966551326414, "grad_norm": 0.0, "learning_rate": 1.4323855334318026e-05, "loss": 0.3108, "step": 1469 }, { "epoch": 3.3910034602076125, "grad_norm": 0.0, "learning_rate": 1.4316359823296174e-05, "loss": 0.2951, "step": 1470 }, { "epoch": 3.3933102652825835, "grad_norm": 0.0, "learning_rate": 1.430886133078154e-05, "loss": 0.2915, "step": 1471 }, { "epoch": 3.395617070357555, "grad_norm": 0.0, "learning_rate": 1.4301359861953652e-05, "loss": 0.3814, "step": 1472 }, { "epoch": 3.397923875432526, "grad_norm": 0.0, "learning_rate": 1.4293855421994094e-05, "loss": 0.3606, "step": 1473 }, { "epoch": 3.400230680507497, "grad_norm": 0.0, "learning_rate": 1.4286348016086496e-05, "loss": 0.2738, "step": 1474 }, { "epoch": 3.402537485582468, "grad_norm": 0.0, "learning_rate": 1.4278837649416543e-05, "loss": 0.2622, "step": 1475 }, { "epoch": 3.404844290657439, "grad_norm": 0.0, "learning_rate": 1.4271324327171969e-05, "loss": 0.2941, "step": 1476 }, { "epoch": 3.4071510957324107, "grad_norm": 0.0, "learning_rate": 1.4263808054542541e-05, "loss": 0.3735, "step": 1477 }, { "epoch": 3.4094579008073818, "grad_norm": 0.0, "learning_rate": 1.4256288836720065e-05, "loss": 0.3645, "step": 1478 }, { "epoch": 3.411764705882353, "grad_norm": 0.0, "learning_rate": 1.4248766678898386e-05, "loss": 0.3484, "step": 1479 }, { "epoch": 3.4140715109573243, "grad_norm": 0.0, "learning_rate": 1.4241241586273377e-05, "loss": 0.2413, "step": 1480 }, { "epoch": 3.4163783160322954, "grad_norm": 0.0, "learning_rate": 1.4233713564042937e-05, "loss": 0.1764, "step": 1481 }, { "epoch": 3.4186851211072664, "grad_norm": 0.0, "learning_rate": 1.4226182617406996e-05, "loss": 0.2618, "step": 1482 }, { "epoch": 3.4209919261822375, "grad_norm": 0.0, "learning_rate": 1.4218648751567492e-05, "loss": 0.1661, "step": 1483 }, { "epoch": 3.423298731257209, "grad_norm": 0.0, "learning_rate": 1.4211111971728388e-05, "loss": 0.3404, "step": 1484 }, { "epoch": 3.42560553633218, "grad_norm": 0.0, "learning_rate": 1.4203572283095657e-05, "loss": 0.2877, "step": 1485 }, { "epoch": 3.427912341407151, "grad_norm": 0.0, "learning_rate": 1.419602969087728e-05, "loss": 0.2449, "step": 1486 }, { "epoch": 3.430219146482122, "grad_norm": 0.0, "learning_rate": 1.418848420028325e-05, "loss": 0.391, "step": 1487 }, { "epoch": 3.4325259515570936, "grad_norm": 0.0, "learning_rate": 1.4180935816525554e-05, "loss": 0.2508, "step": 1488 }, { "epoch": 3.4348327566320647, "grad_norm": 0.0, "learning_rate": 1.417338454481818e-05, "loss": 0.3421, "step": 1489 }, { "epoch": 3.4371395617070357, "grad_norm": 0.0, "learning_rate": 1.4165830390377115e-05, "loss": 0.2835, "step": 1490 }, { "epoch": 3.4394463667820068, "grad_norm": 0.0, "learning_rate": 1.415827335842033e-05, "loss": 0.3754, "step": 1491 }, { "epoch": 3.4417531718569783, "grad_norm": 0.0, "learning_rate": 1.4150713454167788e-05, "loss": 0.4026, "step": 1492 }, { "epoch": 3.4440599769319493, "grad_norm": 0.0, "learning_rate": 1.414315068284144e-05, "loss": 0.3019, "step": 1493 }, { "epoch": 3.4463667820069204, "grad_norm": 0.0, "learning_rate": 1.4135585049665207e-05, "loss": 0.1687, "step": 1494 }, { "epoch": 3.4486735870818914, "grad_norm": 0.0, "learning_rate": 1.4128016559864998e-05, "loss": 0.2277, "step": 1495 }, { "epoch": 3.450980392156863, "grad_norm": 0.0, "learning_rate": 1.4120445218668687e-05, "loss": 0.2716, "step": 1496 }, { "epoch": 3.453287197231834, "grad_norm": 0.0, "learning_rate": 1.4112871031306118e-05, "loss": 0.3664, "step": 1497 }, { "epoch": 3.455594002306805, "grad_norm": 0.0, "learning_rate": 1.4105294003009107e-05, "loss": 0.2151, "step": 1498 }, { "epoch": 3.457900807381776, "grad_norm": 0.0, "learning_rate": 1.4097714139011428e-05, "loss": 0.3097, "step": 1499 }, { "epoch": 3.4602076124567476, "grad_norm": 0.0, "learning_rate": 1.4090131444548814e-05, "loss": 0.3317, "step": 1500 }, { "epoch": 3.4625144175317186, "grad_norm": 0.0, "learning_rate": 1.4082545924858955e-05, "loss": 0.2498, "step": 1501 }, { "epoch": 3.4648212226066897, "grad_norm": 0.0, "learning_rate": 1.4074957585181488e-05, "loss": 0.3038, "step": 1502 }, { "epoch": 3.4671280276816607, "grad_norm": 0.0, "learning_rate": 1.4067366430758004e-05, "loss": 0.358, "step": 1503 }, { "epoch": 3.4694348327566322, "grad_norm": 0.0, "learning_rate": 1.4059772466832033e-05, "loss": 0.3243, "step": 1504 }, { "epoch": 3.4717416378316033, "grad_norm": 0.0, "learning_rate": 1.4052175698649054e-05, "loss": 0.2212, "step": 1505 }, { "epoch": 3.4740484429065743, "grad_norm": 0.0, "learning_rate": 1.4044576131456466e-05, "loss": 0.2785, "step": 1506 }, { "epoch": 3.4763552479815454, "grad_norm": 0.0, "learning_rate": 1.4036973770503623e-05, "loss": 0.4104, "step": 1507 }, { "epoch": 3.478662053056517, "grad_norm": 0.0, "learning_rate": 1.4029368621041795e-05, "loss": 0.5832, "step": 1508 }, { "epoch": 3.480968858131488, "grad_norm": 0.0, "learning_rate": 1.4021760688324175e-05, "loss": 0.3468, "step": 1509 }, { "epoch": 3.483275663206459, "grad_norm": 0.0, "learning_rate": 1.4014149977605893e-05, "loss": 0.3597, "step": 1510 }, { "epoch": 3.48558246828143, "grad_norm": 0.0, "learning_rate": 1.4006536494143987e-05, "loss": 0.263, "step": 1511 }, { "epoch": 3.4878892733564015, "grad_norm": 0.0, "learning_rate": 1.3998920243197408e-05, "loss": 0.2407, "step": 1512 }, { "epoch": 3.4901960784313726, "grad_norm": 0.0, "learning_rate": 1.3991301230027032e-05, "loss": 0.3208, "step": 1513 }, { "epoch": 3.4925028835063436, "grad_norm": 0.0, "learning_rate": 1.3983679459895635e-05, "loss": 0.3936, "step": 1514 }, { "epoch": 3.4948096885813147, "grad_norm": 0.0, "learning_rate": 1.3976054938067885e-05, "loss": 0.3356, "step": 1515 }, { "epoch": 3.497116493656286, "grad_norm": 0.0, "learning_rate": 1.3968427669810372e-05, "loss": 0.4391, "step": 1516 }, { "epoch": 3.4994232987312572, "grad_norm": 0.0, "learning_rate": 1.396079766039157e-05, "loss": 0.3209, "step": 1517 }, { "epoch": 3.5017301038062283, "grad_norm": 0.0, "learning_rate": 1.3953164915081852e-05, "loss": 0.1835, "step": 1518 }, { "epoch": 3.5040369088812, "grad_norm": 0.0, "learning_rate": 1.3945529439153478e-05, "loss": 0.393, "step": 1519 }, { "epoch": 3.506343713956171, "grad_norm": 0.0, "learning_rate": 1.3937891237880599e-05, "loss": 0.2571, "step": 1520 }, { "epoch": 3.508650519031142, "grad_norm": 0.0, "learning_rate": 1.3930250316539237e-05, "loss": 0.3312, "step": 1521 }, { "epoch": 3.510957324106113, "grad_norm": 0.0, "learning_rate": 1.3922606680407307e-05, "loss": 0.2452, "step": 1522 }, { "epoch": 3.513264129181084, "grad_norm": 0.0, "learning_rate": 1.3914960334764589e-05, "loss": 0.2028, "step": 1523 }, { "epoch": 3.5155709342560555, "grad_norm": 0.0, "learning_rate": 1.3907311284892737e-05, "loss": 0.4266, "step": 1524 }, { "epoch": 3.5178777393310265, "grad_norm": 0.0, "learning_rate": 1.389965953607528e-05, "loss": 0.2556, "step": 1525 }, { "epoch": 3.5201845444059976, "grad_norm": 0.0, "learning_rate": 1.38920050935976e-05, "loss": 0.3679, "step": 1526 }, { "epoch": 3.522491349480969, "grad_norm": 0.0, "learning_rate": 1.3884347962746949e-05, "loss": 0.3175, "step": 1527 }, { "epoch": 3.52479815455594, "grad_norm": 0.0, "learning_rate": 1.3876688148812428e-05, "loss": 0.4343, "step": 1528 }, { "epoch": 3.527104959630911, "grad_norm": 0.0, "learning_rate": 1.3869025657084996e-05, "loss": 0.3324, "step": 1529 }, { "epoch": 3.5294117647058822, "grad_norm": 0.0, "learning_rate": 1.3861360492857464e-05, "loss": 0.3387, "step": 1530 }, { "epoch": 3.5317185697808533, "grad_norm": 0.0, "learning_rate": 1.3853692661424485e-05, "loss": 0.3322, "step": 1531 }, { "epoch": 3.534025374855825, "grad_norm": 0.0, "learning_rate": 1.3846022168082553e-05, "loss": 0.3218, "step": 1532 }, { "epoch": 3.536332179930796, "grad_norm": 0.0, "learning_rate": 1.3838349018130007e-05, "loss": 0.3545, "step": 1533 }, { "epoch": 3.538638985005767, "grad_norm": 0.0, "learning_rate": 1.383067321686701e-05, "loss": 0.396, "step": 1534 }, { "epoch": 3.5409457900807384, "grad_norm": 0.0, "learning_rate": 1.382299476959557e-05, "loss": 0.3169, "step": 1535 }, { "epoch": 3.5432525951557095, "grad_norm": 0.0, "learning_rate": 1.3815313681619515e-05, "loss": 0.4068, "step": 1536 }, { "epoch": 3.5455594002306805, "grad_norm": 0.0, "learning_rate": 1.3807629958244498e-05, "loss": 0.3084, "step": 1537 }, { "epoch": 3.5478662053056516, "grad_norm": 0.0, "learning_rate": 1.3799943604777993e-05, "loss": 0.3517, "step": 1538 }, { "epoch": 3.5501730103806226, "grad_norm": 0.0, "learning_rate": 1.3792254626529286e-05, "loss": 0.2759, "step": 1539 }, { "epoch": 3.552479815455594, "grad_norm": 0.0, "learning_rate": 1.3784563028809485e-05, "loss": 0.2451, "step": 1540 }, { "epoch": 3.554786620530565, "grad_norm": 0.0, "learning_rate": 1.3776868816931501e-05, "loss": 0.3406, "step": 1541 }, { "epoch": 3.557093425605536, "grad_norm": 0.0, "learning_rate": 1.3769171996210053e-05, "loss": 0.296, "step": 1542 }, { "epoch": 3.5594002306805077, "grad_norm": 0.0, "learning_rate": 1.3761472571961664e-05, "loss": 0.3114, "step": 1543 }, { "epoch": 3.5617070357554788, "grad_norm": 0.0, "learning_rate": 1.375377054950465e-05, "loss": 0.2946, "step": 1544 }, { "epoch": 3.56401384083045, "grad_norm": 0.0, "learning_rate": 1.3746065934159123e-05, "loss": 0.2878, "step": 1545 }, { "epoch": 3.566320645905421, "grad_norm": 0.0, "learning_rate": 1.3738358731246988e-05, "loss": 0.3163, "step": 1546 }, { "epoch": 3.568627450980392, "grad_norm": 0.0, "learning_rate": 1.373064894609194e-05, "loss": 0.3226, "step": 1547 }, { "epoch": 3.5709342560553634, "grad_norm": 0.0, "learning_rate": 1.3722936584019453e-05, "loss": 0.2587, "step": 1548 }, { "epoch": 3.5732410611303345, "grad_norm": 0.0, "learning_rate": 1.371522165035678e-05, "loss": 0.2326, "step": 1549 }, { "epoch": 3.5755478662053055, "grad_norm": 0.0, "learning_rate": 1.370750415043296e-05, "loss": 0.2866, "step": 1550 }, { "epoch": 3.577854671280277, "grad_norm": 0.0, "learning_rate": 1.3699784089578791e-05, "loss": 0.3296, "step": 1551 }, { "epoch": 3.580161476355248, "grad_norm": 0.0, "learning_rate": 1.3692061473126845e-05, "loss": 0.2021, "step": 1552 }, { "epoch": 3.582468281430219, "grad_norm": 0.0, "learning_rate": 1.3684336306411467e-05, "loss": 0.1763, "step": 1553 }, { "epoch": 3.58477508650519, "grad_norm": 0.0, "learning_rate": 1.3676608594768754e-05, "loss": 0.3425, "step": 1554 }, { "epoch": 3.587081891580161, "grad_norm": 0.0, "learning_rate": 1.3668878343536562e-05, "loss": 0.2775, "step": 1555 }, { "epoch": 3.5893886966551327, "grad_norm": 0.0, "learning_rate": 1.366114555805451e-05, "loss": 0.2184, "step": 1556 }, { "epoch": 3.5916955017301038, "grad_norm": 0.0, "learning_rate": 1.3653410243663953e-05, "loss": 0.3801, "step": 1557 }, { "epoch": 3.594002306805075, "grad_norm": 0.0, "learning_rate": 1.3645672405708003e-05, "loss": 0.3479, "step": 1558 }, { "epoch": 3.5963091118800463, "grad_norm": 0.0, "learning_rate": 1.3637932049531517e-05, "loss": 0.332, "step": 1559 }, { "epoch": 3.5986159169550174, "grad_norm": 0.0, "learning_rate": 1.3630189180481083e-05, "loss": 0.3743, "step": 1560 }, { "epoch": 3.6009227220299884, "grad_norm": 0.0, "learning_rate": 1.3622443803905028e-05, "loss": 0.2975, "step": 1561 }, { "epoch": 3.6032295271049595, "grad_norm": 0.0, "learning_rate": 1.361469592515342e-05, "loss": 0.4326, "step": 1562 }, { "epoch": 3.6055363321799305, "grad_norm": 0.0, "learning_rate": 1.3606945549578039e-05, "loss": 0.2289, "step": 1563 }, { "epoch": 3.607843137254902, "grad_norm": 0.0, "learning_rate": 1.3599192682532398e-05, "loss": 0.2092, "step": 1564 }, { "epoch": 3.610149942329873, "grad_norm": 0.0, "learning_rate": 1.3591437329371738e-05, "loss": 0.2271, "step": 1565 }, { "epoch": 3.612456747404844, "grad_norm": 0.0, "learning_rate": 1.3583679495453e-05, "loss": 0.2342, "step": 1566 }, { "epoch": 3.6147635524798156, "grad_norm": 0.0, "learning_rate": 1.3575919186134862e-05, "loss": 0.2539, "step": 1567 }, { "epoch": 3.6170703575547867, "grad_norm": 0.0, "learning_rate": 1.3568156406777693e-05, "loss": 0.4775, "step": 1568 }, { "epoch": 3.6193771626297577, "grad_norm": 0.0, "learning_rate": 1.356039116274357e-05, "loss": 0.348, "step": 1569 }, { "epoch": 3.621683967704729, "grad_norm": 0.0, "learning_rate": 1.3552623459396279e-05, "loss": 0.3697, "step": 1570 }, { "epoch": 3.6239907727797003, "grad_norm": 0.0, "learning_rate": 1.3544853302101302e-05, "loss": 0.3499, "step": 1571 }, { "epoch": 3.6262975778546713, "grad_norm": 0.0, "learning_rate": 1.3537080696225815e-05, "loss": 0.3591, "step": 1572 }, { "epoch": 3.6286043829296424, "grad_norm": 0.0, "learning_rate": 1.3529305647138689e-05, "loss": 0.3687, "step": 1573 }, { "epoch": 3.630911188004614, "grad_norm": 0.0, "learning_rate": 1.3521528160210479e-05, "loss": 0.3569, "step": 1574 }, { "epoch": 3.633217993079585, "grad_norm": 0.0, "learning_rate": 1.3513748240813429e-05, "loss": 0.1778, "step": 1575 }, { "epoch": 3.635524798154556, "grad_norm": 0.0, "learning_rate": 1.3505965894321453e-05, "loss": 0.5023, "step": 1576 }, { "epoch": 3.637831603229527, "grad_norm": 0.0, "learning_rate": 1.349818112611015e-05, "loss": 0.3319, "step": 1577 }, { "epoch": 3.640138408304498, "grad_norm": 0.0, "learning_rate": 1.3490393941556787e-05, "loss": 0.3686, "step": 1578 }, { "epoch": 3.6424452133794696, "grad_norm": 0.0, "learning_rate": 1.348260434604031e-05, "loss": 0.3395, "step": 1579 }, { "epoch": 3.6447520184544406, "grad_norm": 0.0, "learning_rate": 1.3474812344941315e-05, "loss": 0.2645, "step": 1580 }, { "epoch": 3.6470588235294117, "grad_norm": 0.0, "learning_rate": 1.3467017943642074e-05, "loss": 0.2106, "step": 1581 }, { "epoch": 3.649365628604383, "grad_norm": 0.0, "learning_rate": 1.3459221147526504e-05, "loss": 0.3926, "step": 1582 }, { "epoch": 3.6516724336793542, "grad_norm": 0.0, "learning_rate": 1.3451421961980189e-05, "loss": 0.3171, "step": 1583 }, { "epoch": 3.6539792387543253, "grad_norm": 0.0, "learning_rate": 1.3443620392390352e-05, "loss": 0.3794, "step": 1584 }, { "epoch": 3.6562860438292963, "grad_norm": 0.0, "learning_rate": 1.3435816444145871e-05, "loss": 0.3959, "step": 1585 }, { "epoch": 3.6585928489042674, "grad_norm": 0.0, "learning_rate": 1.3428010122637265e-05, "loss": 0.293, "step": 1586 }, { "epoch": 3.660899653979239, "grad_norm": 0.0, "learning_rate": 1.342020143325669e-05, "loss": 0.2619, "step": 1587 }, { "epoch": 3.66320645905421, "grad_norm": 0.0, "learning_rate": 1.3412390381397938e-05, "loss": 0.2685, "step": 1588 }, { "epoch": 3.665513264129181, "grad_norm": 0.0, "learning_rate": 1.340457697245643e-05, "loss": 0.2823, "step": 1589 }, { "epoch": 3.6678200692041525, "grad_norm": 0.0, "learning_rate": 1.3396761211829229e-05, "loss": 0.2209, "step": 1590 }, { "epoch": 3.6701268742791235, "grad_norm": 0.0, "learning_rate": 1.3388943104915004e-05, "loss": 0.2299, "step": 1591 }, { "epoch": 3.6724336793540946, "grad_norm": 0.0, "learning_rate": 1.3381122657114059e-05, "loss": 0.3839, "step": 1592 }, { "epoch": 3.6747404844290656, "grad_norm": 0.0, "learning_rate": 1.3373299873828303e-05, "loss": 0.2482, "step": 1593 }, { "epoch": 3.6770472895040367, "grad_norm": 0.0, "learning_rate": 1.3365474760461265e-05, "loss": 0.285, "step": 1594 }, { "epoch": 3.679354094579008, "grad_norm": 0.0, "learning_rate": 1.3357647322418086e-05, "loss": 0.323, "step": 1595 }, { "epoch": 3.6816608996539792, "grad_norm": 0.0, "learning_rate": 1.3349817565105507e-05, "loss": 0.3931, "step": 1596 }, { "epoch": 3.6839677047289503, "grad_norm": 0.0, "learning_rate": 1.3341985493931877e-05, "loss": 0.3993, "step": 1597 }, { "epoch": 3.686274509803922, "grad_norm": 0.0, "learning_rate": 1.3334151114307136e-05, "loss": 0.3926, "step": 1598 }, { "epoch": 3.688581314878893, "grad_norm": 0.0, "learning_rate": 1.3326314431642821e-05, "loss": 0.1268, "step": 1599 }, { "epoch": 3.690888119953864, "grad_norm": 0.0, "learning_rate": 1.3318475451352066e-05, "loss": 0.334, "step": 1600 }, { "epoch": 3.693194925028835, "grad_norm": 0.0, "learning_rate": 1.3310634178849583e-05, "loss": 0.3, "step": 1601 }, { "epoch": 3.695501730103806, "grad_norm": 0.0, "learning_rate": 1.3302790619551673e-05, "loss": 0.1854, "step": 1602 }, { "epoch": 3.6978085351787775, "grad_norm": 0.0, "learning_rate": 1.3294944778876215e-05, "loss": 0.2424, "step": 1603 }, { "epoch": 3.7001153402537486, "grad_norm": 0.0, "learning_rate": 1.3287096662242665e-05, "loss": 0.2985, "step": 1604 }, { "epoch": 3.7024221453287196, "grad_norm": 0.0, "learning_rate": 1.3279246275072046e-05, "loss": 0.2453, "step": 1605 }, { "epoch": 3.704728950403691, "grad_norm": 0.0, "learning_rate": 1.3271393622786957e-05, "loss": 0.3666, "step": 1606 }, { "epoch": 3.707035755478662, "grad_norm": 0.0, "learning_rate": 1.3263538710811559e-05, "loss": 0.3422, "step": 1607 }, { "epoch": 3.709342560553633, "grad_norm": 0.0, "learning_rate": 1.3255681544571568e-05, "loss": 0.2995, "step": 1608 }, { "epoch": 3.7116493656286043, "grad_norm": 0.0, "learning_rate": 1.3247822129494265e-05, "loss": 0.2882, "step": 1609 }, { "epoch": 3.7139561707035753, "grad_norm": 0.0, "learning_rate": 1.3239960471008484e-05, "loss": 0.3378, "step": 1610 }, { "epoch": 3.716262975778547, "grad_norm": 0.0, "learning_rate": 1.3232096574544602e-05, "loss": 0.261, "step": 1611 }, { "epoch": 3.718569780853518, "grad_norm": 0.0, "learning_rate": 1.3224230445534544e-05, "loss": 0.243, "step": 1612 }, { "epoch": 3.720876585928489, "grad_norm": 0.0, "learning_rate": 1.3216362089411785e-05, "loss": 0.2325, "step": 1613 }, { "epoch": 3.7231833910034604, "grad_norm": 0.0, "learning_rate": 1.320849151161133e-05, "loss": 0.2039, "step": 1614 }, { "epoch": 3.7254901960784315, "grad_norm": 0.0, "learning_rate": 1.3200618717569716e-05, "loss": 0.2406, "step": 1615 }, { "epoch": 3.7277970011534025, "grad_norm": 0.0, "learning_rate": 1.3192743712725022e-05, "loss": 0.319, "step": 1616 }, { "epoch": 3.7301038062283736, "grad_norm": 0.0, "learning_rate": 1.3184866502516846e-05, "loss": 0.283, "step": 1617 }, { "epoch": 3.7324106113033446, "grad_norm": 0.0, "learning_rate": 1.317698709238631e-05, "loss": 0.2611, "step": 1618 }, { "epoch": 3.734717416378316, "grad_norm": 0.0, "learning_rate": 1.3169105487776057e-05, "loss": 0.1818, "step": 1619 }, { "epoch": 3.737024221453287, "grad_norm": 0.0, "learning_rate": 1.3161221694130247e-05, "loss": 0.1769, "step": 1620 }, { "epoch": 3.739331026528258, "grad_norm": 0.0, "learning_rate": 1.3153335716894543e-05, "loss": 0.2327, "step": 1621 }, { "epoch": 3.7416378316032297, "grad_norm": 0.0, "learning_rate": 1.3145447561516138e-05, "loss": 0.2544, "step": 1622 }, { "epoch": 3.7439446366782008, "grad_norm": 0.0, "learning_rate": 1.3137557233443707e-05, "loss": 0.3829, "step": 1623 }, { "epoch": 3.746251441753172, "grad_norm": 0.0, "learning_rate": 1.3129664738127431e-05, "loss": 0.3288, "step": 1624 }, { "epoch": 3.748558246828143, "grad_norm": 0.0, "learning_rate": 1.3121770081018998e-05, "loss": 0.1589, "step": 1625 }, { "epoch": 3.750865051903114, "grad_norm": 0.0, "learning_rate": 1.3113873267571577e-05, "loss": 0.2206, "step": 1626 }, { "epoch": 3.7531718569780854, "grad_norm": 0.0, "learning_rate": 1.3105974303239838e-05, "loss": 0.433, "step": 1627 }, { "epoch": 3.7554786620530565, "grad_norm": 0.0, "learning_rate": 1.3098073193479929e-05, "loss": 0.3526, "step": 1628 }, { "epoch": 3.7577854671280275, "grad_norm": 0.0, "learning_rate": 1.3090169943749475e-05, "loss": 0.204, "step": 1629 }, { "epoch": 3.760092272202999, "grad_norm": 0.0, "learning_rate": 1.3082264559507593e-05, "loss": 0.2303, "step": 1630 }, { "epoch": 3.76239907727797, "grad_norm": 0.0, "learning_rate": 1.3074357046214865e-05, "loss": 0.3435, "step": 1631 }, { "epoch": 3.764705882352941, "grad_norm": 0.0, "learning_rate": 1.3066447409333345e-05, "loss": 0.2011, "step": 1632 }, { "epoch": 3.767012687427912, "grad_norm": 0.0, "learning_rate": 1.3058535654326554e-05, "loss": 0.4013, "step": 1633 }, { "epoch": 3.7693194925028837, "grad_norm": 0.0, "learning_rate": 1.305062178665948e-05, "loss": 0.2058, "step": 1634 }, { "epoch": 3.7716262975778547, "grad_norm": 0.0, "learning_rate": 1.3042705811798565e-05, "loss": 0.3435, "step": 1635 }, { "epoch": 3.7739331026528258, "grad_norm": 0.0, "learning_rate": 1.3034787735211708e-05, "loss": 0.4149, "step": 1636 }, { "epoch": 3.7762399077277973, "grad_norm": 0.0, "learning_rate": 1.3026867562368262e-05, "loss": 0.162, "step": 1637 }, { "epoch": 3.7785467128027683, "grad_norm": 0.0, "learning_rate": 1.3018945298739022e-05, "loss": 0.2817, "step": 1638 }, { "epoch": 3.7808535178777394, "grad_norm": 0.0, "learning_rate": 1.3011020949796236e-05, "loss": 0.4222, "step": 1639 }, { "epoch": 3.7831603229527104, "grad_norm": 0.0, "learning_rate": 1.3003094521013586e-05, "loss": 0.3925, "step": 1640 }, { "epoch": 3.7854671280276815, "grad_norm": 0.0, "learning_rate": 1.2995166017866194e-05, "loss": 0.3864, "step": 1641 }, { "epoch": 3.787773933102653, "grad_norm": 0.0, "learning_rate": 1.2987235445830612e-05, "loss": 0.5532, "step": 1642 }, { "epoch": 3.790080738177624, "grad_norm": 0.0, "learning_rate": 1.297930281038482e-05, "loss": 0.3028, "step": 1643 }, { "epoch": 3.792387543252595, "grad_norm": 0.0, "learning_rate": 1.2971368117008232e-05, "loss": 0.2385, "step": 1644 }, { "epoch": 3.7946943483275666, "grad_norm": 0.0, "learning_rate": 1.2963431371181672e-05, "loss": 0.3265, "step": 1645 }, { "epoch": 3.7970011534025376, "grad_norm": 0.0, "learning_rate": 1.295549257838739e-05, "loss": 0.3159, "step": 1646 }, { "epoch": 3.7993079584775087, "grad_norm": 0.0, "learning_rate": 1.2947551744109044e-05, "loss": 0.2964, "step": 1647 }, { "epoch": 3.8016147635524797, "grad_norm": 0.0, "learning_rate": 1.2939608873831708e-05, "loss": 0.253, "step": 1648 }, { "epoch": 3.803921568627451, "grad_norm": 0.0, "learning_rate": 1.2931663973041855e-05, "loss": 0.2996, "step": 1649 }, { "epoch": 3.8062283737024223, "grad_norm": 0.0, "learning_rate": 1.2923717047227368e-05, "loss": 0.323, "step": 1650 }, { "epoch": 3.8085351787773933, "grad_norm": 0.0, "learning_rate": 1.2915768101877526e-05, "loss": 0.3638, "step": 1651 }, { "epoch": 3.8108419838523644, "grad_norm": 0.0, "learning_rate": 1.2907817142483002e-05, "loss": 0.2885, "step": 1652 }, { "epoch": 3.813148788927336, "grad_norm": 0.0, "learning_rate": 1.2899864174535863e-05, "loss": 0.2691, "step": 1653 }, { "epoch": 3.815455594002307, "grad_norm": 0.0, "learning_rate": 1.2891909203529558e-05, "loss": 0.2833, "step": 1654 }, { "epoch": 3.817762399077278, "grad_norm": 0.0, "learning_rate": 1.2883952234958921e-05, "loss": 0.3069, "step": 1655 }, { "epoch": 3.820069204152249, "grad_norm": 0.0, "learning_rate": 1.2875993274320173e-05, "loss": 0.4663, "step": 1656 }, { "epoch": 3.82237600922722, "grad_norm": 0.0, "learning_rate": 1.2868032327110904e-05, "loss": 0.2144, "step": 1657 }, { "epoch": 3.8246828143021916, "grad_norm": 0.0, "learning_rate": 1.2860069398830075e-05, "loss": 0.3179, "step": 1658 }, { "epoch": 3.8269896193771626, "grad_norm": 0.0, "learning_rate": 1.2852104494978024e-05, "loss": 0.2847, "step": 1659 }, { "epoch": 3.8292964244521337, "grad_norm": 0.0, "learning_rate": 1.284413762105644e-05, "loss": 0.2572, "step": 1660 }, { "epoch": 3.831603229527105, "grad_norm": 0.0, "learning_rate": 1.2836168782568385e-05, "loss": 0.4057, "step": 1661 }, { "epoch": 3.8339100346020762, "grad_norm": 0.0, "learning_rate": 1.2828197985018276e-05, "loss": 0.3066, "step": 1662 }, { "epoch": 3.8362168396770473, "grad_norm": 0.0, "learning_rate": 1.2820225233911877e-05, "loss": 0.1876, "step": 1663 }, { "epoch": 3.8385236447520183, "grad_norm": 0.0, "learning_rate": 1.2812250534756307e-05, "loss": 0.3396, "step": 1664 }, { "epoch": 3.8408304498269894, "grad_norm": 0.0, "learning_rate": 1.2804273893060028e-05, "loss": 0.3979, "step": 1665 }, { "epoch": 3.843137254901961, "grad_norm": 0.0, "learning_rate": 1.2796295314332847e-05, "loss": 0.273, "step": 1666 }, { "epoch": 3.845444059976932, "grad_norm": 0.0, "learning_rate": 1.2788314804085904e-05, "loss": 0.1965, "step": 1667 }, { "epoch": 3.847750865051903, "grad_norm": 0.0, "learning_rate": 1.2780332367831678e-05, "loss": 0.2138, "step": 1668 }, { "epoch": 3.8500576701268745, "grad_norm": 0.0, "learning_rate": 1.2772348011083973e-05, "loss": 0.3155, "step": 1669 }, { "epoch": 3.8523644752018456, "grad_norm": 0.0, "learning_rate": 1.2764361739357925e-05, "loss": 0.251, "step": 1670 }, { "epoch": 3.8546712802768166, "grad_norm": 0.0, "learning_rate": 1.2756373558169992e-05, "loss": 0.3779, "step": 1671 }, { "epoch": 3.8569780853517877, "grad_norm": 0.0, "learning_rate": 1.2748383473037948e-05, "loss": 0.3273, "step": 1672 }, { "epoch": 3.8592848904267587, "grad_norm": 0.0, "learning_rate": 1.2740391489480885e-05, "loss": 0.3446, "step": 1673 }, { "epoch": 3.86159169550173, "grad_norm": 0.0, "learning_rate": 1.2732397613019203e-05, "loss": 0.3339, "step": 1674 }, { "epoch": 3.8638985005767013, "grad_norm": 0.0, "learning_rate": 1.272440184917461e-05, "loss": 0.401, "step": 1675 }, { "epoch": 3.8662053056516723, "grad_norm": 0.0, "learning_rate": 1.2716404203470121e-05, "loss": 0.1701, "step": 1676 }, { "epoch": 3.868512110726644, "grad_norm": 0.0, "learning_rate": 1.2708404681430054e-05, "loss": 0.3131, "step": 1677 }, { "epoch": 3.870818915801615, "grad_norm": 0.0, "learning_rate": 1.270040328858001e-05, "loss": 0.3271, "step": 1678 }, { "epoch": 3.873125720876586, "grad_norm": 0.0, "learning_rate": 1.2692400030446895e-05, "loss": 0.2178, "step": 1679 }, { "epoch": 3.875432525951557, "grad_norm": 0.0, "learning_rate": 1.2684394912558898e-05, "loss": 0.1925, "step": 1680 }, { "epoch": 3.877739331026528, "grad_norm": 0.0, "learning_rate": 1.267638794044549e-05, "loss": 0.3196, "step": 1681 }, { "epoch": 3.8800461361014995, "grad_norm": 0.0, "learning_rate": 1.266837911963743e-05, "loss": 0.2587, "step": 1682 }, { "epoch": 3.8823529411764706, "grad_norm": 0.0, "learning_rate": 1.2660368455666752e-05, "loss": 0.2855, "step": 1683 }, { "epoch": 3.8846597462514416, "grad_norm": 0.0, "learning_rate": 1.265235595406676e-05, "loss": 0.3071, "step": 1684 }, { "epoch": 3.886966551326413, "grad_norm": 0.0, "learning_rate": 1.2644341620372025e-05, "loss": 0.1994, "step": 1685 }, { "epoch": 3.889273356401384, "grad_norm": 0.0, "learning_rate": 1.2636325460118388e-05, "loss": 0.4437, "step": 1686 }, { "epoch": 3.891580161476355, "grad_norm": 0.0, "learning_rate": 1.2628307478842955e-05, "loss": 0.2628, "step": 1687 }, { "epoch": 3.8938869665513263, "grad_norm": 0.0, "learning_rate": 1.2620287682084082e-05, "loss": 0.4151, "step": 1688 }, { "epoch": 3.8961937716262973, "grad_norm": 0.0, "learning_rate": 1.2612266075381385e-05, "loss": 0.3714, "step": 1689 }, { "epoch": 3.898500576701269, "grad_norm": 0.0, "learning_rate": 1.2604242664275728e-05, "loss": 0.2252, "step": 1690 }, { "epoch": 3.90080738177624, "grad_norm": 0.0, "learning_rate": 1.2596217454309216e-05, "loss": 0.2019, "step": 1691 }, { "epoch": 3.903114186851211, "grad_norm": 0.0, "learning_rate": 1.2588190451025209e-05, "loss": 0.2288, "step": 1692 }, { "epoch": 3.9054209919261824, "grad_norm": 0.0, "learning_rate": 1.2580161659968294e-05, "loss": 0.3035, "step": 1693 }, { "epoch": 3.9077277970011535, "grad_norm": 0.0, "learning_rate": 1.25721310866843e-05, "loss": 0.2037, "step": 1694 }, { "epoch": 3.9100346020761245, "grad_norm": 0.0, "learning_rate": 1.2564098736720286e-05, "loss": 0.2536, "step": 1695 }, { "epoch": 3.9123414071510956, "grad_norm": 0.0, "learning_rate": 1.2556064615624532e-05, "loss": 0.347, "step": 1696 }, { "epoch": 3.9146482122260666, "grad_norm": 0.0, "learning_rate": 1.2548028728946548e-05, "loss": 0.1558, "step": 1697 }, { "epoch": 3.916955017301038, "grad_norm": 0.0, "learning_rate": 1.2539991082237062e-05, "loss": 0.28, "step": 1698 }, { "epoch": 3.919261822376009, "grad_norm": 0.0, "learning_rate": 1.253195168104802e-05, "loss": 0.336, "step": 1699 }, { "epoch": 3.9215686274509802, "grad_norm": 0.0, "learning_rate": 1.2523910530932572e-05, "loss": 0.3436, "step": 1700 }, { "epoch": 3.9238754325259517, "grad_norm": 0.0, "learning_rate": 1.2515867637445088e-05, "loss": 0.2987, "step": 1701 }, { "epoch": 3.9261822376009228, "grad_norm": 0.0, "learning_rate": 1.2507823006141128e-05, "loss": 0.4079, "step": 1702 }, { "epoch": 3.928489042675894, "grad_norm": 0.0, "learning_rate": 1.2499776642577465e-05, "loss": 0.2848, "step": 1703 }, { "epoch": 3.930795847750865, "grad_norm": 0.0, "learning_rate": 1.2491728552312066e-05, "loss": 0.2593, "step": 1704 }, { "epoch": 3.9331026528258364, "grad_norm": 0.0, "learning_rate": 1.2483678740904081e-05, "loss": 0.3382, "step": 1705 }, { "epoch": 3.9354094579008074, "grad_norm": 0.0, "learning_rate": 1.2475627213913861e-05, "loss": 0.2571, "step": 1706 }, { "epoch": 3.9377162629757785, "grad_norm": 0.0, "learning_rate": 1.2467573976902936e-05, "loss": 0.2838, "step": 1707 }, { "epoch": 3.94002306805075, "grad_norm": 0.0, "learning_rate": 1.2459519035434023e-05, "loss": 0.2974, "step": 1708 }, { "epoch": 3.942329873125721, "grad_norm": 0.0, "learning_rate": 1.2451462395071002e-05, "loss": 0.2798, "step": 1709 }, { "epoch": 3.944636678200692, "grad_norm": 0.0, "learning_rate": 1.2443404061378941e-05, "loss": 0.3008, "step": 1710 }, { "epoch": 3.946943483275663, "grad_norm": 0.0, "learning_rate": 1.2435344039924076e-05, "loss": 0.5102, "step": 1711 }, { "epoch": 3.949250288350634, "grad_norm": 0.0, "learning_rate": 1.24272823362738e-05, "loss": 0.3114, "step": 1712 }, { "epoch": 3.9515570934256057, "grad_norm": 0.0, "learning_rate": 1.2419218955996677e-05, "loss": 0.3737, "step": 1713 }, { "epoch": 3.9538638985005767, "grad_norm": 0.0, "learning_rate": 1.241115390466243e-05, "loss": 0.3145, "step": 1714 }, { "epoch": 3.956170703575548, "grad_norm": 0.0, "learning_rate": 1.240308718784192e-05, "loss": 0.2322, "step": 1715 }, { "epoch": 3.9584775086505193, "grad_norm": 0.0, "learning_rate": 1.239501881110718e-05, "loss": 0.3182, "step": 1716 }, { "epoch": 3.9607843137254903, "grad_norm": 0.0, "learning_rate": 1.238694878003138e-05, "loss": 0.3454, "step": 1717 }, { "epoch": 3.9630911188004614, "grad_norm": 0.0, "learning_rate": 1.2378877100188827e-05, "loss": 0.2624, "step": 1718 }, { "epoch": 3.9653979238754324, "grad_norm": 0.0, "learning_rate": 1.2370803777154976e-05, "loss": 0.2159, "step": 1719 }, { "epoch": 3.9677047289504035, "grad_norm": 0.0, "learning_rate": 1.2362728816506418e-05, "loss": 0.308, "step": 1720 }, { "epoch": 3.970011534025375, "grad_norm": 0.0, "learning_rate": 1.2354652223820858e-05, "loss": 0.2822, "step": 1721 }, { "epoch": 3.972318339100346, "grad_norm": 0.0, "learning_rate": 1.2346574004677154e-05, "loss": 0.3138, "step": 1722 }, { "epoch": 3.974625144175317, "grad_norm": 0.0, "learning_rate": 1.2338494164655267e-05, "loss": 0.367, "step": 1723 }, { "epoch": 3.9769319492502886, "grad_norm": 0.0, "learning_rate": 1.233041270933629e-05, "loss": 0.2529, "step": 1724 }, { "epoch": 3.9792387543252596, "grad_norm": 0.0, "learning_rate": 1.2322329644302426e-05, "loss": 0.2852, "step": 1725 }, { "epoch": 3.9815455594002307, "grad_norm": 0.0, "learning_rate": 1.2314244975136989e-05, "loss": 0.2602, "step": 1726 }, { "epoch": 3.9838523644752017, "grad_norm": 0.0, "learning_rate": 1.2306158707424402e-05, "loss": 0.3142, "step": 1727 }, { "epoch": 3.986159169550173, "grad_norm": 0.0, "learning_rate": 1.2298070846750197e-05, "loss": 0.2278, "step": 1728 }, { "epoch": 3.9884659746251443, "grad_norm": 0.0, "learning_rate": 1.2289981398700996e-05, "loss": 0.4661, "step": 1729 }, { "epoch": 3.9907727797001153, "grad_norm": 0.0, "learning_rate": 1.228189036886453e-05, "loss": 0.2752, "step": 1730 }, { "epoch": 3.9930795847750864, "grad_norm": 0.0, "learning_rate": 1.2273797762829615e-05, "loss": 0.2883, "step": 1731 }, { "epoch": 3.995386389850058, "grad_norm": 0.0, "learning_rate": 1.2265703586186158e-05, "loss": 0.2496, "step": 1732 }, { "epoch": 3.997693194925029, "grad_norm": 0.0, "learning_rate": 1.2257607844525145e-05, "loss": 0.2548, "step": 1733 }, { "epoch": 4.0, "grad_norm": 0.0, "learning_rate": 1.2249510543438652e-05, "loss": 0.425, "step": 1734 }, { "epoch": 4.002306805074971, "grad_norm": 0.0, "learning_rate": 1.2241411688519826e-05, "loss": 0.1496, "step": 1735 }, { "epoch": 4.004613610149942, "grad_norm": 0.0, "learning_rate": 1.2233311285362895e-05, "loss": 0.186, "step": 1736 }, { "epoch": 4.006920415224913, "grad_norm": 0.0, "learning_rate": 1.2225209339563144e-05, "loss": 0.2156, "step": 1737 }, { "epoch": 4.009227220299885, "grad_norm": 0.0, "learning_rate": 1.2217105856716938e-05, "loss": 0.3354, "step": 1738 }, { "epoch": 4.011534025374856, "grad_norm": 0.0, "learning_rate": 1.220900084242169e-05, "loss": 0.1923, "step": 1739 }, { "epoch": 4.013840830449827, "grad_norm": 0.0, "learning_rate": 1.2200894302275878e-05, "loss": 0.2135, "step": 1740 }, { "epoch": 4.016147635524798, "grad_norm": 0.0, "learning_rate": 1.2192786241879033e-05, "loss": 0.1802, "step": 1741 }, { "epoch": 4.018454440599769, "grad_norm": 0.0, "learning_rate": 1.2184676666831741e-05, "loss": 0.1866, "step": 1742 }, { "epoch": 4.02076124567474, "grad_norm": 0.0, "learning_rate": 1.2176565582735624e-05, "loss": 0.1554, "step": 1743 }, { "epoch": 4.023068050749711, "grad_norm": 0.0, "learning_rate": 1.2168452995193354e-05, "loss": 0.2091, "step": 1744 }, { "epoch": 4.0253748558246825, "grad_norm": 0.0, "learning_rate": 1.216033890980864e-05, "loss": 0.2339, "step": 1745 }, { "epoch": 4.027681660899654, "grad_norm": 0.0, "learning_rate": 1.2152223332186222e-05, "loss": 0.1835, "step": 1746 }, { "epoch": 4.0299884659746255, "grad_norm": 0.0, "learning_rate": 1.2144106267931877e-05, "loss": 0.1862, "step": 1747 }, { "epoch": 4.0322952710495965, "grad_norm": 0.0, "learning_rate": 1.2135987722652403e-05, "loss": 0.1711, "step": 1748 }, { "epoch": 4.034602076124568, "grad_norm": 0.0, "learning_rate": 1.2127867701955622e-05, "loss": 0.153, "step": 1749 }, { "epoch": 4.036908881199539, "grad_norm": 0.0, "learning_rate": 1.2119746211450382e-05, "loss": 0.1654, "step": 1750 }, { "epoch": 4.03921568627451, "grad_norm": 0.0, "learning_rate": 1.2111623256746539e-05, "loss": 0.2226, "step": 1751 }, { "epoch": 4.041522491349481, "grad_norm": 0.0, "learning_rate": 1.210349884345496e-05, "loss": 0.25, "step": 1752 }, { "epoch": 4.043829296424452, "grad_norm": 0.0, "learning_rate": 1.2095372977187521e-05, "loss": 0.1054, "step": 1753 }, { "epoch": 4.046136101499424, "grad_norm": 0.0, "learning_rate": 1.2087245663557108e-05, "loss": 0.1807, "step": 1754 }, { "epoch": 4.048442906574395, "grad_norm": 0.0, "learning_rate": 1.2079116908177592e-05, "loss": 0.1275, "step": 1755 }, { "epoch": 4.050749711649366, "grad_norm": 0.0, "learning_rate": 1.2070986716663864e-05, "loss": 0.1872, "step": 1756 }, { "epoch": 4.053056516724337, "grad_norm": 0.0, "learning_rate": 1.2062855094631777e-05, "loss": 0.1397, "step": 1757 }, { "epoch": 4.055363321799308, "grad_norm": 0.0, "learning_rate": 1.2054722047698192e-05, "loss": 0.1035, "step": 1758 }, { "epoch": 4.057670126874279, "grad_norm": 0.0, "learning_rate": 1.2046587581480953e-05, "loss": 0.1749, "step": 1759 }, { "epoch": 4.05997693194925, "grad_norm": 0.0, "learning_rate": 1.2038451701598879e-05, "loss": 0.1698, "step": 1760 }, { "epoch": 4.062283737024221, "grad_norm": 0.0, "learning_rate": 1.2030314413671763e-05, "loss": 0.1502, "step": 1761 }, { "epoch": 4.064590542099193, "grad_norm": 0.0, "learning_rate": 1.2022175723320382e-05, "loss": 0.2349, "step": 1762 }, { "epoch": 4.066897347174164, "grad_norm": 0.0, "learning_rate": 1.2014035636166468e-05, "loss": 0.1458, "step": 1763 }, { "epoch": 4.069204152249135, "grad_norm": 0.0, "learning_rate": 1.200589415783273e-05, "loss": 0.2033, "step": 1764 }, { "epoch": 4.071510957324106, "grad_norm": 0.0, "learning_rate": 1.1997751293942828e-05, "loss": 0.1418, "step": 1765 }, { "epoch": 4.073817762399077, "grad_norm": 0.0, "learning_rate": 1.1989607050121383e-05, "loss": 0.2387, "step": 1766 }, { "epoch": 4.076124567474048, "grad_norm": 0.0, "learning_rate": 1.1981461431993978e-05, "loss": 0.1648, "step": 1767 }, { "epoch": 4.078431372549019, "grad_norm": 0.0, "learning_rate": 1.1973314445187125e-05, "loss": 0.1348, "step": 1768 }, { "epoch": 4.08073817762399, "grad_norm": 0.0, "learning_rate": 1.1965166095328302e-05, "loss": 0.2018, "step": 1769 }, { "epoch": 4.083044982698962, "grad_norm": 0.0, "learning_rate": 1.1957016388045917e-05, "loss": 0.2664, "step": 1770 }, { "epoch": 4.085351787773933, "grad_norm": 0.0, "learning_rate": 1.1948865328969317e-05, "loss": 0.2344, "step": 1771 }, { "epoch": 4.087658592848904, "grad_norm": 0.0, "learning_rate": 1.1940712923728784e-05, "loss": 0.1984, "step": 1772 }, { "epoch": 4.0899653979238755, "grad_norm": 0.0, "learning_rate": 1.1932559177955533e-05, "loss": 0.1934, "step": 1773 }, { "epoch": 4.0922722029988465, "grad_norm": 0.0, "learning_rate": 1.1924404097281702e-05, "loss": 0.1533, "step": 1774 }, { "epoch": 4.094579008073818, "grad_norm": 0.0, "learning_rate": 1.1916247687340348e-05, "loss": 0.1125, "step": 1775 }, { "epoch": 4.096885813148789, "grad_norm": 0.0, "learning_rate": 1.190808995376545e-05, "loss": 0.247, "step": 1776 }, { "epoch": 4.09919261822376, "grad_norm": 0.0, "learning_rate": 1.1899930902191904e-05, "loss": 0.3058, "step": 1777 }, { "epoch": 4.101499423298732, "grad_norm": 0.0, "learning_rate": 1.1891770538255506e-05, "loss": 0.1869, "step": 1778 }, { "epoch": 4.103806228373703, "grad_norm": 0.0, "learning_rate": 1.188360886759297e-05, "loss": 0.2139, "step": 1779 }, { "epoch": 4.106113033448674, "grad_norm": 0.0, "learning_rate": 1.1875445895841911e-05, "loss": 0.2488, "step": 1780 }, { "epoch": 4.108419838523645, "grad_norm": 0.0, "learning_rate": 1.1867281628640833e-05, "loss": 0.1644, "step": 1781 }, { "epoch": 4.110726643598616, "grad_norm": 0.0, "learning_rate": 1.1859116071629148e-05, "loss": 0.2598, "step": 1782 }, { "epoch": 4.113033448673587, "grad_norm": 0.0, "learning_rate": 1.1850949230447146e-05, "loss": 0.106, "step": 1783 }, { "epoch": 4.115340253748558, "grad_norm": 0.0, "learning_rate": 1.1842781110736016e-05, "loss": 0.1477, "step": 1784 }, { "epoch": 4.117647058823529, "grad_norm": 0.0, "learning_rate": 1.1834611718137825e-05, "loss": 0.2039, "step": 1785 }, { "epoch": 4.119953863898501, "grad_norm": 0.0, "learning_rate": 1.1826441058295514e-05, "loss": 0.1712, "step": 1786 }, { "epoch": 4.122260668973472, "grad_norm": 0.0, "learning_rate": 1.181826913685291e-05, "loss": 0.1901, "step": 1787 }, { "epoch": 4.124567474048443, "grad_norm": 0.0, "learning_rate": 1.18100959594547e-05, "loss": 0.1155, "step": 1788 }, { "epoch": 4.126874279123414, "grad_norm": 0.0, "learning_rate": 1.1801921531746446e-05, "loss": 0.2571, "step": 1789 }, { "epoch": 4.129181084198385, "grad_norm": 0.0, "learning_rate": 1.1793745859374575e-05, "loss": 0.2007, "step": 1790 }, { "epoch": 4.131487889273356, "grad_norm": 0.0, "learning_rate": 1.1785568947986368e-05, "loss": 0.1472, "step": 1791 }, { "epoch": 4.133794694348327, "grad_norm": 0.0, "learning_rate": 1.1777390803229964e-05, "loss": 0.2193, "step": 1792 }, { "epoch": 4.136101499423299, "grad_norm": 0.0, "learning_rate": 1.1769211430754357e-05, "loss": 0.1127, "step": 1793 }, { "epoch": 4.13840830449827, "grad_norm": 0.0, "learning_rate": 1.1761030836209384e-05, "loss": 0.357, "step": 1794 }, { "epoch": 4.140715109573241, "grad_norm": 0.0, "learning_rate": 1.1752849025245727e-05, "loss": 0.1703, "step": 1795 }, { "epoch": 4.143021914648212, "grad_norm": 0.0, "learning_rate": 1.1744666003514916e-05, "loss": 0.2676, "step": 1796 }, { "epoch": 4.145328719723183, "grad_norm": 0.0, "learning_rate": 1.1736481776669307e-05, "loss": 0.1177, "step": 1797 }, { "epoch": 4.1476355247981544, "grad_norm": 0.0, "learning_rate": 1.172829635036209e-05, "loss": 0.2378, "step": 1798 }, { "epoch": 4.1499423298731255, "grad_norm": 0.0, "learning_rate": 1.1720109730247291e-05, "loss": 0.1675, "step": 1799 }, { "epoch": 4.1522491349480966, "grad_norm": 0.0, "learning_rate": 1.1711921921979754e-05, "loss": 0.1615, "step": 1800 }, { "epoch": 4.154555940023068, "grad_norm": 0.0, "learning_rate": 1.1703732931215141e-05, "loss": 0.2301, "step": 1801 }, { "epoch": 4.1568627450980395, "grad_norm": 0.0, "learning_rate": 1.1695542763609944e-05, "loss": 0.1164, "step": 1802 }, { "epoch": 4.159169550173011, "grad_norm": 0.0, "learning_rate": 1.1687351424821448e-05, "loss": 0.2795, "step": 1803 }, { "epoch": 4.161476355247982, "grad_norm": 0.0, "learning_rate": 1.1679158920507773e-05, "loss": 0.2225, "step": 1804 }, { "epoch": 4.163783160322953, "grad_norm": 0.0, "learning_rate": 1.1670965256327818e-05, "loss": 0.1073, "step": 1805 }, { "epoch": 4.166089965397924, "grad_norm": 0.0, "learning_rate": 1.1662770437941293e-05, "loss": 0.1782, "step": 1806 }, { "epoch": 4.168396770472895, "grad_norm": 0.0, "learning_rate": 1.1654574471008712e-05, "loss": 0.2662, "step": 1807 }, { "epoch": 4.170703575547866, "grad_norm": 0.0, "learning_rate": 1.1646377361191379e-05, "loss": 0.2599, "step": 1808 }, { "epoch": 4.173010380622838, "grad_norm": 0.0, "learning_rate": 1.1638179114151378e-05, "loss": 0.199, "step": 1809 }, { "epoch": 4.175317185697809, "grad_norm": 0.0, "learning_rate": 1.1629979735551592e-05, "loss": 0.2143, "step": 1810 }, { "epoch": 4.17762399077278, "grad_norm": 0.0, "learning_rate": 1.1621779231055677e-05, "loss": 0.1818, "step": 1811 }, { "epoch": 4.179930795847751, "grad_norm": 0.0, "learning_rate": 1.1613577606328068e-05, "loss": 0.1285, "step": 1812 }, { "epoch": 4.182237600922722, "grad_norm": 0.0, "learning_rate": 1.1605374867033978e-05, "loss": 0.1335, "step": 1813 }, { "epoch": 4.184544405997693, "grad_norm": 0.0, "learning_rate": 1.1597171018839384e-05, "loss": 0.1636, "step": 1814 }, { "epoch": 4.186851211072664, "grad_norm": 0.0, "learning_rate": 1.1588966067411033e-05, "loss": 0.1519, "step": 1815 }, { "epoch": 4.189158016147635, "grad_norm": 0.0, "learning_rate": 1.1580760018416434e-05, "loss": 0.1565, "step": 1816 }, { "epoch": 4.191464821222607, "grad_norm": 0.0, "learning_rate": 1.1572552877523855e-05, "loss": 0.1605, "step": 1817 }, { "epoch": 4.193771626297578, "grad_norm": 0.0, "learning_rate": 1.156434465040231e-05, "loss": 0.1823, "step": 1818 }, { "epoch": 4.196078431372549, "grad_norm": 0.0, "learning_rate": 1.1556135342721575e-05, "loss": 0.1897, "step": 1819 }, { "epoch": 4.19838523644752, "grad_norm": 0.0, "learning_rate": 1.1547924960152162e-05, "loss": 0.1674, "step": 1820 }, { "epoch": 4.200692041522491, "grad_norm": 0.0, "learning_rate": 1.1539713508365336e-05, "loss": 0.1819, "step": 1821 }, { "epoch": 4.202998846597462, "grad_norm": 0.0, "learning_rate": 1.1531500993033094e-05, "loss": 0.2221, "step": 1822 }, { "epoch": 4.205305651672433, "grad_norm": 0.0, "learning_rate": 1.1523287419828165e-05, "loss": 0.2248, "step": 1823 }, { "epoch": 4.2076124567474045, "grad_norm": 0.0, "learning_rate": 1.1515072794424013e-05, "loss": 0.1347, "step": 1824 }, { "epoch": 4.209919261822376, "grad_norm": 0.0, "learning_rate": 1.1506857122494832e-05, "loss": 0.1759, "step": 1825 }, { "epoch": 4.2122260668973475, "grad_norm": 0.0, "learning_rate": 1.1498640409715532e-05, "loss": 0.1484, "step": 1826 }, { "epoch": 4.2145328719723185, "grad_norm": 0.0, "learning_rate": 1.1490422661761744e-05, "loss": 0.235, "step": 1827 }, { "epoch": 4.21683967704729, "grad_norm": 0.0, "learning_rate": 1.148220388430982e-05, "loss": 0.2536, "step": 1828 }, { "epoch": 4.219146482122261, "grad_norm": 0.0, "learning_rate": 1.1473984083036813e-05, "loss": 0.1628, "step": 1829 }, { "epoch": 4.221453287197232, "grad_norm": 0.0, "learning_rate": 1.146576326362049e-05, "loss": 0.2838, "step": 1830 }, { "epoch": 4.223760092272203, "grad_norm": 0.0, "learning_rate": 1.1457541431739321e-05, "loss": 0.1258, "step": 1831 }, { "epoch": 4.226066897347174, "grad_norm": 0.0, "learning_rate": 1.1449318593072468e-05, "loss": 0.2008, "step": 1832 }, { "epoch": 4.228373702422146, "grad_norm": 0.0, "learning_rate": 1.1441094753299802e-05, "loss": 0.1424, "step": 1833 }, { "epoch": 4.230680507497117, "grad_norm": 0.0, "learning_rate": 1.1432869918101877e-05, "loss": 0.2017, "step": 1834 }, { "epoch": 4.232987312572088, "grad_norm": 0.0, "learning_rate": 1.142464409315993e-05, "loss": 0.1637, "step": 1835 }, { "epoch": 4.235294117647059, "grad_norm": 0.0, "learning_rate": 1.1416417284155892e-05, "loss": 0.2572, "step": 1836 }, { "epoch": 4.23760092272203, "grad_norm": 0.0, "learning_rate": 1.1408189496772369e-05, "loss": 0.123, "step": 1837 }, { "epoch": 4.239907727797001, "grad_norm": 0.0, "learning_rate": 1.1399960736692637e-05, "loss": 0.1639, "step": 1838 }, { "epoch": 4.242214532871972, "grad_norm": 0.0, "learning_rate": 1.1391731009600655e-05, "loss": 0.1802, "step": 1839 }, { "epoch": 4.244521337946943, "grad_norm": 0.0, "learning_rate": 1.1383500321181045e-05, "loss": 0.1952, "step": 1840 }, { "epoch": 4.246828143021915, "grad_norm": 0.0, "learning_rate": 1.1375268677119089e-05, "loss": 0.1144, "step": 1841 }, { "epoch": 4.249134948096886, "grad_norm": 0.0, "learning_rate": 1.1367036083100735e-05, "loss": 0.1814, "step": 1842 }, { "epoch": 4.251441753171857, "grad_norm": 0.0, "learning_rate": 1.1358802544812584e-05, "loss": 0.1483, "step": 1843 }, { "epoch": 4.253748558246828, "grad_norm": 0.0, "learning_rate": 1.135056806794189e-05, "loss": 0.171, "step": 1844 }, { "epoch": 4.256055363321799, "grad_norm": 0.0, "learning_rate": 1.1342332658176556e-05, "loss": 0.2132, "step": 1845 }, { "epoch": 4.25836216839677, "grad_norm": 0.0, "learning_rate": 1.1334096321205129e-05, "loss": 0.2036, "step": 1846 }, { "epoch": 4.260668973471741, "grad_norm": 0.0, "learning_rate": 1.1325859062716795e-05, "loss": 0.1242, "step": 1847 }, { "epoch": 4.262975778546712, "grad_norm": 0.0, "learning_rate": 1.1317620888401379e-05, "loss": 0.1525, "step": 1848 }, { "epoch": 4.265282583621684, "grad_norm": 0.0, "learning_rate": 1.1309381803949333e-05, "loss": 0.1445, "step": 1849 }, { "epoch": 4.267589388696655, "grad_norm": 0.0, "learning_rate": 1.1301141815051751e-05, "loss": 0.1457, "step": 1850 }, { "epoch": 4.269896193771626, "grad_norm": 0.0, "learning_rate": 1.1292900927400334e-05, "loss": 0.1906, "step": 1851 }, { "epoch": 4.2722029988465975, "grad_norm": 0.0, "learning_rate": 1.1284659146687416e-05, "loss": 0.1612, "step": 1852 }, { "epoch": 4.2745098039215685, "grad_norm": 0.0, "learning_rate": 1.127641647860595e-05, "loss": 0.162, "step": 1853 }, { "epoch": 4.27681660899654, "grad_norm": 0.0, "learning_rate": 1.1268172928849486e-05, "loss": 0.1714, "step": 1854 }, { "epoch": 4.279123414071511, "grad_norm": 0.0, "learning_rate": 1.1259928503112199e-05, "loss": 0.2213, "step": 1855 }, { "epoch": 4.281430219146483, "grad_norm": 0.0, "learning_rate": 1.1251683207088862e-05, "loss": 0.2243, "step": 1856 }, { "epoch": 4.283737024221454, "grad_norm": 0.0, "learning_rate": 1.1243437046474854e-05, "loss": 0.3105, "step": 1857 }, { "epoch": 4.286043829296425, "grad_norm": 0.0, "learning_rate": 1.1235190026966142e-05, "loss": 0.1343, "step": 1858 }, { "epoch": 4.288350634371396, "grad_norm": 0.0, "learning_rate": 1.1226942154259302e-05, "loss": 0.1594, "step": 1859 }, { "epoch": 4.290657439446367, "grad_norm": 0.0, "learning_rate": 1.1218693434051475e-05, "loss": 0.1459, "step": 1860 }, { "epoch": 4.292964244521338, "grad_norm": 0.0, "learning_rate": 1.1210443872040416e-05, "loss": 0.2067, "step": 1861 }, { "epoch": 4.295271049596309, "grad_norm": 0.0, "learning_rate": 1.120219347392444e-05, "loss": 0.2437, "step": 1862 }, { "epoch": 4.29757785467128, "grad_norm": 0.0, "learning_rate": 1.1193942245402443e-05, "loss": 0.1786, "step": 1863 }, { "epoch": 4.299884659746251, "grad_norm": 0.0, "learning_rate": 1.1185690192173908e-05, "loss": 0.2092, "step": 1864 }, { "epoch": 4.302191464821223, "grad_norm": 0.0, "learning_rate": 1.1177437319938874e-05, "loss": 0.2423, "step": 1865 }, { "epoch": 4.304498269896194, "grad_norm": 0.0, "learning_rate": 1.1169183634397948e-05, "loss": 0.2105, "step": 1866 }, { "epoch": 4.306805074971165, "grad_norm": 0.0, "learning_rate": 1.1160929141252303e-05, "loss": 0.26, "step": 1867 }, { "epoch": 4.309111880046136, "grad_norm": 0.0, "learning_rate": 1.1152673846203668e-05, "loss": 0.2862, "step": 1868 }, { "epoch": 4.311418685121107, "grad_norm": 0.0, "learning_rate": 1.114441775495432e-05, "loss": 0.1387, "step": 1869 }, { "epoch": 4.313725490196078, "grad_norm": 0.0, "learning_rate": 1.1136160873207098e-05, "loss": 0.1655, "step": 1870 }, { "epoch": 4.316032295271049, "grad_norm": 0.0, "learning_rate": 1.1127903206665379e-05, "loss": 0.1731, "step": 1871 }, { "epoch": 4.318339100346021, "grad_norm": 0.0, "learning_rate": 1.1119644761033079e-05, "loss": 0.1818, "step": 1872 }, { "epoch": 4.320645905420992, "grad_norm": 0.0, "learning_rate": 1.1111385542014662e-05, "loss": 0.2117, "step": 1873 }, { "epoch": 4.322952710495963, "grad_norm": 0.0, "learning_rate": 1.110312555531512e-05, "loss": 0.1936, "step": 1874 }, { "epoch": 4.325259515570934, "grad_norm": 0.0, "learning_rate": 1.1094864806639971e-05, "loss": 0.1514, "step": 1875 }, { "epoch": 4.327566320645905, "grad_norm": 0.0, "learning_rate": 1.1086603301695268e-05, "loss": 0.2077, "step": 1876 }, { "epoch": 4.3298731257208765, "grad_norm": 0.0, "learning_rate": 1.1078341046187588e-05, "loss": 0.2069, "step": 1877 }, { "epoch": 4.3321799307958475, "grad_norm": 0.0, "learning_rate": 1.1070078045824014e-05, "loss": 0.2248, "step": 1878 }, { "epoch": 4.334486735870819, "grad_norm": 0.0, "learning_rate": 1.1061814306312153e-05, "loss": 0.1682, "step": 1879 }, { "epoch": 4.33679354094579, "grad_norm": 0.0, "learning_rate": 1.1053549833360117e-05, "loss": 0.1939, "step": 1880 }, { "epoch": 4.339100346020762, "grad_norm": 0.0, "learning_rate": 1.1045284632676535e-05, "loss": 0.134, "step": 1881 }, { "epoch": 4.341407151095733, "grad_norm": 0.0, "learning_rate": 1.1037018709970528e-05, "loss": 0.2189, "step": 1882 }, { "epoch": 4.343713956170704, "grad_norm": 0.0, "learning_rate": 1.102875207095172e-05, "loss": 0.2714, "step": 1883 }, { "epoch": 4.346020761245675, "grad_norm": 0.0, "learning_rate": 1.1020484721330227e-05, "loss": 0.1596, "step": 1884 }, { "epoch": 4.348327566320646, "grad_norm": 0.0, "learning_rate": 1.101221666681666e-05, "loss": 0.2115, "step": 1885 }, { "epoch": 4.350634371395617, "grad_norm": 0.0, "learning_rate": 1.1003947913122112e-05, "loss": 0.2636, "step": 1886 }, { "epoch": 4.352941176470588, "grad_norm": 0.0, "learning_rate": 1.0995678465958168e-05, "loss": 0.2805, "step": 1887 }, { "epoch": 4.35524798154556, "grad_norm": 0.0, "learning_rate": 1.0987408331036879e-05, "loss": 0.1807, "step": 1888 }, { "epoch": 4.357554786620531, "grad_norm": 0.0, "learning_rate": 1.0979137514070783e-05, "loss": 0.1868, "step": 1889 }, { "epoch": 4.359861591695502, "grad_norm": 0.0, "learning_rate": 1.0970866020772884e-05, "loss": 0.1891, "step": 1890 }, { "epoch": 4.362168396770473, "grad_norm": 0.0, "learning_rate": 1.0962593856856649e-05, "loss": 0.1667, "step": 1891 }, { "epoch": 4.364475201845444, "grad_norm": 0.0, "learning_rate": 1.0954321028036013e-05, "loss": 0.1216, "step": 1892 }, { "epoch": 4.366782006920415, "grad_norm": 0.0, "learning_rate": 1.0946047540025373e-05, "loss": 0.0782, "step": 1893 }, { "epoch": 4.369088811995386, "grad_norm": 0.0, "learning_rate": 1.0937773398539578e-05, "loss": 0.1923, "step": 1894 }, { "epoch": 4.371395617070357, "grad_norm": 0.0, "learning_rate": 1.0929498609293925e-05, "loss": 0.0838, "step": 1895 }, { "epoch": 4.373702422145329, "grad_norm": 0.0, "learning_rate": 1.0921223178004163e-05, "loss": 0.1566, "step": 1896 }, { "epoch": 4.3760092272203, "grad_norm": 0.0, "learning_rate": 1.0912947110386484e-05, "loss": 0.1836, "step": 1897 }, { "epoch": 4.378316032295271, "grad_norm": 0.0, "learning_rate": 1.0904670412157522e-05, "loss": 0.2738, "step": 1898 }, { "epoch": 4.380622837370242, "grad_norm": 0.0, "learning_rate": 1.0896393089034336e-05, "loss": 0.1616, "step": 1899 }, { "epoch": 4.382929642445213, "grad_norm": 0.0, "learning_rate": 1.088811514673443e-05, "loss": 0.1479, "step": 1900 }, { "epoch": 4.385236447520184, "grad_norm": 0.0, "learning_rate": 1.0879836590975732e-05, "loss": 0.203, "step": 1901 }, { "epoch": 4.387543252595155, "grad_norm": 0.0, "learning_rate": 1.0871557427476585e-05, "loss": 0.2499, "step": 1902 }, { "epoch": 4.3898500576701265, "grad_norm": 0.0, "learning_rate": 1.0863277661955757e-05, "loss": 0.1802, "step": 1903 }, { "epoch": 4.392156862745098, "grad_norm": 0.0, "learning_rate": 1.0854997300132444e-05, "loss": 0.2076, "step": 1904 }, { "epoch": 4.3944636678200695, "grad_norm": 0.0, "learning_rate": 1.0846716347726233e-05, "loss": 0.2188, "step": 1905 }, { "epoch": 4.3967704728950405, "grad_norm": 0.0, "learning_rate": 1.0838434810457132e-05, "loss": 0.3145, "step": 1906 }, { "epoch": 4.399077277970012, "grad_norm": 0.0, "learning_rate": 1.0830152694045553e-05, "loss": 0.1724, "step": 1907 }, { "epoch": 4.401384083044983, "grad_norm": 0.0, "learning_rate": 1.0821870004212305e-05, "loss": 0.2174, "step": 1908 }, { "epoch": 4.403690888119954, "grad_norm": 0.0, "learning_rate": 1.0813586746678584e-05, "loss": 0.1869, "step": 1909 }, { "epoch": 4.405997693194925, "grad_norm": 0.0, "learning_rate": 1.0805302927165996e-05, "loss": 0.1213, "step": 1910 }, { "epoch": 4.408304498269896, "grad_norm": 0.0, "learning_rate": 1.0797018551396527e-05, "loss": 0.1203, "step": 1911 }, { "epoch": 4.410611303344868, "grad_norm": 0.0, "learning_rate": 1.078873362509254e-05, "loss": 0.1391, "step": 1912 }, { "epoch": 4.412918108419839, "grad_norm": 0.0, "learning_rate": 1.0780448153976792e-05, "loss": 0.1427, "step": 1913 }, { "epoch": 4.41522491349481, "grad_norm": 0.0, "learning_rate": 1.0772162143772407e-05, "loss": 0.1715, "step": 1914 }, { "epoch": 4.417531718569781, "grad_norm": 0.0, "learning_rate": 1.076387560020288e-05, "loss": 0.262, "step": 1915 }, { "epoch": 4.419838523644752, "grad_norm": 0.0, "learning_rate": 1.0755588528992082e-05, "loss": 0.1749, "step": 1916 }, { "epoch": 4.422145328719723, "grad_norm": 0.0, "learning_rate": 1.0747300935864245e-05, "loss": 0.1609, "step": 1917 }, { "epoch": 4.424452133794694, "grad_norm": 0.0, "learning_rate": 1.0739012826543955e-05, "loss": 0.1175, "step": 1918 }, { "epoch": 4.426758938869666, "grad_norm": 0.0, "learning_rate": 1.073072420675617e-05, "loss": 0.2036, "step": 1919 }, { "epoch": 4.429065743944637, "grad_norm": 0.0, "learning_rate": 1.0722435082226186e-05, "loss": 0.1697, "step": 1920 }, { "epoch": 4.431372549019608, "grad_norm": 0.0, "learning_rate": 1.071414545867965e-05, "loss": 0.1323, "step": 1921 }, { "epoch": 4.433679354094579, "grad_norm": 0.0, "learning_rate": 1.0705855341842564e-05, "loss": 0.2009, "step": 1922 }, { "epoch": 4.43598615916955, "grad_norm": 0.0, "learning_rate": 1.0697564737441254e-05, "loss": 0.2231, "step": 1923 }, { "epoch": 4.438292964244521, "grad_norm": 0.0, "learning_rate": 1.0689273651202398e-05, "loss": 0.184, "step": 1924 }, { "epoch": 4.440599769319492, "grad_norm": 0.0, "learning_rate": 1.0680982088853003e-05, "loss": 0.2049, "step": 1925 }, { "epoch": 4.442906574394463, "grad_norm": 0.0, "learning_rate": 1.0672690056120398e-05, "loss": 0.2426, "step": 1926 }, { "epoch": 4.445213379469434, "grad_norm": 0.0, "learning_rate": 1.0664397558732245e-05, "loss": 0.1243, "step": 1927 }, { "epoch": 4.447520184544406, "grad_norm": 0.0, "learning_rate": 1.0656104602416519e-05, "loss": 0.2568, "step": 1928 }, { "epoch": 4.449826989619377, "grad_norm": 0.0, "learning_rate": 1.0647811192901518e-05, "loss": 0.1157, "step": 1929 }, { "epoch": 4.4521337946943484, "grad_norm": 0.0, "learning_rate": 1.0639517335915857e-05, "loss": 0.2157, "step": 1930 }, { "epoch": 4.4544405997693195, "grad_norm": 0.0, "learning_rate": 1.063122303718845e-05, "loss": 0.1577, "step": 1931 }, { "epoch": 4.4567474048442905, "grad_norm": 0.0, "learning_rate": 1.0622928302448523e-05, "loss": 0.1248, "step": 1932 }, { "epoch": 4.459054209919262, "grad_norm": 0.0, "learning_rate": 1.0614633137425599e-05, "loss": 0.236, "step": 1933 }, { "epoch": 4.461361014994233, "grad_norm": 0.0, "learning_rate": 1.06063375478495e-05, "loss": 0.138, "step": 1934 }, { "epoch": 4.463667820069205, "grad_norm": 0.0, "learning_rate": 1.0598041539450344e-05, "loss": 0.1874, "step": 1935 }, { "epoch": 4.465974625144176, "grad_norm": 0.0, "learning_rate": 1.0589745117958533e-05, "loss": 0.15, "step": 1936 }, { "epoch": 4.468281430219147, "grad_norm": 0.0, "learning_rate": 1.0581448289104759e-05, "loss": 0.3162, "step": 1937 }, { "epoch": 4.470588235294118, "grad_norm": 0.0, "learning_rate": 1.0573151058619994e-05, "loss": 0.2448, "step": 1938 }, { "epoch": 4.472895040369089, "grad_norm": 0.0, "learning_rate": 1.0564853432235486e-05, "loss": 0.1195, "step": 1939 }, { "epoch": 4.47520184544406, "grad_norm": 0.0, "learning_rate": 1.0556555415682757e-05, "loss": 0.2061, "step": 1940 }, { "epoch": 4.477508650519031, "grad_norm": 0.0, "learning_rate": 1.0548257014693602e-05, "loss": 0.2662, "step": 1941 }, { "epoch": 4.479815455594002, "grad_norm": 0.0, "learning_rate": 1.0539958235000075e-05, "loss": 0.1564, "step": 1942 }, { "epoch": 4.482122260668973, "grad_norm": 0.0, "learning_rate": 1.0531659082334495e-05, "loss": 0.2876, "step": 1943 }, { "epoch": 4.484429065743945, "grad_norm": 0.0, "learning_rate": 1.0523359562429441e-05, "loss": 0.2131, "step": 1944 }, { "epoch": 4.486735870818916, "grad_norm": 0.0, "learning_rate": 1.051505968101774e-05, "loss": 0.1728, "step": 1945 }, { "epoch": 4.489042675893887, "grad_norm": 0.0, "learning_rate": 1.0506759443832474e-05, "loss": 0.1007, "step": 1946 }, { "epoch": 4.491349480968858, "grad_norm": 0.0, "learning_rate": 1.0498458856606972e-05, "loss": 0.2064, "step": 1947 }, { "epoch": 4.493656286043829, "grad_norm": 0.0, "learning_rate": 1.04901579250748e-05, "loss": 0.2708, "step": 1948 }, { "epoch": 4.4959630911188, "grad_norm": 0.0, "learning_rate": 1.0481856654969758e-05, "loss": 0.2141, "step": 1949 }, { "epoch": 4.498269896193771, "grad_norm": 0.0, "learning_rate": 1.0473555052025893e-05, "loss": 0.1775, "step": 1950 }, { "epoch": 4.500576701268743, "grad_norm": 0.0, "learning_rate": 1.046525312197747e-05, "loss": 0.2152, "step": 1951 }, { "epoch": 4.502883506343714, "grad_norm": 0.0, "learning_rate": 1.0456950870558982e-05, "loss": 0.2013, "step": 1952 }, { "epoch": 4.505190311418685, "grad_norm": 0.0, "learning_rate": 1.044864830350515e-05, "loss": 0.1709, "step": 1953 }, { "epoch": 4.507497116493656, "grad_norm": 0.0, "learning_rate": 1.044034542655091e-05, "loss": 0.2135, "step": 1954 }, { "epoch": 4.509803921568627, "grad_norm": 0.0, "learning_rate": 1.0432042245431406e-05, "loss": 0.1422, "step": 1955 }, { "epoch": 4.5121107266435985, "grad_norm": 0.0, "learning_rate": 1.0423738765882006e-05, "loss": 0.1892, "step": 1956 }, { "epoch": 4.5144175317185695, "grad_norm": 0.0, "learning_rate": 1.0415434993638269e-05, "loss": 0.239, "step": 1957 }, { "epoch": 4.516724336793541, "grad_norm": 0.0, "learning_rate": 1.040713093443596e-05, "loss": 0.2019, "step": 1958 }, { "epoch": 4.519031141868512, "grad_norm": 0.0, "learning_rate": 1.039882659401105e-05, "loss": 0.1672, "step": 1959 }, { "epoch": 4.521337946943484, "grad_norm": 0.0, "learning_rate": 1.0390521978099697e-05, "loss": 0.144, "step": 1960 }, { "epoch": 4.523644752018455, "grad_norm": 0.0, "learning_rate": 1.0382217092438256e-05, "loss": 0.139, "step": 1961 }, { "epoch": 4.525951557093426, "grad_norm": 0.0, "learning_rate": 1.037391194276326e-05, "loss": 0.2045, "step": 1962 }, { "epoch": 4.528258362168397, "grad_norm": 0.0, "learning_rate": 1.0365606534811423e-05, "loss": 0.1251, "step": 1963 }, { "epoch": 4.530565167243368, "grad_norm": 0.0, "learning_rate": 1.0357300874319651e-05, "loss": 0.2713, "step": 1964 }, { "epoch": 4.532871972318339, "grad_norm": 0.0, "learning_rate": 1.0348994967025012e-05, "loss": 0.2667, "step": 1965 }, { "epoch": 4.53517877739331, "grad_norm": 0.0, "learning_rate": 1.0340688818664746e-05, "loss": 0.1185, "step": 1966 }, { "epoch": 4.537485582468282, "grad_norm": 0.0, "learning_rate": 1.0332382434976267e-05, "loss": 0.2311, "step": 1967 }, { "epoch": 4.539792387543253, "grad_norm": 0.0, "learning_rate": 1.0324075821697146e-05, "loss": 0.2394, "step": 1968 }, { "epoch": 4.542099192618224, "grad_norm": 0.0, "learning_rate": 1.031576898456511e-05, "loss": 0.1736, "step": 1969 }, { "epoch": 4.544405997693195, "grad_norm": 0.0, "learning_rate": 1.0307461929318045e-05, "loss": 0.1617, "step": 1970 }, { "epoch": 4.546712802768166, "grad_norm": 0.0, "learning_rate": 1.0299154661693987e-05, "loss": 0.1496, "step": 1971 }, { "epoch": 4.549019607843137, "grad_norm": 0.0, "learning_rate": 1.0290847187431115e-05, "loss": 0.1768, "step": 1972 }, { "epoch": 4.551326412918108, "grad_norm": 0.0, "learning_rate": 1.0282539512267758e-05, "loss": 0.18, "step": 1973 }, { "epoch": 4.553633217993079, "grad_norm": 0.0, "learning_rate": 1.0274231641942378e-05, "loss": 0.1832, "step": 1974 }, { "epoch": 4.555940023068051, "grad_norm": 0.0, "learning_rate": 1.0265923582193574e-05, "loss": 0.1633, "step": 1975 }, { "epoch": 4.558246828143022, "grad_norm": 0.0, "learning_rate": 1.0257615338760073e-05, "loss": 0.247, "step": 1976 }, { "epoch": 4.560553633217993, "grad_norm": 0.0, "learning_rate": 1.0249306917380731e-05, "loss": 0.0854, "step": 1977 }, { "epoch": 4.562860438292964, "grad_norm": 0.0, "learning_rate": 1.024099832379453e-05, "loss": 0.2056, "step": 1978 }, { "epoch": 4.565167243367935, "grad_norm": 0.0, "learning_rate": 1.0232689563740563e-05, "loss": 0.2498, "step": 1979 }, { "epoch": 4.567474048442906, "grad_norm": 0.0, "learning_rate": 1.0224380642958052e-05, "loss": 0.1238, "step": 1980 }, { "epoch": 4.569780853517877, "grad_norm": 0.0, "learning_rate": 1.0216071567186312e-05, "loss": 0.2554, "step": 1981 }, { "epoch": 4.572087658592849, "grad_norm": 0.0, "learning_rate": 1.0207762342164778e-05, "loss": 0.2142, "step": 1982 }, { "epoch": 4.57439446366782, "grad_norm": 0.0, "learning_rate": 1.0199452973632982e-05, "loss": 0.1608, "step": 1983 }, { "epoch": 4.5767012687427915, "grad_norm": 0.0, "learning_rate": 1.0191143467330558e-05, "loss": 0.1009, "step": 1984 }, { "epoch": 4.5790080738177625, "grad_norm": 0.0, "learning_rate": 1.0182833828997238e-05, "loss": 0.0881, "step": 1985 }, { "epoch": 4.581314878892734, "grad_norm": 0.0, "learning_rate": 1.0174524064372837e-05, "loss": 0.3117, "step": 1986 }, { "epoch": 4.583621683967705, "grad_norm": 0.0, "learning_rate": 1.0166214179197265e-05, "loss": 0.1858, "step": 1987 }, { "epoch": 4.585928489042676, "grad_norm": 0.0, "learning_rate": 1.0157904179210507e-05, "loss": 0.1414, "step": 1988 }, { "epoch": 4.588235294117647, "grad_norm": 0.0, "learning_rate": 1.0149594070152638e-05, "loss": 0.2033, "step": 1989 }, { "epoch": 4.590542099192618, "grad_norm": 0.0, "learning_rate": 1.01412838577638e-05, "loss": 0.186, "step": 1990 }, { "epoch": 4.59284890426759, "grad_norm": 0.0, "learning_rate": 1.013297354778421e-05, "loss": 0.212, "step": 1991 }, { "epoch": 4.595155709342561, "grad_norm": 0.0, "learning_rate": 1.0124663145954152e-05, "loss": 0.153, "step": 1992 }, { "epoch": 4.597462514417532, "grad_norm": 0.0, "learning_rate": 1.0116352658013973e-05, "loss": 0.1393, "step": 1993 }, { "epoch": 4.599769319492503, "grad_norm": 0.0, "learning_rate": 1.0108042089704078e-05, "loss": 0.2088, "step": 1994 }, { "epoch": 4.602076124567474, "grad_norm": 0.0, "learning_rate": 1.0099731446764927e-05, "loss": 0.2114, "step": 1995 }, { "epoch": 4.604382929642445, "grad_norm": 0.0, "learning_rate": 1.0091420734937038e-05, "loss": 0.1025, "step": 1996 }, { "epoch": 4.606689734717416, "grad_norm": 0.0, "learning_rate": 1.0083109959960974e-05, "loss": 0.2248, "step": 1997 }, { "epoch": 4.608996539792388, "grad_norm": 0.0, "learning_rate": 1.007479912757733e-05, "loss": 0.2169, "step": 1998 }, { "epoch": 4.611303344867359, "grad_norm": 0.0, "learning_rate": 1.0066488243526761e-05, "loss": 0.1651, "step": 1999 }, { "epoch": 4.61361014994233, "grad_norm": 0.0, "learning_rate": 1.005817731354994e-05, "loss": 0.104, "step": 2000 }, { "epoch": 4.615916955017301, "grad_norm": 0.0, "learning_rate": 1.0049866343387582e-05, "loss": 0.2001, "step": 2001 }, { "epoch": 4.618223760092272, "grad_norm": 0.0, "learning_rate": 1.0041555338780427e-05, "loss": 0.1955, "step": 2002 }, { "epoch": 4.620530565167243, "grad_norm": 0.0, "learning_rate": 1.0033244305469233e-05, "loss": 0.1821, "step": 2003 }, { "epoch": 4.622837370242214, "grad_norm": 0.0, "learning_rate": 1.0024933249194792e-05, "loss": 0.2507, "step": 2004 }, { "epoch": 4.625144175317185, "grad_norm": 0.0, "learning_rate": 1.0016622175697898e-05, "loss": 0.2247, "step": 2005 }, { "epoch": 4.627450980392156, "grad_norm": 0.0, "learning_rate": 1.000831109071936e-05, "loss": 0.2912, "step": 2006 }, { "epoch": 4.629757785467128, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.3337, "step": 2007 }, { "epoch": 4.632064590542099, "grad_norm": 0.0, "learning_rate": 9.99168890928064e-06, "loss": 0.1868, "step": 2008 }, { "epoch": 4.6343713956170705, "grad_norm": 0.0, "learning_rate": 9.983377824302107e-06, "loss": 0.154, "step": 2009 }, { "epoch": 4.6366782006920415, "grad_norm": 0.0, "learning_rate": 9.97506675080521e-06, "loss": 0.2047, "step": 2010 }, { "epoch": 4.638985005767013, "grad_norm": 0.0, "learning_rate": 9.966755694530768e-06, "loss": 0.2307, "step": 2011 }, { "epoch": 4.641291810841984, "grad_norm": 0.0, "learning_rate": 9.958444661219578e-06, "loss": 0.1706, "step": 2012 }, { "epoch": 4.643598615916955, "grad_norm": 0.0, "learning_rate": 9.950133656612421e-06, "loss": 0.1953, "step": 2013 }, { "epoch": 4.645905420991927, "grad_norm": 0.0, "learning_rate": 9.941822686450061e-06, "loss": 0.1385, "step": 2014 }, { "epoch": 4.648212226066898, "grad_norm": 0.0, "learning_rate": 9.933511756473244e-06, "loss": 0.1783, "step": 2015 }, { "epoch": 4.650519031141869, "grad_norm": 0.0, "learning_rate": 9.925200872422671e-06, "loss": 0.2209, "step": 2016 }, { "epoch": 4.65282583621684, "grad_norm": 0.0, "learning_rate": 9.916890040039031e-06, "loss": 0.1452, "step": 2017 }, { "epoch": 4.655132641291811, "grad_norm": 0.0, "learning_rate": 9.908579265062967e-06, "loss": 0.2519, "step": 2018 }, { "epoch": 4.657439446366782, "grad_norm": 0.0, "learning_rate": 9.900268553235077e-06, "loss": 0.2668, "step": 2019 }, { "epoch": 4.659746251441753, "grad_norm": 0.0, "learning_rate": 9.891957910295926e-06, "loss": 0.2084, "step": 2020 }, { "epoch": 4.662053056516724, "grad_norm": 0.0, "learning_rate": 9.883647341986032e-06, "loss": 0.1009, "step": 2021 }, { "epoch": 4.664359861591695, "grad_norm": 0.0, "learning_rate": 9.87533685404585e-06, "loss": 0.1283, "step": 2022 }, { "epoch": 4.666666666666667, "grad_norm": 0.0, "learning_rate": 9.867026452215791e-06, "loss": 0.2079, "step": 2023 }, { "epoch": 4.668973471741638, "grad_norm": 0.0, "learning_rate": 9.858716142236205e-06, "loss": 0.2246, "step": 2024 }, { "epoch": 4.671280276816609, "grad_norm": 0.0, "learning_rate": 9.850405929847367e-06, "loss": 0.2779, "step": 2025 }, { "epoch": 4.67358708189158, "grad_norm": 0.0, "learning_rate": 9.842095820789495e-06, "loss": 0.1413, "step": 2026 }, { "epoch": 4.675893886966551, "grad_norm": 0.0, "learning_rate": 9.833785820802739e-06, "loss": 0.1801, "step": 2027 }, { "epoch": 4.678200692041522, "grad_norm": 0.0, "learning_rate": 9.825475935627165e-06, "loss": 0.2998, "step": 2028 }, { "epoch": 4.680507497116493, "grad_norm": 0.0, "learning_rate": 9.817166171002766e-06, "loss": 0.1455, "step": 2029 }, { "epoch": 4.682814302191465, "grad_norm": 0.0, "learning_rate": 9.808856532669442e-06, "loss": 0.1643, "step": 2030 }, { "epoch": 4.685121107266436, "grad_norm": 0.0, "learning_rate": 9.800547026367022e-06, "loss": 0.2727, "step": 2031 }, { "epoch": 4.687427912341407, "grad_norm": 0.0, "learning_rate": 9.792237657835225e-06, "loss": 0.1746, "step": 2032 }, { "epoch": 4.689734717416378, "grad_norm": 0.0, "learning_rate": 9.783928432813688e-06, "loss": 0.1468, "step": 2033 }, { "epoch": 4.692041522491349, "grad_norm": 0.0, "learning_rate": 9.775619357041952e-06, "loss": 0.1795, "step": 2034 }, { "epoch": 4.6943483275663205, "grad_norm": 0.0, "learning_rate": 9.767310436259438e-06, "loss": 0.1787, "step": 2035 }, { "epoch": 4.6966551326412915, "grad_norm": 0.0, "learning_rate": 9.759001676205472e-06, "loss": 0.1027, "step": 2036 }, { "epoch": 4.698961937716263, "grad_norm": 0.0, "learning_rate": 9.750693082619274e-06, "loss": 0.1319, "step": 2037 }, { "epoch": 4.7012687427912345, "grad_norm": 0.0, "learning_rate": 9.74238466123993e-06, "loss": 0.2192, "step": 2038 }, { "epoch": 4.703575547866206, "grad_norm": 0.0, "learning_rate": 9.734076417806428e-06, "loss": 0.1089, "step": 2039 }, { "epoch": 4.705882352941177, "grad_norm": 0.0, "learning_rate": 9.725768358057625e-06, "loss": 0.2057, "step": 2040 }, { "epoch": 4.708189158016148, "grad_norm": 0.0, "learning_rate": 9.717460487732246e-06, "loss": 0.1373, "step": 2041 }, { "epoch": 4.710495963091119, "grad_norm": 0.0, "learning_rate": 9.709152812568886e-06, "loss": 0.2446, "step": 2042 }, { "epoch": 4.71280276816609, "grad_norm": 0.0, "learning_rate": 9.700845338306018e-06, "loss": 0.1814, "step": 2043 }, { "epoch": 4.715109573241061, "grad_norm": 0.0, "learning_rate": 9.692538070681957e-06, "loss": 0.2648, "step": 2044 }, { "epoch": 4.717416378316033, "grad_norm": 0.0, "learning_rate": 9.684231015434891e-06, "loss": 0.1349, "step": 2045 }, { "epoch": 4.719723183391004, "grad_norm": 0.0, "learning_rate": 9.675924178302857e-06, "loss": 0.2206, "step": 2046 }, { "epoch": 4.722029988465975, "grad_norm": 0.0, "learning_rate": 9.667617565023734e-06, "loss": 0.2443, "step": 2047 }, { "epoch": 4.724336793540946, "grad_norm": 0.0, "learning_rate": 9.659311181335255e-06, "loss": 0.2422, "step": 2048 }, { "epoch": 4.726643598615917, "grad_norm": 0.0, "learning_rate": 9.651005032974994e-06, "loss": 0.1193, "step": 2049 }, { "epoch": 4.728950403690888, "grad_norm": 0.0, "learning_rate": 9.642699125680352e-06, "loss": 0.1496, "step": 2050 }, { "epoch": 4.731257208765859, "grad_norm": 0.0, "learning_rate": 9.634393465188577e-06, "loss": 0.1183, "step": 2051 }, { "epoch": 4.73356401384083, "grad_norm": 0.0, "learning_rate": 9.626088057236745e-06, "loss": 0.2147, "step": 2052 }, { "epoch": 4.735870818915801, "grad_norm": 0.0, "learning_rate": 9.617782907561748e-06, "loss": 0.1133, "step": 2053 }, { "epoch": 4.738177623990773, "grad_norm": 0.0, "learning_rate": 9.609478021900303e-06, "loss": 0.0983, "step": 2054 }, { "epoch": 4.740484429065744, "grad_norm": 0.0, "learning_rate": 9.601173405988955e-06, "loss": 0.1283, "step": 2055 }, { "epoch": 4.742791234140715, "grad_norm": 0.0, "learning_rate": 9.592869065564043e-06, "loss": 0.2352, "step": 2056 }, { "epoch": 4.745098039215686, "grad_norm": 0.0, "learning_rate": 9.584565006361735e-06, "loss": 0.1008, "step": 2057 }, { "epoch": 4.747404844290657, "grad_norm": 0.0, "learning_rate": 9.576261234117998e-06, "loss": 0.1621, "step": 2058 }, { "epoch": 4.749711649365628, "grad_norm": 0.0, "learning_rate": 9.567957754568596e-06, "loss": 0.1246, "step": 2059 }, { "epoch": 4.7520184544405994, "grad_norm": 0.0, "learning_rate": 9.559654573449093e-06, "loss": 0.2057, "step": 2060 }, { "epoch": 4.754325259515571, "grad_norm": 0.0, "learning_rate": 9.551351696494854e-06, "loss": 0.2435, "step": 2061 }, { "epoch": 4.756632064590542, "grad_norm": 0.0, "learning_rate": 9.543049129441021e-06, "loss": 0.2536, "step": 2062 }, { "epoch": 4.7589388696655135, "grad_norm": 0.0, "learning_rate": 9.534746878022533e-06, "loss": 0.1903, "step": 2063 }, { "epoch": 4.7612456747404845, "grad_norm": 0.0, "learning_rate": 9.526444947974112e-06, "loss": 0.2132, "step": 2064 }, { "epoch": 4.763552479815456, "grad_norm": 0.0, "learning_rate": 9.518143345030247e-06, "loss": 0.175, "step": 2065 }, { "epoch": 4.765859284890427, "grad_norm": 0.0, "learning_rate": 9.509842074925204e-06, "loss": 0.1628, "step": 2066 }, { "epoch": 4.768166089965398, "grad_norm": 0.0, "learning_rate": 9.501541143393028e-06, "loss": 0.1632, "step": 2067 }, { "epoch": 4.770472895040369, "grad_norm": 0.0, "learning_rate": 9.493240556167527e-06, "loss": 0.1342, "step": 2068 }, { "epoch": 4.77277970011534, "grad_norm": 0.0, "learning_rate": 9.484940318982261e-06, "loss": 0.1732, "step": 2069 }, { "epoch": 4.775086505190312, "grad_norm": 0.0, "learning_rate": 9.476640437570562e-06, "loss": 0.248, "step": 2070 }, { "epoch": 4.777393310265283, "grad_norm": 0.0, "learning_rate": 9.468340917665508e-06, "loss": 0.1878, "step": 2071 }, { "epoch": 4.779700115340254, "grad_norm": 0.0, "learning_rate": 9.460041764999929e-06, "loss": 0.144, "step": 2072 }, { "epoch": 4.782006920415225, "grad_norm": 0.0, "learning_rate": 9.4517429853064e-06, "loss": 0.1364, "step": 2073 }, { "epoch": 4.784313725490196, "grad_norm": 0.0, "learning_rate": 9.443444584317244e-06, "loss": 0.1836, "step": 2074 }, { "epoch": 4.786620530565167, "grad_norm": 0.0, "learning_rate": 9.435146567764516e-06, "loss": 0.2614, "step": 2075 }, { "epoch": 4.788927335640138, "grad_norm": 0.0, "learning_rate": 9.426848941380007e-06, "loss": 0.1042, "step": 2076 }, { "epoch": 4.79123414071511, "grad_norm": 0.0, "learning_rate": 9.418551710895243e-06, "loss": 0.229, "step": 2077 }, { "epoch": 4.793540945790081, "grad_norm": 0.0, "learning_rate": 9.410254882041469e-06, "loss": 0.1581, "step": 2078 }, { "epoch": 4.795847750865052, "grad_norm": 0.0, "learning_rate": 9.401958460549658e-06, "loss": 0.2721, "step": 2079 }, { "epoch": 4.798154555940023, "grad_norm": 0.0, "learning_rate": 9.393662452150504e-06, "loss": 0.1733, "step": 2080 }, { "epoch": 4.800461361014994, "grad_norm": 0.0, "learning_rate": 9.385366862574405e-06, "loss": 0.098, "step": 2081 }, { "epoch": 4.802768166089965, "grad_norm": 0.0, "learning_rate": 9.377071697551479e-06, "loss": 0.2411, "step": 2082 }, { "epoch": 4.805074971164936, "grad_norm": 0.0, "learning_rate": 9.368776962811552e-06, "loss": 0.3038, "step": 2083 }, { "epoch": 4.807381776239907, "grad_norm": 0.0, "learning_rate": 9.360482664084144e-06, "loss": 0.2478, "step": 2084 }, { "epoch": 4.809688581314878, "grad_norm": 0.0, "learning_rate": 9.352188807098482e-06, "loss": 0.2386, "step": 2085 }, { "epoch": 4.81199538638985, "grad_norm": 0.0, "learning_rate": 9.343895397583486e-06, "loss": 0.1666, "step": 2086 }, { "epoch": 4.814302191464821, "grad_norm": 0.0, "learning_rate": 9.33560244126776e-06, "loss": 0.1843, "step": 2087 }, { "epoch": 4.8166089965397925, "grad_norm": 0.0, "learning_rate": 9.327309943879604e-06, "loss": 0.1863, "step": 2088 }, { "epoch": 4.8189158016147635, "grad_norm": 0.0, "learning_rate": 9.319017911147e-06, "loss": 0.1974, "step": 2089 }, { "epoch": 4.821222606689735, "grad_norm": 0.0, "learning_rate": 9.310726348797603e-06, "loss": 0.2245, "step": 2090 }, { "epoch": 4.823529411764706, "grad_norm": 0.0, "learning_rate": 9.302435262558748e-06, "loss": 0.1434, "step": 2091 }, { "epoch": 4.825836216839677, "grad_norm": 0.0, "learning_rate": 9.294144658157443e-06, "loss": 0.3041, "step": 2092 }, { "epoch": 4.828143021914649, "grad_norm": 0.0, "learning_rate": 9.285854541320352e-06, "loss": 0.2306, "step": 2093 }, { "epoch": 4.83044982698962, "grad_norm": 0.0, "learning_rate": 9.277564917773816e-06, "loss": 0.1256, "step": 2094 }, { "epoch": 4.832756632064591, "grad_norm": 0.0, "learning_rate": 9.269275793243832e-06, "loss": 0.2251, "step": 2095 }, { "epoch": 4.835063437139562, "grad_norm": 0.0, "learning_rate": 9.260987173456047e-06, "loss": 0.1819, "step": 2096 }, { "epoch": 4.837370242214533, "grad_norm": 0.0, "learning_rate": 9.252699064135759e-06, "loss": 0.1368, "step": 2097 }, { "epoch": 4.839677047289504, "grad_norm": 0.0, "learning_rate": 9.244411471007923e-06, "loss": 0.2171, "step": 2098 }, { "epoch": 4.841983852364475, "grad_norm": 0.0, "learning_rate": 9.236124399797122e-06, "loss": 0.2526, "step": 2099 }, { "epoch": 4.844290657439446, "grad_norm": 0.0, "learning_rate": 9.227837856227594e-06, "loss": 0.2098, "step": 2100 }, { "epoch": 4.846597462514418, "grad_norm": 0.0, "learning_rate": 9.219551846023211e-06, "loss": 0.297, "step": 2101 }, { "epoch": 4.848904267589389, "grad_norm": 0.0, "learning_rate": 9.211266374907463e-06, "loss": 0.1249, "step": 2102 }, { "epoch": 4.85121107266436, "grad_norm": 0.0, "learning_rate": 9.202981448603477e-06, "loss": 0.1494, "step": 2103 }, { "epoch": 4.853517877739331, "grad_norm": 0.0, "learning_rate": 9.194697072834009e-06, "loss": 0.1136, "step": 2104 }, { "epoch": 4.855824682814302, "grad_norm": 0.0, "learning_rate": 9.18641325332142e-06, "loss": 0.1963, "step": 2105 }, { "epoch": 4.858131487889273, "grad_norm": 0.0, "learning_rate": 9.178129995787698e-06, "loss": 0.2581, "step": 2106 }, { "epoch": 4.860438292964244, "grad_norm": 0.0, "learning_rate": 9.169847305954448e-06, "loss": 0.1883, "step": 2107 }, { "epoch": 4.862745098039216, "grad_norm": 0.0, "learning_rate": 9.16156518954287e-06, "loss": 0.1476, "step": 2108 }, { "epoch": 4.865051903114187, "grad_norm": 0.0, "learning_rate": 9.153283652273768e-06, "loss": 0.2835, "step": 2109 }, { "epoch": 4.867358708189158, "grad_norm": 0.0, "learning_rate": 9.145002699867556e-06, "loss": 0.1969, "step": 2110 }, { "epoch": 4.869665513264129, "grad_norm": 0.0, "learning_rate": 9.136722338044244e-06, "loss": 0.1918, "step": 2111 }, { "epoch": 4.8719723183391, "grad_norm": 0.0, "learning_rate": 9.128442572523418e-06, "loss": 0.2292, "step": 2112 }, { "epoch": 4.874279123414071, "grad_norm": 0.0, "learning_rate": 9.120163409024272e-06, "loss": 0.1756, "step": 2113 }, { "epoch": 4.8765859284890425, "grad_norm": 0.0, "learning_rate": 9.111884853265573e-06, "loss": 0.2545, "step": 2114 }, { "epoch": 4.8788927335640135, "grad_norm": 0.0, "learning_rate": 9.103606910965666e-06, "loss": 0.1474, "step": 2115 }, { "epoch": 4.881199538638985, "grad_norm": 0.0, "learning_rate": 9.09532958784248e-06, "loss": 0.263, "step": 2116 }, { "epoch": 4.8835063437139565, "grad_norm": 0.0, "learning_rate": 9.087052889613519e-06, "loss": 0.1743, "step": 2117 }, { "epoch": 4.885813148788928, "grad_norm": 0.0, "learning_rate": 9.078776821995839e-06, "loss": 0.135, "step": 2118 }, { "epoch": 4.888119953863899, "grad_norm": 0.0, "learning_rate": 9.07050139070608e-06, "loss": 0.1813, "step": 2119 }, { "epoch": 4.89042675893887, "grad_norm": 0.0, "learning_rate": 9.062226601460429e-06, "loss": 0.1624, "step": 2120 }, { "epoch": 4.892733564013841, "grad_norm": 0.0, "learning_rate": 9.05395245997463e-06, "loss": 0.1605, "step": 2121 }, { "epoch": 4.895040369088812, "grad_norm": 0.0, "learning_rate": 9.045678971963988e-06, "loss": 0.2081, "step": 2122 }, { "epoch": 4.897347174163783, "grad_norm": 0.0, "learning_rate": 9.037406143143356e-06, "loss": 0.1657, "step": 2123 }, { "epoch": 4.899653979238755, "grad_norm": 0.0, "learning_rate": 9.02913397922712e-06, "loss": 0.2662, "step": 2124 }, { "epoch": 4.901960784313726, "grad_norm": 0.0, "learning_rate": 9.020862485929219e-06, "loss": 0.2152, "step": 2125 }, { "epoch": 4.904267589388697, "grad_norm": 0.0, "learning_rate": 9.012591668963123e-06, "loss": 0.1353, "step": 2126 }, { "epoch": 4.906574394463668, "grad_norm": 0.0, "learning_rate": 9.004321534041836e-06, "loss": 0.1237, "step": 2127 }, { "epoch": 4.908881199538639, "grad_norm": 0.0, "learning_rate": 8.996052086877888e-06, "loss": 0.1727, "step": 2128 }, { "epoch": 4.91118800461361, "grad_norm": 0.0, "learning_rate": 8.987783333183345e-06, "loss": 0.2387, "step": 2129 }, { "epoch": 4.913494809688581, "grad_norm": 0.0, "learning_rate": 8.979515278669776e-06, "loss": 0.1998, "step": 2130 }, { "epoch": 4.915801614763552, "grad_norm": 0.0, "learning_rate": 8.971247929048283e-06, "loss": 0.2162, "step": 2131 }, { "epoch": 4.918108419838523, "grad_norm": 0.0, "learning_rate": 8.962981290029475e-06, "loss": 0.2075, "step": 2132 }, { "epoch": 4.920415224913495, "grad_norm": 0.0, "learning_rate": 8.954715367323468e-06, "loss": 0.1986, "step": 2133 }, { "epoch": 4.922722029988466, "grad_norm": 0.0, "learning_rate": 8.946450166639883e-06, "loss": 0.1778, "step": 2134 }, { "epoch": 4.925028835063437, "grad_norm": 0.0, "learning_rate": 8.938185693687853e-06, "loss": 0.1975, "step": 2135 }, { "epoch": 4.927335640138408, "grad_norm": 0.0, "learning_rate": 8.92992195417599e-06, "loss": 0.2094, "step": 2136 }, { "epoch": 4.929642445213379, "grad_norm": 0.0, "learning_rate": 8.921658953812416e-06, "loss": 0.1985, "step": 2137 }, { "epoch": 4.93194925028835, "grad_norm": 0.0, "learning_rate": 8.913396698304733e-06, "loss": 0.1639, "step": 2138 }, { "epoch": 4.9342560553633215, "grad_norm": 0.0, "learning_rate": 8.905135193360032e-06, "loss": 0.1222, "step": 2139 }, { "epoch": 4.936562860438293, "grad_norm": 0.0, "learning_rate": 8.896874444684882e-06, "loss": 0.1553, "step": 2140 }, { "epoch": 4.9388696655132645, "grad_norm": 0.0, "learning_rate": 8.888614457985343e-06, "loss": 0.2064, "step": 2141 }, { "epoch": 4.9411764705882355, "grad_norm": 0.0, "learning_rate": 8.880355238966923e-06, "loss": 0.2544, "step": 2142 }, { "epoch": 4.9434832756632066, "grad_norm": 0.0, "learning_rate": 8.872096793334624e-06, "loss": 0.1528, "step": 2143 }, { "epoch": 4.945790080738178, "grad_norm": 0.0, "learning_rate": 8.863839126792905e-06, "loss": 0.2023, "step": 2144 }, { "epoch": 4.948096885813149, "grad_norm": 0.0, "learning_rate": 8.855582245045682e-06, "loss": 0.12, "step": 2145 }, { "epoch": 4.95040369088812, "grad_norm": 0.0, "learning_rate": 8.847326153796335e-06, "loss": 0.1589, "step": 2146 }, { "epoch": 4.952710495963091, "grad_norm": 0.0, "learning_rate": 8.839070858747697e-06, "loss": 0.2069, "step": 2147 }, { "epoch": 4.955017301038062, "grad_norm": 0.0, "learning_rate": 8.830816365602053e-06, "loss": 0.1201, "step": 2148 }, { "epoch": 4.957324106113034, "grad_norm": 0.0, "learning_rate": 8.822562680061127e-06, "loss": 0.2688, "step": 2149 }, { "epoch": 4.959630911188005, "grad_norm": 0.0, "learning_rate": 8.814309807826092e-06, "loss": 0.188, "step": 2150 }, { "epoch": 4.961937716262976, "grad_norm": 0.0, "learning_rate": 8.806057754597559e-06, "loss": 0.178, "step": 2151 }, { "epoch": 4.964244521337947, "grad_norm": 0.0, "learning_rate": 8.797806526075566e-06, "loss": 0.2219, "step": 2152 }, { "epoch": 4.966551326412918, "grad_norm": 0.0, "learning_rate": 8.789556127959586e-06, "loss": 0.1495, "step": 2153 }, { "epoch": 4.968858131487889, "grad_norm": 0.0, "learning_rate": 8.781306565948528e-06, "loss": 0.1692, "step": 2154 }, { "epoch": 4.97116493656286, "grad_norm": 0.0, "learning_rate": 8.773057845740702e-06, "loss": 0.1442, "step": 2155 }, { "epoch": 4.973471741637832, "grad_norm": 0.0, "learning_rate": 8.76480997303386e-06, "loss": 0.1999, "step": 2156 }, { "epoch": 4.975778546712803, "grad_norm": 0.0, "learning_rate": 8.756562953525151e-06, "loss": 0.1109, "step": 2157 }, { "epoch": 4.978085351787774, "grad_norm": 0.0, "learning_rate": 8.74831679291114e-06, "loss": 0.1679, "step": 2158 }, { "epoch": 4.980392156862745, "grad_norm": 0.0, "learning_rate": 8.740071496887803e-06, "loss": 0.2638, "step": 2159 }, { "epoch": 4.982698961937716, "grad_norm": 0.0, "learning_rate": 8.731827071150519e-06, "loss": 0.1411, "step": 2160 }, { "epoch": 4.985005767012687, "grad_norm": 0.0, "learning_rate": 8.723583521394054e-06, "loss": 0.1695, "step": 2161 }, { "epoch": 4.987312572087658, "grad_norm": 0.0, "learning_rate": 8.715340853312586e-06, "loss": 0.1538, "step": 2162 }, { "epoch": 4.989619377162629, "grad_norm": 0.0, "learning_rate": 8.70709907259967e-06, "loss": 0.1721, "step": 2163 }, { "epoch": 4.9919261822376, "grad_norm": 0.0, "learning_rate": 8.698858184948254e-06, "loss": 0.1933, "step": 2164 }, { "epoch": 4.994232987312572, "grad_norm": 0.0, "learning_rate": 8.690618196050667e-06, "loss": 0.1941, "step": 2165 }, { "epoch": 4.996539792387543, "grad_norm": 0.0, "learning_rate": 8.682379111598626e-06, "loss": 0.2005, "step": 2166 }, { "epoch": 4.9988465974625145, "grad_norm": 0.0, "learning_rate": 8.674140937283208e-06, "loss": 0.2144, "step": 2167 }, { "epoch": 5.0011534025374855, "grad_norm": 0.0, "learning_rate": 8.665903678794873e-06, "loss": 0.1495, "step": 2168 }, { "epoch": 5.003460207612457, "grad_norm": 0.0, "learning_rate": 8.657667341823449e-06, "loss": 0.1516, "step": 2169 }, { "epoch": 5.005767012687428, "grad_norm": 0.0, "learning_rate": 8.649431932058111e-06, "loss": 0.1195, "step": 2170 }, { "epoch": 5.008073817762399, "grad_norm": 0.0, "learning_rate": 8.641197455187418e-06, "loss": 0.0936, "step": 2171 }, { "epoch": 5.010380622837371, "grad_norm": 0.0, "learning_rate": 8.632963916899268e-06, "loss": 0.1459, "step": 2172 }, { "epoch": 5.012687427912342, "grad_norm": 0.0, "learning_rate": 8.624731322880913e-06, "loss": 0.1414, "step": 2173 }, { "epoch": 5.014994232987313, "grad_norm": 0.0, "learning_rate": 8.616499678818958e-06, "loss": 0.1322, "step": 2174 }, { "epoch": 5.017301038062284, "grad_norm": 0.0, "learning_rate": 8.60826899039935e-06, "loss": 0.124, "step": 2175 }, { "epoch": 5.019607843137255, "grad_norm": 0.0, "learning_rate": 8.600039263307367e-06, "loss": 0.1516, "step": 2176 }, { "epoch": 5.021914648212226, "grad_norm": 0.0, "learning_rate": 8.591810503227634e-06, "loss": 0.1479, "step": 2177 }, { "epoch": 5.024221453287197, "grad_norm": 0.0, "learning_rate": 8.583582715844113e-06, "loss": 0.0904, "step": 2178 }, { "epoch": 5.026528258362168, "grad_norm": 0.0, "learning_rate": 8.575355906840073e-06, "loss": 0.1361, "step": 2179 }, { "epoch": 5.02883506343714, "grad_norm": 0.0, "learning_rate": 8.567130081898127e-06, "loss": 0.0867, "step": 2180 }, { "epoch": 5.031141868512111, "grad_norm": 0.0, "learning_rate": 8.558905246700202e-06, "loss": 0.0681, "step": 2181 }, { "epoch": 5.033448673587082, "grad_norm": 0.0, "learning_rate": 8.550681406927534e-06, "loss": 0.1284, "step": 2182 }, { "epoch": 5.035755478662053, "grad_norm": 0.0, "learning_rate": 8.542458568260682e-06, "loss": 0.1254, "step": 2183 }, { "epoch": 5.038062283737024, "grad_norm": 0.0, "learning_rate": 8.534236736379515e-06, "loss": 0.1262, "step": 2184 }, { "epoch": 5.040369088811995, "grad_norm": 0.0, "learning_rate": 8.52601591696319e-06, "loss": 0.1241, "step": 2185 }, { "epoch": 5.042675893886966, "grad_norm": 0.0, "learning_rate": 8.517796115690183e-06, "loss": 0.08, "step": 2186 }, { "epoch": 5.044982698961937, "grad_norm": 0.0, "learning_rate": 8.509577338238255e-06, "loss": 0.1439, "step": 2187 }, { "epoch": 5.047289504036909, "grad_norm": 0.0, "learning_rate": 8.501359590284472e-06, "loss": 0.1756, "step": 2188 }, { "epoch": 5.04959630911188, "grad_norm": 0.0, "learning_rate": 8.49314287750517e-06, "loss": 0.1842, "step": 2189 }, { "epoch": 5.051903114186851, "grad_norm": 0.0, "learning_rate": 8.484927205575985e-06, "loss": 0.1437, "step": 2190 }, { "epoch": 5.054209919261822, "grad_norm": 0.0, "learning_rate": 8.476712580171838e-06, "loss": 0.0606, "step": 2191 }, { "epoch": 5.0565167243367934, "grad_norm": 0.0, "learning_rate": 8.46849900696691e-06, "loss": 0.121, "step": 2192 }, { "epoch": 5.0588235294117645, "grad_norm": 0.0, "learning_rate": 8.460286491634664e-06, "loss": 0.1937, "step": 2193 }, { "epoch": 5.0611303344867355, "grad_norm": 0.0, "learning_rate": 8.45207503984784e-06, "loss": 0.0853, "step": 2194 }, { "epoch": 5.063437139561707, "grad_norm": 0.0, "learning_rate": 8.443864657278428e-06, "loss": 0.0855, "step": 2195 }, { "epoch": 5.0657439446366785, "grad_norm": 0.0, "learning_rate": 8.43565534959769e-06, "loss": 0.1329, "step": 2196 }, { "epoch": 5.06805074971165, "grad_norm": 0.0, "learning_rate": 8.427447122476148e-06, "loss": 0.1022, "step": 2197 }, { "epoch": 5.070357554786621, "grad_norm": 0.0, "learning_rate": 8.419239981583567e-06, "loss": 0.179, "step": 2198 }, { "epoch": 5.072664359861592, "grad_norm": 0.0, "learning_rate": 8.411033932588969e-06, "loss": 0.1191, "step": 2199 }, { "epoch": 5.074971164936563, "grad_norm": 0.0, "learning_rate": 8.40282898116062e-06, "loss": 0.0543, "step": 2200 }, { "epoch": 5.077277970011534, "grad_norm": 0.0, "learning_rate": 8.394625132966025e-06, "loss": 0.1193, "step": 2201 }, { "epoch": 5.079584775086505, "grad_norm": 0.0, "learning_rate": 8.386422393671934e-06, "loss": 0.1245, "step": 2202 }, { "epoch": 5.081891580161477, "grad_norm": 0.0, "learning_rate": 8.378220768944328e-06, "loss": 0.1485, "step": 2203 }, { "epoch": 5.084198385236448, "grad_norm": 0.0, "learning_rate": 8.370020264448413e-06, "loss": 0.0965, "step": 2204 }, { "epoch": 5.086505190311419, "grad_norm": 0.0, "learning_rate": 8.361820885848623e-06, "loss": 0.0803, "step": 2205 }, { "epoch": 5.08881199538639, "grad_norm": 0.0, "learning_rate": 8.353622638808628e-06, "loss": 0.0971, "step": 2206 }, { "epoch": 5.091118800461361, "grad_norm": 0.0, "learning_rate": 8.34542552899129e-06, "loss": 0.1029, "step": 2207 }, { "epoch": 5.093425605536332, "grad_norm": 0.0, "learning_rate": 8.337229562058707e-06, "loss": 0.1029, "step": 2208 }, { "epoch": 5.095732410611303, "grad_norm": 0.0, "learning_rate": 8.329034743672187e-06, "loss": 0.1178, "step": 2209 }, { "epoch": 5.098039215686274, "grad_norm": 0.0, "learning_rate": 8.32084107949223e-06, "loss": 0.1704, "step": 2210 }, { "epoch": 5.100346020761246, "grad_norm": 0.0, "learning_rate": 8.312648575178552e-06, "loss": 0.1339, "step": 2211 }, { "epoch": 5.102652825836217, "grad_norm": 0.0, "learning_rate": 8.304457236390062e-06, "loss": 0.1364, "step": 2212 }, { "epoch": 5.104959630911188, "grad_norm": 0.0, "learning_rate": 8.296267068784862e-06, "loss": 0.1252, "step": 2213 }, { "epoch": 5.107266435986159, "grad_norm": 0.0, "learning_rate": 8.28807807802025e-06, "loss": 0.099, "step": 2214 }, { "epoch": 5.10957324106113, "grad_norm": 0.0, "learning_rate": 8.279890269752715e-06, "loss": 0.1126, "step": 2215 }, { "epoch": 5.111880046136101, "grad_norm": 0.0, "learning_rate": 8.271703649637911e-06, "loss": 0.1939, "step": 2216 }, { "epoch": 5.114186851211072, "grad_norm": 0.0, "learning_rate": 8.263518223330698e-06, "loss": 0.0793, "step": 2217 }, { "epoch": 5.1164936562860435, "grad_norm": 0.0, "learning_rate": 8.25533399648509e-06, "loss": 0.1413, "step": 2218 }, { "epoch": 5.118800461361015, "grad_norm": 0.0, "learning_rate": 8.247150974754275e-06, "loss": 0.1041, "step": 2219 }, { "epoch": 5.1211072664359865, "grad_norm": 0.0, "learning_rate": 8.238969163790617e-06, "loss": 0.102, "step": 2220 }, { "epoch": 5.1234140715109575, "grad_norm": 0.0, "learning_rate": 8.230788569245648e-06, "loss": 0.1206, "step": 2221 }, { "epoch": 5.125720876585929, "grad_norm": 0.0, "learning_rate": 8.222609196770037e-06, "loss": 0.0987, "step": 2222 }, { "epoch": 5.1280276816609, "grad_norm": 0.0, "learning_rate": 8.214431052013636e-06, "loss": 0.0742, "step": 2223 }, { "epoch": 5.130334486735871, "grad_norm": 0.0, "learning_rate": 8.206254140625425e-06, "loss": 0.1438, "step": 2224 }, { "epoch": 5.132641291810842, "grad_norm": 0.0, "learning_rate": 8.198078468253556e-06, "loss": 0.1136, "step": 2225 }, { "epoch": 5.134948096885813, "grad_norm": 0.0, "learning_rate": 8.189904040545302e-06, "loss": 0.1128, "step": 2226 }, { "epoch": 5.137254901960785, "grad_norm": 0.0, "learning_rate": 8.181730863147094e-06, "loss": 0.1252, "step": 2227 }, { "epoch": 5.139561707035756, "grad_norm": 0.0, "learning_rate": 8.173558941704487e-06, "loss": 0.1318, "step": 2228 }, { "epoch": 5.141868512110727, "grad_norm": 0.0, "learning_rate": 8.165388281862177e-06, "loss": 0.1683, "step": 2229 }, { "epoch": 5.144175317185698, "grad_norm": 0.0, "learning_rate": 8.157218889263984e-06, "loss": 0.0917, "step": 2230 }, { "epoch": 5.146482122260669, "grad_norm": 0.0, "learning_rate": 8.149050769552856e-06, "loss": 0.1561, "step": 2231 }, { "epoch": 5.14878892733564, "grad_norm": 0.0, "learning_rate": 8.140883928370855e-06, "loss": 0.1362, "step": 2232 }, { "epoch": 5.151095732410611, "grad_norm": 0.0, "learning_rate": 8.132718371359168e-06, "loss": 0.1303, "step": 2233 }, { "epoch": 5.153402537485582, "grad_norm": 0.0, "learning_rate": 8.124554104158094e-06, "loss": 0.096, "step": 2234 }, { "epoch": 5.155709342560554, "grad_norm": 0.0, "learning_rate": 8.116391132407033e-06, "loss": 0.1079, "step": 2235 }, { "epoch": 5.158016147635525, "grad_norm": 0.0, "learning_rate": 8.108229461744496e-06, "loss": 0.0833, "step": 2236 }, { "epoch": 5.160322952710496, "grad_norm": 0.0, "learning_rate": 8.100069097808103e-06, "loss": 0.1309, "step": 2237 }, { "epoch": 5.162629757785467, "grad_norm": 0.0, "learning_rate": 8.091910046234552e-06, "loss": 0.1182, "step": 2238 }, { "epoch": 5.164936562860438, "grad_norm": 0.0, "learning_rate": 8.083752312659653e-06, "loss": 0.1312, "step": 2239 }, { "epoch": 5.167243367935409, "grad_norm": 0.0, "learning_rate": 8.075595902718302e-06, "loss": 0.1302, "step": 2240 }, { "epoch": 5.16955017301038, "grad_norm": 0.0, "learning_rate": 8.06744082204447e-06, "loss": 0.17, "step": 2241 }, { "epoch": 5.171856978085351, "grad_norm": 0.0, "learning_rate": 8.059287076271216e-06, "loss": 0.1281, "step": 2242 }, { "epoch": 5.174163783160323, "grad_norm": 0.0, "learning_rate": 8.051134671030686e-06, "loss": 0.0902, "step": 2243 }, { "epoch": 5.176470588235294, "grad_norm": 0.0, "learning_rate": 8.042983611954087e-06, "loss": 0.1183, "step": 2244 }, { "epoch": 5.178777393310265, "grad_norm": 0.0, "learning_rate": 8.034833904671698e-06, "loss": 0.079, "step": 2245 }, { "epoch": 5.1810841983852365, "grad_norm": 0.0, "learning_rate": 8.026685554812877e-06, "loss": 0.1056, "step": 2246 }, { "epoch": 5.1833910034602075, "grad_norm": 0.0, "learning_rate": 8.018538568006027e-06, "loss": 0.1229, "step": 2247 }, { "epoch": 5.185697808535179, "grad_norm": 0.0, "learning_rate": 8.010392949878616e-06, "loss": 0.0956, "step": 2248 }, { "epoch": 5.18800461361015, "grad_norm": 0.0, "learning_rate": 8.002248706057177e-06, "loss": 0.101, "step": 2249 }, { "epoch": 5.190311418685121, "grad_norm": 0.0, "learning_rate": 7.994105842167274e-06, "loss": 0.1429, "step": 2250 }, { "epoch": 5.192618223760093, "grad_norm": 0.0, "learning_rate": 7.985964363833532e-06, "loss": 0.2251, "step": 2251 }, { "epoch": 5.194925028835064, "grad_norm": 0.0, "learning_rate": 7.977824276679623e-06, "loss": 0.1238, "step": 2252 }, { "epoch": 5.197231833910035, "grad_norm": 0.0, "learning_rate": 7.96968558632824e-06, "loss": 0.1255, "step": 2253 }, { "epoch": 5.199538638985006, "grad_norm": 0.0, "learning_rate": 7.961548298401125e-06, "loss": 0.0941, "step": 2254 }, { "epoch": 5.201845444059977, "grad_norm": 0.0, "learning_rate": 7.953412418519052e-06, "loss": 0.0794, "step": 2255 }, { "epoch": 5.204152249134948, "grad_norm": 0.0, "learning_rate": 7.945277952301811e-06, "loss": 0.1098, "step": 2256 }, { "epoch": 5.206459054209919, "grad_norm": 0.0, "learning_rate": 7.937144905368226e-06, "loss": 0.1163, "step": 2257 }, { "epoch": 5.20876585928489, "grad_norm": 0.0, "learning_rate": 7.929013283336141e-06, "loss": 0.119, "step": 2258 }, { "epoch": 5.211072664359862, "grad_norm": 0.0, "learning_rate": 7.92088309182241e-06, "loss": 0.094, "step": 2259 }, { "epoch": 5.213379469434833, "grad_norm": 0.0, "learning_rate": 7.912754336442897e-06, "loss": 0.1079, "step": 2260 }, { "epoch": 5.215686274509804, "grad_norm": 0.0, "learning_rate": 7.904627022812484e-06, "loss": 0.1326, "step": 2261 }, { "epoch": 5.217993079584775, "grad_norm": 0.0, "learning_rate": 7.896501156545044e-06, "loss": 0.151, "step": 2262 }, { "epoch": 5.220299884659746, "grad_norm": 0.0, "learning_rate": 7.888376743253462e-06, "loss": 0.1152, "step": 2263 }, { "epoch": 5.222606689734717, "grad_norm": 0.0, "learning_rate": 7.88025378854962e-06, "loss": 0.1926, "step": 2264 }, { "epoch": 5.224913494809688, "grad_norm": 0.0, "learning_rate": 7.872132298044382e-06, "loss": 0.1437, "step": 2265 }, { "epoch": 5.22722029988466, "grad_norm": 0.0, "learning_rate": 7.864012277347602e-06, "loss": 0.0927, "step": 2266 }, { "epoch": 5.229527104959631, "grad_norm": 0.0, "learning_rate": 7.855893732068124e-06, "loss": 0.0738, "step": 2267 }, { "epoch": 5.231833910034602, "grad_norm": 0.0, "learning_rate": 7.847776667813782e-06, "loss": 0.055, "step": 2268 }, { "epoch": 5.234140715109573, "grad_norm": 0.0, "learning_rate": 7.839661090191362e-06, "loss": 0.0677, "step": 2269 }, { "epoch": 5.236447520184544, "grad_norm": 0.0, "learning_rate": 7.831547004806647e-06, "loss": 0.1054, "step": 2270 }, { "epoch": 5.2387543252595155, "grad_norm": 0.0, "learning_rate": 7.823434417264378e-06, "loss": 0.1636, "step": 2271 }, { "epoch": 5.2410611303344865, "grad_norm": 0.0, "learning_rate": 7.815323333168262e-06, "loss": 0.099, "step": 2272 }, { "epoch": 5.243367935409458, "grad_norm": 0.0, "learning_rate": 7.807213758120965e-06, "loss": 0.0882, "step": 2273 }, { "epoch": 5.245674740484429, "grad_norm": 0.0, "learning_rate": 7.799105697724127e-06, "loss": 0.1198, "step": 2274 }, { "epoch": 5.2479815455594006, "grad_norm": 0.0, "learning_rate": 7.790999157578314e-06, "loss": 0.0729, "step": 2275 }, { "epoch": 5.250288350634372, "grad_norm": 0.0, "learning_rate": 7.782894143283065e-06, "loss": 0.1131, "step": 2276 }, { "epoch": 5.252595155709343, "grad_norm": 0.0, "learning_rate": 7.774790660436857e-06, "loss": 0.0498, "step": 2277 }, { "epoch": 5.254901960784314, "grad_norm": 0.0, "learning_rate": 7.766688714637109e-06, "loss": 0.1083, "step": 2278 }, { "epoch": 5.257208765859285, "grad_norm": 0.0, "learning_rate": 7.758588311480174e-06, "loss": 0.0884, "step": 2279 }, { "epoch": 5.259515570934256, "grad_norm": 0.0, "learning_rate": 7.750489456561351e-06, "loss": 0.1061, "step": 2280 }, { "epoch": 5.261822376009227, "grad_norm": 0.0, "learning_rate": 7.742392155474858e-06, "loss": 0.1311, "step": 2281 }, { "epoch": 5.264129181084199, "grad_norm": 0.0, "learning_rate": 7.734296413813847e-06, "loss": 0.0854, "step": 2282 }, { "epoch": 5.26643598615917, "grad_norm": 0.0, "learning_rate": 7.726202237170387e-06, "loss": 0.1119, "step": 2283 }, { "epoch": 5.268742791234141, "grad_norm": 0.0, "learning_rate": 7.718109631135472e-06, "loss": 0.156, "step": 2284 }, { "epoch": 5.271049596309112, "grad_norm": 0.0, "learning_rate": 7.710018601299004e-06, "loss": 0.0985, "step": 2285 }, { "epoch": 5.273356401384083, "grad_norm": 0.0, "learning_rate": 7.701929153249808e-06, "loss": 0.1588, "step": 2286 }, { "epoch": 5.275663206459054, "grad_norm": 0.0, "learning_rate": 7.6938412925756e-06, "loss": 0.1022, "step": 2287 }, { "epoch": 5.277970011534025, "grad_norm": 0.0, "learning_rate": 7.685755024863013e-06, "loss": 0.1446, "step": 2288 }, { "epoch": 5.280276816608996, "grad_norm": 0.0, "learning_rate": 7.677670355697577e-06, "loss": 0.1797, "step": 2289 }, { "epoch": 5.282583621683968, "grad_norm": 0.0, "learning_rate": 7.669587290663711e-06, "loss": 0.1068, "step": 2290 }, { "epoch": 5.284890426758939, "grad_norm": 0.0, "learning_rate": 7.661505835344733e-06, "loss": 0.1554, "step": 2291 }, { "epoch": 5.28719723183391, "grad_norm": 0.0, "learning_rate": 7.653425995322852e-06, "loss": 0.151, "step": 2292 }, { "epoch": 5.289504036908881, "grad_norm": 0.0, "learning_rate": 7.645347776179144e-06, "loss": 0.1269, "step": 2293 }, { "epoch": 5.291810841983852, "grad_norm": 0.0, "learning_rate": 7.637271183493587e-06, "loss": 0.1839, "step": 2294 }, { "epoch": 5.294117647058823, "grad_norm": 0.0, "learning_rate": 7.629196222845027e-06, "loss": 0.1341, "step": 2295 }, { "epoch": 5.296424452133794, "grad_norm": 0.0, "learning_rate": 7.621122899811177e-06, "loss": 0.0932, "step": 2296 }, { "epoch": 5.2987312572087655, "grad_norm": 0.0, "learning_rate": 7.613051219968624e-06, "loss": 0.1643, "step": 2297 }, { "epoch": 5.301038062283737, "grad_norm": 0.0, "learning_rate": 7.6049811888928235e-06, "loss": 0.1563, "step": 2298 }, { "epoch": 5.3033448673587085, "grad_norm": 0.0, "learning_rate": 7.596912812158083e-06, "loss": 0.162, "step": 2299 }, { "epoch": 5.3056516724336795, "grad_norm": 0.0, "learning_rate": 7.588846095337574e-06, "loss": 0.1715, "step": 2300 }, { "epoch": 5.307958477508651, "grad_norm": 0.0, "learning_rate": 7.580781044003324e-06, "loss": 0.143, "step": 2301 }, { "epoch": 5.310265282583622, "grad_norm": 0.0, "learning_rate": 7.5727176637262034e-06, "loss": 0.1004, "step": 2302 }, { "epoch": 5.312572087658593, "grad_norm": 0.0, "learning_rate": 7.564655960075927e-06, "loss": 0.1638, "step": 2303 }, { "epoch": 5.314878892733564, "grad_norm": 0.0, "learning_rate": 7.556595938621058e-06, "loss": 0.1063, "step": 2304 }, { "epoch": 5.317185697808535, "grad_norm": 0.0, "learning_rate": 7.5485376049290014e-06, "loss": 0.0884, "step": 2305 }, { "epoch": 5.319492502883507, "grad_norm": 0.0, "learning_rate": 7.540480964565981e-06, "loss": 0.1348, "step": 2306 }, { "epoch": 5.321799307958478, "grad_norm": 0.0, "learning_rate": 7.532426023097063e-06, "loss": 0.0931, "step": 2307 }, { "epoch": 5.324106113033449, "grad_norm": 0.0, "learning_rate": 7.524372786086143e-06, "loss": 0.1618, "step": 2308 }, { "epoch": 5.32641291810842, "grad_norm": 0.0, "learning_rate": 7.516321259095921e-06, "loss": 0.1083, "step": 2309 }, { "epoch": 5.328719723183391, "grad_norm": 0.0, "learning_rate": 7.508271447687936e-06, "loss": 0.1479, "step": 2310 }, { "epoch": 5.331026528258362, "grad_norm": 0.0, "learning_rate": 7.500223357422537e-06, "loss": 0.1094, "step": 2311 }, { "epoch": 5.333333333333333, "grad_norm": 0.0, "learning_rate": 7.492176993858873e-06, "loss": 0.1584, "step": 2312 }, { "epoch": 5.335640138408304, "grad_norm": 0.0, "learning_rate": 7.484132362554915e-06, "loss": 0.1474, "step": 2313 }, { "epoch": 5.337946943483276, "grad_norm": 0.0, "learning_rate": 7.476089469067432e-06, "loss": 0.1665, "step": 2314 }, { "epoch": 5.340253748558247, "grad_norm": 0.0, "learning_rate": 7.468048318951983e-06, "loss": 0.1302, "step": 2315 }, { "epoch": 5.342560553633218, "grad_norm": 0.0, "learning_rate": 7.4600089177629384e-06, "loss": 0.1754, "step": 2316 }, { "epoch": 5.344867358708189, "grad_norm": 0.0, "learning_rate": 7.451971271053455e-06, "loss": 0.1228, "step": 2317 }, { "epoch": 5.34717416378316, "grad_norm": 0.0, "learning_rate": 7.4439353843754715e-06, "loss": 0.1626, "step": 2318 }, { "epoch": 5.349480968858131, "grad_norm": 0.0, "learning_rate": 7.435901263279717e-06, "loss": 0.0944, "step": 2319 }, { "epoch": 5.351787773933102, "grad_norm": 0.0, "learning_rate": 7.4278689133157034e-06, "loss": 0.1013, "step": 2320 }, { "epoch": 5.354094579008073, "grad_norm": 0.0, "learning_rate": 7.419838340031709e-06, "loss": 0.1058, "step": 2321 }, { "epoch": 5.356401384083045, "grad_norm": 0.0, "learning_rate": 7.411809548974792e-06, "loss": 0.1163, "step": 2322 }, { "epoch": 5.358708189158016, "grad_norm": 0.0, "learning_rate": 7.403782545690787e-06, "loss": 0.1901, "step": 2323 }, { "epoch": 5.361014994232987, "grad_norm": 0.0, "learning_rate": 7.395757335724276e-06, "loss": 0.1381, "step": 2324 }, { "epoch": 5.3633217993079585, "grad_norm": 0.0, "learning_rate": 7.387733924618617e-06, "loss": 0.1357, "step": 2325 }, { "epoch": 5.3656286043829295, "grad_norm": 0.0, "learning_rate": 7.3797123179159225e-06, "loss": 0.0372, "step": 2326 }, { "epoch": 5.367935409457901, "grad_norm": 0.0, "learning_rate": 7.371692521157048e-06, "loss": 0.06, "step": 2327 }, { "epoch": 5.370242214532872, "grad_norm": 0.0, "learning_rate": 7.3636745398816135e-06, "loss": 0.1012, "step": 2328 }, { "epoch": 5.372549019607844, "grad_norm": 0.0, "learning_rate": 7.355658379627981e-06, "loss": 0.1568, "step": 2329 }, { "epoch": 5.374855824682815, "grad_norm": 0.0, "learning_rate": 7.347644045933244e-06, "loss": 0.1249, "step": 2330 }, { "epoch": 5.377162629757786, "grad_norm": 0.0, "learning_rate": 7.33963154433325e-06, "loss": 0.1521, "step": 2331 }, { "epoch": 5.379469434832757, "grad_norm": 0.0, "learning_rate": 7.331620880362571e-06, "loss": 0.0785, "step": 2332 }, { "epoch": 5.381776239907728, "grad_norm": 0.0, "learning_rate": 7.323612059554514e-06, "loss": 0.1795, "step": 2333 }, { "epoch": 5.384083044982699, "grad_norm": 0.0, "learning_rate": 7.315605087441107e-06, "loss": 0.1333, "step": 2334 }, { "epoch": 5.38638985005767, "grad_norm": 0.0, "learning_rate": 7.307599969553111e-06, "loss": 0.1032, "step": 2335 }, { "epoch": 5.388696655132641, "grad_norm": 0.0, "learning_rate": 7.299596711419994e-06, "loss": 0.1417, "step": 2336 }, { "epoch": 5.391003460207612, "grad_norm": 0.0, "learning_rate": 7.291595318569951e-06, "loss": 0.173, "step": 2337 }, { "epoch": 5.393310265282584, "grad_norm": 0.0, "learning_rate": 7.2835957965298805e-06, "loss": 0.0894, "step": 2338 }, { "epoch": 5.395617070357555, "grad_norm": 0.0, "learning_rate": 7.2755981508253935e-06, "loss": 0.1691, "step": 2339 }, { "epoch": 5.397923875432526, "grad_norm": 0.0, "learning_rate": 7.267602386980801e-06, "loss": 0.1301, "step": 2340 }, { "epoch": 5.400230680507497, "grad_norm": 0.0, "learning_rate": 7.259608510519121e-06, "loss": 0.0999, "step": 2341 }, { "epoch": 5.402537485582468, "grad_norm": 0.0, "learning_rate": 7.2516165269620534e-06, "loss": 0.1334, "step": 2342 }, { "epoch": 5.404844290657439, "grad_norm": 0.0, "learning_rate": 7.243626441830009e-06, "loss": 0.2163, "step": 2343 }, { "epoch": 5.40715109573241, "grad_norm": 0.0, "learning_rate": 7.235638260642075e-06, "loss": 0.129, "step": 2344 }, { "epoch": 5.409457900807382, "grad_norm": 0.0, "learning_rate": 7.227651988916032e-06, "loss": 0.149, "step": 2345 }, { "epoch": 5.411764705882353, "grad_norm": 0.0, "learning_rate": 7.219667632168326e-06, "loss": 0.1241, "step": 2346 }, { "epoch": 5.414071510957324, "grad_norm": 0.0, "learning_rate": 7.2116851959140965e-06, "loss": 0.1807, "step": 2347 }, { "epoch": 5.416378316032295, "grad_norm": 0.0, "learning_rate": 7.203704685667156e-06, "loss": 0.0846, "step": 2348 }, { "epoch": 5.418685121107266, "grad_norm": 0.0, "learning_rate": 7.1957261069399745e-06, "loss": 0.1518, "step": 2349 }, { "epoch": 5.4209919261822375, "grad_norm": 0.0, "learning_rate": 7.187749465243694e-06, "loss": 0.1478, "step": 2350 }, { "epoch": 5.4232987312572085, "grad_norm": 0.0, "learning_rate": 7.179774766088127e-06, "loss": 0.1471, "step": 2351 }, { "epoch": 5.42560553633218, "grad_norm": 0.0, "learning_rate": 7.171802014981726e-06, "loss": 0.1405, "step": 2352 }, { "epoch": 5.4279123414071515, "grad_norm": 0.0, "learning_rate": 7.163831217431615e-06, "loss": 0.1103, "step": 2353 }, { "epoch": 5.430219146482123, "grad_norm": 0.0, "learning_rate": 7.1558623789435634e-06, "loss": 0.0927, "step": 2354 }, { "epoch": 5.432525951557094, "grad_norm": 0.0, "learning_rate": 7.14789550502198e-06, "loss": 0.1815, "step": 2355 }, { "epoch": 5.434832756632065, "grad_norm": 0.0, "learning_rate": 7.139930601169926e-06, "loss": 0.1077, "step": 2356 }, { "epoch": 5.437139561707036, "grad_norm": 0.0, "learning_rate": 7.131967672889101e-06, "loss": 0.144, "step": 2357 }, { "epoch": 5.439446366782007, "grad_norm": 0.0, "learning_rate": 7.124006725679828e-06, "loss": 0.1472, "step": 2358 }, { "epoch": 5.441753171856978, "grad_norm": 0.0, "learning_rate": 7.116047765041078e-06, "loss": 0.1114, "step": 2359 }, { "epoch": 5.444059976931949, "grad_norm": 0.0, "learning_rate": 7.108090796470446e-06, "loss": 0.1308, "step": 2360 }, { "epoch": 5.446366782006921, "grad_norm": 0.0, "learning_rate": 7.100135825464138e-06, "loss": 0.1317, "step": 2361 }, { "epoch": 5.448673587081892, "grad_norm": 0.0, "learning_rate": 7.092182857516998e-06, "loss": 0.1821, "step": 2362 }, { "epoch": 5.450980392156863, "grad_norm": 0.0, "learning_rate": 7.084231898122478e-06, "loss": 0.1191, "step": 2363 }, { "epoch": 5.453287197231834, "grad_norm": 0.0, "learning_rate": 7.076282952772634e-06, "loss": 0.1682, "step": 2364 }, { "epoch": 5.455594002306805, "grad_norm": 0.0, "learning_rate": 7.0683360269581465e-06, "loss": 0.1238, "step": 2365 }, { "epoch": 5.457900807381776, "grad_norm": 0.0, "learning_rate": 7.060391126168297e-06, "loss": 0.1263, "step": 2366 }, { "epoch": 5.460207612456747, "grad_norm": 0.0, "learning_rate": 7.052448255890958e-06, "loss": 0.12, "step": 2367 }, { "epoch": 5.462514417531718, "grad_norm": 0.0, "learning_rate": 7.044507421612613e-06, "loss": 0.094, "step": 2368 }, { "epoch": 5.46482122260669, "grad_norm": 0.0, "learning_rate": 7.036568628818332e-06, "loss": 0.1063, "step": 2369 }, { "epoch": 5.467128027681661, "grad_norm": 0.0, "learning_rate": 7.028631882991771e-06, "loss": 0.158, "step": 2370 }, { "epoch": 5.469434832756632, "grad_norm": 0.0, "learning_rate": 7.02069718961518e-06, "loss": 0.1242, "step": 2371 }, { "epoch": 5.471741637831603, "grad_norm": 0.0, "learning_rate": 7.012764554169393e-06, "loss": 0.1032, "step": 2372 }, { "epoch": 5.474048442906574, "grad_norm": 0.0, "learning_rate": 7.004833982133808e-06, "loss": 0.1063, "step": 2373 }, { "epoch": 5.476355247981545, "grad_norm": 0.0, "learning_rate": 6.996905478986415e-06, "loss": 0.145, "step": 2374 }, { "epoch": 5.478662053056516, "grad_norm": 0.0, "learning_rate": 6.988979050203769e-06, "loss": 0.0888, "step": 2375 }, { "epoch": 5.4809688581314875, "grad_norm": 0.0, "learning_rate": 6.981054701260981e-06, "loss": 0.1449, "step": 2376 }, { "epoch": 5.483275663206459, "grad_norm": 0.0, "learning_rate": 6.973132437631743e-06, "loss": 0.193, "step": 2377 }, { "epoch": 5.4855824682814305, "grad_norm": 0.0, "learning_rate": 6.9652122647882966e-06, "loss": 0.2291, "step": 2378 }, { "epoch": 5.4878892733564015, "grad_norm": 0.0, "learning_rate": 6.957294188201438e-06, "loss": 0.1084, "step": 2379 }, { "epoch": 5.490196078431373, "grad_norm": 0.0, "learning_rate": 6.949378213340522e-06, "loss": 0.1257, "step": 2380 }, { "epoch": 5.492502883506344, "grad_norm": 0.0, "learning_rate": 6.94146434567345e-06, "loss": 0.1278, "step": 2381 }, { "epoch": 5.494809688581315, "grad_norm": 0.0, "learning_rate": 6.933552590666659e-06, "loss": 0.0772, "step": 2382 }, { "epoch": 5.497116493656286, "grad_norm": 0.0, "learning_rate": 6.9256429537851365e-06, "loss": 0.1118, "step": 2383 }, { "epoch": 5.499423298731257, "grad_norm": 0.0, "learning_rate": 6.917735440492407e-06, "loss": 0.1075, "step": 2384 }, { "epoch": 5.501730103806229, "grad_norm": 0.0, "learning_rate": 6.909830056250527e-06, "loss": 0.143, "step": 2385 }, { "epoch": 5.5040369088812, "grad_norm": 0.0, "learning_rate": 6.9019268065200765e-06, "loss": 0.0998, "step": 2386 }, { "epoch": 5.506343713956171, "grad_norm": 0.0, "learning_rate": 6.8940256967601625e-06, "loss": 0.1115, "step": 2387 }, { "epoch": 5.508650519031142, "grad_norm": 0.0, "learning_rate": 6.886126732428424e-06, "loss": 0.1694, "step": 2388 }, { "epoch": 5.510957324106113, "grad_norm": 0.0, "learning_rate": 6.878229918981003e-06, "loss": 0.0988, "step": 2389 }, { "epoch": 5.513264129181084, "grad_norm": 0.0, "learning_rate": 6.870335261872569e-06, "loss": 0.2149, "step": 2390 }, { "epoch": 5.515570934256055, "grad_norm": 0.0, "learning_rate": 6.862442766556297e-06, "loss": 0.1279, "step": 2391 }, { "epoch": 5.517877739331027, "grad_norm": 0.0, "learning_rate": 6.854552438483866e-06, "loss": 0.1479, "step": 2392 }, { "epoch": 5.520184544405998, "grad_norm": 0.0, "learning_rate": 6.846664283105455e-06, "loss": 0.0917, "step": 2393 }, { "epoch": 5.522491349480969, "grad_norm": 0.0, "learning_rate": 6.83877830586976e-06, "loss": 0.117, "step": 2394 }, { "epoch": 5.52479815455594, "grad_norm": 0.0, "learning_rate": 6.830894512223947e-06, "loss": 0.1405, "step": 2395 }, { "epoch": 5.527104959630911, "grad_norm": 0.0, "learning_rate": 6.823012907613691e-06, "loss": 0.1338, "step": 2396 }, { "epoch": 5.529411764705882, "grad_norm": 0.0, "learning_rate": 6.815133497483157e-06, "loss": 0.0751, "step": 2397 }, { "epoch": 5.531718569780853, "grad_norm": 0.0, "learning_rate": 6.807256287274981e-06, "loss": 0.1442, "step": 2398 }, { "epoch": 5.534025374855824, "grad_norm": 0.0, "learning_rate": 6.799381282430284e-06, "loss": 0.0848, "step": 2399 }, { "epoch": 5.536332179930795, "grad_norm": 0.0, "learning_rate": 6.791508488388675e-06, "loss": 0.1538, "step": 2400 }, { "epoch": 5.538638985005767, "grad_norm": 0.0, "learning_rate": 6.783637910588216e-06, "loss": 0.1288, "step": 2401 }, { "epoch": 5.540945790080738, "grad_norm": 0.0, "learning_rate": 6.775769554465455e-06, "loss": 0.0907, "step": 2402 }, { "epoch": 5.5432525951557095, "grad_norm": 0.0, "learning_rate": 6.767903425455402e-06, "loss": 0.1576, "step": 2403 }, { "epoch": 5.5455594002306805, "grad_norm": 0.0, "learning_rate": 6.76003952899152e-06, "loss": 0.142, "step": 2404 }, { "epoch": 5.5478662053056516, "grad_norm": 0.0, "learning_rate": 6.752177870505736e-06, "loss": 0.1113, "step": 2405 }, { "epoch": 5.550173010380623, "grad_norm": 0.0, "learning_rate": 6.744318455428436e-06, "loss": 0.0818, "step": 2406 }, { "epoch": 5.552479815455594, "grad_norm": 0.0, "learning_rate": 6.736461289188445e-06, "loss": 0.1264, "step": 2407 }, { "epoch": 5.554786620530566, "grad_norm": 0.0, "learning_rate": 6.728606377213045e-06, "loss": 0.0775, "step": 2408 }, { "epoch": 5.557093425605537, "grad_norm": 0.0, "learning_rate": 6.720753724927957e-06, "loss": 0.1695, "step": 2409 }, { "epoch": 5.559400230680508, "grad_norm": 0.0, "learning_rate": 6.712903337757339e-06, "loss": 0.0953, "step": 2410 }, { "epoch": 5.561707035755479, "grad_norm": 0.0, "learning_rate": 6.705055221123788e-06, "loss": 0.1091, "step": 2411 }, { "epoch": 5.56401384083045, "grad_norm": 0.0, "learning_rate": 6.697209380448333e-06, "loss": 0.1006, "step": 2412 }, { "epoch": 5.566320645905421, "grad_norm": 0.0, "learning_rate": 6.689365821150421e-06, "loss": 0.1257, "step": 2413 }, { "epoch": 5.568627450980392, "grad_norm": 0.0, "learning_rate": 6.681524548647936e-06, "loss": 0.1336, "step": 2414 }, { "epoch": 5.570934256055363, "grad_norm": 0.0, "learning_rate": 6.673685568357182e-06, "loss": 0.111, "step": 2415 }, { "epoch": 5.573241061130334, "grad_norm": 0.0, "learning_rate": 6.665848885692867e-06, "loss": 0.1131, "step": 2416 }, { "epoch": 5.575547866205306, "grad_norm": 0.0, "learning_rate": 6.6580145060681255e-06, "loss": 0.152, "step": 2417 }, { "epoch": 5.577854671280277, "grad_norm": 0.0, "learning_rate": 6.650182434894496e-06, "loss": 0.1047, "step": 2418 }, { "epoch": 5.580161476355248, "grad_norm": 0.0, "learning_rate": 6.642352677581917e-06, "loss": 0.1094, "step": 2419 }, { "epoch": 5.582468281430219, "grad_norm": 0.0, "learning_rate": 6.634525239538736e-06, "loss": 0.1242, "step": 2420 }, { "epoch": 5.58477508650519, "grad_norm": 0.0, "learning_rate": 6.6267001261717015e-06, "loss": 0.1919, "step": 2421 }, { "epoch": 5.587081891580161, "grad_norm": 0.0, "learning_rate": 6.618877342885945e-06, "loss": 0.1693, "step": 2422 }, { "epoch": 5.589388696655132, "grad_norm": 0.0, "learning_rate": 6.611056895084997e-06, "loss": 0.2294, "step": 2423 }, { "epoch": 5.591695501730104, "grad_norm": 0.0, "learning_rate": 6.603238788170771e-06, "loss": 0.1217, "step": 2424 }, { "epoch": 5.594002306805075, "grad_norm": 0.0, "learning_rate": 6.595423027543572e-06, "loss": 0.1884, "step": 2425 }, { "epoch": 5.596309111880046, "grad_norm": 0.0, "learning_rate": 6.587609618602065e-06, "loss": 0.1357, "step": 2426 }, { "epoch": 5.598615916955017, "grad_norm": 0.0, "learning_rate": 6.579798566743314e-06, "loss": 0.1135, "step": 2427 }, { "epoch": 5.600922722029988, "grad_norm": 0.0, "learning_rate": 6.571989877362738e-06, "loss": 0.1454, "step": 2428 }, { "epoch": 5.6032295271049595, "grad_norm": 0.0, "learning_rate": 6.5641835558541314e-06, "loss": 0.0846, "step": 2429 }, { "epoch": 5.6055363321799305, "grad_norm": 0.0, "learning_rate": 6.5563796076096484e-06, "loss": 0.1248, "step": 2430 }, { "epoch": 5.607843137254902, "grad_norm": 0.0, "learning_rate": 6.548578038019815e-06, "loss": 0.1146, "step": 2431 }, { "epoch": 5.610149942329873, "grad_norm": 0.0, "learning_rate": 6.540778852473497e-06, "loss": 0.1195, "step": 2432 }, { "epoch": 5.612456747404845, "grad_norm": 0.0, "learning_rate": 6.532982056357928e-06, "loss": 0.1321, "step": 2433 }, { "epoch": 5.614763552479816, "grad_norm": 0.0, "learning_rate": 6.525187655058687e-06, "loss": 0.1762, "step": 2434 }, { "epoch": 5.617070357554787, "grad_norm": 0.0, "learning_rate": 6.517395653959694e-06, "loss": 0.1277, "step": 2435 }, { "epoch": 5.619377162629758, "grad_norm": 0.0, "learning_rate": 6.5096060584432134e-06, "loss": 0.0624, "step": 2436 }, { "epoch": 5.621683967704729, "grad_norm": 0.0, "learning_rate": 6.501818873889856e-06, "loss": 0.092, "step": 2437 }, { "epoch": 5.6239907727797, "grad_norm": 0.0, "learning_rate": 6.494034105678551e-06, "loss": 0.1774, "step": 2438 }, { "epoch": 5.626297577854672, "grad_norm": 0.0, "learning_rate": 6.486251759186573e-06, "loss": 0.074, "step": 2439 }, { "epoch": 5.628604382929643, "grad_norm": 0.0, "learning_rate": 6.478471839789522e-06, "loss": 0.1198, "step": 2440 }, { "epoch": 5.630911188004614, "grad_norm": 0.0, "learning_rate": 6.4706943528613135e-06, "loss": 0.1509, "step": 2441 }, { "epoch": 5.633217993079585, "grad_norm": 0.0, "learning_rate": 6.462919303774186e-06, "loss": 0.1143, "step": 2442 }, { "epoch": 5.635524798154556, "grad_norm": 0.0, "learning_rate": 6.455146697898703e-06, "loss": 0.079, "step": 2443 }, { "epoch": 5.637831603229527, "grad_norm": 0.0, "learning_rate": 6.447376540603725e-06, "loss": 0.1958, "step": 2444 }, { "epoch": 5.640138408304498, "grad_norm": 0.0, "learning_rate": 6.439608837256432e-06, "loss": 0.1881, "step": 2445 }, { "epoch": 5.642445213379469, "grad_norm": 0.0, "learning_rate": 6.4318435932223115e-06, "loss": 0.1026, "step": 2446 }, { "epoch": 5.64475201845444, "grad_norm": 0.0, "learning_rate": 6.424080813865139e-06, "loss": 0.0794, "step": 2447 }, { "epoch": 5.647058823529412, "grad_norm": 0.0, "learning_rate": 6.4163205045469975e-06, "loss": 0.0691, "step": 2448 }, { "epoch": 5.649365628604383, "grad_norm": 0.0, "learning_rate": 6.408562670628267e-06, "loss": 0.1581, "step": 2449 }, { "epoch": 5.651672433679354, "grad_norm": 0.0, "learning_rate": 6.400807317467604e-06, "loss": 0.0775, "step": 2450 }, { "epoch": 5.653979238754325, "grad_norm": 0.0, "learning_rate": 6.393054450421963e-06, "loss": 0.1166, "step": 2451 }, { "epoch": 5.656286043829296, "grad_norm": 0.0, "learning_rate": 6.3853040748465855e-06, "loss": 0.1114, "step": 2452 }, { "epoch": 5.658592848904267, "grad_norm": 0.0, "learning_rate": 6.377556196094974e-06, "loss": 0.0991, "step": 2453 }, { "epoch": 5.660899653979238, "grad_norm": 0.0, "learning_rate": 6.36981081951892e-06, "loss": 0.1253, "step": 2454 }, { "epoch": 5.66320645905421, "grad_norm": 0.0, "learning_rate": 6.362067950468489e-06, "loss": 0.1908, "step": 2455 }, { "epoch": 5.665513264129181, "grad_norm": 0.0, "learning_rate": 6.3543275942920004e-06, "loss": 0.1327, "step": 2456 }, { "epoch": 5.6678200692041525, "grad_norm": 0.0, "learning_rate": 6.34658975633605e-06, "loss": 0.1737, "step": 2457 }, { "epoch": 5.6701268742791235, "grad_norm": 0.0, "learning_rate": 6.338854441945495e-06, "loss": 0.0993, "step": 2458 }, { "epoch": 5.672433679354095, "grad_norm": 0.0, "learning_rate": 6.331121656463441e-06, "loss": 0.1192, "step": 2459 }, { "epoch": 5.674740484429066, "grad_norm": 0.0, "learning_rate": 6.32339140523125e-06, "loss": 0.1421, "step": 2460 }, { "epoch": 5.677047289504037, "grad_norm": 0.0, "learning_rate": 6.3156636935885344e-06, "loss": 0.0705, "step": 2461 }, { "epoch": 5.679354094579008, "grad_norm": 0.0, "learning_rate": 6.3079385268731575e-06, "loss": 0.1405, "step": 2462 }, { "epoch": 5.681660899653979, "grad_norm": 0.0, "learning_rate": 6.300215910421212e-06, "loss": 0.1079, "step": 2463 }, { "epoch": 5.683967704728951, "grad_norm": 0.0, "learning_rate": 6.292495849567042e-06, "loss": 0.0928, "step": 2464 }, { "epoch": 5.686274509803922, "grad_norm": 0.0, "learning_rate": 6.284778349643221e-06, "loss": 0.1646, "step": 2465 }, { "epoch": 5.688581314878893, "grad_norm": 0.0, "learning_rate": 6.277063415980549e-06, "loss": 0.1286, "step": 2466 }, { "epoch": 5.690888119953864, "grad_norm": 0.0, "learning_rate": 6.269351053908061e-06, "loss": 0.1082, "step": 2467 }, { "epoch": 5.693194925028835, "grad_norm": 0.0, "learning_rate": 6.2616412687530145e-06, "loss": 0.147, "step": 2468 }, { "epoch": 5.695501730103806, "grad_norm": 0.0, "learning_rate": 6.25393406584088e-06, "loss": 0.1253, "step": 2469 }, { "epoch": 5.697808535178777, "grad_norm": 0.0, "learning_rate": 6.246229450495354e-06, "loss": 0.1705, "step": 2470 }, { "epoch": 5.700115340253749, "grad_norm": 0.0, "learning_rate": 6.238527428038339e-06, "loss": 0.1485, "step": 2471 }, { "epoch": 5.70242214532872, "grad_norm": 0.0, "learning_rate": 6.230828003789949e-06, "loss": 0.132, "step": 2472 }, { "epoch": 5.704728950403691, "grad_norm": 0.0, "learning_rate": 6.2231311830684995e-06, "loss": 0.1727, "step": 2473 }, { "epoch": 5.707035755478662, "grad_norm": 0.0, "learning_rate": 6.215436971190518e-06, "loss": 0.1436, "step": 2474 }, { "epoch": 5.709342560553633, "grad_norm": 0.0, "learning_rate": 6.207745373470717e-06, "loss": 0.1542, "step": 2475 }, { "epoch": 5.711649365628604, "grad_norm": 0.0, "learning_rate": 6.200056395222012e-06, "loss": 0.1702, "step": 2476 }, { "epoch": 5.713956170703575, "grad_norm": 0.0, "learning_rate": 6.192370041755505e-06, "loss": 0.0644, "step": 2477 }, { "epoch": 5.716262975778546, "grad_norm": 0.0, "learning_rate": 6.184686318380488e-06, "loss": 0.2113, "step": 2478 }, { "epoch": 5.718569780853517, "grad_norm": 0.0, "learning_rate": 6.177005230404431e-06, "loss": 0.1338, "step": 2479 }, { "epoch": 5.720876585928489, "grad_norm": 0.0, "learning_rate": 6.169326783132994e-06, "loss": 0.1048, "step": 2480 }, { "epoch": 5.72318339100346, "grad_norm": 0.0, "learning_rate": 6.1616509818699975e-06, "loss": 0.1198, "step": 2481 }, { "epoch": 5.7254901960784315, "grad_norm": 0.0, "learning_rate": 6.153977831917451e-06, "loss": 0.159, "step": 2482 }, { "epoch": 5.7277970011534025, "grad_norm": 0.0, "learning_rate": 6.146307338575519e-06, "loss": 0.1333, "step": 2483 }, { "epoch": 5.730103806228374, "grad_norm": 0.0, "learning_rate": 6.138639507142539e-06, "loss": 0.1079, "step": 2484 }, { "epoch": 5.732410611303345, "grad_norm": 0.0, "learning_rate": 6.1309743429150045e-06, "loss": 0.1, "step": 2485 }, { "epoch": 5.734717416378316, "grad_norm": 0.0, "learning_rate": 6.1233118511875765e-06, "loss": 0.1193, "step": 2486 }, { "epoch": 5.737024221453288, "grad_norm": 0.0, "learning_rate": 6.115652037253054e-06, "loss": 0.0936, "step": 2487 }, { "epoch": 5.739331026528259, "grad_norm": 0.0, "learning_rate": 6.107994906402401e-06, "loss": 0.1722, "step": 2488 }, { "epoch": 5.74163783160323, "grad_norm": 0.0, "learning_rate": 6.1003404639247234e-06, "loss": 0.1396, "step": 2489 }, { "epoch": 5.743944636678201, "grad_norm": 0.0, "learning_rate": 6.092688715107265e-06, "loss": 0.129, "step": 2490 }, { "epoch": 5.746251441753172, "grad_norm": 0.0, "learning_rate": 6.085039665235413e-06, "loss": 0.1404, "step": 2491 }, { "epoch": 5.748558246828143, "grad_norm": 0.0, "learning_rate": 6.077393319592697e-06, "loss": 0.0678, "step": 2492 }, { "epoch": 5.750865051903114, "grad_norm": 0.0, "learning_rate": 6.069749683460765e-06, "loss": 0.2005, "step": 2493 }, { "epoch": 5.753171856978085, "grad_norm": 0.0, "learning_rate": 6.062108762119403e-06, "loss": 0.1355, "step": 2494 }, { "epoch": 5.755478662053056, "grad_norm": 0.0, "learning_rate": 6.054470560846524e-06, "loss": 0.1392, "step": 2495 }, { "epoch": 5.757785467128028, "grad_norm": 0.0, "learning_rate": 6.046835084918152e-06, "loss": 0.0526, "step": 2496 }, { "epoch": 5.760092272202999, "grad_norm": 0.0, "learning_rate": 6.039202339608432e-06, "loss": 0.124, "step": 2497 }, { "epoch": 5.76239907727797, "grad_norm": 0.0, "learning_rate": 6.031572330189635e-06, "loss": 0.0895, "step": 2498 }, { "epoch": 5.764705882352941, "grad_norm": 0.0, "learning_rate": 6.023945061932119e-06, "loss": 0.135, "step": 2499 }, { "epoch": 5.767012687427912, "grad_norm": 0.0, "learning_rate": 6.016320540104369e-06, "loss": 0.1245, "step": 2500 }, { "epoch": 5.769319492502883, "grad_norm": 0.0, "learning_rate": 6.008698769972967e-06, "loss": 0.0985, "step": 2501 }, { "epoch": 5.771626297577855, "grad_norm": 0.0, "learning_rate": 6.001079756802592e-06, "loss": 0.0832, "step": 2502 }, { "epoch": 5.773933102652826, "grad_norm": 0.0, "learning_rate": 5.993463505856015e-06, "loss": 0.1478, "step": 2503 }, { "epoch": 5.776239907727797, "grad_norm": 0.0, "learning_rate": 5.9858500223941066e-06, "loss": 0.1114, "step": 2504 }, { "epoch": 5.778546712802768, "grad_norm": 0.0, "learning_rate": 5.978239311675826e-06, "loss": 0.1913, "step": 2505 }, { "epoch": 5.780853517877739, "grad_norm": 0.0, "learning_rate": 5.970631378958208e-06, "loss": 0.1255, "step": 2506 }, { "epoch": 5.78316032295271, "grad_norm": 0.0, "learning_rate": 5.963026229496378e-06, "loss": 0.1287, "step": 2507 }, { "epoch": 5.7854671280276815, "grad_norm": 0.0, "learning_rate": 5.955423868543537e-06, "loss": 0.1836, "step": 2508 }, { "epoch": 5.7877739331026525, "grad_norm": 0.0, "learning_rate": 5.94782430135095e-06, "loss": 0.1376, "step": 2509 }, { "epoch": 5.790080738177624, "grad_norm": 0.0, "learning_rate": 5.940227533167966e-06, "loss": 0.1178, "step": 2510 }, { "epoch": 5.7923875432525955, "grad_norm": 0.0, "learning_rate": 5.932633569242e-06, "loss": 0.161, "step": 2511 }, { "epoch": 5.794694348327567, "grad_norm": 0.0, "learning_rate": 5.925042414818514e-06, "loss": 0.1216, "step": 2512 }, { "epoch": 5.797001153402538, "grad_norm": 0.0, "learning_rate": 5.917454075141049e-06, "loss": 0.0988, "step": 2513 }, { "epoch": 5.799307958477509, "grad_norm": 0.0, "learning_rate": 5.909868555451191e-06, "loss": 0.1342, "step": 2514 }, { "epoch": 5.80161476355248, "grad_norm": 0.0, "learning_rate": 5.902285860988576e-06, "loss": 0.1284, "step": 2515 }, { "epoch": 5.803921568627451, "grad_norm": 0.0, "learning_rate": 5.8947059969908945e-06, "loss": 0.102, "step": 2516 }, { "epoch": 5.806228373702422, "grad_norm": 0.0, "learning_rate": 5.887128968693887e-06, "loss": 0.1243, "step": 2517 }, { "epoch": 5.808535178777394, "grad_norm": 0.0, "learning_rate": 5.879554781331317e-06, "loss": 0.1588, "step": 2518 }, { "epoch": 5.810841983852365, "grad_norm": 0.0, "learning_rate": 5.871983440135005e-06, "loss": 0.1552, "step": 2519 }, { "epoch": 5.813148788927336, "grad_norm": 0.0, "learning_rate": 5.864414950334796e-06, "loss": 0.1465, "step": 2520 }, { "epoch": 5.815455594002307, "grad_norm": 0.0, "learning_rate": 5.8568493171585625e-06, "loss": 0.1216, "step": 2521 }, { "epoch": 5.817762399077278, "grad_norm": 0.0, "learning_rate": 5.849286545832211e-06, "loss": 0.1127, "step": 2522 }, { "epoch": 5.820069204152249, "grad_norm": 0.0, "learning_rate": 5.8417266415796745e-06, "loss": 0.1564, "step": 2523 }, { "epoch": 5.82237600922722, "grad_norm": 0.0, "learning_rate": 5.83416960962289e-06, "loss": 0.1462, "step": 2524 }, { "epoch": 5.824682814302191, "grad_norm": 0.0, "learning_rate": 5.8266154551818225e-06, "loss": 0.1546, "step": 2525 }, { "epoch": 5.826989619377162, "grad_norm": 0.0, "learning_rate": 5.819064183474451e-06, "loss": 0.138, "step": 2526 }, { "epoch": 5.829296424452134, "grad_norm": 0.0, "learning_rate": 5.811515799716754e-06, "loss": 0.1563, "step": 2527 }, { "epoch": 5.831603229527105, "grad_norm": 0.0, "learning_rate": 5.80397030912272e-06, "loss": 0.096, "step": 2528 }, { "epoch": 5.833910034602076, "grad_norm": 0.0, "learning_rate": 5.796427716904347e-06, "loss": 0.1868, "step": 2529 }, { "epoch": 5.836216839677047, "grad_norm": 0.0, "learning_rate": 5.7888880282716155e-06, "loss": 0.1338, "step": 2530 }, { "epoch": 5.838523644752018, "grad_norm": 0.0, "learning_rate": 5.78135124843251e-06, "loss": 0.1738, "step": 2531 }, { "epoch": 5.840830449826989, "grad_norm": 0.0, "learning_rate": 5.773817382593008e-06, "loss": 0.1124, "step": 2532 }, { "epoch": 5.8431372549019605, "grad_norm": 0.0, "learning_rate": 5.766286435957063e-06, "loss": 0.1062, "step": 2533 }, { "epoch": 5.845444059976932, "grad_norm": 0.0, "learning_rate": 5.758758413726626e-06, "loss": 0.1081, "step": 2534 }, { "epoch": 5.8477508650519034, "grad_norm": 0.0, "learning_rate": 5.751233321101617e-06, "loss": 0.0829, "step": 2535 }, { "epoch": 5.8500576701268745, "grad_norm": 0.0, "learning_rate": 5.743711163279941e-06, "loss": 0.1177, "step": 2536 }, { "epoch": 5.8523644752018456, "grad_norm": 0.0, "learning_rate": 5.736191945457463e-06, "loss": 0.0796, "step": 2537 }, { "epoch": 5.854671280276817, "grad_norm": 0.0, "learning_rate": 5.728675672828037e-06, "loss": 0.1223, "step": 2538 }, { "epoch": 5.856978085351788, "grad_norm": 0.0, "learning_rate": 5.72116235058346e-06, "loss": 0.1488, "step": 2539 }, { "epoch": 5.859284890426759, "grad_norm": 0.0, "learning_rate": 5.713651983913506e-06, "loss": 0.1481, "step": 2540 }, { "epoch": 5.86159169550173, "grad_norm": 0.0, "learning_rate": 5.706144578005908e-06, "loss": 0.1741, "step": 2541 }, { "epoch": 5.863898500576701, "grad_norm": 0.0, "learning_rate": 5.698640138046349e-06, "loss": 0.0745, "step": 2542 }, { "epoch": 5.866205305651673, "grad_norm": 0.0, "learning_rate": 5.69113866921846e-06, "loss": 0.1262, "step": 2543 }, { "epoch": 5.868512110726644, "grad_norm": 0.0, "learning_rate": 5.683640176703824e-06, "loss": 0.1171, "step": 2544 }, { "epoch": 5.870818915801615, "grad_norm": 0.0, "learning_rate": 5.6761446656819745e-06, "loss": 0.0939, "step": 2545 }, { "epoch": 5.873125720876586, "grad_norm": 0.0, "learning_rate": 5.668652141330373e-06, "loss": 0.1156, "step": 2546 }, { "epoch": 5.875432525951557, "grad_norm": 0.0, "learning_rate": 5.66116260882442e-06, "loss": 0.1162, "step": 2547 }, { "epoch": 5.877739331026528, "grad_norm": 0.0, "learning_rate": 5.653676073337462e-06, "loss": 0.1024, "step": 2548 }, { "epoch": 5.880046136101499, "grad_norm": 0.0, "learning_rate": 5.646192540040758e-06, "loss": 0.0845, "step": 2549 }, { "epoch": 5.882352941176471, "grad_norm": 0.0, "learning_rate": 5.638712014103507e-06, "loss": 0.1197, "step": 2550 }, { "epoch": 5.884659746251442, "grad_norm": 0.0, "learning_rate": 5.631234500692828e-06, "loss": 0.0753, "step": 2551 }, { "epoch": 5.886966551326413, "grad_norm": 0.0, "learning_rate": 5.623760004973749e-06, "loss": 0.1028, "step": 2552 }, { "epoch": 5.889273356401384, "grad_norm": 0.0, "learning_rate": 5.616288532109225e-06, "loss": 0.0449, "step": 2553 }, { "epoch": 5.891580161476355, "grad_norm": 0.0, "learning_rate": 5.608820087260125e-06, "loss": 0.1727, "step": 2554 }, { "epoch": 5.893886966551326, "grad_norm": 0.0, "learning_rate": 5.6013546755852086e-06, "loss": 0.1287, "step": 2555 }, { "epoch": 5.896193771626297, "grad_norm": 0.0, "learning_rate": 5.5938923022411615e-06, "loss": 0.1475, "step": 2556 }, { "epoch": 5.898500576701268, "grad_norm": 0.0, "learning_rate": 5.586432972382561e-06, "loss": 0.114, "step": 2557 }, { "epoch": 5.900807381776239, "grad_norm": 0.0, "learning_rate": 5.578976691161877e-06, "loss": 0.1175, "step": 2558 }, { "epoch": 5.903114186851211, "grad_norm": 0.0, "learning_rate": 5.571523463729487e-06, "loss": 0.1512, "step": 2559 }, { "epoch": 5.905420991926182, "grad_norm": 0.0, "learning_rate": 5.564073295233645e-06, "loss": 0.1316, "step": 2560 }, { "epoch": 5.9077277970011535, "grad_norm": 0.0, "learning_rate": 5.556626190820497e-06, "loss": 0.0906, "step": 2561 }, { "epoch": 5.9100346020761245, "grad_norm": 0.0, "learning_rate": 5.549182155634076e-06, "loss": 0.0759, "step": 2562 }, { "epoch": 5.912341407151096, "grad_norm": 0.0, "learning_rate": 5.541741194816299e-06, "loss": 0.1043, "step": 2563 }, { "epoch": 5.914648212226067, "grad_norm": 0.0, "learning_rate": 5.5343033135069434e-06, "loss": 0.0509, "step": 2564 }, { "epoch": 5.916955017301038, "grad_norm": 0.0, "learning_rate": 5.526868516843673e-06, "loss": 0.076, "step": 2565 }, { "epoch": 5.91926182237601, "grad_norm": 0.0, "learning_rate": 5.519436809962024e-06, "loss": 0.1838, "step": 2566 }, { "epoch": 5.921568627450981, "grad_norm": 0.0, "learning_rate": 5.512008197995379e-06, "loss": 0.2291, "step": 2567 }, { "epoch": 5.923875432525952, "grad_norm": 0.0, "learning_rate": 5.504582686075002e-06, "loss": 0.1515, "step": 2568 }, { "epoch": 5.926182237600923, "grad_norm": 0.0, "learning_rate": 5.4971602793300134e-06, "loss": 0.1934, "step": 2569 }, { "epoch": 5.928489042675894, "grad_norm": 0.0, "learning_rate": 5.4897409828873745e-06, "loss": 0.1412, "step": 2570 }, { "epoch": 5.930795847750865, "grad_norm": 0.0, "learning_rate": 5.4823248018719184e-06, "loss": 0.085, "step": 2571 }, { "epoch": 5.933102652825836, "grad_norm": 0.0, "learning_rate": 5.47491174140631e-06, "loss": 0.092, "step": 2572 }, { "epoch": 5.935409457900807, "grad_norm": 0.0, "learning_rate": 5.467501806611062e-06, "loss": 0.1537, "step": 2573 }, { "epoch": 5.937716262975779, "grad_norm": 0.0, "learning_rate": 5.460095002604533e-06, "loss": 0.1386, "step": 2574 }, { "epoch": 5.94002306805075, "grad_norm": 0.0, "learning_rate": 5.452691334502922e-06, "loss": 0.1335, "step": 2575 }, { "epoch": 5.942329873125721, "grad_norm": 0.0, "learning_rate": 5.445290807420247e-06, "loss": 0.1451, "step": 2576 }, { "epoch": 5.944636678200692, "grad_norm": 0.0, "learning_rate": 5.43789342646837e-06, "loss": 0.123, "step": 2577 }, { "epoch": 5.946943483275663, "grad_norm": 0.0, "learning_rate": 5.430499196756977e-06, "loss": 0.1452, "step": 2578 }, { "epoch": 5.949250288350634, "grad_norm": 0.0, "learning_rate": 5.423108123393581e-06, "loss": 0.0929, "step": 2579 }, { "epoch": 5.951557093425605, "grad_norm": 0.0, "learning_rate": 5.415720211483499e-06, "loss": 0.1697, "step": 2580 }, { "epoch": 5.953863898500577, "grad_norm": 0.0, "learning_rate": 5.4083354661298816e-06, "loss": 0.1125, "step": 2581 }, { "epoch": 5.956170703575548, "grad_norm": 0.0, "learning_rate": 5.4009538924336864e-06, "loss": 0.0718, "step": 2582 }, { "epoch": 5.958477508650519, "grad_norm": 0.0, "learning_rate": 5.393575495493679e-06, "loss": 0.0888, "step": 2583 }, { "epoch": 5.96078431372549, "grad_norm": 0.0, "learning_rate": 5.386200280406426e-06, "loss": 0.1225, "step": 2584 }, { "epoch": 5.963091118800461, "grad_norm": 0.0, "learning_rate": 5.3788282522663085e-06, "loss": 0.1321, "step": 2585 }, { "epoch": 5.965397923875432, "grad_norm": 0.0, "learning_rate": 5.37145941616549e-06, "loss": 0.1045, "step": 2586 }, { "epoch": 5.9677047289504035, "grad_norm": 0.0, "learning_rate": 5.364093777193944e-06, "loss": 0.1374, "step": 2587 }, { "epoch": 5.9700115340253745, "grad_norm": 0.0, "learning_rate": 5.356731340439432e-06, "loss": 0.093, "step": 2588 }, { "epoch": 5.972318339100346, "grad_norm": 0.0, "learning_rate": 5.349372110987496e-06, "loss": 0.1599, "step": 2589 }, { "epoch": 5.9746251441753175, "grad_norm": 0.0, "learning_rate": 5.342016093921469e-06, "loss": 0.0876, "step": 2590 }, { "epoch": 5.976931949250289, "grad_norm": 0.0, "learning_rate": 5.33466329432247e-06, "loss": 0.1862, "step": 2591 }, { "epoch": 5.97923875432526, "grad_norm": 0.0, "learning_rate": 5.32731371726938e-06, "loss": 0.0925, "step": 2592 }, { "epoch": 5.981545559400231, "grad_norm": 0.0, "learning_rate": 5.319967367838868e-06, "loss": 0.1385, "step": 2593 }, { "epoch": 5.983852364475202, "grad_norm": 0.0, "learning_rate": 5.312624251105374e-06, "loss": 0.1227, "step": 2594 }, { "epoch": 5.986159169550173, "grad_norm": 0.0, "learning_rate": 5.305284372141095e-06, "loss": 0.1392, "step": 2595 }, { "epoch": 5.988465974625144, "grad_norm": 0.0, "learning_rate": 5.297947736015994e-06, "loss": 0.1249, "step": 2596 }, { "epoch": 5.990772779700116, "grad_norm": 0.0, "learning_rate": 5.290614347797802e-06, "loss": 0.2109, "step": 2597 }, { "epoch": 5.993079584775087, "grad_norm": 0.0, "learning_rate": 5.283284212551997e-06, "loss": 0.0994, "step": 2598 }, { "epoch": 5.995386389850058, "grad_norm": 0.0, "learning_rate": 5.275957335341815e-06, "loss": 0.0851, "step": 2599 }, { "epoch": 5.997693194925029, "grad_norm": 0.0, "learning_rate": 5.268633721228247e-06, "loss": 0.0957, "step": 2600 }, { "epoch": 6.0, "grad_norm": 0.0, "learning_rate": 5.2613133752700145e-06, "loss": 0.1511, "step": 2601 }, { "epoch": 6.002306805074971, "grad_norm": 0.0, "learning_rate": 5.253996302523596e-06, "loss": 0.1146, "step": 2602 }, { "epoch": 6.004613610149942, "grad_norm": 0.0, "learning_rate": 5.246682508043206e-06, "loss": 0.1119, "step": 2603 }, { "epoch": 6.006920415224913, "grad_norm": 0.0, "learning_rate": 5.239371996880786e-06, "loss": 0.0804, "step": 2604 }, { "epoch": 6.009227220299885, "grad_norm": 0.0, "learning_rate": 5.232064774086022e-06, "loss": 0.0861, "step": 2605 }, { "epoch": 6.011534025374856, "grad_norm": 0.0, "learning_rate": 5.224760844706324e-06, "loss": 0.0499, "step": 2606 }, { "epoch": 6.013840830449827, "grad_norm": 0.0, "learning_rate": 5.217460213786822e-06, "loss": 0.0922, "step": 2607 }, { "epoch": 6.016147635524798, "grad_norm": 0.0, "learning_rate": 5.210162886370367e-06, "loss": 0.0639, "step": 2608 }, { "epoch": 6.018454440599769, "grad_norm": 0.0, "learning_rate": 5.202868867497542e-06, "loss": 0.0369, "step": 2609 }, { "epoch": 6.02076124567474, "grad_norm": 0.0, "learning_rate": 5.195578162206627e-06, "loss": 0.0579, "step": 2610 }, { "epoch": 6.023068050749711, "grad_norm": 0.0, "learning_rate": 5.188290775533624e-06, "loss": 0.065, "step": 2611 }, { "epoch": 6.0253748558246825, "grad_norm": 0.0, "learning_rate": 5.181006712512245e-06, "loss": 0.0972, "step": 2612 }, { "epoch": 6.027681660899654, "grad_norm": 0.0, "learning_rate": 5.1737259781738934e-06, "loss": 0.0671, "step": 2613 }, { "epoch": 6.0299884659746255, "grad_norm": 0.0, "learning_rate": 5.1664485775476844e-06, "loss": 0.0759, "step": 2614 }, { "epoch": 6.0322952710495965, "grad_norm": 0.0, "learning_rate": 5.159174515660432e-06, "loss": 0.1878, "step": 2615 }, { "epoch": 6.034602076124568, "grad_norm": 0.0, "learning_rate": 5.151903797536631e-06, "loss": 0.0727, "step": 2616 }, { "epoch": 6.036908881199539, "grad_norm": 0.0, "learning_rate": 5.144636428198477e-06, "loss": 0.0616, "step": 2617 }, { "epoch": 6.03921568627451, "grad_norm": 0.0, "learning_rate": 5.137372412665857e-06, "loss": 0.0811, "step": 2618 }, { "epoch": 6.041522491349481, "grad_norm": 0.0, "learning_rate": 5.130111755956327e-06, "loss": 0.0916, "step": 2619 }, { "epoch": 6.043829296424452, "grad_norm": 0.0, "learning_rate": 5.122854463085136e-06, "loss": 0.1091, "step": 2620 }, { "epoch": 6.046136101499424, "grad_norm": 0.0, "learning_rate": 5.115600539065197e-06, "loss": 0.1055, "step": 2621 }, { "epoch": 6.048442906574395, "grad_norm": 0.0, "learning_rate": 5.108349988907111e-06, "loss": 0.0608, "step": 2622 }, { "epoch": 6.050749711649366, "grad_norm": 0.0, "learning_rate": 5.101102817619132e-06, "loss": 0.0828, "step": 2623 }, { "epoch": 6.053056516724337, "grad_norm": 0.0, "learning_rate": 5.093859030207192e-06, "loss": 0.0786, "step": 2624 }, { "epoch": 6.055363321799308, "grad_norm": 0.0, "learning_rate": 5.086618631674888e-06, "loss": 0.0395, "step": 2625 }, { "epoch": 6.057670126874279, "grad_norm": 0.0, "learning_rate": 5.079381627023461e-06, "loss": 0.1019, "step": 2626 }, { "epoch": 6.05997693194925, "grad_norm": 0.0, "learning_rate": 5.072148021251822e-06, "loss": 0.0662, "step": 2627 }, { "epoch": 6.062283737024221, "grad_norm": 0.0, "learning_rate": 5.064917819356532e-06, "loss": 0.0503, "step": 2628 }, { "epoch": 6.064590542099193, "grad_norm": 0.0, "learning_rate": 5.057691026331792e-06, "loss": 0.0785, "step": 2629 }, { "epoch": 6.066897347174164, "grad_norm": 0.0, "learning_rate": 5.05046764716946e-06, "loss": 0.1151, "step": 2630 }, { "epoch": 6.069204152249135, "grad_norm": 0.0, "learning_rate": 5.043247686859024e-06, "loss": 0.0487, "step": 2631 }, { "epoch": 6.071510957324106, "grad_norm": 0.0, "learning_rate": 5.036031150387624e-06, "loss": 0.0943, "step": 2632 }, { "epoch": 6.073817762399077, "grad_norm": 0.0, "learning_rate": 5.0288180427400205e-06, "loss": 0.0708, "step": 2633 }, { "epoch": 6.076124567474048, "grad_norm": 0.0, "learning_rate": 5.021608368898621e-06, "loss": 0.1047, "step": 2634 }, { "epoch": 6.078431372549019, "grad_norm": 0.0, "learning_rate": 5.014402133843443e-06, "loss": 0.0962, "step": 2635 }, { "epoch": 6.08073817762399, "grad_norm": 0.0, "learning_rate": 5.007199342552145e-06, "loss": 0.053, "step": 2636 }, { "epoch": 6.083044982698962, "grad_norm": 0.0, "learning_rate": 5.000000000000003e-06, "loss": 0.0772, "step": 2637 }, { "epoch": 6.085351787773933, "grad_norm": 0.0, "learning_rate": 4.9928041111599e-06, "loss": 0.094, "step": 2638 }, { "epoch": 6.087658592848904, "grad_norm": 0.0, "learning_rate": 4.985611681002347e-06, "loss": 0.072, "step": 2639 }, { "epoch": 6.0899653979238755, "grad_norm": 0.0, "learning_rate": 4.978422714495465e-06, "loss": 0.0748, "step": 2640 }, { "epoch": 6.0922722029988465, "grad_norm": 0.0, "learning_rate": 4.971237216604967e-06, "loss": 0.085, "step": 2641 }, { "epoch": 6.094579008073818, "grad_norm": 0.0, "learning_rate": 4.964055192294187e-06, "loss": 0.0766, "step": 2642 }, { "epoch": 6.096885813148789, "grad_norm": 0.0, "learning_rate": 4.956876646524059e-06, "loss": 0.0768, "step": 2643 }, { "epoch": 6.09919261822376, "grad_norm": 0.0, "learning_rate": 4.949701584253103e-06, "loss": 0.1084, "step": 2644 }, { "epoch": 6.101499423298732, "grad_norm": 0.0, "learning_rate": 4.942530010437435e-06, "loss": 0.1116, "step": 2645 }, { "epoch": 6.103806228373703, "grad_norm": 0.0, "learning_rate": 4.935361930030774e-06, "loss": 0.0626, "step": 2646 }, { "epoch": 6.106113033448674, "grad_norm": 0.0, "learning_rate": 4.92819734798441e-06, "loss": 0.054, "step": 2647 }, { "epoch": 6.108419838523645, "grad_norm": 0.0, "learning_rate": 4.921036269247225e-06, "loss": 0.0926, "step": 2648 }, { "epoch": 6.110726643598616, "grad_norm": 0.0, "learning_rate": 4.9138786987656865e-06, "loss": 0.0991, "step": 2649 }, { "epoch": 6.113033448673587, "grad_norm": 0.0, "learning_rate": 4.906724641483822e-06, "loss": 0.065, "step": 2650 }, { "epoch": 6.115340253748558, "grad_norm": 0.0, "learning_rate": 4.899574102343247e-06, "loss": 0.0881, "step": 2651 }, { "epoch": 6.117647058823529, "grad_norm": 0.0, "learning_rate": 4.892427086283147e-06, "loss": 0.0662, "step": 2652 }, { "epoch": 6.119953863898501, "grad_norm": 0.0, "learning_rate": 4.885283598240259e-06, "loss": 0.0465, "step": 2653 }, { "epoch": 6.122260668973472, "grad_norm": 0.0, "learning_rate": 4.878143643148899e-06, "loss": 0.0947, "step": 2654 }, { "epoch": 6.124567474048443, "grad_norm": 0.0, "learning_rate": 4.87100722594094e-06, "loss": 0.0735, "step": 2655 }, { "epoch": 6.126874279123414, "grad_norm": 0.0, "learning_rate": 4.863874351545803e-06, "loss": 0.1198, "step": 2656 }, { "epoch": 6.129181084198385, "grad_norm": 0.0, "learning_rate": 4.856745024890466e-06, "loss": 0.0644, "step": 2657 }, { "epoch": 6.131487889273356, "grad_norm": 0.0, "learning_rate": 4.849619250899458e-06, "loss": 0.0615, "step": 2658 }, { "epoch": 6.133794694348327, "grad_norm": 0.0, "learning_rate": 4.8424970344948585e-06, "loss": 0.1107, "step": 2659 }, { "epoch": 6.136101499423299, "grad_norm": 0.0, "learning_rate": 4.8353783805962776e-06, "loss": 0.0549, "step": 2660 }, { "epoch": 6.13840830449827, "grad_norm": 0.0, "learning_rate": 4.8282632941208725e-06, "loss": 0.1303, "step": 2661 }, { "epoch": 6.140715109573241, "grad_norm": 0.0, "learning_rate": 4.821151779983343e-06, "loss": 0.0795, "step": 2662 }, { "epoch": 6.143021914648212, "grad_norm": 0.0, "learning_rate": 4.814043843095903e-06, "loss": 0.0781, "step": 2663 }, { "epoch": 6.145328719723183, "grad_norm": 0.0, "learning_rate": 4.806939488368308e-06, "loss": 0.0848, "step": 2664 }, { "epoch": 6.1476355247981544, "grad_norm": 0.0, "learning_rate": 4.799838720707847e-06, "loss": 0.0651, "step": 2665 }, { "epoch": 6.1499423298731255, "grad_norm": 0.0, "learning_rate": 4.792741545019307e-06, "loss": 0.0796, "step": 2666 }, { "epoch": 6.1522491349480966, "grad_norm": 0.0, "learning_rate": 4.78564796620502e-06, "loss": 0.1282, "step": 2667 }, { "epoch": 6.154555940023068, "grad_norm": 0.0, "learning_rate": 4.7785579891648185e-06, "loss": 0.1346, "step": 2668 }, { "epoch": 6.1568627450980395, "grad_norm": 0.0, "learning_rate": 4.771471618796043e-06, "loss": 0.1288, "step": 2669 }, { "epoch": 6.159169550173011, "grad_norm": 0.0, "learning_rate": 4.764388859993556e-06, "loss": 0.1429, "step": 2670 }, { "epoch": 6.161476355247982, "grad_norm": 0.0, "learning_rate": 4.757309717649723e-06, "loss": 0.0846, "step": 2671 }, { "epoch": 6.163783160322953, "grad_norm": 0.0, "learning_rate": 4.7502341966544e-06, "loss": 0.0925, "step": 2672 }, { "epoch": 6.166089965397924, "grad_norm": 0.0, "learning_rate": 4.743162301894952e-06, "loss": 0.082, "step": 2673 }, { "epoch": 6.168396770472895, "grad_norm": 0.0, "learning_rate": 4.736094038256244e-06, "loss": 0.1284, "step": 2674 }, { "epoch": 6.170703575547866, "grad_norm": 0.0, "learning_rate": 4.729029410620615e-06, "loss": 0.0698, "step": 2675 }, { "epoch": 6.173010380622838, "grad_norm": 0.0, "learning_rate": 4.7219684238679066e-06, "loss": 0.1155, "step": 2676 }, { "epoch": 6.175317185697809, "grad_norm": 0.0, "learning_rate": 4.714911082875446e-06, "loss": 0.0439, "step": 2677 }, { "epoch": 6.17762399077278, "grad_norm": 0.0, "learning_rate": 4.707857392518032e-06, "loss": 0.076, "step": 2678 }, { "epoch": 6.179930795847751, "grad_norm": 0.0, "learning_rate": 4.700807357667953e-06, "loss": 0.0895, "step": 2679 }, { "epoch": 6.182237600922722, "grad_norm": 0.0, "learning_rate": 4.693760983194959e-06, "loss": 0.038, "step": 2680 }, { "epoch": 6.184544405997693, "grad_norm": 0.0, "learning_rate": 4.686718273966291e-06, "loss": 0.0556, "step": 2681 }, { "epoch": 6.186851211072664, "grad_norm": 0.0, "learning_rate": 4.679679234846636e-06, "loss": 0.0768, "step": 2682 }, { "epoch": 6.189158016147635, "grad_norm": 0.0, "learning_rate": 4.6726438706981644e-06, "loss": 0.0554, "step": 2683 }, { "epoch": 6.191464821222607, "grad_norm": 0.0, "learning_rate": 4.665612186380495e-06, "loss": 0.0765, "step": 2684 }, { "epoch": 6.193771626297578, "grad_norm": 0.0, "learning_rate": 4.658584186750713e-06, "loss": 0.0688, "step": 2685 }, { "epoch": 6.196078431372549, "grad_norm": 0.0, "learning_rate": 4.65155987666336e-06, "loss": 0.0694, "step": 2686 }, { "epoch": 6.19838523644752, "grad_norm": 0.0, "learning_rate": 4.644539260970417e-06, "loss": 0.0388, "step": 2687 }, { "epoch": 6.200692041522491, "grad_norm": 0.0, "learning_rate": 4.637522344521323e-06, "loss": 0.0528, "step": 2688 }, { "epoch": 6.202998846597462, "grad_norm": 0.0, "learning_rate": 4.630509132162967e-06, "loss": 0.0554, "step": 2689 }, { "epoch": 6.205305651672433, "grad_norm": 0.0, "learning_rate": 4.623499628739663e-06, "loss": 0.0824, "step": 2690 }, { "epoch": 6.2076124567474045, "grad_norm": 0.0, "learning_rate": 4.616493839093179e-06, "loss": 0.1451, "step": 2691 }, { "epoch": 6.209919261822376, "grad_norm": 0.0, "learning_rate": 4.609491768062705e-06, "loss": 0.1045, "step": 2692 }, { "epoch": 6.2122260668973475, "grad_norm": 0.0, "learning_rate": 4.6024934204848745e-06, "loss": 0.1097, "step": 2693 }, { "epoch": 6.2145328719723185, "grad_norm": 0.0, "learning_rate": 4.595498801193736e-06, "loss": 0.1139, "step": 2694 }, { "epoch": 6.21683967704729, "grad_norm": 0.0, "learning_rate": 4.588507915020778e-06, "loss": 0.0265, "step": 2695 }, { "epoch": 6.219146482122261, "grad_norm": 0.0, "learning_rate": 4.581520766794893e-06, "loss": 0.0559, "step": 2696 }, { "epoch": 6.221453287197232, "grad_norm": 0.0, "learning_rate": 4.5745373613424075e-06, "loss": 0.0957, "step": 2697 }, { "epoch": 6.223760092272203, "grad_norm": 0.0, "learning_rate": 4.567557703487051e-06, "loss": 0.0644, "step": 2698 }, { "epoch": 6.226066897347174, "grad_norm": 0.0, "learning_rate": 4.560581798049977e-06, "loss": 0.0998, "step": 2699 }, { "epoch": 6.228373702422146, "grad_norm": 0.0, "learning_rate": 4.5536096498497295e-06, "loss": 0.0674, "step": 2700 }, { "epoch": 6.230680507497117, "grad_norm": 0.0, "learning_rate": 4.546641263702271e-06, "loss": 0.0434, "step": 2701 }, { "epoch": 6.232987312572088, "grad_norm": 0.0, "learning_rate": 4.539676644420966e-06, "loss": 0.0628, "step": 2702 }, { "epoch": 6.235294117647059, "grad_norm": 0.0, "learning_rate": 4.532715796816565e-06, "loss": 0.0968, "step": 2703 }, { "epoch": 6.23760092272203, "grad_norm": 0.0, "learning_rate": 4.525758725697226e-06, "loss": 0.0926, "step": 2704 }, { "epoch": 6.239907727797001, "grad_norm": 0.0, "learning_rate": 4.518805435868492e-06, "loss": 0.0519, "step": 2705 }, { "epoch": 6.242214532871972, "grad_norm": 0.0, "learning_rate": 4.511855932133289e-06, "loss": 0.0633, "step": 2706 }, { "epoch": 6.244521337946943, "grad_norm": 0.0, "learning_rate": 4.504910219291941e-06, "loss": 0.1143, "step": 2707 }, { "epoch": 6.246828143021915, "grad_norm": 0.0, "learning_rate": 4.497968302142146e-06, "loss": 0.0863, "step": 2708 }, { "epoch": 6.249134948096886, "grad_norm": 0.0, "learning_rate": 4.491030185478976e-06, "loss": 0.093, "step": 2709 }, { "epoch": 6.251441753171857, "grad_norm": 0.0, "learning_rate": 4.484095874094885e-06, "loss": 0.0933, "step": 2710 }, { "epoch": 6.253748558246828, "grad_norm": 0.0, "learning_rate": 4.4771653727797e-06, "loss": 0.0907, "step": 2711 }, { "epoch": 6.256055363321799, "grad_norm": 0.0, "learning_rate": 4.470238686320606e-06, "loss": 0.0515, "step": 2712 }, { "epoch": 6.25836216839677, "grad_norm": 0.0, "learning_rate": 4.46331581950216e-06, "loss": 0.0852, "step": 2713 }, { "epoch": 6.260668973471741, "grad_norm": 0.0, "learning_rate": 4.4563967771062856e-06, "loss": 0.0479, "step": 2714 }, { "epoch": 6.262975778546712, "grad_norm": 0.0, "learning_rate": 4.449481563912252e-06, "loss": 0.0899, "step": 2715 }, { "epoch": 6.265282583621684, "grad_norm": 0.0, "learning_rate": 4.442570184696694e-06, "loss": 0.0724, "step": 2716 }, { "epoch": 6.267589388696655, "grad_norm": 0.0, "learning_rate": 4.435662644233594e-06, "loss": 0.0957, "step": 2717 }, { "epoch": 6.269896193771626, "grad_norm": 0.0, "learning_rate": 4.428758947294278e-06, "loss": 0.0864, "step": 2718 }, { "epoch": 6.2722029988465975, "grad_norm": 0.0, "learning_rate": 4.4218590986474276e-06, "loss": 0.0556, "step": 2719 }, { "epoch": 6.2745098039215685, "grad_norm": 0.0, "learning_rate": 4.4149631030590625e-06, "loss": 0.0482, "step": 2720 }, { "epoch": 6.27681660899654, "grad_norm": 0.0, "learning_rate": 4.408070965292534e-06, "loss": 0.0958, "step": 2721 }, { "epoch": 6.279123414071511, "grad_norm": 0.0, "learning_rate": 4.4011826901085346e-06, "loss": 0.0468, "step": 2722 }, { "epoch": 6.281430219146483, "grad_norm": 0.0, "learning_rate": 4.394298282265095e-06, "loss": 0.0719, "step": 2723 }, { "epoch": 6.283737024221454, "grad_norm": 0.0, "learning_rate": 4.387417746517557e-06, "loss": 0.1054, "step": 2724 }, { "epoch": 6.286043829296425, "grad_norm": 0.0, "learning_rate": 4.380541087618606e-06, "loss": 0.1578, "step": 2725 }, { "epoch": 6.288350634371396, "grad_norm": 0.0, "learning_rate": 4.373668310318243e-06, "loss": 0.0914, "step": 2726 }, { "epoch": 6.290657439446367, "grad_norm": 0.0, "learning_rate": 4.3667994193637794e-06, "loss": 0.0572, "step": 2727 }, { "epoch": 6.292964244521338, "grad_norm": 0.0, "learning_rate": 4.359934419499859e-06, "loss": 0.1185, "step": 2728 }, { "epoch": 6.295271049596309, "grad_norm": 0.0, "learning_rate": 4.353073315468417e-06, "loss": 0.0588, "step": 2729 }, { "epoch": 6.29757785467128, "grad_norm": 0.0, "learning_rate": 4.34621611200872e-06, "loss": 0.1343, "step": 2730 }, { "epoch": 6.299884659746251, "grad_norm": 0.0, "learning_rate": 4.339362813857321e-06, "loss": 0.0782, "step": 2731 }, { "epoch": 6.302191464821223, "grad_norm": 0.0, "learning_rate": 4.3325134257480905e-06, "loss": 0.0931, "step": 2732 }, { "epoch": 6.304498269896194, "grad_norm": 0.0, "learning_rate": 4.325667952412184e-06, "loss": 0.0548, "step": 2733 }, { "epoch": 6.306805074971165, "grad_norm": 0.0, "learning_rate": 4.318826398578063e-06, "loss": 0.0935, "step": 2734 }, { "epoch": 6.309111880046136, "grad_norm": 0.0, "learning_rate": 4.311988768971484e-06, "loss": 0.0824, "step": 2735 }, { "epoch": 6.311418685121107, "grad_norm": 0.0, "learning_rate": 4.305155068315481e-06, "loss": 0.0388, "step": 2736 }, { "epoch": 6.313725490196078, "grad_norm": 0.0, "learning_rate": 4.298325301330383e-06, "loss": 0.101, "step": 2737 }, { "epoch": 6.316032295271049, "grad_norm": 0.0, "learning_rate": 4.2914994727338e-06, "loss": 0.0764, "step": 2738 }, { "epoch": 6.318339100346021, "grad_norm": 0.0, "learning_rate": 4.284677587240625e-06, "loss": 0.0703, "step": 2739 }, { "epoch": 6.320645905420992, "grad_norm": 0.0, "learning_rate": 4.277859649563021e-06, "loss": 0.0704, "step": 2740 }, { "epoch": 6.322952710495963, "grad_norm": 0.0, "learning_rate": 4.27104566441042e-06, "loss": 0.0462, "step": 2741 }, { "epoch": 6.325259515570934, "grad_norm": 0.0, "learning_rate": 4.264235636489542e-06, "loss": 0.0791, "step": 2742 }, { "epoch": 6.327566320645905, "grad_norm": 0.0, "learning_rate": 4.257429570504353e-06, "loss": 0.1206, "step": 2743 }, { "epoch": 6.3298731257208765, "grad_norm": 0.0, "learning_rate": 4.250627471156094e-06, "loss": 0.0771, "step": 2744 }, { "epoch": 6.3321799307958475, "grad_norm": 0.0, "learning_rate": 4.2438293431432665e-06, "loss": 0.1001, "step": 2745 }, { "epoch": 6.334486735870819, "grad_norm": 0.0, "learning_rate": 4.237035191161621e-06, "loss": 0.0646, "step": 2746 }, { "epoch": 6.33679354094579, "grad_norm": 0.0, "learning_rate": 4.23024501990417e-06, "loss": 0.0403, "step": 2747 }, { "epoch": 6.339100346020762, "grad_norm": 0.0, "learning_rate": 4.223458834061175e-06, "loss": 0.1073, "step": 2748 }, { "epoch": 6.341407151095733, "grad_norm": 0.0, "learning_rate": 4.216676638320135e-06, "loss": 0.084, "step": 2749 }, { "epoch": 6.343713956170704, "grad_norm": 0.0, "learning_rate": 4.209898437365805e-06, "loss": 0.0622, "step": 2750 }, { "epoch": 6.346020761245675, "grad_norm": 0.0, "learning_rate": 4.203124235880179e-06, "loss": 0.0858, "step": 2751 }, { "epoch": 6.348327566320646, "grad_norm": 0.0, "learning_rate": 4.196354038542476e-06, "loss": 0.1237, "step": 2752 }, { "epoch": 6.350634371395617, "grad_norm": 0.0, "learning_rate": 4.189587850029169e-06, "loss": 0.082, "step": 2753 }, { "epoch": 6.352941176470588, "grad_norm": 0.0, "learning_rate": 4.182825675013945e-06, "loss": 0.0733, "step": 2754 }, { "epoch": 6.35524798154556, "grad_norm": 0.0, "learning_rate": 4.176067518167723e-06, "loss": 0.0226, "step": 2755 }, { "epoch": 6.357554786620531, "grad_norm": 0.0, "learning_rate": 4.169313384158653e-06, "loss": 0.0708, "step": 2756 }, { "epoch": 6.359861591695502, "grad_norm": 0.0, "learning_rate": 4.162563277652104e-06, "loss": 0.0794, "step": 2757 }, { "epoch": 6.362168396770473, "grad_norm": 0.0, "learning_rate": 4.1558172033106535e-06, "loss": 0.1407, "step": 2758 }, { "epoch": 6.364475201845444, "grad_norm": 0.0, "learning_rate": 4.1490751657941055e-06, "loss": 0.1244, "step": 2759 }, { "epoch": 6.366782006920415, "grad_norm": 0.0, "learning_rate": 4.142337169759472e-06, "loss": 0.0783, "step": 2760 }, { "epoch": 6.369088811995386, "grad_norm": 0.0, "learning_rate": 4.135603219860971e-06, "loss": 0.0665, "step": 2761 }, { "epoch": 6.371395617070357, "grad_norm": 0.0, "learning_rate": 4.128873320750027e-06, "loss": 0.0866, "step": 2762 }, { "epoch": 6.373702422145329, "grad_norm": 0.0, "learning_rate": 4.12214747707527e-06, "loss": 0.0985, "step": 2763 }, { "epoch": 6.3760092272203, "grad_norm": 0.0, "learning_rate": 4.1154256934825195e-06, "loss": 0.0969, "step": 2764 }, { "epoch": 6.378316032295271, "grad_norm": 0.0, "learning_rate": 4.108707974614804e-06, "loss": 0.0619, "step": 2765 }, { "epoch": 6.380622837370242, "grad_norm": 0.0, "learning_rate": 4.101994325112332e-06, "loss": 0.0491, "step": 2766 }, { "epoch": 6.382929642445213, "grad_norm": 0.0, "learning_rate": 4.095284749612504e-06, "loss": 0.0789, "step": 2767 }, { "epoch": 6.385236447520184, "grad_norm": 0.0, "learning_rate": 4.0885792527499094e-06, "loss": 0.0918, "step": 2768 }, { "epoch": 6.387543252595155, "grad_norm": 0.0, "learning_rate": 4.0818778391563255e-06, "loss": 0.0719, "step": 2769 }, { "epoch": 6.3898500576701265, "grad_norm": 0.0, "learning_rate": 4.075180513460695e-06, "loss": 0.0826, "step": 2770 }, { "epoch": 6.392156862745098, "grad_norm": 0.0, "learning_rate": 4.068487280289146e-06, "loss": 0.1064, "step": 2771 }, { "epoch": 6.3944636678200695, "grad_norm": 0.0, "learning_rate": 4.061798144264986e-06, "loss": 0.0847, "step": 2772 }, { "epoch": 6.3967704728950405, "grad_norm": 0.0, "learning_rate": 4.055113110008675e-06, "loss": 0.1167, "step": 2773 }, { "epoch": 6.399077277970012, "grad_norm": 0.0, "learning_rate": 4.048432182137855e-06, "loss": 0.1066, "step": 2774 }, { "epoch": 6.401384083044983, "grad_norm": 0.0, "learning_rate": 4.041755365267323e-06, "loss": 0.1408, "step": 2775 }, { "epoch": 6.403690888119954, "grad_norm": 0.0, "learning_rate": 4.0350826640090475e-06, "loss": 0.0874, "step": 2776 }, { "epoch": 6.405997693194925, "grad_norm": 0.0, "learning_rate": 4.028414082972141e-06, "loss": 0.0697, "step": 2777 }, { "epoch": 6.408304498269896, "grad_norm": 0.0, "learning_rate": 4.021749626762869e-06, "loss": 0.1271, "step": 2778 }, { "epoch": 6.410611303344868, "grad_norm": 0.0, "learning_rate": 4.015089299984666e-06, "loss": 0.1097, "step": 2779 }, { "epoch": 6.412918108419839, "grad_norm": 0.0, "learning_rate": 4.00843310723809e-06, "loss": 0.0493, "step": 2780 }, { "epoch": 6.41522491349481, "grad_norm": 0.0, "learning_rate": 4.001781053120863e-06, "loss": 0.0921, "step": 2781 }, { "epoch": 6.417531718569781, "grad_norm": 0.0, "learning_rate": 3.995133142227843e-06, "loss": 0.0508, "step": 2782 }, { "epoch": 6.419838523644752, "grad_norm": 0.0, "learning_rate": 3.988489379151016e-06, "loss": 0.0959, "step": 2783 }, { "epoch": 6.422145328719723, "grad_norm": 0.0, "learning_rate": 3.981849768479516e-06, "loss": 0.0906, "step": 2784 }, { "epoch": 6.424452133794694, "grad_norm": 0.0, "learning_rate": 3.975214314799607e-06, "loss": 0.0485, "step": 2785 }, { "epoch": 6.426758938869666, "grad_norm": 0.0, "learning_rate": 3.9685830226946695e-06, "loss": 0.0884, "step": 2786 }, { "epoch": 6.429065743944637, "grad_norm": 0.0, "learning_rate": 3.961955896745224e-06, "loss": 0.0757, "step": 2787 }, { "epoch": 6.431372549019608, "grad_norm": 0.0, "learning_rate": 3.95533294152891e-06, "loss": 0.119, "step": 2788 }, { "epoch": 6.433679354094579, "grad_norm": 0.0, "learning_rate": 3.9487141616204804e-06, "loss": 0.112, "step": 2789 }, { "epoch": 6.43598615916955, "grad_norm": 0.0, "learning_rate": 3.942099561591802e-06, "loss": 0.1185, "step": 2790 }, { "epoch": 6.438292964244521, "grad_norm": 0.0, "learning_rate": 3.9354891460118695e-06, "loss": 0.0595, "step": 2791 }, { "epoch": 6.440599769319492, "grad_norm": 0.0, "learning_rate": 3.928882919446767e-06, "loss": 0.0925, "step": 2792 }, { "epoch": 6.442906574394463, "grad_norm": 0.0, "learning_rate": 3.922280886459701e-06, "loss": 0.0893, "step": 2793 }, { "epoch": 6.445213379469434, "grad_norm": 0.0, "learning_rate": 3.915683051610979e-06, "loss": 0.0476, "step": 2794 }, { "epoch": 6.447520184544406, "grad_norm": 0.0, "learning_rate": 3.909089419457997e-06, "loss": 0.0597, "step": 2795 }, { "epoch": 6.449826989619377, "grad_norm": 0.0, "learning_rate": 3.902499994555261e-06, "loss": 0.0745, "step": 2796 }, { "epoch": 6.4521337946943484, "grad_norm": 0.0, "learning_rate": 3.89591478145437e-06, "loss": 0.0743, "step": 2797 }, { "epoch": 6.4544405997693195, "grad_norm": 0.0, "learning_rate": 3.889333784704003e-06, "loss": 0.1137, "step": 2798 }, { "epoch": 6.4567474048442905, "grad_norm": 0.0, "learning_rate": 3.882757008849936e-06, "loss": 0.0642, "step": 2799 }, { "epoch": 6.459054209919262, "grad_norm": 0.0, "learning_rate": 3.876184458435031e-06, "loss": 0.0735, "step": 2800 }, { "epoch": 6.461361014994233, "grad_norm": 0.0, "learning_rate": 3.8696161379992225e-06, "loss": 0.0654, "step": 2801 }, { "epoch": 6.463667820069205, "grad_norm": 0.0, "learning_rate": 3.8630520520795275e-06, "loss": 0.0759, "step": 2802 }, { "epoch": 6.465974625144176, "grad_norm": 0.0, "learning_rate": 3.856492205210043e-06, "loss": 0.0688, "step": 2803 }, { "epoch": 6.468281430219147, "grad_norm": 0.0, "learning_rate": 3.849936601921928e-06, "loss": 0.0633, "step": 2804 }, { "epoch": 6.470588235294118, "grad_norm": 0.0, "learning_rate": 3.8433852467434175e-06, "loss": 0.103, "step": 2805 }, { "epoch": 6.472895040369089, "grad_norm": 0.0, "learning_rate": 3.836838144199816e-06, "loss": 0.1367, "step": 2806 }, { "epoch": 6.47520184544406, "grad_norm": 0.0, "learning_rate": 3.830295298813475e-06, "loss": 0.0702, "step": 2807 }, { "epoch": 6.477508650519031, "grad_norm": 0.0, "learning_rate": 3.823756715103822e-06, "loss": 0.1265, "step": 2808 }, { "epoch": 6.479815455594002, "grad_norm": 0.0, "learning_rate": 3.8172223975873355e-06, "loss": 0.118, "step": 2809 }, { "epoch": 6.482122260668973, "grad_norm": 0.0, "learning_rate": 3.8106923507775396e-06, "loss": 0.0743, "step": 2810 }, { "epoch": 6.484429065743945, "grad_norm": 0.0, "learning_rate": 3.804166579185018e-06, "loss": 0.0862, "step": 2811 }, { "epoch": 6.486735870818916, "grad_norm": 0.0, "learning_rate": 3.797645087317401e-06, "loss": 0.0808, "step": 2812 }, { "epoch": 6.489042675893887, "grad_norm": 0.0, "learning_rate": 3.7911278796793518e-06, "loss": 0.0719, "step": 2813 }, { "epoch": 6.491349480968858, "grad_norm": 0.0, "learning_rate": 3.78461496077259e-06, "loss": 0.0828, "step": 2814 }, { "epoch": 6.493656286043829, "grad_norm": 0.0, "learning_rate": 3.7781063350958592e-06, "loss": 0.0877, "step": 2815 }, { "epoch": 6.4959630911188, "grad_norm": 0.0, "learning_rate": 3.771602007144948e-06, "loss": 0.0477, "step": 2816 }, { "epoch": 6.498269896193771, "grad_norm": 0.0, "learning_rate": 3.7651019814126656e-06, "loss": 0.0818, "step": 2817 }, { "epoch": 6.500576701268743, "grad_norm": 0.0, "learning_rate": 3.758606262388859e-06, "loss": 0.0762, "step": 2818 }, { "epoch": 6.502883506343714, "grad_norm": 0.0, "learning_rate": 3.7521148545604003e-06, "loss": 0.101, "step": 2819 }, { "epoch": 6.505190311418685, "grad_norm": 0.0, "learning_rate": 3.7456277624111725e-06, "loss": 0.0609, "step": 2820 }, { "epoch": 6.507497116493656, "grad_norm": 0.0, "learning_rate": 3.739144990422089e-06, "loss": 0.0868, "step": 2821 }, { "epoch": 6.509803921568627, "grad_norm": 0.0, "learning_rate": 3.7326665430710798e-06, "loss": 0.043, "step": 2822 }, { "epoch": 6.5121107266435985, "grad_norm": 0.0, "learning_rate": 3.726192424833075e-06, "loss": 0.081, "step": 2823 }, { "epoch": 6.5144175317185695, "grad_norm": 0.0, "learning_rate": 3.719722640180029e-06, "loss": 0.05, "step": 2824 }, { "epoch": 6.516724336793541, "grad_norm": 0.0, "learning_rate": 3.7132571935808924e-06, "loss": 0.075, "step": 2825 }, { "epoch": 6.519031141868512, "grad_norm": 0.0, "learning_rate": 3.7067960895016277e-06, "loss": 0.046, "step": 2826 }, { "epoch": 6.521337946943484, "grad_norm": 0.0, "learning_rate": 3.7003393324051874e-06, "loss": 0.0445, "step": 2827 }, { "epoch": 6.523644752018455, "grad_norm": 0.0, "learning_rate": 3.6938869267515343e-06, "loss": 0.0892, "step": 2828 }, { "epoch": 6.525951557093426, "grad_norm": 0.0, "learning_rate": 3.687438876997612e-06, "loss": 0.0905, "step": 2829 }, { "epoch": 6.528258362168397, "grad_norm": 0.0, "learning_rate": 3.680995187597365e-06, "loss": 0.0718, "step": 2830 }, { "epoch": 6.530565167243368, "grad_norm": 0.0, "learning_rate": 3.6745558630017254e-06, "loss": 0.0897, "step": 2831 }, { "epoch": 6.532871972318339, "grad_norm": 0.0, "learning_rate": 3.6681209076586035e-06, "loss": 0.1317, "step": 2832 }, { "epoch": 6.53517877739331, "grad_norm": 0.0, "learning_rate": 3.661690326012897e-06, "loss": 0.1293, "step": 2833 }, { "epoch": 6.537485582468282, "grad_norm": 0.0, "learning_rate": 3.6552641225064843e-06, "loss": 0.1249, "step": 2834 }, { "epoch": 6.539792387543253, "grad_norm": 0.0, "learning_rate": 3.6488423015782128e-06, "loss": 0.0769, "step": 2835 }, { "epoch": 6.542099192618224, "grad_norm": 0.0, "learning_rate": 3.6424248676639075e-06, "loss": 0.0704, "step": 2836 }, { "epoch": 6.544405997693195, "grad_norm": 0.0, "learning_rate": 3.636011825196365e-06, "loss": 0.0532, "step": 2837 }, { "epoch": 6.546712802768166, "grad_norm": 0.0, "learning_rate": 3.6296031786053455e-06, "loss": 0.1, "step": 2838 }, { "epoch": 6.549019607843137, "grad_norm": 0.0, "learning_rate": 3.6231989323175665e-06, "loss": 0.0657, "step": 2839 }, { "epoch": 6.551326412918108, "grad_norm": 0.0, "learning_rate": 3.6167990907567207e-06, "loss": 0.0864, "step": 2840 }, { "epoch": 6.553633217993079, "grad_norm": 0.0, "learning_rate": 3.610403658343443e-06, "loss": 0.0742, "step": 2841 }, { "epoch": 6.555940023068051, "grad_norm": 0.0, "learning_rate": 3.6040126394953334e-06, "loss": 0.079, "step": 2842 }, { "epoch": 6.558246828143022, "grad_norm": 0.0, "learning_rate": 3.5976260386269423e-06, "loss": 0.0699, "step": 2843 }, { "epoch": 6.560553633217993, "grad_norm": 0.0, "learning_rate": 3.591243860149759e-06, "loss": 0.0793, "step": 2844 }, { "epoch": 6.562860438292964, "grad_norm": 0.0, "learning_rate": 3.5848661084722302e-06, "loss": 0.0901, "step": 2845 }, { "epoch": 6.565167243367935, "grad_norm": 0.0, "learning_rate": 3.57849278799974e-06, "loss": 0.1013, "step": 2846 }, { "epoch": 6.567474048442906, "grad_norm": 0.0, "learning_rate": 3.5721239031346067e-06, "loss": 0.0519, "step": 2847 }, { "epoch": 6.569780853517877, "grad_norm": 0.0, "learning_rate": 3.565759458276091e-06, "loss": 0.0601, "step": 2848 }, { "epoch": 6.572087658592849, "grad_norm": 0.0, "learning_rate": 3.5593994578203893e-06, "loss": 0.0346, "step": 2849 }, { "epoch": 6.57439446366782, "grad_norm": 0.0, "learning_rate": 3.5530439061606202e-06, "loss": 0.0856, "step": 2850 }, { "epoch": 6.5767012687427915, "grad_norm": 0.0, "learning_rate": 3.546692807686829e-06, "loss": 0.0592, "step": 2851 }, { "epoch": 6.5790080738177625, "grad_norm": 0.0, "learning_rate": 3.540346166785994e-06, "loss": 0.0783, "step": 2852 }, { "epoch": 6.581314878892734, "grad_norm": 0.0, "learning_rate": 3.534003987842005e-06, "loss": 0.0626, "step": 2853 }, { "epoch": 6.583621683967705, "grad_norm": 0.0, "learning_rate": 3.527666275235677e-06, "loss": 0.0913, "step": 2854 }, { "epoch": 6.585928489042676, "grad_norm": 0.0, "learning_rate": 3.5213330333447347e-06, "loss": 0.114, "step": 2855 }, { "epoch": 6.588235294117647, "grad_norm": 0.0, "learning_rate": 3.5150042665438233e-06, "loss": 0.0742, "step": 2856 }, { "epoch": 6.590542099192618, "grad_norm": 0.0, "learning_rate": 3.5086799792044812e-06, "loss": 0.1008, "step": 2857 }, { "epoch": 6.59284890426759, "grad_norm": 0.0, "learning_rate": 3.5023601756951665e-06, "loss": 0.0827, "step": 2858 }, { "epoch": 6.595155709342561, "grad_norm": 0.0, "learning_rate": 3.496044860381238e-06, "loss": 0.0735, "step": 2859 }, { "epoch": 6.597462514417532, "grad_norm": 0.0, "learning_rate": 3.4897340376249455e-06, "loss": 0.0927, "step": 2860 }, { "epoch": 6.599769319492503, "grad_norm": 0.0, "learning_rate": 3.483427711785449e-06, "loss": 0.1152, "step": 2861 }, { "epoch": 6.602076124567474, "grad_norm": 0.0, "learning_rate": 3.4771258872187917e-06, "loss": 0.1049, "step": 2862 }, { "epoch": 6.604382929642445, "grad_norm": 0.0, "learning_rate": 3.4708285682779074e-06, "loss": 0.17, "step": 2863 }, { "epoch": 6.606689734717416, "grad_norm": 0.0, "learning_rate": 3.464535759312625e-06, "loss": 0.0628, "step": 2864 }, { "epoch": 6.608996539792388, "grad_norm": 0.0, "learning_rate": 3.4582474646696575e-06, "loss": 0.099, "step": 2865 }, { "epoch": 6.611303344867359, "grad_norm": 0.0, "learning_rate": 3.451963688692591e-06, "loss": 0.1256, "step": 2866 }, { "epoch": 6.61361014994233, "grad_norm": 0.0, "learning_rate": 3.4456844357218977e-06, "loss": 0.085, "step": 2867 }, { "epoch": 6.615916955017301, "grad_norm": 0.0, "learning_rate": 3.4394097100949286e-06, "loss": 0.1082, "step": 2868 }, { "epoch": 6.618223760092272, "grad_norm": 0.0, "learning_rate": 3.433139516145896e-06, "loss": 0.094, "step": 2869 }, { "epoch": 6.620530565167243, "grad_norm": 0.0, "learning_rate": 3.4268738582058913e-06, "loss": 0.0579, "step": 2870 }, { "epoch": 6.622837370242214, "grad_norm": 0.0, "learning_rate": 3.4206127406028744e-06, "loss": 0.069, "step": 2871 }, { "epoch": 6.625144175317185, "grad_norm": 0.0, "learning_rate": 3.414356167661658e-06, "loss": 0.0987, "step": 2872 }, { "epoch": 6.627450980392156, "grad_norm": 0.0, "learning_rate": 3.4081041437039288e-06, "loss": 0.1038, "step": 2873 }, { "epoch": 6.629757785467128, "grad_norm": 0.0, "learning_rate": 3.401856673048217e-06, "loss": 0.0504, "step": 2874 }, { "epoch": 6.632064590542099, "grad_norm": 0.0, "learning_rate": 3.3956137600099248e-06, "loss": 0.0484, "step": 2875 }, { "epoch": 6.6343713956170705, "grad_norm": 0.0, "learning_rate": 3.3893754089012886e-06, "loss": 0.0852, "step": 2876 }, { "epoch": 6.6366782006920415, "grad_norm": 0.0, "learning_rate": 3.3831416240314085e-06, "loss": 0.0883, "step": 2877 }, { "epoch": 6.638985005767013, "grad_norm": 0.0, "learning_rate": 3.3769124097062178e-06, "loss": 0.1297, "step": 2878 }, { "epoch": 6.641291810841984, "grad_norm": 0.0, "learning_rate": 3.3706877702285033e-06, "loss": 0.0628, "step": 2879 }, { "epoch": 6.643598615916955, "grad_norm": 0.0, "learning_rate": 3.3644677098978894e-06, "loss": 0.0988, "step": 2880 }, { "epoch": 6.645905420991927, "grad_norm": 0.0, "learning_rate": 3.35825223301083e-06, "loss": 0.0751, "step": 2881 }, { "epoch": 6.648212226066898, "grad_norm": 0.0, "learning_rate": 3.3520413438606215e-06, "loss": 0.0957, "step": 2882 }, { "epoch": 6.650519031141869, "grad_norm": 0.0, "learning_rate": 3.3458350467373914e-06, "loss": 0.0607, "step": 2883 }, { "epoch": 6.65282583621684, "grad_norm": 0.0, "learning_rate": 3.339633345928085e-06, "loss": 0.1428, "step": 2884 }, { "epoch": 6.655132641291811, "grad_norm": 0.0, "learning_rate": 3.333436245716488e-06, "loss": 0.0921, "step": 2885 }, { "epoch": 6.657439446366782, "grad_norm": 0.0, "learning_rate": 3.3272437503831945e-06, "loss": 0.0353, "step": 2886 }, { "epoch": 6.659746251441753, "grad_norm": 0.0, "learning_rate": 3.3210558642056277e-06, "loss": 0.0859, "step": 2887 }, { "epoch": 6.662053056516724, "grad_norm": 0.0, "learning_rate": 3.3148725914580183e-06, "loss": 0.0779, "step": 2888 }, { "epoch": 6.664359861591695, "grad_norm": 0.0, "learning_rate": 3.308693936411421e-06, "loss": 0.0946, "step": 2889 }, { "epoch": 6.666666666666667, "grad_norm": 0.0, "learning_rate": 3.3025199033336887e-06, "loss": 0.089, "step": 2890 }, { "epoch": 6.668973471741638, "grad_norm": 0.0, "learning_rate": 3.29635049648949e-06, "loss": 0.0835, "step": 2891 }, { "epoch": 6.671280276816609, "grad_norm": 0.0, "learning_rate": 3.290185720140301e-06, "loss": 0.1067, "step": 2892 }, { "epoch": 6.67358708189158, "grad_norm": 0.0, "learning_rate": 3.284025578544385e-06, "loss": 0.11, "step": 2893 }, { "epoch": 6.675893886966551, "grad_norm": 0.0, "learning_rate": 3.2778700759568194e-06, "loss": 0.0726, "step": 2894 }, { "epoch": 6.678200692041522, "grad_norm": 0.0, "learning_rate": 3.2717192166294685e-06, "loss": 0.0831, "step": 2895 }, { "epoch": 6.680507497116493, "grad_norm": 0.0, "learning_rate": 3.265573004810997e-06, "loss": 0.089, "step": 2896 }, { "epoch": 6.682814302191465, "grad_norm": 0.0, "learning_rate": 3.2594314447468457e-06, "loss": 0.0518, "step": 2897 }, { "epoch": 6.685121107266436, "grad_norm": 0.0, "learning_rate": 3.2532945406792573e-06, "loss": 0.0802, "step": 2898 }, { "epoch": 6.687427912341407, "grad_norm": 0.0, "learning_rate": 3.2471622968472494e-06, "loss": 0.1096, "step": 2899 }, { "epoch": 6.689734717416378, "grad_norm": 0.0, "learning_rate": 3.2410347174866188e-06, "loss": 0.0426, "step": 2900 }, { "epoch": 6.692041522491349, "grad_norm": 0.0, "learning_rate": 3.234911806829948e-06, "loss": 0.0535, "step": 2901 }, { "epoch": 6.6943483275663205, "grad_norm": 0.0, "learning_rate": 3.228793569106594e-06, "loss": 0.0568, "step": 2902 }, { "epoch": 6.6966551326412915, "grad_norm": 0.0, "learning_rate": 3.222680008542678e-06, "loss": 0.0826, "step": 2903 }, { "epoch": 6.698961937716263, "grad_norm": 0.0, "learning_rate": 3.216571129361097e-06, "loss": 0.0656, "step": 2904 }, { "epoch": 6.7012687427912345, "grad_norm": 0.0, "learning_rate": 3.2104669357815167e-06, "loss": 0.054, "step": 2905 }, { "epoch": 6.703575547866206, "grad_norm": 0.0, "learning_rate": 3.2043674320203565e-06, "loss": 0.0608, "step": 2906 }, { "epoch": 6.705882352941177, "grad_norm": 0.0, "learning_rate": 3.1982726222908046e-06, "loss": 0.0925, "step": 2907 }, { "epoch": 6.708189158016148, "grad_norm": 0.0, "learning_rate": 3.1921825108028093e-06, "loss": 0.0519, "step": 2908 }, { "epoch": 6.710495963091119, "grad_norm": 0.0, "learning_rate": 3.1860971017630605e-06, "loss": 0.1546, "step": 2909 }, { "epoch": 6.71280276816609, "grad_norm": 0.0, "learning_rate": 3.1800163993750166e-06, "loss": 0.1249, "step": 2910 }, { "epoch": 6.715109573241061, "grad_norm": 0.0, "learning_rate": 3.1739404078388713e-06, "loss": 0.0674, "step": 2911 }, { "epoch": 6.717416378316033, "grad_norm": 0.0, "learning_rate": 3.1678691313515688e-06, "loss": 0.0841, "step": 2912 }, { "epoch": 6.719723183391004, "grad_norm": 0.0, "learning_rate": 3.161802574106799e-06, "loss": 0.0628, "step": 2913 }, { "epoch": 6.722029988465975, "grad_norm": 0.0, "learning_rate": 3.1557407402949937e-06, "loss": 0.0346, "step": 2914 }, { "epoch": 6.724336793540946, "grad_norm": 0.0, "learning_rate": 3.149683634103312e-06, "loss": 0.0637, "step": 2915 }, { "epoch": 6.726643598615917, "grad_norm": 0.0, "learning_rate": 3.143631259715658e-06, "loss": 0.0544, "step": 2916 }, { "epoch": 6.728950403690888, "grad_norm": 0.0, "learning_rate": 3.1375836213126653e-06, "loss": 0.0388, "step": 2917 }, { "epoch": 6.731257208765859, "grad_norm": 0.0, "learning_rate": 3.13154072307169e-06, "loss": 0.0701, "step": 2918 }, { "epoch": 6.73356401384083, "grad_norm": 0.0, "learning_rate": 3.1255025691668184e-06, "loss": 0.0979, "step": 2919 }, { "epoch": 6.735870818915801, "grad_norm": 0.0, "learning_rate": 3.1194691637688645e-06, "loss": 0.0714, "step": 2920 }, { "epoch": 6.738177623990773, "grad_norm": 0.0, "learning_rate": 3.1134405110453512e-06, "loss": 0.0434, "step": 2921 }, { "epoch": 6.740484429065744, "grad_norm": 0.0, "learning_rate": 3.10741661516053e-06, "loss": 0.1162, "step": 2922 }, { "epoch": 6.742791234140715, "grad_norm": 0.0, "learning_rate": 3.101397480275359e-06, "loss": 0.0849, "step": 2923 }, { "epoch": 6.745098039215686, "grad_norm": 0.0, "learning_rate": 3.0953831105475064e-06, "loss": 0.0431, "step": 2924 }, { "epoch": 6.747404844290657, "grad_norm": 0.0, "learning_rate": 3.089373510131354e-06, "loss": 0.0894, "step": 2925 }, { "epoch": 6.749711649365628, "grad_norm": 0.0, "learning_rate": 3.083368683177993e-06, "loss": 0.1019, "step": 2926 }, { "epoch": 6.7520184544405994, "grad_norm": 0.0, "learning_rate": 3.077368633835205e-06, "loss": 0.0574, "step": 2927 }, { "epoch": 6.754325259515571, "grad_norm": 0.0, "learning_rate": 3.071373366247482e-06, "loss": 0.1271, "step": 2928 }, { "epoch": 6.756632064590542, "grad_norm": 0.0, "learning_rate": 3.065382884556012e-06, "loss": 0.0919, "step": 2929 }, { "epoch": 6.7589388696655135, "grad_norm": 0.0, "learning_rate": 3.0593971928986688e-06, "loss": 0.0945, "step": 2930 }, { "epoch": 6.7612456747404845, "grad_norm": 0.0, "learning_rate": 3.0534162954100264e-06, "loss": 0.1158, "step": 2931 }, { "epoch": 6.763552479815456, "grad_norm": 0.0, "learning_rate": 3.0474401962213483e-06, "loss": 0.11, "step": 2932 }, { "epoch": 6.765859284890427, "grad_norm": 0.0, "learning_rate": 3.0414688994605724e-06, "loss": 0.0829, "step": 2933 }, { "epoch": 6.768166089965398, "grad_norm": 0.0, "learning_rate": 3.0355024092523334e-06, "loss": 0.0507, "step": 2934 }, { "epoch": 6.770472895040369, "grad_norm": 0.0, "learning_rate": 3.0295407297179326e-06, "loss": 0.1086, "step": 2935 }, { "epoch": 6.77277970011534, "grad_norm": 0.0, "learning_rate": 3.0235838649753615e-06, "loss": 0.057, "step": 2936 }, { "epoch": 6.775086505190312, "grad_norm": 0.0, "learning_rate": 3.017631819139273e-06, "loss": 0.0772, "step": 2937 }, { "epoch": 6.777393310265283, "grad_norm": 0.0, "learning_rate": 3.0116845963209996e-06, "loss": 0.1133, "step": 2938 }, { "epoch": 6.779700115340254, "grad_norm": 0.0, "learning_rate": 3.005742200628545e-06, "loss": 0.055, "step": 2939 }, { "epoch": 6.782006920415225, "grad_norm": 0.0, "learning_rate": 2.999804636166567e-06, "loss": 0.0833, "step": 2940 }, { "epoch": 6.784313725490196, "grad_norm": 0.0, "learning_rate": 2.9938719070363954e-06, "loss": 0.0847, "step": 2941 }, { "epoch": 6.786620530565167, "grad_norm": 0.0, "learning_rate": 2.987944017336023e-06, "loss": 0.075, "step": 2942 }, { "epoch": 6.788927335640138, "grad_norm": 0.0, "learning_rate": 2.9820209711600858e-06, "loss": 0.0884, "step": 2943 }, { "epoch": 6.79123414071511, "grad_norm": 0.0, "learning_rate": 2.9761027725998883e-06, "loss": 0.0939, "step": 2944 }, { "epoch": 6.793540945790081, "grad_norm": 0.0, "learning_rate": 2.970189425743383e-06, "loss": 0.0841, "step": 2945 }, { "epoch": 6.795847750865052, "grad_norm": 0.0, "learning_rate": 2.9642809346751677e-06, "loss": 0.095, "step": 2946 }, { "epoch": 6.798154555940023, "grad_norm": 0.0, "learning_rate": 2.958377303476483e-06, "loss": 0.0691, "step": 2947 }, { "epoch": 6.800461361014994, "grad_norm": 0.0, "learning_rate": 2.952478536225224e-06, "loss": 0.0933, "step": 2948 }, { "epoch": 6.802768166089965, "grad_norm": 0.0, "learning_rate": 2.9465846369959126e-06, "loss": 0.1027, "step": 2949 }, { "epoch": 6.805074971164936, "grad_norm": 0.0, "learning_rate": 2.9406956098597208e-06, "loss": 0.1112, "step": 2950 }, { "epoch": 6.807381776239907, "grad_norm": 0.0, "learning_rate": 2.934811458884449e-06, "loss": 0.083, "step": 2951 }, { "epoch": 6.809688581314878, "grad_norm": 0.0, "learning_rate": 2.9289321881345257e-06, "loss": 0.106, "step": 2952 }, { "epoch": 6.81199538638985, "grad_norm": 0.0, "learning_rate": 2.9230578016710154e-06, "loss": 0.0947, "step": 2953 }, { "epoch": 6.814302191464821, "grad_norm": 0.0, "learning_rate": 2.917188303551608e-06, "loss": 0.077, "step": 2954 }, { "epoch": 6.8166089965397925, "grad_norm": 0.0, "learning_rate": 2.91132369783061e-06, "loss": 0.0524, "step": 2955 }, { "epoch": 6.8189158016147635, "grad_norm": 0.0, "learning_rate": 2.905463988558955e-06, "loss": 0.1328, "step": 2956 }, { "epoch": 6.821222606689735, "grad_norm": 0.0, "learning_rate": 2.8996091797841976e-06, "loss": 0.0388, "step": 2957 }, { "epoch": 6.823529411764706, "grad_norm": 0.0, "learning_rate": 2.893759275550494e-06, "loss": 0.0761, "step": 2958 }, { "epoch": 6.825836216839677, "grad_norm": 0.0, "learning_rate": 2.8879142798986293e-06, "loss": 0.0588, "step": 2959 }, { "epoch": 6.828143021914649, "grad_norm": 0.0, "learning_rate": 2.882074196865986e-06, "loss": 0.0752, "step": 2960 }, { "epoch": 6.83044982698962, "grad_norm": 0.0, "learning_rate": 2.876239030486554e-06, "loss": 0.098, "step": 2961 }, { "epoch": 6.832756632064591, "grad_norm": 0.0, "learning_rate": 2.8704087847909333e-06, "loss": 0.0587, "step": 2962 }, { "epoch": 6.835063437139562, "grad_norm": 0.0, "learning_rate": 2.8645834638063253e-06, "loss": 0.0935, "step": 2963 }, { "epoch": 6.837370242214533, "grad_norm": 0.0, "learning_rate": 2.8587630715565185e-06, "loss": 0.0797, "step": 2964 }, { "epoch": 6.839677047289504, "grad_norm": 0.0, "learning_rate": 2.8529476120619102e-06, "loss": 0.1073, "step": 2965 }, { "epoch": 6.841983852364475, "grad_norm": 0.0, "learning_rate": 2.8471370893394866e-06, "loss": 0.074, "step": 2966 }, { "epoch": 6.844290657439446, "grad_norm": 0.0, "learning_rate": 2.8413315074028157e-06, "loss": 0.0967, "step": 2967 }, { "epoch": 6.846597462514418, "grad_norm": 0.0, "learning_rate": 2.8355308702620633e-06, "loss": 0.04, "step": 2968 }, { "epoch": 6.848904267589389, "grad_norm": 0.0, "learning_rate": 2.829735181923978e-06, "loss": 0.0665, "step": 2969 }, { "epoch": 6.85121107266436, "grad_norm": 0.0, "learning_rate": 2.823944446391881e-06, "loss": 0.0812, "step": 2970 }, { "epoch": 6.853517877739331, "grad_norm": 0.0, "learning_rate": 2.818158667665686e-06, "loss": 0.0836, "step": 2971 }, { "epoch": 6.855824682814302, "grad_norm": 0.0, "learning_rate": 2.8123778497418687e-06, "loss": 0.073, "step": 2972 }, { "epoch": 6.858131487889273, "grad_norm": 0.0, "learning_rate": 2.8066019966134907e-06, "loss": 0.0928, "step": 2973 }, { "epoch": 6.860438292964244, "grad_norm": 0.0, "learning_rate": 2.800831112270175e-06, "loss": 0.0918, "step": 2974 }, { "epoch": 6.862745098039216, "grad_norm": 0.0, "learning_rate": 2.795065200698116e-06, "loss": 0.1027, "step": 2975 }, { "epoch": 6.865051903114187, "grad_norm": 0.0, "learning_rate": 2.7893042658800793e-06, "loss": 0.0605, "step": 2976 }, { "epoch": 6.867358708189158, "grad_norm": 0.0, "learning_rate": 2.783548311795379e-06, "loss": 0.0793, "step": 2977 }, { "epoch": 6.869665513264129, "grad_norm": 0.0, "learning_rate": 2.777797342419901e-06, "loss": 0.0954, "step": 2978 }, { "epoch": 6.8719723183391, "grad_norm": 0.0, "learning_rate": 2.7720513617260857e-06, "loss": 0.082, "step": 2979 }, { "epoch": 6.874279123414071, "grad_norm": 0.0, "learning_rate": 2.76631037368292e-06, "loss": 0.0648, "step": 2980 }, { "epoch": 6.8765859284890425, "grad_norm": 0.0, "learning_rate": 2.7605743822559504e-06, "loss": 0.1019, "step": 2981 }, { "epoch": 6.8788927335640135, "grad_norm": 0.0, "learning_rate": 2.7548433914072736e-06, "loss": 0.0661, "step": 2982 }, { "epoch": 6.881199538638985, "grad_norm": 0.0, "learning_rate": 2.7491174050955237e-06, "loss": 0.1199, "step": 2983 }, { "epoch": 6.8835063437139565, "grad_norm": 0.0, "learning_rate": 2.7433964272758805e-06, "loss": 0.0877, "step": 2984 }, { "epoch": 6.885813148788928, "grad_norm": 0.0, "learning_rate": 2.7376804619000706e-06, "loss": 0.1212, "step": 2985 }, { "epoch": 6.888119953863899, "grad_norm": 0.0, "learning_rate": 2.7319695129163493e-06, "loss": 0.0545, "step": 2986 }, { "epoch": 6.89042675893887, "grad_norm": 0.0, "learning_rate": 2.726263584269513e-06, "loss": 0.0761, "step": 2987 }, { "epoch": 6.892733564013841, "grad_norm": 0.0, "learning_rate": 2.720562679900892e-06, "loss": 0.063, "step": 2988 }, { "epoch": 6.895040369088812, "grad_norm": 0.0, "learning_rate": 2.714866803748337e-06, "loss": 0.0811, "step": 2989 }, { "epoch": 6.897347174163783, "grad_norm": 0.0, "learning_rate": 2.709175959746233e-06, "loss": 0.0705, "step": 2990 }, { "epoch": 6.899653979238755, "grad_norm": 0.0, "learning_rate": 2.703490151825492e-06, "loss": 0.045, "step": 2991 }, { "epoch": 6.901960784313726, "grad_norm": 0.0, "learning_rate": 2.6978093839135365e-06, "loss": 0.0717, "step": 2992 }, { "epoch": 6.904267589388697, "grad_norm": 0.0, "learning_rate": 2.6921336599343153e-06, "loss": 0.0887, "step": 2993 }, { "epoch": 6.906574394463668, "grad_norm": 0.0, "learning_rate": 2.6864629838082957e-06, "loss": 0.1213, "step": 2994 }, { "epoch": 6.908881199538639, "grad_norm": 0.0, "learning_rate": 2.6807973594524508e-06, "loss": 0.1169, "step": 2995 }, { "epoch": 6.91118800461361, "grad_norm": 0.0, "learning_rate": 2.675136790780265e-06, "loss": 0.1588, "step": 2996 }, { "epoch": 6.913494809688581, "grad_norm": 0.0, "learning_rate": 2.669481281701739e-06, "loss": 0.1299, "step": 2997 }, { "epoch": 6.915801614763552, "grad_norm": 0.0, "learning_rate": 2.6638308361233677e-06, "loss": 0.1275, "step": 2998 }, { "epoch": 6.918108419838523, "grad_norm": 0.0, "learning_rate": 2.6581854579481546e-06, "loss": 0.1012, "step": 2999 }, { "epoch": 6.920415224913495, "grad_norm": 0.0, "learning_rate": 2.652545151075606e-06, "loss": 0.0788, "step": 3000 }, { "epoch": 6.922722029988466, "grad_norm": 0.0, "learning_rate": 2.6469099194017144e-06, "loss": 0.0714, "step": 3001 }, { "epoch": 6.925028835063437, "grad_norm": 0.0, "learning_rate": 2.641279766818977e-06, "loss": 0.0648, "step": 3002 }, { "epoch": 6.927335640138408, "grad_norm": 0.0, "learning_rate": 2.635654697216382e-06, "loss": 0.1825, "step": 3003 }, { "epoch": 6.929642445213379, "grad_norm": 0.0, "learning_rate": 2.630034714479397e-06, "loss": 0.1082, "step": 3004 }, { "epoch": 6.93194925028835, "grad_norm": 0.0, "learning_rate": 2.624419822489985e-06, "loss": 0.1228, "step": 3005 }, { "epoch": 6.9342560553633215, "grad_norm": 0.0, "learning_rate": 2.6188100251265947e-06, "loss": 0.061, "step": 3006 }, { "epoch": 6.936562860438293, "grad_norm": 0.0, "learning_rate": 2.6132053262641467e-06, "loss": 0.0675, "step": 3007 }, { "epoch": 6.9388696655132645, "grad_norm": 0.0, "learning_rate": 2.607605729774041e-06, "loss": 0.1114, "step": 3008 }, { "epoch": 6.9411764705882355, "grad_norm": 0.0, "learning_rate": 2.6020112395241627e-06, "loss": 0.0916, "step": 3009 }, { "epoch": 6.9434832756632066, "grad_norm": 0.0, "learning_rate": 2.596421859378858e-06, "loss": 0.0606, "step": 3010 }, { "epoch": 6.945790080738178, "grad_norm": 0.0, "learning_rate": 2.5908375931989517e-06, "loss": 0.0426, "step": 3011 }, { "epoch": 6.948096885813149, "grad_norm": 0.0, "learning_rate": 2.5852584448417327e-06, "loss": 0.0635, "step": 3012 }, { "epoch": 6.95040369088812, "grad_norm": 0.0, "learning_rate": 2.5796844181609583e-06, "loss": 0.0724, "step": 3013 }, { "epoch": 6.952710495963091, "grad_norm": 0.0, "learning_rate": 2.57411551700684e-06, "loss": 0.1065, "step": 3014 }, { "epoch": 6.955017301038062, "grad_norm": 0.0, "learning_rate": 2.5685517452260566e-06, "loss": 0.0856, "step": 3015 }, { "epoch": 6.957324106113034, "grad_norm": 0.0, "learning_rate": 2.562993106661744e-06, "loss": 0.0739, "step": 3016 }, { "epoch": 6.959630911188005, "grad_norm": 0.0, "learning_rate": 2.5574396051534835e-06, "loss": 0.096, "step": 3017 }, { "epoch": 6.961937716262976, "grad_norm": 0.0, "learning_rate": 2.55189124453732e-06, "loss": 0.0694, "step": 3018 }, { "epoch": 6.964244521337947, "grad_norm": 0.0, "learning_rate": 2.5463480286457367e-06, "loss": 0.0831, "step": 3019 }, { "epoch": 6.966551326412918, "grad_norm": 0.0, "learning_rate": 2.540809961307672e-06, "loss": 0.1211, "step": 3020 }, { "epoch": 6.968858131487889, "grad_norm": 0.0, "learning_rate": 2.5352770463484986e-06, "loss": 0.0619, "step": 3021 }, { "epoch": 6.97116493656286, "grad_norm": 0.0, "learning_rate": 2.529749287590042e-06, "loss": 0.0844, "step": 3022 }, { "epoch": 6.973471741637832, "grad_norm": 0.0, "learning_rate": 2.524226688850554e-06, "loss": 0.0669, "step": 3023 }, { "epoch": 6.975778546712803, "grad_norm": 0.0, "learning_rate": 2.51870925394473e-06, "loss": 0.0934, "step": 3024 }, { "epoch": 6.978085351787774, "grad_norm": 0.0, "learning_rate": 2.513196986683699e-06, "loss": 0.1004, "step": 3025 }, { "epoch": 6.980392156862745, "grad_norm": 0.0, "learning_rate": 2.5076898908750127e-06, "loss": 0.0442, "step": 3026 }, { "epoch": 6.982698961937716, "grad_norm": 0.0, "learning_rate": 2.502187970322657e-06, "loss": 0.0459, "step": 3027 }, { "epoch": 6.985005767012687, "grad_norm": 0.0, "learning_rate": 2.4966912288270473e-06, "loss": 0.0687, "step": 3028 }, { "epoch": 6.987312572087658, "grad_norm": 0.0, "learning_rate": 2.4911996701850083e-06, "loss": 0.0595, "step": 3029 }, { "epoch": 6.989619377162629, "grad_norm": 0.0, "learning_rate": 2.485713298189798e-06, "loss": 0.0909, "step": 3030 }, { "epoch": 6.9919261822376, "grad_norm": 0.0, "learning_rate": 2.4802321166310815e-06, "loss": 0.0885, "step": 3031 }, { "epoch": 6.994232987312572, "grad_norm": 0.0, "learning_rate": 2.4747561292949496e-06, "loss": 0.0457, "step": 3032 }, { "epoch": 6.996539792387543, "grad_norm": 0.0, "learning_rate": 2.469285339963892e-06, "loss": 0.0921, "step": 3033 }, { "epoch": 6.9988465974625145, "grad_norm": 0.0, "learning_rate": 2.4638197524168208e-06, "loss": 0.0941, "step": 3034 }, { "epoch": 7.0011534025374855, "grad_norm": 0.0, "learning_rate": 2.458359370429043e-06, "loss": 0.0697, "step": 3035 }, { "epoch": 7.003460207612457, "grad_norm": 0.0, "learning_rate": 2.45290419777228e-06, "loss": 0.0689, "step": 3036 }, { "epoch": 7.005767012687428, "grad_norm": 0.0, "learning_rate": 2.447454238214654e-06, "loss": 0.0591, "step": 3037 }, { "epoch": 7.008073817762399, "grad_norm": 0.0, "learning_rate": 2.4420094955206753e-06, "loss": 0.0372, "step": 3038 }, { "epoch": 7.010380622837371, "grad_norm": 0.0, "learning_rate": 2.436569973451264e-06, "loss": 0.0707, "step": 3039 }, { "epoch": 7.012687427912342, "grad_norm": 0.0, "learning_rate": 2.4311356757637305e-06, "loss": 0.0448, "step": 3040 }, { "epoch": 7.014994232987313, "grad_norm": 0.0, "learning_rate": 2.4257066062117675e-06, "loss": 0.0428, "step": 3041 }, { "epoch": 7.017301038062284, "grad_norm": 0.0, "learning_rate": 2.420282768545469e-06, "loss": 0.0336, "step": 3042 }, { "epoch": 7.019607843137255, "grad_norm": 0.0, "learning_rate": 2.4148641665113116e-06, "loss": 0.0656, "step": 3043 }, { "epoch": 7.021914648212226, "grad_norm": 0.0, "learning_rate": 2.409450803852149e-06, "loss": 0.0699, "step": 3044 }, { "epoch": 7.024221453287197, "grad_norm": 0.0, "learning_rate": 2.4040426843072206e-06, "loss": 0.047, "step": 3045 }, { "epoch": 7.026528258362168, "grad_norm": 0.0, "learning_rate": 2.3986398116121468e-06, "loss": 0.0534, "step": 3046 }, { "epoch": 7.02883506343714, "grad_norm": 0.0, "learning_rate": 2.3932421894989167e-06, "loss": 0.0303, "step": 3047 }, { "epoch": 7.031141868512111, "grad_norm": 0.0, "learning_rate": 2.387849821695899e-06, "loss": 0.0537, "step": 3048 }, { "epoch": 7.033448673587082, "grad_norm": 0.0, "learning_rate": 2.3824627119278344e-06, "loss": 0.0612, "step": 3049 }, { "epoch": 7.035755478662053, "grad_norm": 0.0, "learning_rate": 2.3770808639158216e-06, "loss": 0.0809, "step": 3050 }, { "epoch": 7.038062283737024, "grad_norm": 0.0, "learning_rate": 2.371704281377335e-06, "loss": 0.0446, "step": 3051 }, { "epoch": 7.040369088811995, "grad_norm": 0.0, "learning_rate": 2.366332968026207e-06, "loss": 0.0575, "step": 3052 }, { "epoch": 7.042675893886966, "grad_norm": 0.0, "learning_rate": 2.3609669275726353e-06, "loss": 0.0539, "step": 3053 }, { "epoch": 7.044982698961937, "grad_norm": 0.0, "learning_rate": 2.3556061637231653e-06, "loss": 0.0575, "step": 3054 }, { "epoch": 7.047289504036909, "grad_norm": 0.0, "learning_rate": 2.3502506801807102e-06, "loss": 0.0848, "step": 3055 }, { "epoch": 7.04959630911188, "grad_norm": 0.0, "learning_rate": 2.3449004806445263e-06, "loss": 0.0552, "step": 3056 }, { "epoch": 7.051903114186851, "grad_norm": 0.0, "learning_rate": 2.339555568810221e-06, "loss": 0.0552, "step": 3057 }, { "epoch": 7.054209919261822, "grad_norm": 0.0, "learning_rate": 2.3342159483697535e-06, "loss": 0.0601, "step": 3058 }, { "epoch": 7.0565167243367934, "grad_norm": 0.0, "learning_rate": 2.328881623011431e-06, "loss": 0.0285, "step": 3059 }, { "epoch": 7.0588235294117645, "grad_norm": 0.0, "learning_rate": 2.323552596419889e-06, "loss": 0.0759, "step": 3060 }, { "epoch": 7.0611303344867355, "grad_norm": 0.0, "learning_rate": 2.318228872276118e-06, "loss": 0.0305, "step": 3061 }, { "epoch": 7.063437139561707, "grad_norm": 0.0, "learning_rate": 2.3129104542574433e-06, "loss": 0.028, "step": 3062 }, { "epoch": 7.0657439446366785, "grad_norm": 0.0, "learning_rate": 2.3075973460375134e-06, "loss": 0.0483, "step": 3063 }, { "epoch": 7.06805074971165, "grad_norm": 0.0, "learning_rate": 2.3022895512863207e-06, "loss": 0.0405, "step": 3064 }, { "epoch": 7.070357554786621, "grad_norm": 0.0, "learning_rate": 2.296987073670189e-06, "loss": 0.0514, "step": 3065 }, { "epoch": 7.072664359861592, "grad_norm": 0.0, "learning_rate": 2.291689916851758e-06, "loss": 0.085, "step": 3066 }, { "epoch": 7.074971164936563, "grad_norm": 0.0, "learning_rate": 2.2863980844900036e-06, "loss": 0.0593, "step": 3067 }, { "epoch": 7.077277970011534, "grad_norm": 0.0, "learning_rate": 2.2811115802402174e-06, "loss": 0.0387, "step": 3068 }, { "epoch": 7.079584775086505, "grad_norm": 0.0, "learning_rate": 2.275830407754006e-06, "loss": 0.0554, "step": 3069 }, { "epoch": 7.081891580161477, "grad_norm": 0.0, "learning_rate": 2.2705545706793065e-06, "loss": 0.0357, "step": 3070 }, { "epoch": 7.084198385236448, "grad_norm": 0.0, "learning_rate": 2.265284072660362e-06, "loss": 0.0633, "step": 3071 }, { "epoch": 7.086505190311419, "grad_norm": 0.0, "learning_rate": 2.2600189173377263e-06, "loss": 0.0452, "step": 3072 }, { "epoch": 7.08881199538639, "grad_norm": 0.0, "learning_rate": 2.254759108348267e-06, "loss": 0.0699, "step": 3073 }, { "epoch": 7.091118800461361, "grad_norm": 0.0, "learning_rate": 2.2495046493251603e-06, "loss": 0.0648, "step": 3074 }, { "epoch": 7.093425605536332, "grad_norm": 0.0, "learning_rate": 2.2442555438978774e-06, "loss": 0.0547, "step": 3075 }, { "epoch": 7.095732410611303, "grad_norm": 0.0, "learning_rate": 2.239011795692203e-06, "loss": 0.0244, "step": 3076 }, { "epoch": 7.098039215686274, "grad_norm": 0.0, "learning_rate": 2.2337734083302164e-06, "loss": 0.0641, "step": 3077 }, { "epoch": 7.100346020761246, "grad_norm": 0.0, "learning_rate": 2.2285403854302912e-06, "loss": 0.0633, "step": 3078 }, { "epoch": 7.102652825836217, "grad_norm": 0.0, "learning_rate": 2.2233127306071013e-06, "loss": 0.0708, "step": 3079 }, { "epoch": 7.104959630911188, "grad_norm": 0.0, "learning_rate": 2.2180904474716057e-06, "loss": 0.0416, "step": 3080 }, { "epoch": 7.107266435986159, "grad_norm": 0.0, "learning_rate": 2.2128735396310606e-06, "loss": 0.0312, "step": 3081 }, { "epoch": 7.10957324106113, "grad_norm": 0.0, "learning_rate": 2.207662010689002e-06, "loss": 0.0809, "step": 3082 }, { "epoch": 7.111880046136101, "grad_norm": 0.0, "learning_rate": 2.202455864245259e-06, "loss": 0.1034, "step": 3083 }, { "epoch": 7.114186851211072, "grad_norm": 0.0, "learning_rate": 2.1972551038959313e-06, "loss": 0.0825, "step": 3084 }, { "epoch": 7.1164936562860435, "grad_norm": 0.0, "learning_rate": 2.192059733233408e-06, "loss": 0.062, "step": 3085 }, { "epoch": 7.118800461361015, "grad_norm": 0.0, "learning_rate": 2.1868697558463547e-06, "loss": 0.0636, "step": 3086 }, { "epoch": 7.1211072664359865, "grad_norm": 0.0, "learning_rate": 2.1816851753197023e-06, "loss": 0.06, "step": 3087 }, { "epoch": 7.1234140715109575, "grad_norm": 0.0, "learning_rate": 2.1765059952346655e-06, "loss": 0.1039, "step": 3088 }, { "epoch": 7.125720876585929, "grad_norm": 0.0, "learning_rate": 2.1713322191687234e-06, "loss": 0.1144, "step": 3089 }, { "epoch": 7.1280276816609, "grad_norm": 0.0, "learning_rate": 2.1661638506956208e-06, "loss": 0.0737, "step": 3090 }, { "epoch": 7.130334486735871, "grad_norm": 0.0, "learning_rate": 2.161000893385371e-06, "loss": 0.0432, "step": 3091 }, { "epoch": 7.132641291810842, "grad_norm": 0.0, "learning_rate": 2.155843350804243e-06, "loss": 0.05, "step": 3092 }, { "epoch": 7.134948096885813, "grad_norm": 0.0, "learning_rate": 2.1506912265147772e-06, "loss": 0.0861, "step": 3093 }, { "epoch": 7.137254901960785, "grad_norm": 0.0, "learning_rate": 2.1455445240757575e-06, "loss": 0.065, "step": 3094 }, { "epoch": 7.139561707035756, "grad_norm": 0.0, "learning_rate": 2.140403247042232e-06, "loss": 0.0602, "step": 3095 }, { "epoch": 7.141868512110727, "grad_norm": 0.0, "learning_rate": 2.1352673989655026e-06, "loss": 0.0737, "step": 3096 }, { "epoch": 7.144175317185698, "grad_norm": 0.0, "learning_rate": 2.130136983393112e-06, "loss": 0.0363, "step": 3097 }, { "epoch": 7.146482122260669, "grad_norm": 0.0, "learning_rate": 2.125012003868856e-06, "loss": 0.0576, "step": 3098 }, { "epoch": 7.14878892733564, "grad_norm": 0.0, "learning_rate": 2.119892463932781e-06, "loss": 0.0297, "step": 3099 }, { "epoch": 7.151095732410611, "grad_norm": 0.0, "learning_rate": 2.1147783671211643e-06, "loss": 0.078, "step": 3100 }, { "epoch": 7.153402537485582, "grad_norm": 0.0, "learning_rate": 2.1096697169665312e-06, "loss": 0.092, "step": 3101 }, { "epoch": 7.155709342560554, "grad_norm": 0.0, "learning_rate": 2.104566516997647e-06, "loss": 0.0604, "step": 3102 }, { "epoch": 7.158016147635525, "grad_norm": 0.0, "learning_rate": 2.0994687707395012e-06, "loss": 0.0211, "step": 3103 }, { "epoch": 7.160322952710496, "grad_norm": 0.0, "learning_rate": 2.0943764817133296e-06, "loss": 0.0622, "step": 3104 }, { "epoch": 7.162629757785467, "grad_norm": 0.0, "learning_rate": 2.08928965343659e-06, "loss": 0.0595, "step": 3105 }, { "epoch": 7.164936562860438, "grad_norm": 0.0, "learning_rate": 2.084208289422968e-06, "loss": 0.0621, "step": 3106 }, { "epoch": 7.167243367935409, "grad_norm": 0.0, "learning_rate": 2.0791323931823783e-06, "loss": 0.0265, "step": 3107 }, { "epoch": 7.16955017301038, "grad_norm": 0.0, "learning_rate": 2.0740619682209607e-06, "loss": 0.0178, "step": 3108 }, { "epoch": 7.171856978085351, "grad_norm": 0.0, "learning_rate": 2.068997018041069e-06, "loss": 0.0421, "step": 3109 }, { "epoch": 7.174163783160323, "grad_norm": 0.0, "learning_rate": 2.0639375461412803e-06, "loss": 0.0269, "step": 3110 }, { "epoch": 7.176470588235294, "grad_norm": 0.0, "learning_rate": 2.05888355601639e-06, "loss": 0.038, "step": 3111 }, { "epoch": 7.178777393310265, "grad_norm": 0.0, "learning_rate": 2.053835051157397e-06, "loss": 0.0608, "step": 3112 }, { "epoch": 7.1810841983852365, "grad_norm": 0.0, "learning_rate": 2.048792035051521e-06, "loss": 0.0335, "step": 3113 }, { "epoch": 7.1833910034602075, "grad_norm": 0.0, "learning_rate": 2.043754511182191e-06, "loss": 0.057, "step": 3114 }, { "epoch": 7.185697808535179, "grad_norm": 0.0, "learning_rate": 2.0387224830290308e-06, "loss": 0.0477, "step": 3115 }, { "epoch": 7.18800461361015, "grad_norm": 0.0, "learning_rate": 2.0336959540678813e-06, "loss": 0.0507, "step": 3116 }, { "epoch": 7.190311418685121, "grad_norm": 0.0, "learning_rate": 2.0286749277707783e-06, "loss": 0.0411, "step": 3117 }, { "epoch": 7.192618223760093, "grad_norm": 0.0, "learning_rate": 2.0236594076059534e-06, "loss": 0.0759, "step": 3118 }, { "epoch": 7.194925028835064, "grad_norm": 0.0, "learning_rate": 2.0186493970378416e-06, "loss": 0.0616, "step": 3119 }, { "epoch": 7.197231833910035, "grad_norm": 0.0, "learning_rate": 2.013644899527074e-06, "loss": 0.0784, "step": 3120 }, { "epoch": 7.199538638985006, "grad_norm": 0.0, "learning_rate": 2.008645918530462e-06, "loss": 0.0583, "step": 3121 }, { "epoch": 7.201845444059977, "grad_norm": 0.0, "learning_rate": 2.0036524575010176e-06, "loss": 0.0802, "step": 3122 }, { "epoch": 7.204152249134948, "grad_norm": 0.0, "learning_rate": 1.9986645198879385e-06, "loss": 0.0623, "step": 3123 }, { "epoch": 7.206459054209919, "grad_norm": 0.0, "learning_rate": 1.9936821091366e-06, "loss": 0.0559, "step": 3124 }, { "epoch": 7.20876585928489, "grad_norm": 0.0, "learning_rate": 1.9887052286885654e-06, "loss": 0.0602, "step": 3125 }, { "epoch": 7.211072664359862, "grad_norm": 0.0, "learning_rate": 1.983733881981581e-06, "loss": 0.0458, "step": 3126 }, { "epoch": 7.213379469434833, "grad_norm": 0.0, "learning_rate": 1.9787680724495617e-06, "loss": 0.0493, "step": 3127 }, { "epoch": 7.215686274509804, "grad_norm": 0.0, "learning_rate": 1.9738078035226084e-06, "loss": 0.0159, "step": 3128 }, { "epoch": 7.217993079584775, "grad_norm": 0.0, "learning_rate": 1.9688530786269854e-06, "loss": 0.0317, "step": 3129 }, { "epoch": 7.220299884659746, "grad_norm": 0.0, "learning_rate": 1.9639039011851292e-06, "loss": 0.043, "step": 3130 }, { "epoch": 7.222606689734717, "grad_norm": 0.0, "learning_rate": 1.9589602746156476e-06, "loss": 0.0607, "step": 3131 }, { "epoch": 7.224913494809688, "grad_norm": 0.0, "learning_rate": 1.9540222023333165e-06, "loss": 0.0626, "step": 3132 }, { "epoch": 7.22722029988466, "grad_norm": 0.0, "learning_rate": 1.9490896877490715e-06, "loss": 0.0508, "step": 3133 }, { "epoch": 7.229527104959631, "grad_norm": 0.0, "learning_rate": 1.9441627342700067e-06, "loss": 0.0485, "step": 3134 }, { "epoch": 7.231833910034602, "grad_norm": 0.0, "learning_rate": 1.9392413452993787e-06, "loss": 0.0553, "step": 3135 }, { "epoch": 7.234140715109573, "grad_norm": 0.0, "learning_rate": 1.9343255242366022e-06, "loss": 0.0708, "step": 3136 }, { "epoch": 7.236447520184544, "grad_norm": 0.0, "learning_rate": 1.929415274477239e-06, "loss": 0.025, "step": 3137 }, { "epoch": 7.2387543252595155, "grad_norm": 0.0, "learning_rate": 1.9245105994130086e-06, "loss": 0.0508, "step": 3138 }, { "epoch": 7.2410611303344865, "grad_norm": 0.0, "learning_rate": 1.919611502431782e-06, "loss": 0.0773, "step": 3139 }, { "epoch": 7.243367935409458, "grad_norm": 0.0, "learning_rate": 1.914717986917569e-06, "loss": 0.0649, "step": 3140 }, { "epoch": 7.245674740484429, "grad_norm": 0.0, "learning_rate": 1.9098300562505266e-06, "loss": 0.0678, "step": 3141 }, { "epoch": 7.2479815455594006, "grad_norm": 0.0, "learning_rate": 1.9049477138069606e-06, "loss": 0.0514, "step": 3142 }, { "epoch": 7.250288350634372, "grad_norm": 0.0, "learning_rate": 1.9000709629593073e-06, "loss": 0.0478, "step": 3143 }, { "epoch": 7.252595155709343, "grad_norm": 0.0, "learning_rate": 1.895199807076148e-06, "loss": 0.0389, "step": 3144 }, { "epoch": 7.254901960784314, "grad_norm": 0.0, "learning_rate": 1.8903342495221977e-06, "loss": 0.0613, "step": 3145 }, { "epoch": 7.257208765859285, "grad_norm": 0.0, "learning_rate": 1.8854742936583005e-06, "loss": 0.0589, "step": 3146 }, { "epoch": 7.259515570934256, "grad_norm": 0.0, "learning_rate": 1.880619942841435e-06, "loss": 0.0916, "step": 3147 }, { "epoch": 7.261822376009227, "grad_norm": 0.0, "learning_rate": 1.8757712004247098e-06, "loss": 0.0377, "step": 3148 }, { "epoch": 7.264129181084199, "grad_norm": 0.0, "learning_rate": 1.870928069757353e-06, "loss": 0.0778, "step": 3149 }, { "epoch": 7.26643598615917, "grad_norm": 0.0, "learning_rate": 1.8660905541847208e-06, "loss": 0.0944, "step": 3150 }, { "epoch": 7.268742791234141, "grad_norm": 0.0, "learning_rate": 1.861258657048295e-06, "loss": 0.0726, "step": 3151 }, { "epoch": 7.271049596309112, "grad_norm": 0.0, "learning_rate": 1.856432381685669e-06, "loss": 0.0562, "step": 3152 }, { "epoch": 7.273356401384083, "grad_norm": 0.0, "learning_rate": 1.8516117314305526e-06, "loss": 0.0486, "step": 3153 }, { "epoch": 7.275663206459054, "grad_norm": 0.0, "learning_rate": 1.8467967096127782e-06, "loss": 0.0616, "step": 3154 }, { "epoch": 7.277970011534025, "grad_norm": 0.0, "learning_rate": 1.8419873195582815e-06, "loss": 0.0329, "step": 3155 }, { "epoch": 7.280276816608996, "grad_norm": 0.0, "learning_rate": 1.8371835645891134e-06, "loss": 0.0711, "step": 3156 }, { "epoch": 7.282583621683968, "grad_norm": 0.0, "learning_rate": 1.8323854480234348e-06, "loss": 0.0472, "step": 3157 }, { "epoch": 7.284890426758939, "grad_norm": 0.0, "learning_rate": 1.8275929731755039e-06, "loss": 0.1153, "step": 3158 }, { "epoch": 7.28719723183391, "grad_norm": 0.0, "learning_rate": 1.8228061433556866e-06, "loss": 0.0639, "step": 3159 }, { "epoch": 7.289504036908881, "grad_norm": 0.0, "learning_rate": 1.8180249618704536e-06, "loss": 0.0973, "step": 3160 }, { "epoch": 7.291810841983852, "grad_norm": 0.0, "learning_rate": 1.8132494320223636e-06, "loss": 0.0738, "step": 3161 }, { "epoch": 7.294117647058823, "grad_norm": 0.0, "learning_rate": 1.808479557110081e-06, "loss": 0.0338, "step": 3162 }, { "epoch": 7.296424452133794, "grad_norm": 0.0, "learning_rate": 1.8037153404283636e-06, "loss": 0.0735, "step": 3163 }, { "epoch": 7.2987312572087655, "grad_norm": 0.0, "learning_rate": 1.798956785268051e-06, "loss": 0.0903, "step": 3164 }, { "epoch": 7.301038062283737, "grad_norm": 0.0, "learning_rate": 1.7942038949160857e-06, "loss": 0.0522, "step": 3165 }, { "epoch": 7.3033448673587085, "grad_norm": 0.0, "learning_rate": 1.7894566726554874e-06, "loss": 0.0649, "step": 3166 }, { "epoch": 7.3056516724336795, "grad_norm": 0.0, "learning_rate": 1.7847151217653624e-06, "loss": 0.0606, "step": 3167 }, { "epoch": 7.307958477508651, "grad_norm": 0.0, "learning_rate": 1.7799792455209019e-06, "loss": 0.0346, "step": 3168 }, { "epoch": 7.310265282583622, "grad_norm": 0.0, "learning_rate": 1.7752490471933769e-06, "loss": 0.0786, "step": 3169 }, { "epoch": 7.312572087658593, "grad_norm": 0.0, "learning_rate": 1.7705245300501396e-06, "loss": 0.0558, "step": 3170 }, { "epoch": 7.314878892733564, "grad_norm": 0.0, "learning_rate": 1.765805697354608e-06, "loss": 0.0525, "step": 3171 }, { "epoch": 7.317185697808535, "grad_norm": 0.0, "learning_rate": 1.7610925523662836e-06, "loss": 0.1067, "step": 3172 }, { "epoch": 7.319492502883507, "grad_norm": 0.0, "learning_rate": 1.756385098340736e-06, "loss": 0.0629, "step": 3173 }, { "epoch": 7.321799307958478, "grad_norm": 0.0, "learning_rate": 1.7516833385296016e-06, "loss": 0.1039, "step": 3174 }, { "epoch": 7.324106113033449, "grad_norm": 0.0, "learning_rate": 1.7469872761805872e-06, "loss": 0.0501, "step": 3175 }, { "epoch": 7.32641291810842, "grad_norm": 0.0, "learning_rate": 1.742296914537459e-06, "loss": 0.0603, "step": 3176 }, { "epoch": 7.328719723183391, "grad_norm": 0.0, "learning_rate": 1.7376122568400533e-06, "loss": 0.0604, "step": 3177 }, { "epoch": 7.331026528258362, "grad_norm": 0.0, "learning_rate": 1.732933306324256e-06, "loss": 0.0519, "step": 3178 }, { "epoch": 7.333333333333333, "grad_norm": 0.0, "learning_rate": 1.7282600662220228e-06, "loss": 0.0477, "step": 3179 }, { "epoch": 7.335640138408304, "grad_norm": 0.0, "learning_rate": 1.7235925397613529e-06, "loss": 0.0714, "step": 3180 }, { "epoch": 7.337946943483276, "grad_norm": 0.0, "learning_rate": 1.7189307301663082e-06, "loss": 0.0665, "step": 3181 }, { "epoch": 7.340253748558247, "grad_norm": 0.0, "learning_rate": 1.714274640657001e-06, "loss": 0.0541, "step": 3182 }, { "epoch": 7.342560553633218, "grad_norm": 0.0, "learning_rate": 1.709624274449584e-06, "loss": 0.0394, "step": 3183 }, { "epoch": 7.344867358708189, "grad_norm": 0.0, "learning_rate": 1.704979634756264e-06, "loss": 0.0596, "step": 3184 }, { "epoch": 7.34717416378316, "grad_norm": 0.0, "learning_rate": 1.7003407247852944e-06, "loss": 0.0889, "step": 3185 }, { "epoch": 7.349480968858131, "grad_norm": 0.0, "learning_rate": 1.6957075477409623e-06, "loss": 0.0438, "step": 3186 }, { "epoch": 7.351787773933102, "grad_norm": 0.0, "learning_rate": 1.6910801068236015e-06, "loss": 0.0289, "step": 3187 }, { "epoch": 7.354094579008073, "grad_norm": 0.0, "learning_rate": 1.6864584052295841e-06, "loss": 0.0605, "step": 3188 }, { "epoch": 7.356401384083045, "grad_norm": 0.0, "learning_rate": 1.6818424461513129e-06, "loss": 0.0453, "step": 3189 }, { "epoch": 7.358708189158016, "grad_norm": 0.0, "learning_rate": 1.677232232777224e-06, "loss": 0.0547, "step": 3190 }, { "epoch": 7.361014994232987, "grad_norm": 0.0, "learning_rate": 1.6726277682917925e-06, "loss": 0.0683, "step": 3191 }, { "epoch": 7.3633217993079585, "grad_norm": 0.0, "learning_rate": 1.6680290558755119e-06, "loss": 0.0697, "step": 3192 }, { "epoch": 7.3656286043829295, "grad_norm": 0.0, "learning_rate": 1.6634360987049113e-06, "loss": 0.0563, "step": 3193 }, { "epoch": 7.367935409457901, "grad_norm": 0.0, "learning_rate": 1.6588488999525431e-06, "loss": 0.0469, "step": 3194 }, { "epoch": 7.370242214532872, "grad_norm": 0.0, "learning_rate": 1.6542674627869738e-06, "loss": 0.067, "step": 3195 }, { "epoch": 7.372549019607844, "grad_norm": 0.0, "learning_rate": 1.6496917903728016e-06, "loss": 0.0439, "step": 3196 }, { "epoch": 7.374855824682815, "grad_norm": 0.0, "learning_rate": 1.6451218858706374e-06, "loss": 0.0626, "step": 3197 }, { "epoch": 7.377162629757786, "grad_norm": 0.0, "learning_rate": 1.640557752437103e-06, "loss": 0.0657, "step": 3198 }, { "epoch": 7.379469434832757, "grad_norm": 0.0, "learning_rate": 1.6359993932248442e-06, "loss": 0.058, "step": 3199 }, { "epoch": 7.381776239907728, "grad_norm": 0.0, "learning_rate": 1.631446811382512e-06, "loss": 0.0659, "step": 3200 }, { "epoch": 7.384083044982699, "grad_norm": 0.0, "learning_rate": 1.6269000100547682e-06, "loss": 0.0648, "step": 3201 }, { "epoch": 7.38638985005767, "grad_norm": 0.0, "learning_rate": 1.6223589923822768e-06, "loss": 0.0468, "step": 3202 }, { "epoch": 7.388696655132641, "grad_norm": 0.0, "learning_rate": 1.6178237615017178e-06, "loss": 0.0381, "step": 3203 }, { "epoch": 7.391003460207612, "grad_norm": 0.0, "learning_rate": 1.6132943205457607e-06, "loss": 0.0495, "step": 3204 }, { "epoch": 7.393310265282584, "grad_norm": 0.0, "learning_rate": 1.6087706726430874e-06, "loss": 0.0557, "step": 3205 }, { "epoch": 7.395617070357555, "grad_norm": 0.0, "learning_rate": 1.6042528209183728e-06, "loss": 0.0451, "step": 3206 }, { "epoch": 7.397923875432526, "grad_norm": 0.0, "learning_rate": 1.599740768492286e-06, "loss": 0.0337, "step": 3207 }, { "epoch": 7.400230680507497, "grad_norm": 0.0, "learning_rate": 1.5952345184814955e-06, "loss": 0.06, "step": 3208 }, { "epoch": 7.402537485582468, "grad_norm": 0.0, "learning_rate": 1.5907340739986577e-06, "loss": 0.0425, "step": 3209 }, { "epoch": 7.404844290657439, "grad_norm": 0.0, "learning_rate": 1.5862394381524239e-06, "loss": 0.0343, "step": 3210 }, { "epoch": 7.40715109573241, "grad_norm": 0.0, "learning_rate": 1.5817506140474248e-06, "loss": 0.0714, "step": 3211 }, { "epoch": 7.409457900807382, "grad_norm": 0.0, "learning_rate": 1.5772676047842862e-06, "loss": 0.0728, "step": 3212 }, { "epoch": 7.411764705882353, "grad_norm": 0.0, "learning_rate": 1.5727904134596084e-06, "loss": 0.0851, "step": 3213 }, { "epoch": 7.414071510957324, "grad_norm": 0.0, "learning_rate": 1.5683190431659812e-06, "loss": 0.0352, "step": 3214 }, { "epoch": 7.416378316032295, "grad_norm": 0.0, "learning_rate": 1.563853496991966e-06, "loss": 0.0529, "step": 3215 }, { "epoch": 7.418685121107266, "grad_norm": 0.0, "learning_rate": 1.5593937780221092e-06, "loss": 0.0551, "step": 3216 }, { "epoch": 7.4209919261822375, "grad_norm": 0.0, "learning_rate": 1.5549398893369216e-06, "loss": 0.0309, "step": 3217 }, { "epoch": 7.4232987312572085, "grad_norm": 0.0, "learning_rate": 1.5504918340128982e-06, "loss": 0.0535, "step": 3218 }, { "epoch": 7.42560553633218, "grad_norm": 0.0, "learning_rate": 1.5460496151225002e-06, "loss": 0.0607, "step": 3219 }, { "epoch": 7.4279123414071515, "grad_norm": 0.0, "learning_rate": 1.5416132357341519e-06, "loss": 0.0805, "step": 3220 }, { "epoch": 7.430219146482123, "grad_norm": 0.0, "learning_rate": 1.5371826989122507e-06, "loss": 0.06, "step": 3221 }, { "epoch": 7.432525951557094, "grad_norm": 0.0, "learning_rate": 1.5327580077171589e-06, "loss": 0.0717, "step": 3222 }, { "epoch": 7.434832756632065, "grad_norm": 0.0, "learning_rate": 1.528339165205195e-06, "loss": 0.0406, "step": 3223 }, { "epoch": 7.437139561707036, "grad_norm": 0.0, "learning_rate": 1.5239261744286427e-06, "loss": 0.0716, "step": 3224 }, { "epoch": 7.439446366782007, "grad_norm": 0.0, "learning_rate": 1.5195190384357405e-06, "loss": 0.0502, "step": 3225 }, { "epoch": 7.441753171856978, "grad_norm": 0.0, "learning_rate": 1.5151177602706867e-06, "loss": 0.0856, "step": 3226 }, { "epoch": 7.444059976931949, "grad_norm": 0.0, "learning_rate": 1.5107223429736273e-06, "loss": 0.061, "step": 3227 }, { "epoch": 7.446366782006921, "grad_norm": 0.0, "learning_rate": 1.5063327895806668e-06, "loss": 0.0639, "step": 3228 }, { "epoch": 7.448673587081892, "grad_norm": 0.0, "learning_rate": 1.501949103123852e-06, "loss": 0.0854, "step": 3229 }, { "epoch": 7.450980392156863, "grad_norm": 0.0, "learning_rate": 1.4975712866311832e-06, "loss": 0.0461, "step": 3230 }, { "epoch": 7.453287197231834, "grad_norm": 0.0, "learning_rate": 1.4931993431266056e-06, "loss": 0.0487, "step": 3231 }, { "epoch": 7.455594002306805, "grad_norm": 0.0, "learning_rate": 1.4888332756300027e-06, "loss": 0.0459, "step": 3232 }, { "epoch": 7.457900807381776, "grad_norm": 0.0, "learning_rate": 1.4844730871572045e-06, "loss": 0.0656, "step": 3233 }, { "epoch": 7.460207612456747, "grad_norm": 0.0, "learning_rate": 1.48011878071998e-06, "loss": 0.0522, "step": 3234 }, { "epoch": 7.462514417531718, "grad_norm": 0.0, "learning_rate": 1.4757703593260286e-06, "loss": 0.1048, "step": 3235 }, { "epoch": 7.46482122260669, "grad_norm": 0.0, "learning_rate": 1.4714278259789916e-06, "loss": 0.0626, "step": 3236 }, { "epoch": 7.467128027681661, "grad_norm": 0.0, "learning_rate": 1.467091183678444e-06, "loss": 0.0416, "step": 3237 }, { "epoch": 7.469434832756632, "grad_norm": 0.0, "learning_rate": 1.4627604354198854e-06, "loss": 0.0274, "step": 3238 }, { "epoch": 7.471741637831603, "grad_norm": 0.0, "learning_rate": 1.4584355841947452e-06, "loss": 0.0511, "step": 3239 }, { "epoch": 7.474048442906574, "grad_norm": 0.0, "learning_rate": 1.4541166329903856e-06, "loss": 0.0541, "step": 3240 }, { "epoch": 7.476355247981545, "grad_norm": 0.0, "learning_rate": 1.449803584790086e-06, "loss": 0.0551, "step": 3241 }, { "epoch": 7.478662053056516, "grad_norm": 0.0, "learning_rate": 1.4454964425730533e-06, "loss": 0.0493, "step": 3242 }, { "epoch": 7.4809688581314875, "grad_norm": 0.0, "learning_rate": 1.4411952093144167e-06, "loss": 0.0551, "step": 3243 }, { "epoch": 7.483275663206459, "grad_norm": 0.0, "learning_rate": 1.4368998879852135e-06, "loss": 0.0272, "step": 3244 }, { "epoch": 7.4855824682814305, "grad_norm": 0.0, "learning_rate": 1.432610481552409e-06, "loss": 0.0434, "step": 3245 }, { "epoch": 7.4878892733564015, "grad_norm": 0.0, "learning_rate": 1.4283269929788779e-06, "loss": 0.0705, "step": 3246 }, { "epoch": 7.490196078431373, "grad_norm": 0.0, "learning_rate": 1.424049425223405e-06, "loss": 0.0922, "step": 3247 }, { "epoch": 7.492502883506344, "grad_norm": 0.0, "learning_rate": 1.4197777812406898e-06, "loss": 0.0512, "step": 3248 }, { "epoch": 7.494809688581315, "grad_norm": 0.0, "learning_rate": 1.4155120639813392e-06, "loss": 0.0299, "step": 3249 }, { "epoch": 7.497116493656286, "grad_norm": 0.0, "learning_rate": 1.4112522763918635e-06, "loss": 0.0392, "step": 3250 }, { "epoch": 7.499423298731257, "grad_norm": 0.0, "learning_rate": 1.406998421414676e-06, "loss": 0.0848, "step": 3251 }, { "epoch": 7.501730103806229, "grad_norm": 0.0, "learning_rate": 1.4027505019880972e-06, "loss": 0.0697, "step": 3252 }, { "epoch": 7.5040369088812, "grad_norm": 0.0, "learning_rate": 1.3985085210463479e-06, "loss": 0.0529, "step": 3253 }, { "epoch": 7.506343713956171, "grad_norm": 0.0, "learning_rate": 1.3942724815195386e-06, "loss": 0.0468, "step": 3254 }, { "epoch": 7.508650519031142, "grad_norm": 0.0, "learning_rate": 1.3900423863336842e-06, "loss": 0.0433, "step": 3255 }, { "epoch": 7.510957324106113, "grad_norm": 0.0, "learning_rate": 1.3858182384106943e-06, "loss": 0.0609, "step": 3256 }, { "epoch": 7.513264129181084, "grad_norm": 0.0, "learning_rate": 1.3816000406683604e-06, "loss": 0.0691, "step": 3257 }, { "epoch": 7.515570934256055, "grad_norm": 0.0, "learning_rate": 1.377387796020374e-06, "loss": 0.0436, "step": 3258 }, { "epoch": 7.517877739331027, "grad_norm": 0.0, "learning_rate": 1.3731815073763132e-06, "loss": 0.1054, "step": 3259 }, { "epoch": 7.520184544405998, "grad_norm": 0.0, "learning_rate": 1.368981177641636e-06, "loss": 0.0695, "step": 3260 }, { "epoch": 7.522491349480969, "grad_norm": 0.0, "learning_rate": 1.364786809717692e-06, "loss": 0.0531, "step": 3261 }, { "epoch": 7.52479815455594, "grad_norm": 0.0, "learning_rate": 1.3605984065017074e-06, "loss": 0.0568, "step": 3262 }, { "epoch": 7.527104959630911, "grad_norm": 0.0, "learning_rate": 1.3564159708867863e-06, "loss": 0.0382, "step": 3263 }, { "epoch": 7.529411764705882, "grad_norm": 0.0, "learning_rate": 1.3522395057619186e-06, "loss": 0.106, "step": 3264 }, { "epoch": 7.531718569780853, "grad_norm": 0.0, "learning_rate": 1.3480690140119657e-06, "loss": 0.0491, "step": 3265 }, { "epoch": 7.534025374855824, "grad_norm": 0.0, "learning_rate": 1.3439044985176608e-06, "loss": 0.0429, "step": 3266 }, { "epoch": 7.536332179930795, "grad_norm": 0.0, "learning_rate": 1.339745962155613e-06, "loss": 0.0597, "step": 3267 }, { "epoch": 7.538638985005767, "grad_norm": 0.0, "learning_rate": 1.3355934077983024e-06, "loss": 0.038, "step": 3268 }, { "epoch": 7.540945790080738, "grad_norm": 0.0, "learning_rate": 1.3314468383140687e-06, "loss": 0.0254, "step": 3269 }, { "epoch": 7.5432525951557095, "grad_norm": 0.0, "learning_rate": 1.3273062565671258e-06, "loss": 0.075, "step": 3270 }, { "epoch": 7.5455594002306805, "grad_norm": 0.0, "learning_rate": 1.323171665417552e-06, "loss": 0.0498, "step": 3271 }, { "epoch": 7.5478662053056516, "grad_norm": 0.0, "learning_rate": 1.3190430677212795e-06, "loss": 0.0499, "step": 3272 }, { "epoch": 7.550173010380623, "grad_norm": 0.0, "learning_rate": 1.3149204663301118e-06, "loss": 0.057, "step": 3273 }, { "epoch": 7.552479815455594, "grad_norm": 0.0, "learning_rate": 1.3108038640916988e-06, "loss": 0.1198, "step": 3274 }, { "epoch": 7.554786620530566, "grad_norm": 0.0, "learning_rate": 1.3066932638495566e-06, "loss": 0.0821, "step": 3275 }, { "epoch": 7.557093425605537, "grad_norm": 0.0, "learning_rate": 1.3025886684430467e-06, "loss": 0.1062, "step": 3276 }, { "epoch": 7.559400230680508, "grad_norm": 0.0, "learning_rate": 1.2984900807073919e-06, "loss": 0.1036, "step": 3277 }, { "epoch": 7.561707035755479, "grad_norm": 0.0, "learning_rate": 1.2943975034736566e-06, "loss": 0.0424, "step": 3278 }, { "epoch": 7.56401384083045, "grad_norm": 0.0, "learning_rate": 1.2903109395687597e-06, "loss": 0.0774, "step": 3279 }, { "epoch": 7.566320645905421, "grad_norm": 0.0, "learning_rate": 1.286230391815465e-06, "loss": 0.0709, "step": 3280 }, { "epoch": 7.568627450980392, "grad_norm": 0.0, "learning_rate": 1.282155863032377e-06, "loss": 0.0703, "step": 3281 }, { "epoch": 7.570934256055363, "grad_norm": 0.0, "learning_rate": 1.278087356033947e-06, "loss": 0.0727, "step": 3282 }, { "epoch": 7.573241061130334, "grad_norm": 0.0, "learning_rate": 1.2740248736304673e-06, "loss": 0.0583, "step": 3283 }, { "epoch": 7.575547866205306, "grad_norm": 0.0, "learning_rate": 1.2699684186280636e-06, "loss": 0.0757, "step": 3284 }, { "epoch": 7.577854671280277, "grad_norm": 0.0, "learning_rate": 1.2659179938287035e-06, "loss": 0.0598, "step": 3285 }, { "epoch": 7.580161476355248, "grad_norm": 0.0, "learning_rate": 1.2618736020301858e-06, "loss": 0.0559, "step": 3286 }, { "epoch": 7.582468281430219, "grad_norm": 0.0, "learning_rate": 1.2578352460261456e-06, "loss": 0.0466, "step": 3287 }, { "epoch": 7.58477508650519, "grad_norm": 0.0, "learning_rate": 1.2538029286060428e-06, "loss": 0.0302, "step": 3288 }, { "epoch": 7.587081891580161, "grad_norm": 0.0, "learning_rate": 1.2497766525551724e-06, "loss": 0.0643, "step": 3289 }, { "epoch": 7.589388696655132, "grad_norm": 0.0, "learning_rate": 1.2457564206546568e-06, "loss": 0.0832, "step": 3290 }, { "epoch": 7.591695501730104, "grad_norm": 0.0, "learning_rate": 1.2417422356814345e-06, "loss": 0.0414, "step": 3291 }, { "epoch": 7.594002306805075, "grad_norm": 0.0, "learning_rate": 1.2377341004082778e-06, "loss": 0.0648, "step": 3292 }, { "epoch": 7.596309111880046, "grad_norm": 0.0, "learning_rate": 1.233732017603776e-06, "loss": 0.0631, "step": 3293 }, { "epoch": 7.598615916955017, "grad_norm": 0.0, "learning_rate": 1.2297359900323346e-06, "loss": 0.046, "step": 3294 }, { "epoch": 7.600922722029988, "grad_norm": 0.0, "learning_rate": 1.2257460204541793e-06, "loss": 0.0659, "step": 3295 }, { "epoch": 7.6032295271049595, "grad_norm": 0.0, "learning_rate": 1.2217621116253564e-06, "loss": 0.0789, "step": 3296 }, { "epoch": 7.6055363321799305, "grad_norm": 0.0, "learning_rate": 1.2177842662977136e-06, "loss": 0.108, "step": 3297 }, { "epoch": 7.607843137254902, "grad_norm": 0.0, "learning_rate": 1.213812487218924e-06, "loss": 0.0773, "step": 3298 }, { "epoch": 7.610149942329873, "grad_norm": 0.0, "learning_rate": 1.2098467771324597e-06, "loss": 0.0648, "step": 3299 }, { "epoch": 7.612456747404845, "grad_norm": 0.0, "learning_rate": 1.2058871387776039e-06, "loss": 0.0613, "step": 3300 }, { "epoch": 7.614763552479816, "grad_norm": 0.0, "learning_rate": 1.2019335748894489e-06, "loss": 0.0753, "step": 3301 }, { "epoch": 7.617070357554787, "grad_norm": 0.0, "learning_rate": 1.1979860881988903e-06, "loss": 0.1092, "step": 3302 }, { "epoch": 7.619377162629758, "grad_norm": 0.0, "learning_rate": 1.19404468143262e-06, "loss": 0.056, "step": 3303 }, { "epoch": 7.621683967704729, "grad_norm": 0.0, "learning_rate": 1.1901093573131394e-06, "loss": 0.061, "step": 3304 }, { "epoch": 7.6239907727797, "grad_norm": 0.0, "learning_rate": 1.186180118558743e-06, "loss": 0.1186, "step": 3305 }, { "epoch": 7.626297577854672, "grad_norm": 0.0, "learning_rate": 1.1822569678835195e-06, "loss": 0.0442, "step": 3306 }, { "epoch": 7.628604382929643, "grad_norm": 0.0, "learning_rate": 1.1783399079973578e-06, "loss": 0.0402, "step": 3307 }, { "epoch": 7.630911188004614, "grad_norm": 0.0, "learning_rate": 1.1744289416059396e-06, "loss": 0.0333, "step": 3308 }, { "epoch": 7.633217993079585, "grad_norm": 0.0, "learning_rate": 1.1705240714107301e-06, "loss": 0.0816, "step": 3309 }, { "epoch": 7.635524798154556, "grad_norm": 0.0, "learning_rate": 1.1666253001089933e-06, "loss": 0.0477, "step": 3310 }, { "epoch": 7.637831603229527, "grad_norm": 0.0, "learning_rate": 1.1627326303937747e-06, "loss": 0.0679, "step": 3311 }, { "epoch": 7.640138408304498, "grad_norm": 0.0, "learning_rate": 1.1588460649539036e-06, "loss": 0.0394, "step": 3312 }, { "epoch": 7.642445213379469, "grad_norm": 0.0, "learning_rate": 1.1549656064739966e-06, "loss": 0.0435, "step": 3313 }, { "epoch": 7.64475201845444, "grad_norm": 0.0, "learning_rate": 1.1510912576344546e-06, "loss": 0.0425, "step": 3314 }, { "epoch": 7.647058823529412, "grad_norm": 0.0, "learning_rate": 1.1472230211114498e-06, "loss": 0.0747, "step": 3315 }, { "epoch": 7.649365628604383, "grad_norm": 0.0, "learning_rate": 1.1433608995769396e-06, "loss": 0.0654, "step": 3316 }, { "epoch": 7.651672433679354, "grad_norm": 0.0, "learning_rate": 1.1395048956986577e-06, "loss": 0.0555, "step": 3317 }, { "epoch": 7.653979238754325, "grad_norm": 0.0, "learning_rate": 1.1356550121401033e-06, "loss": 0.0873, "step": 3318 }, { "epoch": 7.656286043829296, "grad_norm": 0.0, "learning_rate": 1.1318112515605583e-06, "loss": 0.0944, "step": 3319 }, { "epoch": 7.658592848904267, "grad_norm": 0.0, "learning_rate": 1.1279736166150724e-06, "loss": 0.0675, "step": 3320 }, { "epoch": 7.660899653979238, "grad_norm": 0.0, "learning_rate": 1.124142109954459e-06, "loss": 0.026, "step": 3321 }, { "epoch": 7.66320645905421, "grad_norm": 0.0, "learning_rate": 1.1203167342253063e-06, "loss": 0.0339, "step": 3322 }, { "epoch": 7.665513264129181, "grad_norm": 0.0, "learning_rate": 1.1164974920699611e-06, "loss": 0.0733, "step": 3323 }, { "epoch": 7.6678200692041525, "grad_norm": 0.0, "learning_rate": 1.1126843861265347e-06, "loss": 0.0878, "step": 3324 }, { "epoch": 7.6701268742791235, "grad_norm": 0.0, "learning_rate": 1.108877419028902e-06, "loss": 0.0504, "step": 3325 }, { "epoch": 7.672433679354095, "grad_norm": 0.0, "learning_rate": 1.1050765934066998e-06, "loss": 0.0613, "step": 3326 }, { "epoch": 7.674740484429066, "grad_norm": 0.0, "learning_rate": 1.1012819118853147e-06, "loss": 0.0606, "step": 3327 }, { "epoch": 7.677047289504037, "grad_norm": 0.0, "learning_rate": 1.0974933770858964e-06, "loss": 0.0631, "step": 3328 }, { "epoch": 7.679354094579008, "grad_norm": 0.0, "learning_rate": 1.0937109916253474e-06, "loss": 0.0283, "step": 3329 }, { "epoch": 7.681660899653979, "grad_norm": 0.0, "learning_rate": 1.0899347581163222e-06, "loss": 0.0332, "step": 3330 }, { "epoch": 7.683967704728951, "grad_norm": 0.0, "learning_rate": 1.086164679167222e-06, "loss": 0.0379, "step": 3331 }, { "epoch": 7.686274509803922, "grad_norm": 0.0, "learning_rate": 1.0824007573822025e-06, "loss": 0.0849, "step": 3332 }, { "epoch": 7.688581314878893, "grad_norm": 0.0, "learning_rate": 1.0786429953611665e-06, "loss": 0.0531, "step": 3333 }, { "epoch": 7.690888119953864, "grad_norm": 0.0, "learning_rate": 1.0748913956997565e-06, "loss": 0.0651, "step": 3334 }, { "epoch": 7.693194925028835, "grad_norm": 0.0, "learning_rate": 1.0711459609893604e-06, "loss": 0.0748, "step": 3335 }, { "epoch": 7.695501730103806, "grad_norm": 0.0, "learning_rate": 1.0674066938171123e-06, "loss": 0.0979, "step": 3336 }, { "epoch": 7.697808535178777, "grad_norm": 0.0, "learning_rate": 1.0636735967658785e-06, "loss": 0.0557, "step": 3337 }, { "epoch": 7.700115340253749, "grad_norm": 0.0, "learning_rate": 1.0599466724142693e-06, "loss": 0.1222, "step": 3338 }, { "epoch": 7.70242214532872, "grad_norm": 0.0, "learning_rate": 1.0562259233366334e-06, "loss": 0.0241, "step": 3339 }, { "epoch": 7.704728950403691, "grad_norm": 0.0, "learning_rate": 1.0525113521030428e-06, "loss": 0.056, "step": 3340 }, { "epoch": 7.707035755478662, "grad_norm": 0.0, "learning_rate": 1.0488029612793138e-06, "loss": 0.0525, "step": 3341 }, { "epoch": 7.709342560553633, "grad_norm": 0.0, "learning_rate": 1.0451007534269908e-06, "loss": 0.0392, "step": 3342 }, { "epoch": 7.711649365628604, "grad_norm": 0.0, "learning_rate": 1.0414047311033404e-06, "loss": 0.0764, "step": 3343 }, { "epoch": 7.713956170703575, "grad_norm": 0.0, "learning_rate": 1.0377148968613659e-06, "loss": 0.0488, "step": 3344 }, { "epoch": 7.716262975778546, "grad_norm": 0.0, "learning_rate": 1.034031253249792e-06, "loss": 0.072, "step": 3345 }, { "epoch": 7.718569780853517, "grad_norm": 0.0, "learning_rate": 1.0303538028130678e-06, "loss": 0.0641, "step": 3346 }, { "epoch": 7.720876585928489, "grad_norm": 0.0, "learning_rate": 1.026682548091361e-06, "loss": 0.0507, "step": 3347 }, { "epoch": 7.72318339100346, "grad_norm": 0.0, "learning_rate": 1.0230174916205681e-06, "loss": 0.088, "step": 3348 }, { "epoch": 7.7254901960784315, "grad_norm": 0.0, "learning_rate": 1.0193586359322927e-06, "loss": 0.047, "step": 3349 }, { "epoch": 7.7277970011534025, "grad_norm": 0.0, "learning_rate": 1.0157059835538662e-06, "loss": 0.0644, "step": 3350 }, { "epoch": 7.730103806228374, "grad_norm": 0.0, "learning_rate": 1.012059537008332e-06, "loss": 0.0801, "step": 3351 }, { "epoch": 7.732410611303345, "grad_norm": 0.0, "learning_rate": 1.0084192988144392e-06, "loss": 0.0698, "step": 3352 }, { "epoch": 7.734717416378316, "grad_norm": 0.0, "learning_rate": 1.0047852714866591e-06, "loss": 0.0747, "step": 3353 }, { "epoch": 7.737024221453288, "grad_norm": 0.0, "learning_rate": 1.0011574575351702e-06, "loss": 0.0997, "step": 3354 }, { "epoch": 7.739331026528259, "grad_norm": 0.0, "learning_rate": 9.975358594658524e-07, "loss": 0.0408, "step": 3355 }, { "epoch": 7.74163783160323, "grad_norm": 0.0, "learning_rate": 9.939204797802992e-07, "loss": 0.0809, "step": 3356 }, { "epoch": 7.743944636678201, "grad_norm": 0.0, "learning_rate": 9.903113209758098e-07, "loss": 0.0504, "step": 3357 }, { "epoch": 7.746251441753172, "grad_norm": 0.0, "learning_rate": 9.867083855453775e-07, "loss": 0.1038, "step": 3358 }, { "epoch": 7.748558246828143, "grad_norm": 0.0, "learning_rate": 9.831116759777082e-07, "loss": 0.0819, "step": 3359 }, { "epoch": 7.750865051903114, "grad_norm": 0.0, "learning_rate": 9.795211947571993e-07, "loss": 0.0493, "step": 3360 }, { "epoch": 7.753171856978085, "grad_norm": 0.0, "learning_rate": 9.759369443639455e-07, "loss": 0.0709, "step": 3361 }, { "epoch": 7.755478662053056, "grad_norm": 0.0, "learning_rate": 9.723589272737443e-07, "loss": 0.0702, "step": 3362 }, { "epoch": 7.757785467128028, "grad_norm": 0.0, "learning_rate": 9.687871459580845e-07, "loss": 0.044, "step": 3363 }, { "epoch": 7.760092272202999, "grad_norm": 0.0, "learning_rate": 9.652216028841433e-07, "loss": 0.0334, "step": 3364 }, { "epoch": 7.76239907727797, "grad_norm": 0.0, "learning_rate": 9.616623005147952e-07, "loss": 0.034, "step": 3365 }, { "epoch": 7.764705882352941, "grad_norm": 0.0, "learning_rate": 9.58109241308599e-07, "loss": 0.0588, "step": 3366 }, { "epoch": 7.767012687427912, "grad_norm": 0.0, "learning_rate": 9.545624277198085e-07, "loss": 0.0564, "step": 3367 }, { "epoch": 7.769319492502883, "grad_norm": 0.0, "learning_rate": 9.510218621983524e-07, "loss": 0.0416, "step": 3368 }, { "epoch": 7.771626297577855, "grad_norm": 0.0, "learning_rate": 9.474875471898526e-07, "loss": 0.0475, "step": 3369 }, { "epoch": 7.773933102652826, "grad_norm": 0.0, "learning_rate": 9.439594851356093e-07, "loss": 0.053, "step": 3370 }, { "epoch": 7.776239907727797, "grad_norm": 0.0, "learning_rate": 9.404376784726054e-07, "loss": 0.0347, "step": 3371 }, { "epoch": 7.778546712802768, "grad_norm": 0.0, "learning_rate": 9.369221296335007e-07, "loss": 0.0756, "step": 3372 }, { "epoch": 7.780853517877739, "grad_norm": 0.0, "learning_rate": 9.334128410466359e-07, "loss": 0.0686, "step": 3373 }, { "epoch": 7.78316032295271, "grad_norm": 0.0, "learning_rate": 9.299098151360231e-07, "loss": 0.0525, "step": 3374 }, { "epoch": 7.7854671280276815, "grad_norm": 0.0, "learning_rate": 9.264130543213512e-07, "loss": 0.0576, "step": 3375 }, { "epoch": 7.7877739331026525, "grad_norm": 0.0, "learning_rate": 9.229225610179848e-07, "loss": 0.0756, "step": 3376 }, { "epoch": 7.790080738177624, "grad_norm": 0.0, "learning_rate": 9.194383376369509e-07, "loss": 0.1163, "step": 3377 }, { "epoch": 7.7923875432525955, "grad_norm": 0.0, "learning_rate": 9.15960386584952e-07, "loss": 0.0485, "step": 3378 }, { "epoch": 7.794694348327567, "grad_norm": 0.0, "learning_rate": 9.124887102643576e-07, "loss": 0.0641, "step": 3379 }, { "epoch": 7.797001153402538, "grad_norm": 0.0, "learning_rate": 9.090233110732006e-07, "loss": 0.0898, "step": 3380 }, { "epoch": 7.799307958477509, "grad_norm": 0.0, "learning_rate": 9.055641914051783e-07, "loss": 0.1169, "step": 3381 }, { "epoch": 7.80161476355248, "grad_norm": 0.0, "learning_rate": 9.021113536496551e-07, "loss": 0.0743, "step": 3382 }, { "epoch": 7.803921568627451, "grad_norm": 0.0, "learning_rate": 8.986648001916499e-07, "loss": 0.0438, "step": 3383 }, { "epoch": 7.806228373702422, "grad_norm": 0.0, "learning_rate": 8.952245334118415e-07, "loss": 0.0706, "step": 3384 }, { "epoch": 7.808535178777394, "grad_norm": 0.0, "learning_rate": 8.917905556865714e-07, "loss": 0.0812, "step": 3385 }, { "epoch": 7.810841983852365, "grad_norm": 0.0, "learning_rate": 8.883628693878299e-07, "loss": 0.0741, "step": 3386 }, { "epoch": 7.813148788927336, "grad_norm": 0.0, "learning_rate": 8.849414768832687e-07, "loss": 0.0599, "step": 3387 }, { "epoch": 7.815455594002307, "grad_norm": 0.0, "learning_rate": 8.815263805361884e-07, "loss": 0.0417, "step": 3388 }, { "epoch": 7.817762399077278, "grad_norm": 0.0, "learning_rate": 8.781175827055388e-07, "loss": 0.0605, "step": 3389 }, { "epoch": 7.820069204152249, "grad_norm": 0.0, "learning_rate": 8.747150857459219e-07, "loss": 0.0758, "step": 3390 }, { "epoch": 7.82237600922722, "grad_norm": 0.0, "learning_rate": 8.713188920075888e-07, "loss": 0.0649, "step": 3391 }, { "epoch": 7.824682814302191, "grad_norm": 0.0, "learning_rate": 8.679290038364319e-07, "loss": 0.0247, "step": 3392 }, { "epoch": 7.826989619377162, "grad_norm": 0.0, "learning_rate": 8.645454235739903e-07, "loss": 0.0609, "step": 3393 }, { "epoch": 7.829296424452134, "grad_norm": 0.0, "learning_rate": 8.611681535574501e-07, "loss": 0.0889, "step": 3394 }, { "epoch": 7.831603229527105, "grad_norm": 0.0, "learning_rate": 8.57797196119633e-07, "loss": 0.0408, "step": 3395 }, { "epoch": 7.833910034602076, "grad_norm": 0.0, "learning_rate": 8.544325535889997e-07, "loss": 0.0516, "step": 3396 }, { "epoch": 7.836216839677047, "grad_norm": 0.0, "learning_rate": 8.510742282896545e-07, "loss": 0.0728, "step": 3397 }, { "epoch": 7.838523644752018, "grad_norm": 0.0, "learning_rate": 8.47722222541334e-07, "loss": 0.0524, "step": 3398 }, { "epoch": 7.840830449826989, "grad_norm": 0.0, "learning_rate": 8.443765386594094e-07, "loss": 0.0723, "step": 3399 }, { "epoch": 7.8431372549019605, "grad_norm": 0.0, "learning_rate": 8.41037178954891e-07, "loss": 0.0693, "step": 3400 }, { "epoch": 7.845444059976932, "grad_norm": 0.0, "learning_rate": 8.377041457344104e-07, "loss": 0.0796, "step": 3401 }, { "epoch": 7.8477508650519034, "grad_norm": 0.0, "learning_rate": 8.343774413002382e-07, "loss": 0.0598, "step": 3402 }, { "epoch": 7.8500576701268745, "grad_norm": 0.0, "learning_rate": 8.310570679502716e-07, "loss": 0.0557, "step": 3403 }, { "epoch": 7.8523644752018456, "grad_norm": 0.0, "learning_rate": 8.277430279780296e-07, "loss": 0.0414, "step": 3404 }, { "epoch": 7.854671280276817, "grad_norm": 0.0, "learning_rate": 8.24435323672661e-07, "loss": 0.0704, "step": 3405 }, { "epoch": 7.856978085351788, "grad_norm": 0.0, "learning_rate": 8.211339573189414e-07, "loss": 0.0624, "step": 3406 }, { "epoch": 7.859284890426759, "grad_norm": 0.0, "learning_rate": 8.178389311972612e-07, "loss": 0.0697, "step": 3407 }, { "epoch": 7.86159169550173, "grad_norm": 0.0, "learning_rate": 8.145502475836331e-07, "loss": 0.0555, "step": 3408 }, { "epoch": 7.863898500576701, "grad_norm": 0.0, "learning_rate": 8.112679087496933e-07, "loss": 0.0638, "step": 3409 }, { "epoch": 7.866205305651673, "grad_norm": 0.0, "learning_rate": 8.079919169626926e-07, "loss": 0.0451, "step": 3410 }, { "epoch": 7.868512110726644, "grad_norm": 0.0, "learning_rate": 8.047222744854943e-07, "loss": 0.0907, "step": 3411 }, { "epoch": 7.870818915801615, "grad_norm": 0.0, "learning_rate": 8.014589835765807e-07, "loss": 0.0456, "step": 3412 }, { "epoch": 7.873125720876586, "grad_norm": 0.0, "learning_rate": 7.982020464900486e-07, "loss": 0.088, "step": 3413 }, { "epoch": 7.875432525951557, "grad_norm": 0.0, "learning_rate": 7.949514654755963e-07, "loss": 0.0493, "step": 3414 }, { "epoch": 7.877739331026528, "grad_norm": 0.0, "learning_rate": 7.917072427785422e-07, "loss": 0.0596, "step": 3415 }, { "epoch": 7.880046136101499, "grad_norm": 0.0, "learning_rate": 7.884693806398091e-07, "loss": 0.0937, "step": 3416 }, { "epoch": 7.882352941176471, "grad_norm": 0.0, "learning_rate": 7.852378812959227e-07, "loss": 0.0563, "step": 3417 }, { "epoch": 7.884659746251442, "grad_norm": 0.0, "learning_rate": 7.820127469790206e-07, "loss": 0.0735, "step": 3418 }, { "epoch": 7.886966551326413, "grad_norm": 0.0, "learning_rate": 7.787939799168342e-07, "loss": 0.0663, "step": 3419 }, { "epoch": 7.889273356401384, "grad_norm": 0.0, "learning_rate": 7.755815823327084e-07, "loss": 0.0578, "step": 3420 }, { "epoch": 7.891580161476355, "grad_norm": 0.0, "learning_rate": 7.723755564455771e-07, "loss": 0.0408, "step": 3421 }, { "epoch": 7.893886966551326, "grad_norm": 0.0, "learning_rate": 7.69175904469982e-07, "loss": 0.0551, "step": 3422 }, { "epoch": 7.896193771626297, "grad_norm": 0.0, "learning_rate": 7.659826286160565e-07, "loss": 0.0756, "step": 3423 }, { "epoch": 7.898500576701268, "grad_norm": 0.0, "learning_rate": 7.627957310895329e-07, "loss": 0.0715, "step": 3424 }, { "epoch": 7.900807381776239, "grad_norm": 0.0, "learning_rate": 7.596152140917368e-07, "loss": 0.0782, "step": 3425 }, { "epoch": 7.903114186851211, "grad_norm": 0.0, "learning_rate": 7.564410798195832e-07, "loss": 0.0404, "step": 3426 }, { "epoch": 7.905420991926182, "grad_norm": 0.0, "learning_rate": 7.532733304655848e-07, "loss": 0.0682, "step": 3427 }, { "epoch": 7.9077277970011535, "grad_norm": 0.0, "learning_rate": 7.501119682178392e-07, "loss": 0.0495, "step": 3428 }, { "epoch": 7.9100346020761245, "grad_norm": 0.0, "learning_rate": 7.46956995260033e-07, "loss": 0.0429, "step": 3429 }, { "epoch": 7.912341407151096, "grad_norm": 0.0, "learning_rate": 7.438084137714408e-07, "loss": 0.0505, "step": 3430 }, { "epoch": 7.914648212226067, "grad_norm": 0.0, "learning_rate": 7.406662259269193e-07, "loss": 0.0645, "step": 3431 }, { "epoch": 7.916955017301038, "grad_norm": 0.0, "learning_rate": 7.375304338969135e-07, "loss": 0.0962, "step": 3432 }, { "epoch": 7.91926182237601, "grad_norm": 0.0, "learning_rate": 7.344010398474455e-07, "loss": 0.0969, "step": 3433 }, { "epoch": 7.921568627450981, "grad_norm": 0.0, "learning_rate": 7.312780459401226e-07, "loss": 0.0487, "step": 3434 }, { "epoch": 7.923875432525952, "grad_norm": 0.0, "learning_rate": 7.281614543321269e-07, "loss": 0.033, "step": 3435 }, { "epoch": 7.926182237600923, "grad_norm": 0.0, "learning_rate": 7.250512671762211e-07, "loss": 0.0625, "step": 3436 }, { "epoch": 7.928489042675894, "grad_norm": 0.0, "learning_rate": 7.219474866207465e-07, "loss": 0.0999, "step": 3437 }, { "epoch": 7.930795847750865, "grad_norm": 0.0, "learning_rate": 7.188501148096117e-07, "loss": 0.0576, "step": 3438 }, { "epoch": 7.933102652825836, "grad_norm": 0.0, "learning_rate": 7.157591538823039e-07, "loss": 0.0615, "step": 3439 }, { "epoch": 7.935409457900807, "grad_norm": 0.0, "learning_rate": 7.126746059738832e-07, "loss": 0.0286, "step": 3440 }, { "epoch": 7.937716262975779, "grad_norm": 0.0, "learning_rate": 7.095964732149741e-07, "loss": 0.0593, "step": 3441 }, { "epoch": 7.94002306805075, "grad_norm": 0.0, "learning_rate": 7.065247577317747e-07, "loss": 0.0355, "step": 3442 }, { "epoch": 7.942329873125721, "grad_norm": 0.0, "learning_rate": 7.034594616460522e-07, "loss": 0.0586, "step": 3443 }, { "epoch": 7.944636678200692, "grad_norm": 0.0, "learning_rate": 7.004005870751341e-07, "loss": 0.0519, "step": 3444 }, { "epoch": 7.946943483275663, "grad_norm": 0.0, "learning_rate": 6.973481361319124e-07, "loss": 0.0582, "step": 3445 }, { "epoch": 7.949250288350634, "grad_norm": 0.0, "learning_rate": 6.943021109248471e-07, "loss": 0.0533, "step": 3446 }, { "epoch": 7.951557093425605, "grad_norm": 0.0, "learning_rate": 6.912625135579587e-07, "loss": 0.0247, "step": 3447 }, { "epoch": 7.953863898500577, "grad_norm": 0.0, "learning_rate": 6.88229346130822e-07, "loss": 0.0635, "step": 3448 }, { "epoch": 7.956170703575548, "grad_norm": 0.0, "learning_rate": 6.852026107385756e-07, "loss": 0.0514, "step": 3449 }, { "epoch": 7.958477508650519, "grad_norm": 0.0, "learning_rate": 6.821823094719171e-07, "loss": 0.0751, "step": 3450 }, { "epoch": 7.96078431372549, "grad_norm": 0.0, "learning_rate": 6.791684444170932e-07, "loss": 0.0462, "step": 3451 }, { "epoch": 7.963091118800461, "grad_norm": 0.0, "learning_rate": 6.761610176559086e-07, "loss": 0.0433, "step": 3452 }, { "epoch": 7.965397923875432, "grad_norm": 0.0, "learning_rate": 6.731600312657238e-07, "loss": 0.049, "step": 3453 }, { "epoch": 7.9677047289504035, "grad_norm": 0.0, "learning_rate": 6.701654873194429e-07, "loss": 0.0602, "step": 3454 }, { "epoch": 7.9700115340253745, "grad_norm": 0.0, "learning_rate": 6.671773878855281e-07, "loss": 0.0412, "step": 3455 }, { "epoch": 7.972318339100346, "grad_norm": 0.0, "learning_rate": 6.641957350279838e-07, "loss": 0.0414, "step": 3456 }, { "epoch": 7.9746251441753175, "grad_norm": 0.0, "learning_rate": 6.612205308063646e-07, "loss": 0.0854, "step": 3457 }, { "epoch": 7.976931949250289, "grad_norm": 0.0, "learning_rate": 6.582517772757702e-07, "loss": 0.0382, "step": 3458 }, { "epoch": 7.97923875432526, "grad_norm": 0.0, "learning_rate": 6.552894764868456e-07, "loss": 0.0701, "step": 3459 }, { "epoch": 7.981545559400231, "grad_norm": 0.0, "learning_rate": 6.523336304857764e-07, "loss": 0.0775, "step": 3460 }, { "epoch": 7.983852364475202, "grad_norm": 0.0, "learning_rate": 6.493842413142915e-07, "loss": 0.041, "step": 3461 }, { "epoch": 7.986159169550173, "grad_norm": 0.0, "learning_rate": 6.464413110096601e-07, "loss": 0.0456, "step": 3462 }, { "epoch": 7.988465974625144, "grad_norm": 0.0, "learning_rate": 6.435048416046863e-07, "loss": 0.0706, "step": 3463 }, { "epoch": 7.990772779700116, "grad_norm": 0.0, "learning_rate": 6.405748351277152e-07, "loss": 0.063, "step": 3464 }, { "epoch": 7.993079584775087, "grad_norm": 0.0, "learning_rate": 6.37651293602628e-07, "loss": 0.0883, "step": 3465 }, { "epoch": 7.995386389850058, "grad_norm": 0.0, "learning_rate": 6.34734219048837e-07, "loss": 0.0665, "step": 3466 }, { "epoch": 7.997693194925029, "grad_norm": 0.0, "learning_rate": 6.318236134812917e-07, "loss": 0.0869, "step": 3467 }, { "epoch": 8.0, "grad_norm": 0.0, "learning_rate": 6.28919478910468e-07, "loss": 0.062, "step": 3468 }, { "epoch": 8.002306805074971, "grad_norm": 0.0, "learning_rate": 6.260218173423749e-07, "loss": 0.0482, "step": 3469 }, { "epoch": 8.004613610149942, "grad_norm": 0.0, "learning_rate": 6.231306307785523e-07, "loss": 0.0636, "step": 3470 }, { "epoch": 8.006920415224913, "grad_norm": 0.0, "learning_rate": 6.202459212160638e-07, "loss": 0.0494, "step": 3471 }, { "epoch": 8.009227220299884, "grad_norm": 0.0, "learning_rate": 6.173676906475012e-07, "loss": 0.0207, "step": 3472 }, { "epoch": 8.011534025374855, "grad_norm": 0.0, "learning_rate": 6.144959410609785e-07, "loss": 0.0468, "step": 3473 }, { "epoch": 8.013840830449826, "grad_norm": 0.0, "learning_rate": 6.116306744401391e-07, "loss": 0.0413, "step": 3474 }, { "epoch": 8.016147635524797, "grad_norm": 0.0, "learning_rate": 6.087718927641406e-07, "loss": 0.0702, "step": 3475 }, { "epoch": 8.01845444059977, "grad_norm": 0.0, "learning_rate": 6.05919598007666e-07, "loss": 0.0205, "step": 3476 }, { "epoch": 8.020761245674741, "grad_norm": 0.0, "learning_rate": 6.030737921409169e-07, "loss": 0.0493, "step": 3477 }, { "epoch": 8.023068050749712, "grad_norm": 0.0, "learning_rate": 6.002344771296098e-07, "loss": 0.0458, "step": 3478 }, { "epoch": 8.025374855824683, "grad_norm": 0.0, "learning_rate": 5.974016549349837e-07, "loss": 0.0322, "step": 3479 }, { "epoch": 8.027681660899654, "grad_norm": 0.0, "learning_rate": 5.945753275137844e-07, "loss": 0.0534, "step": 3480 }, { "epoch": 8.029988465974625, "grad_norm": 0.0, "learning_rate": 5.917554968182803e-07, "loss": 0.0474, "step": 3481 }, { "epoch": 8.032295271049597, "grad_norm": 0.0, "learning_rate": 5.889421647962456e-07, "loss": 0.0325, "step": 3482 }, { "epoch": 8.034602076124568, "grad_norm": 0.0, "learning_rate": 5.861353333909692e-07, "loss": 0.0544, "step": 3483 }, { "epoch": 8.036908881199539, "grad_norm": 0.0, "learning_rate": 5.833350045412478e-07, "loss": 0.0373, "step": 3484 }, { "epoch": 8.03921568627451, "grad_norm": 0.0, "learning_rate": 5.805411801813865e-07, "loss": 0.0639, "step": 3485 }, { "epoch": 8.04152249134948, "grad_norm": 0.0, "learning_rate": 5.777538622412005e-07, "loss": 0.0287, "step": 3486 }, { "epoch": 8.043829296424452, "grad_norm": 0.0, "learning_rate": 5.749730526460073e-07, "loss": 0.0393, "step": 3487 }, { "epoch": 8.046136101499423, "grad_norm": 0.0, "learning_rate": 5.721987533166307e-07, "loss": 0.0167, "step": 3488 }, { "epoch": 8.048442906574394, "grad_norm": 0.0, "learning_rate": 5.694309661693942e-07, "loss": 0.028, "step": 3489 }, { "epoch": 8.050749711649365, "grad_norm": 0.0, "learning_rate": 5.666696931161308e-07, "loss": 0.0427, "step": 3490 }, { "epoch": 8.053056516724336, "grad_norm": 0.0, "learning_rate": 5.63914936064165e-07, "loss": 0.0417, "step": 3491 }, { "epoch": 8.055363321799309, "grad_norm": 0.0, "learning_rate": 5.611666969163243e-07, "loss": 0.0374, "step": 3492 }, { "epoch": 8.05767012687428, "grad_norm": 0.0, "learning_rate": 5.584249775709372e-07, "loss": 0.0284, "step": 3493 }, { "epoch": 8.059976931949251, "grad_norm": 0.0, "learning_rate": 5.556897799218208e-07, "loss": 0.0347, "step": 3494 }, { "epoch": 8.062283737024222, "grad_norm": 0.0, "learning_rate": 5.529611058582951e-07, "loss": 0.0523, "step": 3495 }, { "epoch": 8.064590542099193, "grad_norm": 0.0, "learning_rate": 5.502389572651723e-07, "loss": 0.0527, "step": 3496 }, { "epoch": 8.066897347174164, "grad_norm": 0.0, "learning_rate": 5.475233360227516e-07, "loss": 0.0447, "step": 3497 }, { "epoch": 8.069204152249135, "grad_norm": 0.0, "learning_rate": 5.448142440068316e-07, "loss": 0.0536, "step": 3498 }, { "epoch": 8.071510957324106, "grad_norm": 0.0, "learning_rate": 5.421116830886963e-07, "loss": 0.0412, "step": 3499 }, { "epoch": 8.073817762399077, "grad_norm": 0.0, "learning_rate": 5.394156551351182e-07, "loss": 0.0378, "step": 3500 }, { "epoch": 8.076124567474048, "grad_norm": 0.0, "learning_rate": 5.367261620083575e-07, "loss": 0.0751, "step": 3501 }, { "epoch": 8.07843137254902, "grad_norm": 0.0, "learning_rate": 5.340432055661637e-07, "loss": 0.0456, "step": 3502 }, { "epoch": 8.08073817762399, "grad_norm": 0.0, "learning_rate": 5.313667876617657e-07, "loss": 0.0622, "step": 3503 }, { "epoch": 8.083044982698961, "grad_norm": 0.0, "learning_rate": 5.286969101438821e-07, "loss": 0.0336, "step": 3504 }, { "epoch": 8.085351787773932, "grad_norm": 0.0, "learning_rate": 5.26033574856708e-07, "loss": 0.0245, "step": 3505 }, { "epoch": 8.087658592848904, "grad_norm": 0.0, "learning_rate": 5.233767836399217e-07, "loss": 0.0365, "step": 3506 }, { "epoch": 8.089965397923875, "grad_norm": 0.0, "learning_rate": 5.207265383286831e-07, "loss": 0.0685, "step": 3507 }, { "epoch": 8.092272202998847, "grad_norm": 0.0, "learning_rate": 5.180828407536287e-07, "loss": 0.1192, "step": 3508 }, { "epoch": 8.094579008073818, "grad_norm": 0.0, "learning_rate": 5.154456927408713e-07, "loss": 0.0367, "step": 3509 }, { "epoch": 8.09688581314879, "grad_norm": 0.0, "learning_rate": 5.128150961120026e-07, "loss": 0.0362, "step": 3510 }, { "epoch": 8.09919261822376, "grad_norm": 0.0, "learning_rate": 5.101910526840869e-07, "loss": 0.0588, "step": 3511 }, { "epoch": 8.101499423298732, "grad_norm": 0.0, "learning_rate": 5.075735642696611e-07, "loss": 0.021, "step": 3512 }, { "epoch": 8.103806228373703, "grad_norm": 0.0, "learning_rate": 5.049626326767366e-07, "loss": 0.0482, "step": 3513 }, { "epoch": 8.106113033448674, "grad_norm": 0.0, "learning_rate": 5.02358259708795e-07, "loss": 0.0466, "step": 3514 }, { "epoch": 8.108419838523645, "grad_norm": 0.0, "learning_rate": 4.997604471647844e-07, "loss": 0.0267, "step": 3515 }, { "epoch": 8.110726643598616, "grad_norm": 0.0, "learning_rate": 4.97169196839129e-07, "loss": 0.0535, "step": 3516 }, { "epoch": 8.113033448673587, "grad_norm": 0.0, "learning_rate": 4.945845105217118e-07, "loss": 0.07, "step": 3517 }, { "epoch": 8.115340253748558, "grad_norm": 0.0, "learning_rate": 4.920063899978833e-07, "loss": 0.0469, "step": 3518 }, { "epoch": 8.117647058823529, "grad_norm": 0.0, "learning_rate": 4.894348370484648e-07, "loss": 0.0617, "step": 3519 }, { "epoch": 8.1199538638985, "grad_norm": 0.0, "learning_rate": 4.868698534497362e-07, "loss": 0.0584, "step": 3520 }, { "epoch": 8.122260668973471, "grad_norm": 0.0, "learning_rate": 4.843114409734384e-07, "loss": 0.0244, "step": 3521 }, { "epoch": 8.124567474048442, "grad_norm": 0.0, "learning_rate": 4.817596013867765e-07, "loss": 0.032, "step": 3522 }, { "epoch": 8.126874279123413, "grad_norm": 0.0, "learning_rate": 4.792143364524138e-07, "loss": 0.0345, "step": 3523 }, { "epoch": 8.129181084198386, "grad_norm": 0.0, "learning_rate": 4.766756479284751e-07, "loss": 0.0448, "step": 3524 }, { "epoch": 8.131487889273357, "grad_norm": 0.0, "learning_rate": 4.7414353756853773e-07, "loss": 0.0547, "step": 3525 }, { "epoch": 8.133794694348328, "grad_norm": 0.0, "learning_rate": 4.7161800712163807e-07, "loss": 0.0548, "step": 3526 }, { "epoch": 8.1361014994233, "grad_norm": 0.0, "learning_rate": 4.6909905833226965e-07, "loss": 0.0439, "step": 3527 }, { "epoch": 8.13840830449827, "grad_norm": 0.0, "learning_rate": 4.6658669294037393e-07, "loss": 0.0415, "step": 3528 }, { "epoch": 8.140715109573241, "grad_norm": 0.0, "learning_rate": 4.6408091268134836e-07, "loss": 0.0361, "step": 3529 }, { "epoch": 8.143021914648212, "grad_norm": 0.0, "learning_rate": 4.61581719286045e-07, "loss": 0.0352, "step": 3530 }, { "epoch": 8.145328719723183, "grad_norm": 0.0, "learning_rate": 4.5908911448075746e-07, "loss": 0.0349, "step": 3531 }, { "epoch": 8.147635524798154, "grad_norm": 0.0, "learning_rate": 4.566030999872384e-07, "loss": 0.0402, "step": 3532 }, { "epoch": 8.149942329873126, "grad_norm": 0.0, "learning_rate": 4.5412367752268094e-07, "loss": 0.0494, "step": 3533 }, { "epoch": 8.152249134948097, "grad_norm": 0.0, "learning_rate": 4.5165084879972844e-07, "loss": 0.0373, "step": 3534 }, { "epoch": 8.154555940023068, "grad_norm": 0.0, "learning_rate": 4.491846155264667e-07, "loss": 0.0398, "step": 3535 }, { "epoch": 8.156862745098039, "grad_norm": 0.0, "learning_rate": 4.46724979406431e-07, "loss": 0.0559, "step": 3536 }, { "epoch": 8.15916955017301, "grad_norm": 0.0, "learning_rate": 4.4427194213859216e-07, "loss": 0.053, "step": 3537 }, { "epoch": 8.16147635524798, "grad_norm": 0.0, "learning_rate": 4.4182550541737033e-07, "loss": 0.0473, "step": 3538 }, { "epoch": 8.163783160322954, "grad_norm": 0.0, "learning_rate": 4.3938567093262275e-07, "loss": 0.0495, "step": 3539 }, { "epoch": 8.166089965397925, "grad_norm": 0.0, "learning_rate": 4.3695244036964567e-07, "loss": 0.0441, "step": 3540 }, { "epoch": 8.168396770472896, "grad_norm": 0.0, "learning_rate": 4.345258154091747e-07, "loss": 0.0558, "step": 3541 }, { "epoch": 8.170703575547867, "grad_norm": 0.0, "learning_rate": 4.3210579772738237e-07, "loss": 0.0543, "step": 3542 }, { "epoch": 8.173010380622838, "grad_norm": 0.0, "learning_rate": 4.296923889958771e-07, "loss": 0.0341, "step": 3543 }, { "epoch": 8.175317185697809, "grad_norm": 0.0, "learning_rate": 4.272855908817042e-07, "loss": 0.0347, "step": 3544 }, { "epoch": 8.17762399077278, "grad_norm": 0.0, "learning_rate": 4.2488540504734056e-07, "loss": 0.0309, "step": 3545 }, { "epoch": 8.179930795847751, "grad_norm": 0.0, "learning_rate": 4.224918331506955e-07, "loss": 0.0706, "step": 3546 }, { "epoch": 8.182237600922722, "grad_norm": 0.0, "learning_rate": 4.2010487684511105e-07, "loss": 0.0663, "step": 3547 }, { "epoch": 8.184544405997693, "grad_norm": 0.0, "learning_rate": 4.177245377793604e-07, "loss": 0.0244, "step": 3548 }, { "epoch": 8.186851211072664, "grad_norm": 0.0, "learning_rate": 4.1535081759764286e-07, "loss": 0.0535, "step": 3549 }, { "epoch": 8.189158016147635, "grad_norm": 0.0, "learning_rate": 4.12983717939589e-07, "loss": 0.0504, "step": 3550 }, { "epoch": 8.191464821222606, "grad_norm": 0.0, "learning_rate": 4.106232404402544e-07, "loss": 0.0318, "step": 3551 }, { "epoch": 8.193771626297577, "grad_norm": 0.0, "learning_rate": 4.082693867301224e-07, "loss": 0.0486, "step": 3552 }, { "epoch": 8.196078431372548, "grad_norm": 0.0, "learning_rate": 4.0592215843509585e-07, "loss": 0.0426, "step": 3553 }, { "epoch": 8.19838523644752, "grad_norm": 0.0, "learning_rate": 4.035815571765089e-07, "loss": 0.0453, "step": 3554 }, { "epoch": 8.200692041522492, "grad_norm": 0.0, "learning_rate": 4.012475845711106e-07, "loss": 0.0419, "step": 3555 }, { "epoch": 8.202998846597463, "grad_norm": 0.0, "learning_rate": 3.9892024223107673e-07, "loss": 0.0507, "step": 3556 }, { "epoch": 8.205305651672434, "grad_norm": 0.0, "learning_rate": 3.965995317640026e-07, "loss": 0.0784, "step": 3557 }, { "epoch": 8.207612456747405, "grad_norm": 0.0, "learning_rate": 3.9428545477289913e-07, "loss": 0.0807, "step": 3558 }, { "epoch": 8.209919261822376, "grad_norm": 0.0, "learning_rate": 3.919780128561979e-07, "loss": 0.0427, "step": 3559 }, { "epoch": 8.212226066897347, "grad_norm": 0.0, "learning_rate": 3.8967720760774816e-07, "loss": 0.0362, "step": 3560 }, { "epoch": 8.214532871972319, "grad_norm": 0.0, "learning_rate": 3.8738304061681107e-07, "loss": 0.0414, "step": 3561 }, { "epoch": 8.21683967704729, "grad_norm": 0.0, "learning_rate": 3.850955134680678e-07, "loss": 0.0578, "step": 3562 }, { "epoch": 8.21914648212226, "grad_norm": 0.0, "learning_rate": 3.8281462774161004e-07, "loss": 0.0481, "step": 3563 }, { "epoch": 8.221453287197232, "grad_norm": 0.0, "learning_rate": 3.8054038501294077e-07, "loss": 0.0385, "step": 3564 }, { "epoch": 8.223760092272203, "grad_norm": 0.0, "learning_rate": 3.7827278685297785e-07, "loss": 0.0376, "step": 3565 }, { "epoch": 8.226066897347174, "grad_norm": 0.0, "learning_rate": 3.7601183482804504e-07, "loss": 0.0359, "step": 3566 }, { "epoch": 8.228373702422145, "grad_norm": 0.0, "learning_rate": 3.7375753049987974e-07, "loss": 0.0836, "step": 3567 }, { "epoch": 8.230680507497116, "grad_norm": 0.0, "learning_rate": 3.715098754256241e-07, "loss": 0.0459, "step": 3568 }, { "epoch": 8.232987312572087, "grad_norm": 0.0, "learning_rate": 3.692688711578296e-07, "loss": 0.0712, "step": 3569 }, { "epoch": 8.235294117647058, "grad_norm": 0.0, "learning_rate": 3.6703451924445467e-07, "loss": 0.0495, "step": 3570 }, { "epoch": 8.23760092272203, "grad_norm": 0.0, "learning_rate": 3.6480682122885804e-07, "loss": 0.0249, "step": 3571 }, { "epoch": 8.239907727797002, "grad_norm": 0.0, "learning_rate": 3.625857786498055e-07, "loss": 0.0351, "step": 3572 }, { "epoch": 8.242214532871973, "grad_norm": 0.0, "learning_rate": 3.603713930414676e-07, "loss": 0.0366, "step": 3573 }, { "epoch": 8.244521337946944, "grad_norm": 0.0, "learning_rate": 3.58163665933412e-07, "loss": 0.0524, "step": 3574 }, { "epoch": 8.246828143021915, "grad_norm": 0.0, "learning_rate": 3.55962598850611e-07, "loss": 0.0403, "step": 3575 }, { "epoch": 8.249134948096886, "grad_norm": 0.0, "learning_rate": 3.5376819331343404e-07, "loss": 0.0553, "step": 3576 }, { "epoch": 8.251441753171857, "grad_norm": 0.0, "learning_rate": 3.515804508376508e-07, "loss": 0.0668, "step": 3577 }, { "epoch": 8.253748558246828, "grad_norm": 0.0, "learning_rate": 3.4939937293442694e-07, "loss": 0.064, "step": 3578 }, { "epoch": 8.2560553633218, "grad_norm": 0.0, "learning_rate": 3.472249611103273e-07, "loss": 0.0678, "step": 3579 }, { "epoch": 8.25836216839677, "grad_norm": 0.0, "learning_rate": 3.450572168673072e-07, "loss": 0.0779, "step": 3580 }, { "epoch": 8.260668973471741, "grad_norm": 0.0, "learning_rate": 3.428961417027221e-07, "loss": 0.0559, "step": 3581 }, { "epoch": 8.262975778546712, "grad_norm": 0.0, "learning_rate": 3.4074173710931804e-07, "loss": 0.0407, "step": 3582 }, { "epoch": 8.265282583621683, "grad_norm": 0.0, "learning_rate": 3.385940045752323e-07, "loss": 0.0753, "step": 3583 }, { "epoch": 8.267589388696654, "grad_norm": 0.0, "learning_rate": 3.3645294558399487e-07, "loss": 0.0595, "step": 3584 }, { "epoch": 8.269896193771626, "grad_norm": 0.0, "learning_rate": 3.3431856161452835e-07, "loss": 0.0238, "step": 3585 }, { "epoch": 8.272202998846598, "grad_norm": 0.0, "learning_rate": 3.3219085414114003e-07, "loss": 0.0222, "step": 3586 }, { "epoch": 8.27450980392157, "grad_norm": 0.0, "learning_rate": 3.3006982463352764e-07, "loss": 0.0723, "step": 3587 }, { "epoch": 8.27681660899654, "grad_norm": 0.0, "learning_rate": 3.2795547455677813e-07, "loss": 0.0424, "step": 3588 }, { "epoch": 8.279123414071512, "grad_norm": 0.0, "learning_rate": 3.2584780537136206e-07, "loss": 0.075, "step": 3589 }, { "epoch": 8.281430219146483, "grad_norm": 0.0, "learning_rate": 3.237468185331327e-07, "loss": 0.0207, "step": 3590 }, { "epoch": 8.283737024221454, "grad_norm": 0.0, "learning_rate": 3.2165251549333585e-07, "loss": 0.0536, "step": 3591 }, { "epoch": 8.286043829296425, "grad_norm": 0.0, "learning_rate": 3.1956489769859213e-07, "loss": 0.0461, "step": 3592 }, { "epoch": 8.288350634371396, "grad_norm": 0.0, "learning_rate": 3.1748396659090797e-07, "loss": 0.0496, "step": 3593 }, { "epoch": 8.290657439446367, "grad_norm": 0.0, "learning_rate": 3.1540972360767254e-07, "loss": 0.0602, "step": 3594 }, { "epoch": 8.292964244521338, "grad_norm": 0.0, "learning_rate": 3.1334217018165194e-07, "loss": 0.0399, "step": 3595 }, { "epoch": 8.295271049596309, "grad_norm": 0.0, "learning_rate": 3.112813077409926e-07, "loss": 0.0379, "step": 3596 }, { "epoch": 8.29757785467128, "grad_norm": 0.0, "learning_rate": 3.0922713770922155e-07, "loss": 0.0432, "step": 3597 }, { "epoch": 8.299884659746251, "grad_norm": 0.0, "learning_rate": 3.07179661505238e-07, "loss": 0.0528, "step": 3598 }, { "epoch": 8.302191464821222, "grad_norm": 0.0, "learning_rate": 3.051388805433231e-07, "loss": 0.0486, "step": 3599 }, { "epoch": 8.304498269896193, "grad_norm": 0.0, "learning_rate": 3.0310479623313125e-07, "loss": 0.0549, "step": 3600 }, { "epoch": 8.306805074971164, "grad_norm": 0.0, "learning_rate": 3.010774099796898e-07, "loss": 0.0388, "step": 3601 }, { "epoch": 8.309111880046135, "grad_norm": 0.0, "learning_rate": 2.9905672318339963e-07, "loss": 0.0413, "step": 3602 }, { "epoch": 8.311418685121108, "grad_norm": 0.0, "learning_rate": 2.970427372400353e-07, "loss": 0.0446, "step": 3603 }, { "epoch": 8.313725490196079, "grad_norm": 0.0, "learning_rate": 2.950354535407429e-07, "loss": 0.0374, "step": 3604 }, { "epoch": 8.31603229527105, "grad_norm": 0.0, "learning_rate": 2.930348734720379e-07, "loss": 0.0554, "step": 3605 }, { "epoch": 8.318339100346021, "grad_norm": 0.0, "learning_rate": 2.910409984158058e-07, "loss": 0.0817, "step": 3606 }, { "epoch": 8.320645905420992, "grad_norm": 0.0, "learning_rate": 2.8905382974930173e-07, "loss": 0.039, "step": 3607 }, { "epoch": 8.322952710495963, "grad_norm": 0.0, "learning_rate": 2.8707336884514436e-07, "loss": 0.0512, "step": 3608 }, { "epoch": 8.325259515570934, "grad_norm": 0.0, "learning_rate": 2.8509961707132496e-07, "loss": 0.0671, "step": 3609 }, { "epoch": 8.327566320645905, "grad_norm": 0.0, "learning_rate": 2.831325757911985e-07, "loss": 0.0655, "step": 3610 }, { "epoch": 8.329873125720876, "grad_norm": 0.0, "learning_rate": 2.8117224636347917e-07, "loss": 0.0403, "step": 3611 }, { "epoch": 8.332179930795848, "grad_norm": 0.0, "learning_rate": 2.7921863014225504e-07, "loss": 0.0571, "step": 3612 }, { "epoch": 8.334486735870819, "grad_norm": 0.0, "learning_rate": 2.772717284769677e-07, "loss": 0.0557, "step": 3613 }, { "epoch": 8.33679354094579, "grad_norm": 0.0, "learning_rate": 2.753315427124259e-07, "loss": 0.0535, "step": 3614 }, { "epoch": 8.33910034602076, "grad_norm": 0.0, "learning_rate": 2.733980741887987e-07, "loss": 0.0681, "step": 3615 }, { "epoch": 8.341407151095732, "grad_norm": 0.0, "learning_rate": 2.714713242416156e-07, "loss": 0.0559, "step": 3616 }, { "epoch": 8.343713956170703, "grad_norm": 0.0, "learning_rate": 2.6955129420176193e-07, "loss": 0.0557, "step": 3617 }, { "epoch": 8.346020761245676, "grad_norm": 0.0, "learning_rate": 2.676379853954858e-07, "loss": 0.0493, "step": 3618 }, { "epoch": 8.348327566320647, "grad_norm": 0.0, "learning_rate": 2.6573139914439104e-07, "loss": 0.0439, "step": 3619 }, { "epoch": 8.350634371395618, "grad_norm": 0.0, "learning_rate": 2.6383153676543537e-07, "loss": 0.0433, "step": 3620 }, { "epoch": 8.352941176470589, "grad_norm": 0.0, "learning_rate": 2.6193839957093683e-07, "loss": 0.0524, "step": 3621 }, { "epoch": 8.35524798154556, "grad_norm": 0.0, "learning_rate": 2.6005198886856486e-07, "loss": 0.0676, "step": 3622 }, { "epoch": 8.35755478662053, "grad_norm": 0.0, "learning_rate": 2.581723059613428e-07, "loss": 0.0551, "step": 3623 }, { "epoch": 8.359861591695502, "grad_norm": 0.0, "learning_rate": 2.5629935214764866e-07, "loss": 0.0528, "step": 3624 }, { "epoch": 8.362168396770473, "grad_norm": 0.0, "learning_rate": 2.5443312872120763e-07, "loss": 0.0622, "step": 3625 }, { "epoch": 8.364475201845444, "grad_norm": 0.0, "learning_rate": 2.5257363697110406e-07, "loss": 0.0417, "step": 3626 }, { "epoch": 8.366782006920415, "grad_norm": 0.0, "learning_rate": 2.507208781817638e-07, "loss": 0.0681, "step": 3627 }, { "epoch": 8.369088811995386, "grad_norm": 0.0, "learning_rate": 2.4887485363296883e-07, "loss": 0.0397, "step": 3628 }, { "epoch": 8.371395617070357, "grad_norm": 0.0, "learning_rate": 2.4703556459984456e-07, "loss": 0.0186, "step": 3629 }, { "epoch": 8.373702422145328, "grad_norm": 0.0, "learning_rate": 2.45203012352867e-07, "loss": 0.0466, "step": 3630 }, { "epoch": 8.3760092272203, "grad_norm": 0.0, "learning_rate": 2.433771981578581e-07, "loss": 0.0663, "step": 3631 }, { "epoch": 8.37831603229527, "grad_norm": 0.0, "learning_rate": 2.4155812327598337e-07, "loss": 0.0394, "step": 3632 }, { "epoch": 8.380622837370241, "grad_norm": 0.0, "learning_rate": 2.3974578896375555e-07, "loss": 0.052, "step": 3633 }, { "epoch": 8.382929642445214, "grad_norm": 0.0, "learning_rate": 2.3794019647303325e-07, "loss": 0.0452, "step": 3634 }, { "epoch": 8.385236447520185, "grad_norm": 0.0, "learning_rate": 2.361413470510121e-07, "loss": 0.0378, "step": 3635 }, { "epoch": 8.387543252595156, "grad_norm": 0.0, "learning_rate": 2.3434924194023712e-07, "loss": 0.0776, "step": 3636 }, { "epoch": 8.389850057670127, "grad_norm": 0.0, "learning_rate": 2.3256388237858806e-07, "loss": 0.0532, "step": 3637 }, { "epoch": 8.392156862745098, "grad_norm": 0.0, "learning_rate": 2.307852695992907e-07, "loss": 0.0653, "step": 3638 }, { "epoch": 8.39446366782007, "grad_norm": 0.0, "learning_rate": 2.2901340483090785e-07, "loss": 0.0465, "step": 3639 }, { "epoch": 8.39677047289504, "grad_norm": 0.0, "learning_rate": 2.2724828929734156e-07, "loss": 0.0754, "step": 3640 }, { "epoch": 8.399077277970012, "grad_norm": 0.0, "learning_rate": 2.25489924217831e-07, "loss": 0.0579, "step": 3641 }, { "epoch": 8.401384083044983, "grad_norm": 0.0, "learning_rate": 2.2373831080695463e-07, "loss": 0.0261, "step": 3642 }, { "epoch": 8.403690888119954, "grad_norm": 0.0, "learning_rate": 2.2199345027462572e-07, "loss": 0.0806, "step": 3643 }, { "epoch": 8.405997693194925, "grad_norm": 0.0, "learning_rate": 2.202553438260946e-07, "loss": 0.0538, "step": 3644 }, { "epoch": 8.408304498269896, "grad_norm": 0.0, "learning_rate": 2.1852399266194312e-07, "loss": 0.0474, "step": 3645 }, { "epoch": 8.410611303344867, "grad_norm": 0.0, "learning_rate": 2.1679939797809024e-07, "loss": 0.0601, "step": 3646 }, { "epoch": 8.412918108419838, "grad_norm": 0.0, "learning_rate": 2.1508156096578748e-07, "loss": 0.0686, "step": 3647 }, { "epoch": 8.415224913494809, "grad_norm": 0.0, "learning_rate": 2.1337048281161565e-07, "loss": 0.059, "step": 3648 }, { "epoch": 8.41753171856978, "grad_norm": 0.0, "learning_rate": 2.1166616469749047e-07, "loss": 0.0093, "step": 3649 }, { "epoch": 8.419838523644753, "grad_norm": 0.0, "learning_rate": 2.0996860780065575e-07, "loss": 0.0509, "step": 3650 }, { "epoch": 8.422145328719724, "grad_norm": 0.0, "learning_rate": 2.082778132936858e-07, "loss": 0.0346, "step": 3651 }, { "epoch": 8.424452133794695, "grad_norm": 0.0, "learning_rate": 2.0659378234448524e-07, "loss": 0.0575, "step": 3652 }, { "epoch": 8.426758938869666, "grad_norm": 0.0, "learning_rate": 2.0491651611628582e-07, "loss": 0.049, "step": 3653 }, { "epoch": 8.429065743944637, "grad_norm": 0.0, "learning_rate": 2.0324601576764525e-07, "loss": 0.0489, "step": 3654 }, { "epoch": 8.431372549019608, "grad_norm": 0.0, "learning_rate": 2.0158228245244826e-07, "loss": 0.0622, "step": 3655 }, { "epoch": 8.43367935409458, "grad_norm": 0.0, "learning_rate": 1.9992531731991005e-07, "loss": 0.0422, "step": 3656 }, { "epoch": 8.43598615916955, "grad_norm": 0.0, "learning_rate": 1.9827512151456175e-07, "loss": 0.0513, "step": 3657 }, { "epoch": 8.438292964244521, "grad_norm": 0.0, "learning_rate": 1.96631696176266e-07, "loss": 0.0459, "step": 3658 }, { "epoch": 8.440599769319492, "grad_norm": 0.0, "learning_rate": 1.9499504244020694e-07, "loss": 0.0429, "step": 3659 }, { "epoch": 8.442906574394463, "grad_norm": 0.0, "learning_rate": 1.933651614368892e-07, "loss": 0.0404, "step": 3660 }, { "epoch": 8.445213379469434, "grad_norm": 0.0, "learning_rate": 1.917420542921433e-07, "loss": 0.0518, "step": 3661 }, { "epoch": 8.447520184544405, "grad_norm": 0.0, "learning_rate": 1.9012572212711467e-07, "loss": 0.0562, "step": 3662 }, { "epoch": 8.449826989619377, "grad_norm": 0.0, "learning_rate": 1.885161660582746e-07, "loss": 0.037, "step": 3663 }, { "epoch": 8.452133794694348, "grad_norm": 0.0, "learning_rate": 1.8691338719741048e-07, "loss": 0.03, "step": 3664 }, { "epoch": 8.45444059976932, "grad_norm": 0.0, "learning_rate": 1.8531738665163112e-07, "loss": 0.0562, "step": 3665 }, { "epoch": 8.456747404844291, "grad_norm": 0.0, "learning_rate": 1.8372816552336025e-07, "loss": 0.0785, "step": 3666 }, { "epoch": 8.459054209919262, "grad_norm": 0.0, "learning_rate": 1.82145724910342e-07, "loss": 0.0763, "step": 3667 }, { "epoch": 8.461361014994234, "grad_norm": 0.0, "learning_rate": 1.8057006590563419e-07, "loss": 0.0488, "step": 3668 }, { "epoch": 8.463667820069205, "grad_norm": 0.0, "learning_rate": 1.7900118959761181e-07, "loss": 0.0448, "step": 3669 }, { "epoch": 8.465974625144176, "grad_norm": 0.0, "learning_rate": 1.7743909706996242e-07, "loss": 0.0847, "step": 3670 }, { "epoch": 8.468281430219147, "grad_norm": 0.0, "learning_rate": 1.7588378940169293e-07, "loss": 0.0688, "step": 3671 }, { "epoch": 8.470588235294118, "grad_norm": 0.0, "learning_rate": 1.7433526766711727e-07, "loss": 0.0592, "step": 3672 }, { "epoch": 8.472895040369089, "grad_norm": 0.0, "learning_rate": 1.7279353293586765e-07, "loss": 0.0327, "step": 3673 }, { "epoch": 8.47520184544406, "grad_norm": 0.0, "learning_rate": 1.7125858627288328e-07, "loss": 0.041, "step": 3674 }, { "epoch": 8.477508650519031, "grad_norm": 0.0, "learning_rate": 1.6973042873841827e-07, "loss": 0.0528, "step": 3675 }, { "epoch": 8.479815455594002, "grad_norm": 0.0, "learning_rate": 1.6820906138803384e-07, "loss": 0.035, "step": 3676 }, { "epoch": 8.482122260668973, "grad_norm": 0.0, "learning_rate": 1.6669448527260602e-07, "loss": 0.0282, "step": 3677 }, { "epoch": 8.484429065743944, "grad_norm": 0.0, "learning_rate": 1.651867014383146e-07, "loss": 0.0589, "step": 3678 }, { "epoch": 8.486735870818915, "grad_norm": 0.0, "learning_rate": 1.6368571092665098e-07, "loss": 0.0452, "step": 3679 }, { "epoch": 8.489042675893886, "grad_norm": 0.0, "learning_rate": 1.6219151477441243e-07, "loss": 0.0537, "step": 3680 }, { "epoch": 8.491349480968857, "grad_norm": 0.0, "learning_rate": 1.6070411401370335e-07, "loss": 0.0574, "step": 3681 }, { "epoch": 8.49365628604383, "grad_norm": 0.0, "learning_rate": 1.5922350967193524e-07, "loss": 0.0508, "step": 3682 }, { "epoch": 8.495963091118801, "grad_norm": 0.0, "learning_rate": 1.5774970277182333e-07, "loss": 0.0788, "step": 3683 }, { "epoch": 8.498269896193772, "grad_norm": 0.0, "learning_rate": 1.5628269433139e-07, "loss": 0.0739, "step": 3684 }, { "epoch": 8.500576701268743, "grad_norm": 0.0, "learning_rate": 1.5482248536395904e-07, "loss": 0.0577, "step": 3685 }, { "epoch": 8.502883506343714, "grad_norm": 0.0, "learning_rate": 1.5336907687815817e-07, "loss": 0.0515, "step": 3686 }, { "epoch": 8.505190311418685, "grad_norm": 0.0, "learning_rate": 1.519224698779198e-07, "loss": 0.042, "step": 3687 }, { "epoch": 8.507497116493656, "grad_norm": 0.0, "learning_rate": 1.504826653624758e-07, "loss": 0.0478, "step": 3688 }, { "epoch": 8.509803921568627, "grad_norm": 0.0, "learning_rate": 1.4904966432635947e-07, "loss": 0.0551, "step": 3689 }, { "epoch": 8.512110726643598, "grad_norm": 0.0, "learning_rate": 1.4762346775940794e-07, "loss": 0.0471, "step": 3690 }, { "epoch": 8.51441753171857, "grad_norm": 0.0, "learning_rate": 1.4620407664675319e-07, "loss": 0.0351, "step": 3691 }, { "epoch": 8.51672433679354, "grad_norm": 0.0, "learning_rate": 1.447914919688298e-07, "loss": 0.0439, "step": 3692 }, { "epoch": 8.519031141868512, "grad_norm": 0.0, "learning_rate": 1.4338571470137063e-07, "loss": 0.0632, "step": 3693 }, { "epoch": 8.521337946943483, "grad_norm": 0.0, "learning_rate": 1.419867458154034e-07, "loss": 0.047, "step": 3694 }, { "epoch": 8.523644752018454, "grad_norm": 0.0, "learning_rate": 1.405945862772573e-07, "loss": 0.082, "step": 3695 }, { "epoch": 8.525951557093425, "grad_norm": 0.0, "learning_rate": 1.3920923704855648e-07, "loss": 0.0179, "step": 3696 }, { "epoch": 8.528258362168398, "grad_norm": 0.0, "learning_rate": 1.3783069908621772e-07, "loss": 0.0698, "step": 3697 }, { "epoch": 8.530565167243369, "grad_norm": 0.0, "learning_rate": 1.3645897334245817e-07, "loss": 0.0695, "step": 3698 }, { "epoch": 8.53287197231834, "grad_norm": 0.0, "learning_rate": 1.350940607647866e-07, "loss": 0.0427, "step": 3699 }, { "epoch": 8.53517877739331, "grad_norm": 0.0, "learning_rate": 1.337359622960044e-07, "loss": 0.0732, "step": 3700 }, { "epoch": 8.537485582468282, "grad_norm": 0.0, "learning_rate": 1.323846788742078e-07, "loss": 0.0537, "step": 3701 }, { "epoch": 8.539792387543253, "grad_norm": 0.0, "learning_rate": 1.3104021143278911e-07, "loss": 0.0578, "step": 3702 }, { "epoch": 8.542099192618224, "grad_norm": 0.0, "learning_rate": 1.2970256090042432e-07, "loss": 0.0512, "step": 3703 }, { "epoch": 8.544405997693195, "grad_norm": 0.0, "learning_rate": 1.2837172820108769e-07, "loss": 0.0335, "step": 3704 }, { "epoch": 8.546712802768166, "grad_norm": 0.0, "learning_rate": 1.2704771425404382e-07, "loss": 0.0445, "step": 3705 }, { "epoch": 8.549019607843137, "grad_norm": 0.0, "learning_rate": 1.2573051997384122e-07, "loss": 0.0621, "step": 3706 }, { "epoch": 8.551326412918108, "grad_norm": 0.0, "learning_rate": 1.2442014627032318e-07, "loss": 0.0537, "step": 3707 }, { "epoch": 8.55363321799308, "grad_norm": 0.0, "learning_rate": 1.231165940486234e-07, "loss": 0.0701, "step": 3708 }, { "epoch": 8.55594002306805, "grad_norm": 0.0, "learning_rate": 1.2181986420915615e-07, "loss": 0.0784, "step": 3709 }, { "epoch": 8.558246828143021, "grad_norm": 0.0, "learning_rate": 1.2052995764763042e-07, "loss": 0.0334, "step": 3710 }, { "epoch": 8.560553633217992, "grad_norm": 0.0, "learning_rate": 1.192468752550402e-07, "loss": 0.0545, "step": 3711 }, { "epoch": 8.562860438292965, "grad_norm": 0.0, "learning_rate": 1.1797061791766207e-07, "loss": 0.03, "step": 3712 }, { "epoch": 8.565167243367936, "grad_norm": 0.0, "learning_rate": 1.1670118651706197e-07, "loss": 0.04, "step": 3713 }, { "epoch": 8.567474048442907, "grad_norm": 0.0, "learning_rate": 1.1543858193009183e-07, "loss": 0.0356, "step": 3714 }, { "epoch": 8.569780853517878, "grad_norm": 0.0, "learning_rate": 1.1418280502888401e-07, "loss": 0.0513, "step": 3715 }, { "epoch": 8.57208765859285, "grad_norm": 0.0, "learning_rate": 1.1293385668085688e-07, "loss": 0.0596, "step": 3716 }, { "epoch": 8.57439446366782, "grad_norm": 0.0, "learning_rate": 1.1169173774871478e-07, "loss": 0.059, "step": 3717 }, { "epoch": 8.576701268742791, "grad_norm": 0.0, "learning_rate": 1.1045644909043917e-07, "loss": 0.035, "step": 3718 }, { "epoch": 8.579008073817763, "grad_norm": 0.0, "learning_rate": 1.0922799155929753e-07, "loss": 0.0669, "step": 3719 }, { "epoch": 8.581314878892734, "grad_norm": 0.0, "learning_rate": 1.0800636600383662e-07, "loss": 0.014, "step": 3720 }, { "epoch": 8.583621683967705, "grad_norm": 0.0, "learning_rate": 1.0679157326788592e-07, "loss": 0.0525, "step": 3721 }, { "epoch": 8.585928489042676, "grad_norm": 0.0, "learning_rate": 1.055836141905553e-07, "loss": 0.0443, "step": 3722 }, { "epoch": 8.588235294117647, "grad_norm": 0.0, "learning_rate": 1.0438248960623065e-07, "loss": 0.0509, "step": 3723 }, { "epoch": 8.590542099192618, "grad_norm": 0.0, "learning_rate": 1.0318820034458165e-07, "loss": 0.045, "step": 3724 }, { "epoch": 8.592848904267589, "grad_norm": 0.0, "learning_rate": 1.0200074723055397e-07, "loss": 0.0686, "step": 3725 }, { "epoch": 8.59515570934256, "grad_norm": 0.0, "learning_rate": 1.0082013108437038e-07, "loss": 0.0316, "step": 3726 }, { "epoch": 8.597462514417531, "grad_norm": 0.0, "learning_rate": 9.964635272153633e-08, "loss": 0.0675, "step": 3727 }, { "epoch": 8.599769319492502, "grad_norm": 0.0, "learning_rate": 9.84794129528266e-08, "loss": 0.069, "step": 3728 }, { "epoch": 8.602076124567475, "grad_norm": 0.0, "learning_rate": 9.731931258429638e-08, "loss": 0.0713, "step": 3729 }, { "epoch": 8.604382929642446, "grad_norm": 0.0, "learning_rate": 9.616605241727917e-08, "loss": 0.0709, "step": 3730 }, { "epoch": 8.606689734717417, "grad_norm": 0.0, "learning_rate": 9.50196332483766e-08, "loss": 0.0451, "step": 3731 }, { "epoch": 8.608996539792388, "grad_norm": 0.0, "learning_rate": 9.388005586947191e-08, "loss": 0.0461, "step": 3732 }, { "epoch": 8.611303344867359, "grad_norm": 0.0, "learning_rate": 9.274732106771989e-08, "loss": 0.0423, "step": 3733 }, { "epoch": 8.61361014994233, "grad_norm": 0.0, "learning_rate": 9.162142962554576e-08, "loss": 0.0422, "step": 3734 }, { "epoch": 8.615916955017301, "grad_norm": 0.0, "learning_rate": 9.0502382320653e-08, "loss": 0.0529, "step": 3735 }, { "epoch": 8.618223760092272, "grad_norm": 0.0, "learning_rate": 8.939017992601329e-08, "loss": 0.0478, "step": 3736 }, { "epoch": 8.620530565167243, "grad_norm": 0.0, "learning_rate": 8.82848232098732e-08, "loss": 0.0453, "step": 3737 }, { "epoch": 8.622837370242214, "grad_norm": 0.0, "learning_rate": 8.718631293574753e-08, "loss": 0.0475, "step": 3738 }, { "epoch": 8.625144175317185, "grad_norm": 0.0, "learning_rate": 8.609464986242711e-08, "loss": 0.0317, "step": 3739 }, { "epoch": 8.627450980392156, "grad_norm": 0.0, "learning_rate": 8.500983474396762e-08, "loss": 0.0308, "step": 3740 }, { "epoch": 8.629757785467127, "grad_norm": 0.0, "learning_rate": 8.393186832969746e-08, "loss": 0.0585, "step": 3741 }, { "epoch": 8.632064590542099, "grad_norm": 0.0, "learning_rate": 8.286075136421435e-08, "loss": 0.0764, "step": 3742 }, { "epoch": 8.63437139561707, "grad_norm": 0.0, "learning_rate": 8.179648458738309e-08, "loss": 0.054, "step": 3743 }, { "epoch": 8.636678200692042, "grad_norm": 0.0, "learning_rate": 8.07390687343379e-08, "loss": 0.0274, "step": 3744 }, { "epoch": 8.638985005767013, "grad_norm": 0.0, "learning_rate": 7.968850453548227e-08, "loss": 0.0487, "step": 3745 }, { "epoch": 8.641291810841984, "grad_norm": 0.0, "learning_rate": 7.864479271648462e-08, "loss": 0.049, "step": 3746 }, { "epoch": 8.643598615916956, "grad_norm": 0.0, "learning_rate": 7.760793399827937e-08, "loss": 0.0169, "step": 3747 }, { "epoch": 8.645905420991927, "grad_norm": 0.0, "learning_rate": 7.65779290970714e-08, "loss": 0.0374, "step": 3748 }, { "epoch": 8.648212226066898, "grad_norm": 0.0, "learning_rate": 7.555477872432715e-08, "loss": 0.0454, "step": 3749 }, { "epoch": 8.650519031141869, "grad_norm": 0.0, "learning_rate": 7.453848358678018e-08, "loss": 0.0651, "step": 3750 }, { "epoch": 8.65282583621684, "grad_norm": 0.0, "learning_rate": 7.352904438642893e-08, "loss": 0.0817, "step": 3751 }, { "epoch": 8.65513264129181, "grad_norm": 0.0, "learning_rate": 7.25264618205357e-08, "loss": 0.0477, "step": 3752 }, { "epoch": 8.657439446366782, "grad_norm": 0.0, "learning_rate": 7.153073658162646e-08, "loss": 0.0903, "step": 3753 }, { "epoch": 8.659746251441753, "grad_norm": 0.0, "learning_rate": 7.054186935749219e-08, "loss": 0.0465, "step": 3754 }, { "epoch": 8.662053056516724, "grad_norm": 0.0, "learning_rate": 6.955986083118426e-08, "loss": 0.037, "step": 3755 }, { "epoch": 8.664359861591695, "grad_norm": 0.0, "learning_rate": 6.858471168101788e-08, "loss": 0.0456, "step": 3756 }, { "epoch": 8.666666666666666, "grad_norm": 0.0, "learning_rate": 6.761642258056977e-08, "loss": 0.0485, "step": 3757 }, { "epoch": 8.668973471741637, "grad_norm": 0.0, "learning_rate": 6.665499419867937e-08, "loss": 0.0538, "step": 3758 }, { "epoch": 8.671280276816608, "grad_norm": 0.0, "learning_rate": 6.570042719944436e-08, "loss": 0.055, "step": 3759 }, { "epoch": 8.67358708189158, "grad_norm": 0.0, "learning_rate": 6.475272224222507e-08, "loss": 0.0294, "step": 3760 }, { "epoch": 8.675893886966552, "grad_norm": 0.0, "learning_rate": 6.381187998164229e-08, "loss": 0.0259, "step": 3761 }, { "epoch": 8.678200692041523, "grad_norm": 0.0, "learning_rate": 6.287790106757396e-08, "loss": 0.0298, "step": 3762 }, { "epoch": 8.680507497116494, "grad_norm": 0.0, "learning_rate": 6.19507861451607e-08, "loss": 0.0427, "step": 3763 }, { "epoch": 8.682814302191465, "grad_norm": 0.0, "learning_rate": 6.103053585480023e-08, "loss": 0.0535, "step": 3764 }, { "epoch": 8.685121107266436, "grad_norm": 0.0, "learning_rate": 6.011715083214742e-08, "loss": 0.0358, "step": 3765 }, { "epoch": 8.687427912341407, "grad_norm": 0.0, "learning_rate": 5.921063170811647e-08, "loss": 0.0274, "step": 3766 }, { "epoch": 8.689734717416378, "grad_norm": 0.0, "learning_rate": 5.831097910887873e-08, "loss": 0.0286, "step": 3767 }, { "epoch": 8.69204152249135, "grad_norm": 0.0, "learning_rate": 5.7418193655861545e-08, "loss": 0.0371, "step": 3768 }, { "epoch": 8.69434832756632, "grad_norm": 0.0, "learning_rate": 5.6532275965751614e-08, "loss": 0.0626, "step": 3769 }, { "epoch": 8.696655132641292, "grad_norm": 0.0, "learning_rate": 5.5653226650487225e-08, "loss": 0.0761, "step": 3770 }, { "epoch": 8.698961937716263, "grad_norm": 0.0, "learning_rate": 5.4781046317267103e-08, "loss": 0.0517, "step": 3771 }, { "epoch": 8.701268742791234, "grad_norm": 0.0, "learning_rate": 5.391573556854157e-08, "loss": 0.0544, "step": 3772 }, { "epoch": 8.703575547866205, "grad_norm": 0.0, "learning_rate": 5.305729500201917e-08, "loss": 0.0455, "step": 3773 }, { "epoch": 8.705882352941176, "grad_norm": 0.0, "learning_rate": 5.220572521066003e-08, "loss": 0.0851, "step": 3774 }, { "epoch": 8.708189158016147, "grad_norm": 0.0, "learning_rate": 5.136102678268029e-08, "loss": 0.0421, "step": 3775 }, { "epoch": 8.71049596309112, "grad_norm": 0.0, "learning_rate": 5.052320030154767e-08, "loss": 0.0531, "step": 3776 }, { "epoch": 8.71280276816609, "grad_norm": 0.0, "learning_rate": 4.9692246345985905e-08, "loss": 0.0491, "step": 3777 }, { "epoch": 8.715109573241062, "grad_norm": 0.0, "learning_rate": 4.88681654899692e-08, "loss": 0.0546, "step": 3778 }, { "epoch": 8.717416378316033, "grad_norm": 0.0, "learning_rate": 4.8050958302726655e-08, "loss": 0.0575, "step": 3779 }, { "epoch": 8.719723183391004, "grad_norm": 0.0, "learning_rate": 4.7240625348735636e-08, "loss": 0.042, "step": 3780 }, { "epoch": 8.722029988465975, "grad_norm": 0.0, "learning_rate": 4.643716718772839e-08, "loss": 0.0495, "step": 3781 }, { "epoch": 8.724336793540946, "grad_norm": 0.0, "learning_rate": 4.564058437468877e-08, "loss": 0.0416, "step": 3782 }, { "epoch": 8.726643598615917, "grad_norm": 0.0, "learning_rate": 4.485087745984884e-08, "loss": 0.0392, "step": 3783 }, { "epoch": 8.728950403690888, "grad_norm": 0.0, "learning_rate": 4.406804698869338e-08, "loss": 0.0665, "step": 3784 }, { "epoch": 8.731257208765859, "grad_norm": 0.0, "learning_rate": 4.329209350195651e-08, "loss": 0.0424, "step": 3785 }, { "epoch": 8.73356401384083, "grad_norm": 0.0, "learning_rate": 4.252301753562171e-08, "loss": 0.0569, "step": 3786 }, { "epoch": 8.735870818915801, "grad_norm": 0.0, "learning_rate": 4.176081962092182e-08, "loss": 0.0381, "step": 3787 }, { "epoch": 8.738177623990772, "grad_norm": 0.0, "learning_rate": 4.100550028434125e-08, "loss": 0.0428, "step": 3788 }, { "epoch": 8.740484429065743, "grad_norm": 0.0, "learning_rate": 4.025706004760932e-08, "loss": 0.0629, "step": 3789 }, { "epoch": 8.742791234140714, "grad_norm": 0.0, "learning_rate": 3.951549942770694e-08, "loss": 0.0428, "step": 3790 }, { "epoch": 8.745098039215687, "grad_norm": 0.0, "learning_rate": 3.878081893685992e-08, "loss": 0.0391, "step": 3791 }, { "epoch": 8.747404844290658, "grad_norm": 0.0, "learning_rate": 3.805301908254455e-08, "loss": 0.0449, "step": 3792 }, { "epoch": 8.74971164936563, "grad_norm": 0.0, "learning_rate": 3.7332100367482027e-08, "loss": 0.0383, "step": 3793 }, { "epoch": 8.7520184544406, "grad_norm": 0.0, "learning_rate": 3.6618063289642904e-08, "loss": 0.0366, "step": 3794 }, { "epoch": 8.754325259515571, "grad_norm": 0.0, "learning_rate": 3.591090834224153e-08, "loss": 0.04, "step": 3795 }, { "epoch": 8.756632064590542, "grad_norm": 0.0, "learning_rate": 3.521063601373942e-08, "loss": 0.0465, "step": 3796 }, { "epoch": 8.758938869665513, "grad_norm": 0.0, "learning_rate": 3.451724678784518e-08, "loss": 0.0501, "step": 3797 }, { "epoch": 8.761245674740485, "grad_norm": 0.0, "learning_rate": 3.383074114351237e-08, "loss": 0.0495, "step": 3798 }, { "epoch": 8.763552479815456, "grad_norm": 0.0, "learning_rate": 3.315111955493944e-08, "loss": 0.0644, "step": 3799 }, { "epoch": 8.765859284890427, "grad_norm": 0.0, "learning_rate": 3.247838249156976e-08, "loss": 0.0526, "step": 3800 }, { "epoch": 8.768166089965398, "grad_norm": 0.0, "learning_rate": 3.181253041809052e-08, "loss": 0.0469, "step": 3801 }, { "epoch": 8.770472895040369, "grad_norm": 0.0, "learning_rate": 3.115356379443601e-08, "loss": 0.054, "step": 3802 }, { "epoch": 8.77277970011534, "grad_norm": 0.0, "learning_rate": 3.0501483075779936e-08, "loss": 0.0529, "step": 3803 }, { "epoch": 8.77508650519031, "grad_norm": 0.0, "learning_rate": 2.9856288712544204e-08, "loss": 0.0523, "step": 3804 }, { "epoch": 8.777393310265282, "grad_norm": 0.0, "learning_rate": 2.9217981150390095e-08, "loss": 0.0506, "step": 3805 }, { "epoch": 8.779700115340253, "grad_norm": 0.0, "learning_rate": 2.858656083022604e-08, "loss": 0.0314, "step": 3806 }, { "epoch": 8.782006920415224, "grad_norm": 0.0, "learning_rate": 2.796202818819871e-08, "loss": 0.0507, "step": 3807 }, { "epoch": 8.784313725490197, "grad_norm": 0.0, "learning_rate": 2.7344383655699692e-08, "loss": 0.0575, "step": 3808 }, { "epoch": 8.786620530565168, "grad_norm": 0.0, "learning_rate": 2.6733627659363272e-08, "loss": 0.0515, "step": 3809 }, { "epoch": 8.788927335640139, "grad_norm": 0.0, "learning_rate": 2.6129760621063095e-08, "loss": 0.0359, "step": 3810 }, { "epoch": 8.79123414071511, "grad_norm": 0.0, "learning_rate": 2.5532782957917724e-08, "loss": 0.07, "step": 3811 }, { "epoch": 8.793540945790081, "grad_norm": 0.0, "learning_rate": 2.4942695082281752e-08, "loss": 0.0737, "step": 3812 }, { "epoch": 8.795847750865052, "grad_norm": 0.0, "learning_rate": 2.4359497401758026e-08, "loss": 0.0466, "step": 3813 }, { "epoch": 8.798154555940023, "grad_norm": 0.0, "learning_rate": 2.378319031918208e-08, "loss": 0.0445, "step": 3814 }, { "epoch": 8.800461361014994, "grad_norm": 0.0, "learning_rate": 2.3213774232635487e-08, "loss": 0.0343, "step": 3815 }, { "epoch": 8.802768166089965, "grad_norm": 0.0, "learning_rate": 2.265124953543918e-08, "loss": 0.0638, "step": 3816 }, { "epoch": 8.805074971164936, "grad_norm": 0.0, "learning_rate": 2.2095616616150117e-08, "loss": 0.0574, "step": 3817 }, { "epoch": 8.807381776239907, "grad_norm": 0.0, "learning_rate": 2.1546875858570182e-08, "loss": 0.0248, "step": 3818 }, { "epoch": 8.809688581314878, "grad_norm": 0.0, "learning_rate": 2.1005027641736176e-08, "loss": 0.0355, "step": 3819 }, { "epoch": 8.81199538638985, "grad_norm": 0.0, "learning_rate": 2.0470072339926482e-08, "loss": 0.0497, "step": 3820 }, { "epoch": 8.81430219146482, "grad_norm": 0.0, "learning_rate": 1.9942010322655527e-08, "loss": 0.0469, "step": 3821 }, { "epoch": 8.816608996539792, "grad_norm": 0.0, "learning_rate": 1.9420841954681525e-08, "loss": 0.0696, "step": 3822 }, { "epoch": 8.818915801614764, "grad_norm": 0.0, "learning_rate": 1.8906567595994295e-08, "loss": 0.0671, "step": 3823 }, { "epoch": 8.821222606689735, "grad_norm": 0.0, "learning_rate": 1.8399187601827462e-08, "loss": 0.02, "step": 3824 }, { "epoch": 8.823529411764707, "grad_norm": 0.0, "learning_rate": 1.7898702322648453e-08, "loss": 0.0804, "step": 3825 }, { "epoch": 8.825836216839678, "grad_norm": 0.0, "learning_rate": 1.7405112104164067e-08, "loss": 0.03, "step": 3826 }, { "epoch": 8.828143021914649, "grad_norm": 0.0, "learning_rate": 1.6918417287318245e-08, "loss": 0.0701, "step": 3827 }, { "epoch": 8.83044982698962, "grad_norm": 0.0, "learning_rate": 1.6438618208290957e-08, "loss": 0.0492, "step": 3828 }, { "epoch": 8.83275663206459, "grad_norm": 0.0, "learning_rate": 1.596571519850043e-08, "loss": 0.0481, "step": 3829 }, { "epoch": 8.835063437139562, "grad_norm": 0.0, "learning_rate": 1.5499708584600924e-08, "loss": 0.0575, "step": 3830 }, { "epoch": 8.837370242214533, "grad_norm": 0.0, "learning_rate": 1.5040598688482732e-08, "loss": 0.0702, "step": 3831 }, { "epoch": 8.839677047289504, "grad_norm": 0.0, "learning_rate": 1.4588385827272178e-08, "loss": 0.0487, "step": 3832 }, { "epoch": 8.841983852364475, "grad_norm": 0.0, "learning_rate": 1.414307031333273e-08, "loss": 0.0686, "step": 3833 }, { "epoch": 8.844290657439446, "grad_norm": 0.0, "learning_rate": 1.370465245426167e-08, "loss": 0.022, "step": 3834 }, { "epoch": 8.846597462514417, "grad_norm": 0.0, "learning_rate": 1.3273132552893419e-08, "loss": 0.0449, "step": 3835 }, { "epoch": 8.848904267589388, "grad_norm": 0.0, "learning_rate": 1.2848510907296219e-08, "loss": 0.0684, "step": 3836 }, { "epoch": 8.85121107266436, "grad_norm": 0.0, "learning_rate": 1.2430787810776556e-08, "loss": 0.0274, "step": 3837 }, { "epoch": 8.853517877739332, "grad_norm": 0.0, "learning_rate": 1.2019963551871405e-08, "loss": 0.047, "step": 3838 }, { "epoch": 8.855824682814303, "grad_norm": 0.0, "learning_rate": 1.161603841435488e-08, "loss": 0.0406, "step": 3839 }, { "epoch": 8.858131487889274, "grad_norm": 0.0, "learning_rate": 1.1219012677234908e-08, "loss": 0.0443, "step": 3840 }, { "epoch": 8.860438292964245, "grad_norm": 0.0, "learning_rate": 1.0828886614754342e-08, "loss": 0.0609, "step": 3841 }, { "epoch": 8.862745098039216, "grad_norm": 0.0, "learning_rate": 1.0445660496390952e-08, "loss": 0.0612, "step": 3842 }, { "epoch": 8.865051903114187, "grad_norm": 0.0, "learning_rate": 1.0069334586854106e-08, "loss": 0.0184, "step": 3843 }, { "epoch": 8.867358708189158, "grad_norm": 0.0, "learning_rate": 9.699909146086983e-09, "loss": 0.0416, "step": 3844 }, { "epoch": 8.86966551326413, "grad_norm": 0.0, "learning_rate": 9.337384429269903e-09, "loss": 0.0467, "step": 3845 }, { "epoch": 8.8719723183391, "grad_norm": 0.0, "learning_rate": 8.981760686811448e-09, "loss": 0.0423, "step": 3846 }, { "epoch": 8.874279123414071, "grad_norm": 0.0, "learning_rate": 8.633038164358454e-09, "loss": 0.0558, "step": 3847 }, { "epoch": 8.876585928489042, "grad_norm": 0.0, "learning_rate": 8.29121710278713e-09, "loss": 0.0414, "step": 3848 }, { "epoch": 8.878892733564014, "grad_norm": 0.0, "learning_rate": 7.956297738207496e-09, "loss": 0.0519, "step": 3849 }, { "epoch": 8.881199538638985, "grad_norm": 0.0, "learning_rate": 7.628280301963387e-09, "loss": 0.0487, "step": 3850 }, { "epoch": 8.883506343713956, "grad_norm": 0.0, "learning_rate": 7.3071650206291145e-09, "loss": 0.0668, "step": 3851 }, { "epoch": 8.885813148788927, "grad_norm": 0.0, "learning_rate": 6.992952116013918e-09, "loss": 0.056, "step": 3852 }, { "epoch": 8.888119953863898, "grad_norm": 0.0, "learning_rate": 6.685641805158627e-09, "loss": 0.0361, "step": 3853 }, { "epoch": 8.890426758938869, "grad_norm": 0.0, "learning_rate": 6.385234300332332e-09, "loss": 0.029, "step": 3854 }, { "epoch": 8.892733564013842, "grad_norm": 0.0, "learning_rate": 6.091729809042379e-09, "loss": 0.0551, "step": 3855 }, { "epoch": 8.895040369088813, "grad_norm": 0.0, "learning_rate": 5.805128534024373e-09, "loss": 0.0743, "step": 3856 }, { "epoch": 8.897347174163784, "grad_norm": 0.0, "learning_rate": 5.525430673244403e-09, "loss": 0.0317, "step": 3857 }, { "epoch": 8.899653979238755, "grad_norm": 0.0, "learning_rate": 5.252636419902368e-09, "loss": 0.04, "step": 3858 }, { "epoch": 8.901960784313726, "grad_norm": 0.0, "learning_rate": 4.986745962428652e-09, "loss": 0.0331, "step": 3859 }, { "epoch": 8.904267589388697, "grad_norm": 0.0, "learning_rate": 4.727759484486338e-09, "loss": 0.0233, "step": 3860 }, { "epoch": 8.906574394463668, "grad_norm": 0.0, "learning_rate": 4.475677164966774e-09, "loss": 0.0524, "step": 3861 }, { "epoch": 8.908881199538639, "grad_norm": 0.0, "learning_rate": 4.230499177994007e-09, "loss": 0.0442, "step": 3862 }, { "epoch": 8.91118800461361, "grad_norm": 0.0, "learning_rate": 3.9922256929247895e-09, "loss": 0.0691, "step": 3863 }, { "epoch": 8.913494809688581, "grad_norm": 0.0, "learning_rate": 3.760856874341912e-09, "loss": 0.0736, "step": 3864 }, { "epoch": 8.915801614763552, "grad_norm": 0.0, "learning_rate": 3.536392882064199e-09, "loss": 0.0648, "step": 3865 }, { "epoch": 8.918108419838523, "grad_norm": 0.0, "learning_rate": 3.3188338711365175e-09, "loss": 0.0514, "step": 3866 }, { "epoch": 8.920415224913494, "grad_norm": 0.0, "learning_rate": 3.1081799918375454e-09, "loss": 0.0382, "step": 3867 }, { "epoch": 8.922722029988465, "grad_norm": 0.0, "learning_rate": 2.9044313896731126e-09, "loss": 0.0419, "step": 3868 }, { "epoch": 8.925028835063436, "grad_norm": 0.0, "learning_rate": 2.7075882053828605e-09, "loss": 0.0269, "step": 3869 }, { "epoch": 8.92733564013841, "grad_norm": 0.0, "learning_rate": 2.5176505749346937e-09, "loss": 0.0328, "step": 3870 }, { "epoch": 8.92964244521338, "grad_norm": 0.0, "learning_rate": 2.3346186295247763e-09, "loss": 0.0458, "step": 3871 }, { "epoch": 8.931949250288351, "grad_norm": 0.0, "learning_rate": 2.1584924955819763e-09, "loss": 0.04, "step": 3872 }, { "epoch": 8.934256055363322, "grad_norm": 0.0, "learning_rate": 1.9892722947645328e-09, "loss": 0.0318, "step": 3873 }, { "epoch": 8.936562860438293, "grad_norm": 0.0, "learning_rate": 1.8269581439600559e-09, "loss": 0.0616, "step": 3874 }, { "epoch": 8.938869665513264, "grad_norm": 0.0, "learning_rate": 1.6715501552855285e-09, "loss": 0.0592, "step": 3875 }, { "epoch": 8.941176470588236, "grad_norm": 0.0, "learning_rate": 1.5230484360873043e-09, "loss": 0.0343, "step": 3876 }, { "epoch": 8.943483275663207, "grad_norm": 0.0, "learning_rate": 1.3814530889433298e-09, "loss": 0.0836, "step": 3877 }, { "epoch": 8.945790080738178, "grad_norm": 0.0, "learning_rate": 1.2467642116575919e-09, "loss": 0.0385, "step": 3878 }, { "epoch": 8.948096885813149, "grad_norm": 0.0, "learning_rate": 1.1189818972656697e-09, "loss": 0.0335, "step": 3879 }, { "epoch": 8.95040369088812, "grad_norm": 0.0, "learning_rate": 9.981062340336246e-10, "loss": 0.0468, "step": 3880 }, { "epoch": 8.95271049596309, "grad_norm": 0.0, "learning_rate": 8.841373054546687e-10, "loss": 0.0504, "step": 3881 }, { "epoch": 8.955017301038062, "grad_norm": 0.0, "learning_rate": 7.770751902513862e-10, "loss": 0.0481, "step": 3882 }, { "epoch": 8.957324106113033, "grad_norm": 0.0, "learning_rate": 6.769199623779532e-10, "loss": 0.049, "step": 3883 }, { "epoch": 8.959630911188004, "grad_norm": 0.0, "learning_rate": 5.836716910134766e-10, "loss": 0.0435, "step": 3884 }, { "epoch": 8.961937716262975, "grad_norm": 0.0, "learning_rate": 4.973304405697654e-10, "loss": 0.0492, "step": 3885 }, { "epoch": 8.964244521337946, "grad_norm": 0.0, "learning_rate": 4.178962706857803e-10, "loss": 0.0373, "step": 3886 }, { "epoch": 8.966551326412919, "grad_norm": 0.0, "learning_rate": 3.4536923623096353e-10, "loss": 0.0651, "step": 3887 }, { "epoch": 8.96885813148789, "grad_norm": 0.0, "learning_rate": 2.797493873019086e-10, "loss": 0.0579, "step": 3888 }, { "epoch": 8.971164936562861, "grad_norm": 0.0, "learning_rate": 2.2103676922680117e-10, "loss": 0.0408, "step": 3889 }, { "epoch": 8.973471741637832, "grad_norm": 0.0, "learning_rate": 1.6923142255764745e-10, "loss": 0.0368, "step": 3890 }, { "epoch": 8.975778546712803, "grad_norm": 0.0, "learning_rate": 1.2433338308137645e-10, "loss": 0.0217, "step": 3891 }, { "epoch": 8.978085351787774, "grad_norm": 0.0, "learning_rate": 8.634268181095806e-11, "loss": 0.0379, "step": 3892 }, { "epoch": 8.980392156862745, "grad_norm": 0.0, "learning_rate": 5.525934498651353e-11, "loss": 0.0243, "step": 3893 }, { "epoch": 8.982698961937716, "grad_norm": 0.0, "learning_rate": 3.108339407975613e-11, "loss": 0.0125, "step": 3894 }, { "epoch": 8.985005767012687, "grad_norm": 0.0, "learning_rate": 1.381484578955039e-11, "loss": 0.0306, "step": 3895 }, { "epoch": 8.987312572087658, "grad_norm": 0.0, "learning_rate": 3.4537120441324733e-12, "loss": 0.0753, "step": 3896 }, { "epoch": 8.98961937716263, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.0315, "step": 3897 }, { "epoch": 8.98961937716263, "step": 3897, "total_flos": 5.517435394662072e+17, "train_loss": 0.40523916085876577, "train_runtime": 135721.8739, "train_samples_per_second": 1.839, "train_steps_per_second": 0.029 } ], "logging_steps": 1.0, "max_steps": 3897, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 1000, "total_flos": 5.517435394662072e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }