{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 4450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4925373134328358e-07, "loss": 2.9424, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.9850746268656716e-07, "loss": 2.9098, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.4776119402985074e-07, "loss": 2.8686, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.970149253731343e-07, "loss": 2.8448, "step": 4 }, { "epoch": 0.0, "learning_rate": 7.462686567164179e-07, "loss": 2.7325, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.955223880597015e-07, "loss": 2.4807, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.044776119402985e-06, "loss": 2.2651, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.1940298507462686e-06, "loss": 2.1474, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.3432835820895524e-06, "loss": 1.9929, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.4925373134328358e-06, "loss": 1.9472, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.6417910447761196e-06, "loss": 1.7901, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.791044776119403e-06, "loss": 1.7208, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.9402985074626867e-06, "loss": 1.6611, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.08955223880597e-06, "loss": 1.6317, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.238805970149254e-06, "loss": 1.4784, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.3880597014925373e-06, "loss": 1.4518, "step": 16 }, { "epoch": 0.0, "learning_rate": 2.537313432835821e-06, "loss": 1.3491, "step": 17 }, { "epoch": 0.0, "learning_rate": 2.686567164179105e-06, "loss": 1.3477, "step": 18 }, { "epoch": 0.0, "learning_rate": 2.835820895522388e-06, "loss": 1.2402, "step": 19 }, { "epoch": 0.0, "learning_rate": 2.9850746268656716e-06, "loss": 1.2339, "step": 20 }, { "epoch": 0.0, "learning_rate": 3.1343283582089558e-06, "loss": 1.2066, "step": 21 }, { "epoch": 0.0, "learning_rate": 3.283582089552239e-06, "loss": 1.1026, "step": 22 }, { "epoch": 0.01, "learning_rate": 3.4328358208955225e-06, "loss": 1.1315, "step": 23 }, { "epoch": 0.01, "learning_rate": 3.582089552238806e-06, "loss": 1.0464, "step": 24 }, { "epoch": 0.01, "learning_rate": 3.73134328358209e-06, "loss": 1.0477, "step": 25 }, { "epoch": 0.01, "learning_rate": 3.8805970149253735e-06, "loss": 0.9811, "step": 26 }, { "epoch": 0.01, "learning_rate": 4.029850746268657e-06, "loss": 0.9803, "step": 27 }, { "epoch": 0.01, "learning_rate": 4.17910447761194e-06, "loss": 0.997, "step": 28 }, { "epoch": 0.01, "learning_rate": 4.3283582089552236e-06, "loss": 0.9998, "step": 29 }, { "epoch": 0.01, "learning_rate": 4.477611940298508e-06, "loss": 0.9274, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.626865671641791e-06, "loss": 0.8905, "step": 31 }, { "epoch": 0.01, "learning_rate": 4.7761194029850745e-06, "loss": 0.9071, "step": 32 }, { "epoch": 0.01, "learning_rate": 4.925373134328359e-06, "loss": 0.9137, "step": 33 }, { "epoch": 0.01, "learning_rate": 5.074626865671642e-06, "loss": 0.8308, "step": 34 }, { "epoch": 0.01, "learning_rate": 5.2238805970149255e-06, "loss": 0.8978, "step": 35 }, { "epoch": 0.01, "learning_rate": 5.37313432835821e-06, "loss": 0.8822, "step": 36 }, { "epoch": 0.01, "learning_rate": 5.522388059701493e-06, "loss": 0.8868, "step": 37 }, { "epoch": 0.01, "learning_rate": 5.671641791044776e-06, "loss": 0.8038, "step": 38 }, { "epoch": 0.01, "learning_rate": 5.820895522388061e-06, "loss": 0.7792, "step": 39 }, { "epoch": 0.01, "learning_rate": 5.970149253731343e-06, "loss": 0.7413, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.119402985074627e-06, "loss": 0.7852, "step": 41 }, { "epoch": 0.01, "learning_rate": 6.2686567164179116e-06, "loss": 0.8028, "step": 42 }, { "epoch": 0.01, "learning_rate": 6.417910447761194e-06, "loss": 0.7787, "step": 43 }, { "epoch": 0.01, "learning_rate": 6.567164179104478e-06, "loss": 0.7509, "step": 44 }, { "epoch": 0.01, "learning_rate": 6.7164179104477625e-06, "loss": 0.7251, "step": 45 }, { "epoch": 0.01, "learning_rate": 6.865671641791045e-06, "loss": 0.7868, "step": 46 }, { "epoch": 0.01, "learning_rate": 7.014925373134329e-06, "loss": 0.7271, "step": 47 }, { "epoch": 0.01, "learning_rate": 7.164179104477612e-06, "loss": 0.7216, "step": 48 }, { "epoch": 0.01, "learning_rate": 7.313432835820896e-06, "loss": 0.7089, "step": 49 }, { "epoch": 0.01, "learning_rate": 7.46268656716418e-06, "loss": 0.7202, "step": 50 }, { "epoch": 0.01, "learning_rate": 7.611940298507463e-06, "loss": 0.711, "step": 51 }, { "epoch": 0.01, "learning_rate": 7.761194029850747e-06, "loss": 0.7445, "step": 52 }, { "epoch": 0.01, "learning_rate": 7.91044776119403e-06, "loss": 0.6369, "step": 53 }, { "epoch": 0.01, "learning_rate": 8.059701492537314e-06, "loss": 0.742, "step": 54 }, { "epoch": 0.01, "learning_rate": 8.208955223880599e-06, "loss": 0.7537, "step": 55 }, { "epoch": 0.01, "learning_rate": 8.35820895522388e-06, "loss": 0.7147, "step": 56 }, { "epoch": 0.01, "learning_rate": 8.507462686567165e-06, "loss": 0.7042, "step": 57 }, { "epoch": 0.01, "learning_rate": 8.656716417910447e-06, "loss": 0.6836, "step": 58 }, { "epoch": 0.01, "learning_rate": 8.805970149253732e-06, "loss": 0.671, "step": 59 }, { "epoch": 0.01, "learning_rate": 8.955223880597016e-06, "loss": 0.7374, "step": 60 }, { "epoch": 0.01, "learning_rate": 9.104477611940299e-06, "loss": 0.6923, "step": 61 }, { "epoch": 0.01, "learning_rate": 9.253731343283582e-06, "loss": 0.6576, "step": 62 }, { "epoch": 0.01, "learning_rate": 9.402985074626867e-06, "loss": 0.6501, "step": 63 }, { "epoch": 0.01, "learning_rate": 9.552238805970149e-06, "loss": 0.63, "step": 64 }, { "epoch": 0.01, "learning_rate": 9.701492537313434e-06, "loss": 0.6656, "step": 65 }, { "epoch": 0.01, "learning_rate": 9.850746268656717e-06, "loss": 0.6877, "step": 66 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 0.6197, "step": 67 }, { "epoch": 0.02, "learning_rate": 1.0149253731343284e-05, "loss": 0.6716, "step": 68 }, { "epoch": 0.02, "learning_rate": 1.029850746268657e-05, "loss": 0.6322, "step": 69 }, { "epoch": 0.02, "learning_rate": 1.0447761194029851e-05, "loss": 0.6397, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.0597014925373134e-05, "loss": 0.6483, "step": 71 }, { "epoch": 0.02, "learning_rate": 1.074626865671642e-05, "loss": 0.7015, "step": 72 }, { "epoch": 0.02, "learning_rate": 1.0895522388059703e-05, "loss": 0.6987, "step": 73 }, { "epoch": 0.02, "learning_rate": 1.1044776119402986e-05, "loss": 0.6632, "step": 74 }, { "epoch": 0.02, "learning_rate": 1.1194029850746268e-05, "loss": 0.6601, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.1343283582089553e-05, "loss": 0.6652, "step": 76 }, { "epoch": 0.02, "learning_rate": 1.1492537313432836e-05, "loss": 0.6711, "step": 77 }, { "epoch": 0.02, "learning_rate": 1.1641791044776121e-05, "loss": 0.6518, "step": 78 }, { "epoch": 0.02, "learning_rate": 1.1791044776119405e-05, "loss": 0.622, "step": 79 }, { "epoch": 0.02, "learning_rate": 1.1940298507462686e-05, "loss": 0.5948, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.2089552238805971e-05, "loss": 0.6547, "step": 81 }, { "epoch": 0.02, "learning_rate": 1.2238805970149255e-05, "loss": 0.6487, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.2388059701492538e-05, "loss": 0.6588, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.2537313432835823e-05, "loss": 0.6229, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.2686567164179107e-05, "loss": 0.6437, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.2835820895522388e-05, "loss": 0.6784, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.2985074626865673e-05, "loss": 0.6305, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.3134328358208957e-05, "loss": 0.6177, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.328358208955224e-05, "loss": 0.6502, "step": 89 }, { "epoch": 0.02, "learning_rate": 1.3432835820895525e-05, "loss": 0.6424, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.3582089552238807e-05, "loss": 0.6342, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.373134328358209e-05, "loss": 0.5863, "step": 92 }, { "epoch": 0.02, "learning_rate": 1.3880597014925375e-05, "loss": 0.6625, "step": 93 }, { "epoch": 0.02, "learning_rate": 1.4029850746268658e-05, "loss": 0.6183, "step": 94 }, { "epoch": 0.02, "learning_rate": 1.4179104477611942e-05, "loss": 0.6499, "step": 95 }, { "epoch": 0.02, "learning_rate": 1.4328358208955224e-05, "loss": 0.6268, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.4477611940298509e-05, "loss": 0.6383, "step": 97 }, { "epoch": 0.02, "learning_rate": 1.4626865671641792e-05, "loss": 0.6549, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.4776119402985077e-05, "loss": 0.5932, "step": 99 }, { "epoch": 0.02, "learning_rate": 1.492537313432836e-05, "loss": 0.6789, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.5074626865671642e-05, "loss": 0.613, "step": 101 }, { "epoch": 0.02, "learning_rate": 1.5223880597014925e-05, "loss": 0.6067, "step": 102 }, { "epoch": 0.02, "learning_rate": 1.537313432835821e-05, "loss": 0.6554, "step": 103 }, { "epoch": 0.02, "learning_rate": 1.5522388059701494e-05, "loss": 0.6312, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.5671641791044777e-05, "loss": 0.6762, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.582089552238806e-05, "loss": 0.5969, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.5970149253731344e-05, "loss": 0.5952, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.6119402985074627e-05, "loss": 0.605, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.626865671641791e-05, "loss": 0.6441, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.6417910447761197e-05, "loss": 0.5536, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.6567164179104477e-05, "loss": 0.642, "step": 111 }, { "epoch": 0.03, "learning_rate": 1.671641791044776e-05, "loss": 0.6029, "step": 112 }, { "epoch": 0.03, "learning_rate": 1.6865671641791048e-05, "loss": 0.6327, "step": 113 }, { "epoch": 0.03, "learning_rate": 1.701492537313433e-05, "loss": 0.5893, "step": 114 }, { "epoch": 0.03, "learning_rate": 1.7164179104477614e-05, "loss": 0.6374, "step": 115 }, { "epoch": 0.03, "learning_rate": 1.7313432835820894e-05, "loss": 0.5994, "step": 116 }, { "epoch": 0.03, "learning_rate": 1.746268656716418e-05, "loss": 0.5745, "step": 117 }, { "epoch": 0.03, "learning_rate": 1.7611940298507464e-05, "loss": 0.6285, "step": 118 }, { "epoch": 0.03, "learning_rate": 1.7761194029850748e-05, "loss": 0.637, "step": 119 }, { "epoch": 0.03, "learning_rate": 1.791044776119403e-05, "loss": 0.5995, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.8059701492537314e-05, "loss": 0.6471, "step": 121 }, { "epoch": 0.03, "learning_rate": 1.8208955223880598e-05, "loss": 0.6385, "step": 122 }, { "epoch": 0.03, "learning_rate": 1.835820895522388e-05, "loss": 0.6335, "step": 123 }, { "epoch": 0.03, "learning_rate": 1.8507462686567165e-05, "loss": 0.6062, "step": 124 }, { "epoch": 0.03, "learning_rate": 1.865671641791045e-05, "loss": 0.6101, "step": 125 }, { "epoch": 0.03, "learning_rate": 1.8805970149253735e-05, "loss": 0.5775, "step": 126 }, { "epoch": 0.03, "learning_rate": 1.8955223880597015e-05, "loss": 0.6171, "step": 127 }, { "epoch": 0.03, "learning_rate": 1.9104477611940298e-05, "loss": 0.5884, "step": 128 }, { "epoch": 0.03, "learning_rate": 1.9253731343283585e-05, "loss": 0.5943, "step": 129 }, { "epoch": 0.03, "learning_rate": 1.9402985074626868e-05, "loss": 0.6159, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.955223880597015e-05, "loss": 0.6008, "step": 131 }, { "epoch": 0.03, "learning_rate": 1.9701492537313435e-05, "loss": 0.6319, "step": 132 }, { "epoch": 0.03, "learning_rate": 1.9850746268656718e-05, "loss": 0.6221, "step": 133 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.6367, "step": 134 }, { "epoch": 0.03, "learning_rate": 1.9999997350848118e-05, "loss": 0.657, "step": 135 }, { "epoch": 0.03, "learning_rate": 1.9999989403393866e-05, "loss": 0.6187, "step": 136 }, { "epoch": 0.03, "learning_rate": 1.9999976157641464e-05, "loss": 0.6426, "step": 137 }, { "epoch": 0.03, "learning_rate": 1.999995761359792e-05, "loss": 0.6095, "step": 138 }, { "epoch": 0.03, "learning_rate": 1.999993377127307e-05, "loss": 0.6115, "step": 139 }, { "epoch": 0.03, "learning_rate": 1.999990463067954e-05, "loss": 0.6802, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.9999870191832765e-05, "loss": 0.6333, "step": 141 }, { "epoch": 0.03, "learning_rate": 1.9999830454751003e-05, "loss": 0.6554, "step": 142 }, { "epoch": 0.03, "learning_rate": 1.99997854194553e-05, "loss": 0.6483, "step": 143 }, { "epoch": 0.03, "learning_rate": 1.999973508596952e-05, "loss": 0.6156, "step": 144 }, { "epoch": 0.03, "learning_rate": 1.999967945432033e-05, "loss": 0.6482, "step": 145 }, { "epoch": 0.03, "learning_rate": 1.9999618524537208e-05, "loss": 0.5955, "step": 146 }, { "epoch": 0.03, "learning_rate": 1.9999552296652432e-05, "loss": 0.6012, "step": 147 }, { "epoch": 0.03, "learning_rate": 1.9999480770701095e-05, "loss": 0.6706, "step": 148 }, { "epoch": 0.03, "learning_rate": 1.999940394672109e-05, "loss": 0.6263, "step": 149 }, { "epoch": 0.03, "learning_rate": 1.9999321824753125e-05, "loss": 0.6028, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.999923440484071e-05, "loss": 0.6137, "step": 151 }, { "epoch": 0.03, "learning_rate": 1.999914168703016e-05, "loss": 0.6236, "step": 152 }, { "epoch": 0.03, "learning_rate": 1.9999043671370603e-05, "loss": 0.6104, "step": 153 }, { "epoch": 0.03, "learning_rate": 1.9998940357913964e-05, "loss": 0.6079, "step": 154 }, { "epoch": 0.03, "learning_rate": 1.9998831746714994e-05, "loss": 0.5446, "step": 155 }, { "epoch": 0.04, "learning_rate": 1.9998717837831224e-05, "loss": 0.6852, "step": 156 }, { "epoch": 0.04, "learning_rate": 1.9998598631323017e-05, "loss": 0.6051, "step": 157 }, { "epoch": 0.04, "learning_rate": 1.999847412725353e-05, "loss": 0.6249, "step": 158 }, { "epoch": 0.04, "learning_rate": 1.9998344325688727e-05, "loss": 0.6208, "step": 159 }, { "epoch": 0.04, "learning_rate": 1.999820922669738e-05, "loss": 0.6253, "step": 160 }, { "epoch": 0.04, "learning_rate": 1.999806883035107e-05, "loss": 0.6177, "step": 161 }, { "epoch": 0.04, "learning_rate": 1.9997923136724186e-05, "loss": 0.624, "step": 162 }, { "epoch": 0.04, "learning_rate": 1.9997772145893914e-05, "loss": 0.5745, "step": 163 }, { "epoch": 0.04, "learning_rate": 1.999761585794026e-05, "loss": 0.6234, "step": 164 }, { "epoch": 0.04, "learning_rate": 1.9997454272946027e-05, "loss": 0.64, "step": 165 }, { "epoch": 0.04, "learning_rate": 1.9997287390996833e-05, "loss": 0.6008, "step": 166 }, { "epoch": 0.04, "learning_rate": 1.999711521218109e-05, "loss": 0.6439, "step": 167 }, { "epoch": 0.04, "learning_rate": 1.999693773659002e-05, "loss": 0.5974, "step": 168 }, { "epoch": 0.04, "learning_rate": 1.9996754964317668e-05, "loss": 0.5813, "step": 169 }, { "epoch": 0.04, "learning_rate": 1.9996566895460865e-05, "loss": 0.6548, "step": 170 }, { "epoch": 0.04, "learning_rate": 1.9996373530119256e-05, "loss": 0.6141, "step": 171 }, { "epoch": 0.04, "learning_rate": 1.999617486839529e-05, "loss": 0.631, "step": 172 }, { "epoch": 0.04, "learning_rate": 1.9995970910394228e-05, "loss": 0.6312, "step": 173 }, { "epoch": 0.04, "learning_rate": 1.999576165622413e-05, "loss": 0.6265, "step": 174 }, { "epoch": 0.04, "learning_rate": 1.9995547105995864e-05, "loss": 0.5838, "step": 175 }, { "epoch": 0.04, "learning_rate": 1.9995327259823113e-05, "loss": 0.6492, "step": 176 }, { "epoch": 0.04, "learning_rate": 1.999510211782235e-05, "loss": 0.5847, "step": 177 }, { "epoch": 0.04, "learning_rate": 1.999487168011286e-05, "loss": 0.6139, "step": 178 }, { "epoch": 0.04, "learning_rate": 1.9994635946816748e-05, "loss": 0.6375, "step": 179 }, { "epoch": 0.04, "learning_rate": 1.9994394918058903e-05, "loss": 0.6228, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.9994148593967028e-05, "loss": 0.5879, "step": 181 }, { "epoch": 0.04, "learning_rate": 1.999389697467164e-05, "loss": 0.5797, "step": 182 }, { "epoch": 0.04, "learning_rate": 1.999364006030605e-05, "loss": 0.5878, "step": 183 }, { "epoch": 0.04, "learning_rate": 1.999337785100638e-05, "loss": 0.5799, "step": 184 }, { "epoch": 0.04, "learning_rate": 1.999311034691156e-05, "loss": 0.5841, "step": 185 }, { "epoch": 0.04, "learning_rate": 1.9992837548163315e-05, "loss": 0.5958, "step": 186 }, { "epoch": 0.04, "learning_rate": 1.9992559454906187e-05, "loss": 0.5898, "step": 187 }, { "epoch": 0.04, "learning_rate": 1.9992276067287517e-05, "loss": 0.5765, "step": 188 }, { "epoch": 0.04, "learning_rate": 1.9991987385457452e-05, "loss": 0.6059, "step": 189 }, { "epoch": 0.04, "learning_rate": 1.9991693409568946e-05, "loss": 0.5889, "step": 190 }, { "epoch": 0.04, "learning_rate": 1.999139413977775e-05, "loss": 0.5896, "step": 191 }, { "epoch": 0.04, "learning_rate": 1.999108957624244e-05, "loss": 0.6576, "step": 192 }, { "epoch": 0.04, "learning_rate": 1.9990779719124363e-05, "loss": 0.5974, "step": 193 }, { "epoch": 0.04, "learning_rate": 1.9990464568587708e-05, "loss": 0.598, "step": 194 }, { "epoch": 0.04, "learning_rate": 1.9990144124799443e-05, "loss": 0.6064, "step": 195 }, { "epoch": 0.04, "learning_rate": 1.9989818387929356e-05, "loss": 0.5995, "step": 196 }, { "epoch": 0.04, "learning_rate": 1.9989487358150023e-05, "loss": 0.57, "step": 197 }, { "epoch": 0.04, "learning_rate": 1.998915103563684e-05, "loss": 0.6037, "step": 198 }, { "epoch": 0.04, "learning_rate": 1.9988809420567998e-05, "loss": 0.5332, "step": 199 }, { "epoch": 0.04, "learning_rate": 1.9988462513124495e-05, "loss": 0.6337, "step": 200 }, { "epoch": 0.05, "learning_rate": 1.9988110313490136e-05, "loss": 0.5917, "step": 201 }, { "epoch": 0.05, "learning_rate": 1.9987752821851526e-05, "loss": 0.6021, "step": 202 }, { "epoch": 0.05, "learning_rate": 1.998739003839807e-05, "loss": 0.6108, "step": 203 }, { "epoch": 0.05, "learning_rate": 1.998702196332199e-05, "loss": 0.5925, "step": 204 }, { "epoch": 0.05, "learning_rate": 1.9986648596818294e-05, "loss": 0.5994, "step": 205 }, { "epoch": 0.05, "learning_rate": 1.9986269939084812e-05, "loss": 0.6164, "step": 206 }, { "epoch": 0.05, "learning_rate": 1.9985885990322164e-05, "loss": 0.6032, "step": 207 }, { "epoch": 0.05, "learning_rate": 1.9985496750733774e-05, "loss": 0.5955, "step": 208 }, { "epoch": 0.05, "learning_rate": 1.998510222052588e-05, "loss": 0.6044, "step": 209 }, { "epoch": 0.05, "learning_rate": 1.9984702399907515e-05, "loss": 0.5981, "step": 210 }, { "epoch": 0.05, "learning_rate": 1.998429728909051e-05, "loss": 0.5678, "step": 211 }, { "epoch": 0.05, "learning_rate": 1.9983886888289515e-05, "loss": 0.594, "step": 212 }, { "epoch": 0.05, "learning_rate": 1.9983471197721962e-05, "loss": 0.5733, "step": 213 }, { "epoch": 0.05, "learning_rate": 1.9983050217608106e-05, "loss": 0.5905, "step": 214 }, { "epoch": 0.05, "learning_rate": 1.998262394817099e-05, "loss": 0.5729, "step": 215 }, { "epoch": 0.05, "learning_rate": 1.9982192389636458e-05, "loss": 0.5897, "step": 216 }, { "epoch": 0.05, "learning_rate": 1.9981755542233175e-05, "loss": 0.5993, "step": 217 }, { "epoch": 0.05, "learning_rate": 1.998131340619259e-05, "loss": 0.6062, "step": 218 }, { "epoch": 0.05, "learning_rate": 1.998086598174896e-05, "loss": 0.5992, "step": 219 }, { "epoch": 0.05, "learning_rate": 1.998041326913935e-05, "loss": 0.5814, "step": 220 }, { "epoch": 0.05, "learning_rate": 1.9979955268603616e-05, "loss": 0.5864, "step": 221 }, { "epoch": 0.05, "learning_rate": 1.9979491980384417e-05, "loss": 0.6009, "step": 222 }, { "epoch": 0.05, "learning_rate": 1.997902340472722e-05, "loss": 0.5987, "step": 223 }, { "epoch": 0.05, "learning_rate": 1.9978549541880295e-05, "loss": 0.5852, "step": 224 }, { "epoch": 0.05, "learning_rate": 1.9978070392094698e-05, "loss": 0.6245, "step": 225 }, { "epoch": 0.05, "learning_rate": 1.997758595562431e-05, "loss": 0.5456, "step": 226 }, { "epoch": 0.05, "learning_rate": 1.9977096232725797e-05, "loss": 0.6054, "step": 227 }, { "epoch": 0.05, "learning_rate": 1.9976601223658623e-05, "loss": 0.6083, "step": 228 }, { "epoch": 0.05, "learning_rate": 1.9976100928685063e-05, "loss": 0.6116, "step": 229 }, { "epoch": 0.05, "learning_rate": 1.997559534807019e-05, "loss": 0.6294, "step": 230 }, { "epoch": 0.05, "learning_rate": 1.997508448208187e-05, "loss": 0.5802, "step": 231 }, { "epoch": 0.05, "learning_rate": 1.997456833099078e-05, "loss": 0.566, "step": 232 }, { "epoch": 0.05, "learning_rate": 1.9974046895070396e-05, "loss": 0.5852, "step": 233 }, { "epoch": 0.05, "learning_rate": 1.9973520174596983e-05, "loss": 0.6328, "step": 234 }, { "epoch": 0.05, "learning_rate": 1.997298816984962e-05, "loss": 0.5667, "step": 235 }, { "epoch": 0.05, "learning_rate": 1.997245088111017e-05, "loss": 0.634, "step": 236 }, { "epoch": 0.05, "learning_rate": 1.9971908308663315e-05, "loss": 0.6198, "step": 237 }, { "epoch": 0.05, "learning_rate": 1.9971360452796523e-05, "loss": 0.6037, "step": 238 }, { "epoch": 0.05, "learning_rate": 1.9970807313800063e-05, "loss": 0.5489, "step": 239 }, { "epoch": 0.05, "learning_rate": 1.9970248891967007e-05, "loss": 0.5869, "step": 240 }, { "epoch": 0.05, "learning_rate": 1.996968518759322e-05, "loss": 0.6163, "step": 241 }, { "epoch": 0.05, "learning_rate": 1.9969116200977373e-05, "loss": 0.5748, "step": 242 }, { "epoch": 0.05, "learning_rate": 1.9968541932420935e-05, "loss": 0.5954, "step": 243 }, { "epoch": 0.05, "learning_rate": 1.9967962382228166e-05, "loss": 0.5739, "step": 244 }, { "epoch": 0.06, "learning_rate": 1.996737755070613e-05, "loss": 0.6033, "step": 245 }, { "epoch": 0.06, "learning_rate": 1.9966787438164693e-05, "loss": 0.6019, "step": 246 }, { "epoch": 0.06, "learning_rate": 1.9966192044916507e-05, "loss": 0.6006, "step": 247 }, { "epoch": 0.06, "learning_rate": 1.9965591371277034e-05, "loss": 0.5751, "step": 248 }, { "epoch": 0.06, "learning_rate": 1.996498541756453e-05, "loss": 0.5691, "step": 249 }, { "epoch": 0.06, "learning_rate": 1.9964374184100047e-05, "loss": 0.6058, "step": 250 }, { "epoch": 0.06, "learning_rate": 1.9963757671207436e-05, "loss": 0.5329, "step": 251 }, { "epoch": 0.06, "learning_rate": 1.996313587921334e-05, "loss": 0.6058, "step": 252 }, { "epoch": 0.06, "learning_rate": 1.9962508808447204e-05, "loss": 0.5507, "step": 253 }, { "epoch": 0.06, "learning_rate": 1.9961876459241274e-05, "loss": 0.5773, "step": 254 }, { "epoch": 0.06, "learning_rate": 1.996123883193059e-05, "loss": 0.5793, "step": 255 }, { "epoch": 0.06, "learning_rate": 1.996059592685297e-05, "loss": 0.5764, "step": 256 }, { "epoch": 0.06, "learning_rate": 1.9959947744349064e-05, "loss": 0.5901, "step": 257 }, { "epoch": 0.06, "learning_rate": 1.9959294284762286e-05, "loss": 0.5606, "step": 258 }, { "epoch": 0.06, "learning_rate": 1.995863554843887e-05, "loss": 0.6009, "step": 259 }, { "epoch": 0.06, "learning_rate": 1.9957971535727822e-05, "loss": 0.5392, "step": 260 }, { "epoch": 0.06, "learning_rate": 1.9957302246980965e-05, "loss": 0.5803, "step": 261 }, { "epoch": 0.06, "learning_rate": 1.9956627682552903e-05, "loss": 0.6084, "step": 262 }, { "epoch": 0.06, "learning_rate": 1.9955947842801047e-05, "loss": 0.6047, "step": 263 }, { "epoch": 0.06, "learning_rate": 1.9955262728085592e-05, "loss": 0.621, "step": 264 }, { "epoch": 0.06, "learning_rate": 1.995457233876953e-05, "loss": 0.5403, "step": 265 }, { "epoch": 0.06, "learning_rate": 1.995387667521866e-05, "loss": 0.5782, "step": 266 }, { "epoch": 0.06, "learning_rate": 1.9953175737801556e-05, "loss": 0.5601, "step": 267 }, { "epoch": 0.06, "learning_rate": 1.99524695268896e-05, "loss": 0.5447, "step": 268 }, { "epoch": 0.06, "learning_rate": 1.9951758042856963e-05, "loss": 0.5647, "step": 269 }, { "epoch": 0.06, "learning_rate": 1.995104128608061e-05, "loss": 0.6164, "step": 270 }, { "epoch": 0.06, "learning_rate": 1.9950319256940305e-05, "loss": 0.6024, "step": 271 }, { "epoch": 0.06, "learning_rate": 1.9949591955818593e-05, "loss": 0.6078, "step": 272 }, { "epoch": 0.06, "learning_rate": 1.9948859383100828e-05, "loss": 0.5643, "step": 273 }, { "epoch": 0.06, "learning_rate": 1.994812153917515e-05, "loss": 0.6216, "step": 274 }, { "epoch": 0.06, "learning_rate": 1.9947378424432483e-05, "loss": 0.5851, "step": 275 }, { "epoch": 0.06, "learning_rate": 1.9946630039266555e-05, "loss": 0.5531, "step": 276 }, { "epoch": 0.06, "learning_rate": 1.994587638407389e-05, "loss": 0.6278, "step": 277 }, { "epoch": 0.06, "learning_rate": 1.9945117459253786e-05, "loss": 0.5937, "step": 278 }, { "epoch": 0.06, "learning_rate": 1.994435326520835e-05, "loss": 0.5803, "step": 279 }, { "epoch": 0.06, "learning_rate": 1.994358380234248e-05, "loss": 0.6126, "step": 280 }, { "epoch": 0.06, "learning_rate": 1.9942809071063855e-05, "loss": 0.5797, "step": 281 }, { "epoch": 0.06, "learning_rate": 1.9942029071782955e-05, "loss": 0.5799, "step": 282 }, { "epoch": 0.06, "learning_rate": 1.9941243804913037e-05, "loss": 0.5789, "step": 283 }, { "epoch": 0.06, "learning_rate": 1.9940453270870174e-05, "loss": 0.5182, "step": 284 }, { "epoch": 0.06, "learning_rate": 1.9939657470073203e-05, "loss": 0.6202, "step": 285 }, { "epoch": 0.06, "learning_rate": 1.9938856402943774e-05, "loss": 0.563, "step": 286 }, { "epoch": 0.06, "learning_rate": 1.9938050069906305e-05, "loss": 0.5976, "step": 287 }, { "epoch": 0.06, "learning_rate": 1.9937238471388025e-05, "loss": 0.606, "step": 288 }, { "epoch": 0.06, "learning_rate": 1.9936421607818942e-05, "loss": 0.5715, "step": 289 }, { "epoch": 0.07, "learning_rate": 1.993559947963185e-05, "loss": 0.564, "step": 290 }, { "epoch": 0.07, "learning_rate": 1.993477208726234e-05, "loss": 0.5667, "step": 291 }, { "epoch": 0.07, "learning_rate": 1.9933939431148795e-05, "loss": 0.5894, "step": 292 }, { "epoch": 0.07, "learning_rate": 1.9933101511732377e-05, "loss": 0.5954, "step": 293 }, { "epoch": 0.07, "learning_rate": 1.993225832945704e-05, "loss": 0.5605, "step": 294 }, { "epoch": 0.07, "learning_rate": 1.9931409884769527e-05, "loss": 0.5512, "step": 295 }, { "epoch": 0.07, "learning_rate": 1.9930556178119372e-05, "loss": 0.5757, "step": 296 }, { "epoch": 0.07, "learning_rate": 1.9929697209958897e-05, "loss": 0.5705, "step": 297 }, { "epoch": 0.07, "learning_rate": 1.9928832980743206e-05, "loss": 0.5987, "step": 298 }, { "epoch": 0.07, "learning_rate": 1.9927963490930195e-05, "loss": 0.6009, "step": 299 }, { "epoch": 0.07, "learning_rate": 1.992708874098054e-05, "loss": 0.5355, "step": 300 }, { "epoch": 0.07, "learning_rate": 1.992620873135772e-05, "loss": 0.5726, "step": 301 }, { "epoch": 0.07, "learning_rate": 1.992532346252799e-05, "loss": 0.5802, "step": 302 }, { "epoch": 0.07, "learning_rate": 1.9924432934960384e-05, "loss": 0.5517, "step": 303 }, { "epoch": 0.07, "learning_rate": 1.9923537149126738e-05, "loss": 0.5687, "step": 304 }, { "epoch": 0.07, "learning_rate": 1.9922636105501663e-05, "loss": 0.5787, "step": 305 }, { "epoch": 0.07, "learning_rate": 1.992172980456256e-05, "loss": 0.557, "step": 306 }, { "epoch": 0.07, "learning_rate": 1.992081824678962e-05, "loss": 0.5711, "step": 307 }, { "epoch": 0.07, "learning_rate": 1.9919901432665803e-05, "loss": 0.5604, "step": 308 }, { "epoch": 0.07, "learning_rate": 1.9918979362676875e-05, "loss": 0.5881, "step": 309 }, { "epoch": 0.07, "learning_rate": 1.9918052037311368e-05, "loss": 0.5723, "step": 310 }, { "epoch": 0.07, "learning_rate": 1.9917119457060616e-05, "loss": 0.5486, "step": 311 }, { "epoch": 0.07, "learning_rate": 1.9916181622418724e-05, "loss": 0.5447, "step": 312 }, { "epoch": 0.07, "learning_rate": 1.9915238533882582e-05, "loss": 0.5343, "step": 313 }, { "epoch": 0.07, "learning_rate": 1.9914290191951875e-05, "loss": 0.5858, "step": 314 }, { "epoch": 0.07, "learning_rate": 1.991333659712906e-05, "loss": 0.5059, "step": 315 }, { "epoch": 0.07, "learning_rate": 1.9912377749919373e-05, "loss": 0.5632, "step": 316 }, { "epoch": 0.07, "learning_rate": 1.9911413650830853e-05, "loss": 0.5848, "step": 317 }, { "epoch": 0.07, "learning_rate": 1.9910444300374297e-05, "loss": 0.5874, "step": 318 }, { "epoch": 0.07, "learning_rate": 1.990946969906331e-05, "loss": 0.5378, "step": 319 }, { "epoch": 0.07, "learning_rate": 1.9908489847414252e-05, "loss": 0.5811, "step": 320 }, { "epoch": 0.07, "learning_rate": 1.9907504745946283e-05, "loss": 0.597, "step": 321 }, { "epoch": 0.07, "learning_rate": 1.9906514395181343e-05, "loss": 0.5279, "step": 322 }, { "epoch": 0.07, "learning_rate": 1.9905518795644146e-05, "loss": 0.5492, "step": 323 }, { "epoch": 0.07, "learning_rate": 1.9904517947862193e-05, "loss": 0.5384, "step": 324 }, { "epoch": 0.07, "learning_rate": 1.990351185236576e-05, "loss": 0.549, "step": 325 }, { "epoch": 0.07, "learning_rate": 1.990250050968791e-05, "loss": 0.5617, "step": 326 }, { "epoch": 0.07, "learning_rate": 1.990148392036449e-05, "loss": 0.5541, "step": 327 }, { "epoch": 0.07, "learning_rate": 1.99004620849341e-05, "loss": 0.5556, "step": 328 }, { "epoch": 0.07, "learning_rate": 1.989943500393816e-05, "loss": 0.5537, "step": 329 }, { "epoch": 0.07, "learning_rate": 1.9898402677920842e-05, "loss": 0.5568, "step": 330 }, { "epoch": 0.07, "learning_rate": 1.9897365107429102e-05, "loss": 0.5648, "step": 331 }, { "epoch": 0.07, "learning_rate": 1.9896322293012672e-05, "loss": 0.525, "step": 332 }, { "epoch": 0.07, "learning_rate": 1.9895274235224076e-05, "loss": 0.5971, "step": 333 }, { "epoch": 0.08, "learning_rate": 1.9894220934618598e-05, "loss": 0.5742, "step": 334 }, { "epoch": 0.08, "learning_rate": 1.9893162391754318e-05, "loss": 0.5691, "step": 335 }, { "epoch": 0.08, "learning_rate": 1.989209860719208e-05, "loss": 0.5735, "step": 336 }, { "epoch": 0.08, "learning_rate": 1.9891029581495504e-05, "loss": 0.5683, "step": 337 }, { "epoch": 0.08, "learning_rate": 1.9889955315231e-05, "loss": 0.5749, "step": 338 }, { "epoch": 0.08, "learning_rate": 1.988887580896774e-05, "loss": 0.5167, "step": 339 }, { "epoch": 0.08, "learning_rate": 1.9887791063277686e-05, "loss": 0.5609, "step": 340 }, { "epoch": 0.08, "learning_rate": 1.9886701078735565e-05, "loss": 0.5368, "step": 341 }, { "epoch": 0.08, "learning_rate": 1.9885605855918887e-05, "loss": 0.5725, "step": 342 }, { "epoch": 0.08, "learning_rate": 1.9884505395407932e-05, "loss": 0.5688, "step": 343 }, { "epoch": 0.08, "learning_rate": 1.9883399697785756e-05, "loss": 0.5663, "step": 344 }, { "epoch": 0.08, "learning_rate": 1.9882288763638195e-05, "loss": 0.5442, "step": 345 }, { "epoch": 0.08, "learning_rate": 1.9881172593553854e-05, "loss": 0.5543, "step": 346 }, { "epoch": 0.08, "learning_rate": 1.9880051188124112e-05, "loss": 0.601, "step": 347 }, { "epoch": 0.08, "learning_rate": 1.9878924547943126e-05, "loss": 0.5329, "step": 348 }, { "epoch": 0.08, "learning_rate": 1.9877792673607823e-05, "loss": 0.5659, "step": 349 }, { "epoch": 0.08, "learning_rate": 1.9876655565717902e-05, "loss": 0.5787, "step": 350 }, { "epoch": 0.08, "learning_rate": 1.9875513224875845e-05, "loss": 0.5396, "step": 351 }, { "epoch": 0.08, "learning_rate": 1.9874365651686894e-05, "loss": 0.5941, "step": 352 }, { "epoch": 0.08, "learning_rate": 1.9873212846759065e-05, "loss": 0.571, "step": 353 }, { "epoch": 0.08, "learning_rate": 1.9872054810703155e-05, "loss": 0.5732, "step": 354 }, { "epoch": 0.08, "learning_rate": 1.9870891544132724e-05, "loss": 0.5822, "step": 355 }, { "epoch": 0.08, "learning_rate": 1.9869723047664103e-05, "loss": 0.5424, "step": 356 }, { "epoch": 0.08, "learning_rate": 1.9868549321916398e-05, "loss": 0.5332, "step": 357 }, { "epoch": 0.08, "learning_rate": 1.9867370367511492e-05, "loss": 0.5708, "step": 358 }, { "epoch": 0.08, "learning_rate": 1.986618618507402e-05, "loss": 0.5752, "step": 359 }, { "epoch": 0.08, "learning_rate": 1.9864996775231407e-05, "loss": 0.5441, "step": 360 }, { "epoch": 0.08, "learning_rate": 1.986380213861383e-05, "loss": 0.6102, "step": 361 }, { "epoch": 0.08, "learning_rate": 1.9862602275854254e-05, "loss": 0.5668, "step": 362 }, { "epoch": 0.08, "learning_rate": 1.9861397187588393e-05, "loss": 0.5771, "step": 363 }, { "epoch": 0.08, "learning_rate": 1.9860186874454746e-05, "loss": 0.5392, "step": 364 }, { "epoch": 0.08, "learning_rate": 1.9858971337094567e-05, "loss": 0.5451, "step": 365 }, { "epoch": 0.08, "learning_rate": 1.985775057615189e-05, "loss": 0.5352, "step": 366 }, { "epoch": 0.08, "learning_rate": 1.9856524592273512e-05, "loss": 0.5801, "step": 367 }, { "epoch": 0.08, "learning_rate": 1.9855293386108995e-05, "loss": 0.5342, "step": 368 }, { "epoch": 0.08, "learning_rate": 1.9854056958310667e-05, "loss": 0.5302, "step": 369 }, { "epoch": 0.08, "learning_rate": 1.9852815309533623e-05, "loss": 0.5474, "step": 370 }, { "epoch": 0.08, "learning_rate": 1.9851568440435737e-05, "loss": 0.5524, "step": 371 }, { "epoch": 0.08, "learning_rate": 1.9850316351677626e-05, "loss": 0.524, "step": 372 }, { "epoch": 0.08, "learning_rate": 1.984905904392269e-05, "loss": 0.5942, "step": 373 }, { "epoch": 0.08, "learning_rate": 1.984779651783709e-05, "loss": 0.5811, "step": 374 }, { "epoch": 0.08, "learning_rate": 1.9846528774089753e-05, "loss": 0.53, "step": 375 }, { "epoch": 0.08, "learning_rate": 1.984525581335236e-05, "loss": 0.5613, "step": 376 }, { "epoch": 0.08, "learning_rate": 1.9843977636299372e-05, "loss": 0.5852, "step": 377 }, { "epoch": 0.08, "learning_rate": 1.9842694243608e-05, "loss": 0.5806, "step": 378 }, { "epoch": 0.09, "learning_rate": 1.9841405635958225e-05, "loss": 0.568, "step": 379 }, { "epoch": 0.09, "learning_rate": 1.9840111814032798e-05, "loss": 0.5671, "step": 380 }, { "epoch": 0.09, "learning_rate": 1.9838812778517215e-05, "loss": 0.5387, "step": 381 }, { "epoch": 0.09, "learning_rate": 1.9837508530099754e-05, "loss": 0.5839, "step": 382 }, { "epoch": 0.09, "learning_rate": 1.983619906947144e-05, "loss": 0.5282, "step": 383 }, { "epoch": 0.09, "learning_rate": 1.983488439732606e-05, "loss": 0.592, "step": 384 }, { "epoch": 0.09, "learning_rate": 1.983356451436018e-05, "loss": 0.5899, "step": 385 }, { "epoch": 0.09, "learning_rate": 1.9832239421273103e-05, "loss": 0.5341, "step": 386 }, { "epoch": 0.09, "learning_rate": 1.983090911876691e-05, "loss": 0.5823, "step": 387 }, { "epoch": 0.09, "learning_rate": 1.982957360754643e-05, "loss": 0.5376, "step": 388 }, { "epoch": 0.09, "learning_rate": 1.9828232888319263e-05, "loss": 0.57, "step": 389 }, { "epoch": 0.09, "learning_rate": 1.982688696179576e-05, "loss": 0.5573, "step": 390 }, { "epoch": 0.09, "learning_rate": 1.9825535828689037e-05, "loss": 0.5601, "step": 391 }, { "epoch": 0.09, "learning_rate": 1.982417948971496e-05, "loss": 0.5845, "step": 392 }, { "epoch": 0.09, "learning_rate": 1.9822817945592162e-05, "loss": 0.5773, "step": 393 }, { "epoch": 0.09, "learning_rate": 1.9821451197042028e-05, "loss": 0.551, "step": 394 }, { "epoch": 0.09, "learning_rate": 1.9820079244788706e-05, "loss": 0.538, "step": 395 }, { "epoch": 0.09, "learning_rate": 1.9818702089559097e-05, "loss": 0.5364, "step": 396 }, { "epoch": 0.09, "learning_rate": 1.9817319732082858e-05, "loss": 0.574, "step": 397 }, { "epoch": 0.09, "learning_rate": 1.9815932173092408e-05, "loss": 0.5628, "step": 398 }, { "epoch": 0.09, "learning_rate": 1.981453941332291e-05, "loss": 0.533, "step": 399 }, { "epoch": 0.09, "learning_rate": 1.98131414535123e-05, "loss": 0.5631, "step": 400 }, { "epoch": 0.09, "learning_rate": 1.9811738294401252e-05, "loss": 0.5534, "step": 401 }, { "epoch": 0.09, "learning_rate": 1.981032993673321e-05, "loss": 0.6124, "step": 402 }, { "epoch": 0.09, "learning_rate": 1.9808916381254354e-05, "loss": 0.5635, "step": 403 }, { "epoch": 0.09, "learning_rate": 1.980749762871364e-05, "loss": 0.5677, "step": 404 }, { "epoch": 0.09, "learning_rate": 1.9806073679862758e-05, "loss": 0.5489, "step": 405 }, { "epoch": 0.09, "learning_rate": 1.9804644535456164e-05, "loss": 0.5668, "step": 406 }, { "epoch": 0.09, "learning_rate": 1.9803210196251057e-05, "loss": 0.5632, "step": 407 }, { "epoch": 0.09, "learning_rate": 1.98017706630074e-05, "loss": 0.6127, "step": 408 }, { "epoch": 0.09, "learning_rate": 1.98003259364879e-05, "loss": 0.5534, "step": 409 }, { "epoch": 0.09, "learning_rate": 1.9798876017458012e-05, "loss": 0.5445, "step": 410 }, { "epoch": 0.09, "learning_rate": 1.979742090668595e-05, "loss": 0.583, "step": 411 }, { "epoch": 0.09, "learning_rate": 1.979596060494268e-05, "loss": 0.5853, "step": 412 }, { "epoch": 0.09, "learning_rate": 1.9794495113001904e-05, "loss": 0.5507, "step": 413 }, { "epoch": 0.09, "learning_rate": 1.9793024431640096e-05, "loss": 0.5314, "step": 414 }, { "epoch": 0.09, "learning_rate": 1.979154856163646e-05, "loss": 0.5555, "step": 415 }, { "epoch": 0.09, "learning_rate": 1.9790067503772966e-05, "loss": 0.5869, "step": 416 }, { "epoch": 0.09, "learning_rate": 1.978858125883431e-05, "loss": 0.5456, "step": 417 }, { "epoch": 0.09, "learning_rate": 1.9787089827607957e-05, "loss": 0.5486, "step": 418 }, { "epoch": 0.09, "learning_rate": 1.9785593210884112e-05, "loss": 0.5819, "step": 419 }, { "epoch": 0.09, "learning_rate": 1.9784091409455728e-05, "loss": 0.5534, "step": 420 }, { "epoch": 0.09, "learning_rate": 1.9782584424118506e-05, "loss": 0.5679, "step": 421 }, { "epoch": 0.09, "learning_rate": 1.978107225567089e-05, "loss": 0.5691, "step": 422 }, { "epoch": 0.1, "learning_rate": 1.9779554904914077e-05, "loss": 0.563, "step": 423 }, { "epoch": 0.1, "learning_rate": 1.9778032372652e-05, "loss": 0.538, "step": 424 }, { "epoch": 0.1, "learning_rate": 1.9776504659691347e-05, "loss": 0.5584, "step": 425 }, { "epoch": 0.1, "learning_rate": 1.9774971766841543e-05, "loss": 0.5328, "step": 426 }, { "epoch": 0.1, "learning_rate": 1.9773433694914768e-05, "loss": 0.5549, "step": 427 }, { "epoch": 0.1, "learning_rate": 1.9771890444725934e-05, "loss": 0.5278, "step": 428 }, { "epoch": 0.1, "learning_rate": 1.97703420170927e-05, "loss": 0.5911, "step": 429 }, { "epoch": 0.1, "learning_rate": 1.9768788412835473e-05, "loss": 0.5239, "step": 430 }, { "epoch": 0.1, "learning_rate": 1.9767229632777402e-05, "loss": 0.5409, "step": 431 }, { "epoch": 0.1, "learning_rate": 1.976566567774437e-05, "loss": 0.5152, "step": 432 }, { "epoch": 0.1, "learning_rate": 1.976409654856501e-05, "loss": 0.5207, "step": 433 }, { "epoch": 0.1, "learning_rate": 1.9762522246070697e-05, "loss": 0.561, "step": 434 }, { "epoch": 0.1, "learning_rate": 1.976094277109555e-05, "loss": 0.5391, "step": 435 }, { "epoch": 0.1, "learning_rate": 1.9759358124476405e-05, "loss": 0.5381, "step": 436 }, { "epoch": 0.1, "learning_rate": 1.975776830705287e-05, "loss": 0.5462, "step": 437 }, { "epoch": 0.1, "learning_rate": 1.9756173319667278e-05, "loss": 0.5513, "step": 438 }, { "epoch": 0.1, "learning_rate": 1.9754573163164697e-05, "loss": 0.5939, "step": 439 }, { "epoch": 0.1, "learning_rate": 1.9752967838392942e-05, "loss": 0.5317, "step": 440 }, { "epoch": 0.1, "learning_rate": 1.9751357346202563e-05, "loss": 0.5789, "step": 441 }, { "epoch": 0.1, "learning_rate": 1.974974168744684e-05, "loss": 0.5512, "step": 442 }, { "epoch": 0.1, "learning_rate": 1.9748120862981808e-05, "loss": 0.5112, "step": 443 }, { "epoch": 0.1, "learning_rate": 1.9746494873666226e-05, "loss": 0.5692, "step": 444 }, { "epoch": 0.1, "learning_rate": 1.974486372036159e-05, "loss": 0.5155, "step": 445 }, { "epoch": 0.1, "learning_rate": 1.9743227403932135e-05, "loss": 0.5538, "step": 446 }, { "epoch": 0.1, "learning_rate": 1.9741585925244835e-05, "loss": 0.5506, "step": 447 }, { "epoch": 0.1, "learning_rate": 1.973993928516939e-05, "loss": 0.529, "step": 448 }, { "epoch": 0.1, "learning_rate": 1.9738287484578243e-05, "loss": 0.5612, "step": 449 }, { "epoch": 0.1, "learning_rate": 1.973663052434657e-05, "loss": 0.5436, "step": 450 }, { "epoch": 0.1, "learning_rate": 1.9734968405352275e-05, "loss": 0.5115, "step": 451 }, { "epoch": 0.1, "learning_rate": 1.9733301128476e-05, "loss": 0.5514, "step": 452 }, { "epoch": 0.1, "learning_rate": 1.973162869460112e-05, "loss": 0.5602, "step": 453 }, { "epoch": 0.1, "learning_rate": 1.972995110461374e-05, "loss": 0.5617, "step": 454 }, { "epoch": 0.1, "learning_rate": 1.97282683594027e-05, "loss": 0.526, "step": 455 }, { "epoch": 0.1, "learning_rate": 1.972658045985957e-05, "loss": 0.5522, "step": 456 }, { "epoch": 0.1, "learning_rate": 1.972488740687865e-05, "loss": 0.5715, "step": 457 }, { "epoch": 0.1, "learning_rate": 1.9723189201356968e-05, "loss": 0.5341, "step": 458 }, { "epoch": 0.1, "learning_rate": 1.9721485844194282e-05, "loss": 0.4744, "step": 459 }, { "epoch": 0.1, "learning_rate": 1.9719777336293094e-05, "loss": 0.5641, "step": 460 }, { "epoch": 0.1, "learning_rate": 1.9718063678558615e-05, "loss": 0.5417, "step": 461 }, { "epoch": 0.1, "learning_rate": 1.9716344871898792e-05, "loss": 0.5523, "step": 462 }, { "epoch": 0.1, "learning_rate": 1.9714620917224305e-05, "loss": 0.5501, "step": 463 }, { "epoch": 0.1, "learning_rate": 1.9712891815448557e-05, "loss": 0.5752, "step": 464 }, { "epoch": 0.1, "learning_rate": 1.9711157567487674e-05, "loss": 0.572, "step": 465 }, { "epoch": 0.1, "learning_rate": 1.9709418174260523e-05, "loss": 0.5238, "step": 466 }, { "epoch": 0.1, "learning_rate": 1.9707673636688677e-05, "loss": 0.5475, "step": 467 }, { "epoch": 0.11, "learning_rate": 1.970592395569645e-05, "loss": 0.556, "step": 468 }, { "epoch": 0.11, "learning_rate": 1.9704169132210874e-05, "loss": 0.5823, "step": 469 }, { "epoch": 0.11, "learning_rate": 1.9702409167161712e-05, "loss": 0.5285, "step": 470 }, { "epoch": 0.11, "learning_rate": 1.9700644061481442e-05, "loss": 0.5316, "step": 471 }, { "epoch": 0.11, "learning_rate": 1.9698873816105272e-05, "loss": 0.5686, "step": 472 }, { "epoch": 0.11, "learning_rate": 1.9697098431971136e-05, "loss": 0.5513, "step": 473 }, { "epoch": 0.11, "learning_rate": 1.969531791001968e-05, "loss": 0.5679, "step": 474 }, { "epoch": 0.11, "learning_rate": 1.9693532251194283e-05, "loss": 0.5299, "step": 475 }, { "epoch": 0.11, "learning_rate": 1.969174145644104e-05, "loss": 0.5442, "step": 476 }, { "epoch": 0.11, "learning_rate": 1.9689945526708765e-05, "loss": 0.5331, "step": 477 }, { "epoch": 0.11, "learning_rate": 1.9688144462949004e-05, "loss": 0.553, "step": 478 }, { "epoch": 0.11, "learning_rate": 1.9686338266116006e-05, "loss": 0.5739, "step": 479 }, { "epoch": 0.11, "learning_rate": 1.968452693716676e-05, "loss": 0.5578, "step": 480 }, { "epoch": 0.11, "learning_rate": 1.9682710477060954e-05, "loss": 0.5392, "step": 481 }, { "epoch": 0.11, "learning_rate": 1.9680888886761002e-05, "loss": 0.5887, "step": 482 }, { "epoch": 0.11, "learning_rate": 1.9679062167232048e-05, "loss": 0.5649, "step": 483 }, { "epoch": 0.11, "learning_rate": 1.9677230319441936e-05, "loss": 0.5572, "step": 484 }, { "epoch": 0.11, "learning_rate": 1.9675393344361237e-05, "loss": 0.5477, "step": 485 }, { "epoch": 0.11, "learning_rate": 1.9673551242963236e-05, "loss": 0.551, "step": 486 }, { "epoch": 0.11, "learning_rate": 1.967170401622393e-05, "loss": 0.5604, "step": 487 }, { "epoch": 0.11, "learning_rate": 1.9669851665122043e-05, "loss": 0.5437, "step": 488 }, { "epoch": 0.11, "learning_rate": 1.9667994190639007e-05, "loss": 0.5254, "step": 489 }, { "epoch": 0.11, "learning_rate": 1.9666131593758962e-05, "loss": 0.5304, "step": 490 }, { "epoch": 0.11, "learning_rate": 1.966426387546877e-05, "loss": 0.5573, "step": 491 }, { "epoch": 0.11, "learning_rate": 1.966239103675801e-05, "loss": 0.5551, "step": 492 }, { "epoch": 0.11, "learning_rate": 1.966051307861896e-05, "loss": 0.5131, "step": 493 }, { "epoch": 0.11, "learning_rate": 1.965863000204663e-05, "loss": 0.542, "step": 494 }, { "epoch": 0.11, "learning_rate": 1.965674180803872e-05, "loss": 0.5594, "step": 495 }, { "epoch": 0.11, "learning_rate": 1.9654848497595663e-05, "loss": 0.5533, "step": 496 }, { "epoch": 0.11, "learning_rate": 1.9652950071720587e-05, "loss": 0.5625, "step": 497 }, { "epoch": 0.11, "learning_rate": 1.9651046531419335e-05, "loss": 0.5427, "step": 498 }, { "epoch": 0.11, "learning_rate": 1.9649137877700462e-05, "loss": 0.5346, "step": 499 }, { "epoch": 0.11, "learning_rate": 1.9647224111575228e-05, "loss": 0.5445, "step": 500 }, { "epoch": 0.11, "learning_rate": 1.9645305234057613e-05, "loss": 0.5351, "step": 501 }, { "epoch": 0.11, "learning_rate": 1.9643381246164285e-05, "loss": 0.5578, "step": 502 }, { "epoch": 0.11, "learning_rate": 1.964145214891464e-05, "loss": 0.552, "step": 503 }, { "epoch": 0.11, "learning_rate": 1.9639517943330768e-05, "loss": 0.551, "step": 504 }, { "epoch": 0.11, "learning_rate": 1.963757863043747e-05, "loss": 0.5495, "step": 505 }, { "epoch": 0.11, "learning_rate": 1.9635634211262257e-05, "loss": 0.5445, "step": 506 }, { "epoch": 0.11, "learning_rate": 1.9633684686835335e-05, "loss": 0.5601, "step": 507 }, { "epoch": 0.11, "learning_rate": 1.9631730058189624e-05, "loss": 0.5197, "step": 508 }, { "epoch": 0.11, "learning_rate": 1.962977032636075e-05, "loss": 0.5491, "step": 509 }, { "epoch": 0.11, "learning_rate": 1.962780549238703e-05, "loss": 0.5328, "step": 510 }, { "epoch": 0.11, "learning_rate": 1.96258355573095e-05, "loss": 0.5093, "step": 511 }, { "epoch": 0.12, "learning_rate": 1.9623860522171886e-05, "loss": 0.5626, "step": 512 }, { "epoch": 0.12, "learning_rate": 1.9621880388020628e-05, "loss": 0.5049, "step": 513 }, { "epoch": 0.12, "learning_rate": 1.9619895155904855e-05, "loss": 0.5375, "step": 514 }, { "epoch": 0.12, "learning_rate": 1.9617904826876407e-05, "loss": 0.5333, "step": 515 }, { "epoch": 0.12, "learning_rate": 1.9615909401989817e-05, "loss": 0.5302, "step": 516 }, { "epoch": 0.12, "learning_rate": 1.961390888230233e-05, "loss": 0.5429, "step": 517 }, { "epoch": 0.12, "learning_rate": 1.9611903268873868e-05, "loss": 0.5562, "step": 518 }, { "epoch": 0.12, "learning_rate": 1.9609892562767082e-05, "loss": 0.5007, "step": 519 }, { "epoch": 0.12, "learning_rate": 1.9607876765047293e-05, "loss": 0.5201, "step": 520 }, { "epoch": 0.12, "learning_rate": 1.960585587678254e-05, "loss": 0.5481, "step": 521 }, { "epoch": 0.12, "learning_rate": 1.960382989904354e-05, "loss": 0.5493, "step": 522 }, { "epoch": 0.12, "learning_rate": 1.9601798832903733e-05, "loss": 0.5654, "step": 523 }, { "epoch": 0.12, "learning_rate": 1.959976267943923e-05, "loss": 0.5279, "step": 524 }, { "epoch": 0.12, "learning_rate": 1.9597721439728845e-05, "loss": 0.5234, "step": 525 }, { "epoch": 0.12, "learning_rate": 1.9595675114854094e-05, "loss": 0.5345, "step": 526 }, { "epoch": 0.12, "learning_rate": 1.959362370589918e-05, "loss": 0.5639, "step": 527 }, { "epoch": 0.12, "learning_rate": 1.9591567213951002e-05, "loss": 0.5482, "step": 528 }, { "epoch": 0.12, "learning_rate": 1.9589505640099156e-05, "loss": 0.5542, "step": 529 }, { "epoch": 0.12, "learning_rate": 1.9587438985435917e-05, "loss": 0.525, "step": 530 }, { "epoch": 0.12, "learning_rate": 1.958536725105627e-05, "loss": 0.5371, "step": 531 }, { "epoch": 0.12, "learning_rate": 1.9583290438057877e-05, "loss": 0.5236, "step": 532 }, { "epoch": 0.12, "learning_rate": 1.9581208547541105e-05, "loss": 0.5036, "step": 533 }, { "epoch": 0.12, "learning_rate": 1.957912158060899e-05, "loss": 0.5395, "step": 534 }, { "epoch": 0.12, "learning_rate": 1.957702953836728e-05, "loss": 0.5553, "step": 535 }, { "epoch": 0.12, "learning_rate": 1.95749324219244e-05, "loss": 0.5469, "step": 536 }, { "epoch": 0.12, "learning_rate": 1.9572830232391467e-05, "loss": 0.5426, "step": 537 }, { "epoch": 0.12, "learning_rate": 1.9570722970882285e-05, "loss": 0.5208, "step": 538 }, { "epoch": 0.12, "learning_rate": 1.9568610638513343e-05, "loss": 0.5371, "step": 539 }, { "epoch": 0.12, "learning_rate": 1.956649323640382e-05, "loss": 0.5667, "step": 540 }, { "epoch": 0.12, "learning_rate": 1.9564370765675578e-05, "loss": 0.515, "step": 541 }, { "epoch": 0.12, "learning_rate": 1.956224322745317e-05, "loss": 0.5907, "step": 542 }, { "epoch": 0.12, "learning_rate": 1.956011062286383e-05, "loss": 0.5414, "step": 543 }, { "epoch": 0.12, "learning_rate": 1.9557972953037476e-05, "loss": 0.5185, "step": 544 }, { "epoch": 0.12, "learning_rate": 1.9555830219106707e-05, "loss": 0.5261, "step": 545 }, { "epoch": 0.12, "learning_rate": 1.955368242220681e-05, "loss": 0.5152, "step": 546 }, { "epoch": 0.12, "learning_rate": 1.955152956347576e-05, "loss": 0.5646, "step": 547 }, { "epoch": 0.12, "learning_rate": 1.9549371644054197e-05, "loss": 0.5984, "step": 548 }, { "epoch": 0.12, "learning_rate": 1.954720866508546e-05, "loss": 0.5741, "step": 549 }, { "epoch": 0.12, "learning_rate": 1.9545040627715554e-05, "loss": 0.5166, "step": 550 }, { "epoch": 0.12, "learning_rate": 1.9542867533093174e-05, "loss": 0.5386, "step": 551 }, { "epoch": 0.12, "learning_rate": 1.9540689382369694e-05, "loss": 0.5093, "step": 552 }, { "epoch": 0.12, "learning_rate": 1.9538506176699166e-05, "loss": 0.5508, "step": 553 }, { "epoch": 0.12, "learning_rate": 1.9536317917238312e-05, "loss": 0.5804, "step": 554 }, { "epoch": 0.12, "learning_rate": 1.9534124605146544e-05, "loss": 0.5624, "step": 555 }, { "epoch": 0.12, "learning_rate": 1.9531926241585943e-05, "loss": 0.5594, "step": 556 }, { "epoch": 0.13, "learning_rate": 1.9529722827721264e-05, "loss": 0.5535, "step": 557 }, { "epoch": 0.13, "learning_rate": 1.9527514364719953e-05, "loss": 0.5588, "step": 558 }, { "epoch": 0.13, "learning_rate": 1.952530085375211e-05, "loss": 0.5597, "step": 559 }, { "epoch": 0.13, "learning_rate": 1.952308229599053e-05, "loss": 0.5092, "step": 560 }, { "epoch": 0.13, "learning_rate": 1.9520858692610665e-05, "loss": 0.5528, "step": 561 }, { "epoch": 0.13, "learning_rate": 1.9518630044790654e-05, "loss": 0.5325, "step": 562 }, { "epoch": 0.13, "learning_rate": 1.9516396353711297e-05, "loss": 0.5471, "step": 563 }, { "epoch": 0.13, "learning_rate": 1.9514157620556072e-05, "loss": 0.5039, "step": 564 }, { "epoch": 0.13, "learning_rate": 1.9511913846511132e-05, "loss": 0.5345, "step": 565 }, { "epoch": 0.13, "learning_rate": 1.950966503276529e-05, "loss": 0.5175, "step": 566 }, { "epoch": 0.13, "learning_rate": 1.9507411180510042e-05, "loss": 0.5353, "step": 567 }, { "epoch": 0.13, "learning_rate": 1.950515229093954e-05, "loss": 0.5496, "step": 568 }, { "epoch": 0.13, "learning_rate": 1.9502888365250622e-05, "loss": 0.5364, "step": 569 }, { "epoch": 0.13, "learning_rate": 1.950061940464278e-05, "loss": 0.5433, "step": 570 }, { "epoch": 0.13, "learning_rate": 1.9498345410318178e-05, "loss": 0.5622, "step": 571 }, { "epoch": 0.13, "learning_rate": 1.9496066383481646e-05, "loss": 0.5356, "step": 572 }, { "epoch": 0.13, "learning_rate": 1.9493782325340683e-05, "loss": 0.5339, "step": 573 }, { "epoch": 0.13, "learning_rate": 1.949149323710545e-05, "loss": 0.5144, "step": 574 }, { "epoch": 0.13, "learning_rate": 1.9489199119988783e-05, "loss": 0.5595, "step": 575 }, { "epoch": 0.13, "learning_rate": 1.9486899975206165e-05, "loss": 0.5729, "step": 576 }, { "epoch": 0.13, "learning_rate": 1.948459580397576e-05, "loss": 0.5453, "step": 577 }, { "epoch": 0.13, "learning_rate": 1.9482286607518386e-05, "loss": 0.5828, "step": 578 }, { "epoch": 0.13, "learning_rate": 1.9479972387057523e-05, "loss": 0.5571, "step": 579 }, { "epoch": 0.13, "learning_rate": 1.947765314381932e-05, "loss": 0.4995, "step": 580 }, { "epoch": 0.13, "learning_rate": 1.9475328879032574e-05, "loss": 0.5327, "step": 581 }, { "epoch": 0.13, "learning_rate": 1.9472999593928764e-05, "loss": 0.5024, "step": 582 }, { "epoch": 0.13, "learning_rate": 1.9470665289742006e-05, "loss": 0.5334, "step": 583 }, { "epoch": 0.13, "learning_rate": 1.9468325967709084e-05, "loss": 0.5548, "step": 584 }, { "epoch": 0.13, "learning_rate": 1.9465981629069453e-05, "loss": 0.5674, "step": 585 }, { "epoch": 0.13, "learning_rate": 1.9463632275065203e-05, "loss": 0.5364, "step": 586 }, { "epoch": 0.13, "learning_rate": 1.9461277906941097e-05, "loss": 0.5551, "step": 587 }, { "epoch": 0.13, "learning_rate": 1.9458918525944553e-05, "loss": 0.5298, "step": 588 }, { "epoch": 0.13, "learning_rate": 1.9456554133325642e-05, "loss": 0.5272, "step": 589 }, { "epoch": 0.13, "learning_rate": 1.9454184730337093e-05, "loss": 0.5117, "step": 590 }, { "epoch": 0.13, "learning_rate": 1.945181031823428e-05, "loss": 0.5545, "step": 591 }, { "epoch": 0.13, "learning_rate": 1.9449430898275248e-05, "loss": 0.5265, "step": 592 }, { "epoch": 0.13, "learning_rate": 1.944704647172068e-05, "loss": 0.5394, "step": 593 }, { "epoch": 0.13, "learning_rate": 1.944465703983392e-05, "loss": 0.548, "step": 594 }, { "epoch": 0.13, "learning_rate": 1.9442262603880957e-05, "loss": 0.5477, "step": 595 }, { "epoch": 0.13, "learning_rate": 1.9439863165130444e-05, "loss": 0.5387, "step": 596 }, { "epoch": 0.13, "learning_rate": 1.943745872485367e-05, "loss": 0.5454, "step": 597 }, { "epoch": 0.13, "learning_rate": 1.9435049284324586e-05, "loss": 0.5336, "step": 598 }, { "epoch": 0.13, "learning_rate": 1.943263484481978e-05, "loss": 0.5285, "step": 599 }, { "epoch": 0.13, "learning_rate": 1.94302154076185e-05, "loss": 0.5137, "step": 600 }, { "epoch": 0.14, "learning_rate": 1.9427790974002637e-05, "loss": 0.4834, "step": 601 }, { "epoch": 0.14, "learning_rate": 1.942536154525673e-05, "loss": 0.5463, "step": 602 }, { "epoch": 0.14, "learning_rate": 1.942292712266796e-05, "loss": 0.5428, "step": 603 }, { "epoch": 0.14, "learning_rate": 1.9420487707526163e-05, "loss": 0.5015, "step": 604 }, { "epoch": 0.14, "learning_rate": 1.9418043301123815e-05, "loss": 0.5148, "step": 605 }, { "epoch": 0.14, "learning_rate": 1.9415593904756032e-05, "loss": 0.5335, "step": 606 }, { "epoch": 0.14, "learning_rate": 1.9413139519720584e-05, "loss": 0.536, "step": 607 }, { "epoch": 0.14, "learning_rate": 1.9410680147317877e-05, "loss": 0.5345, "step": 608 }, { "epoch": 0.14, "learning_rate": 1.9408215788850958e-05, "loss": 0.5503, "step": 609 }, { "epoch": 0.14, "learning_rate": 1.9405746445625523e-05, "loss": 0.5265, "step": 610 }, { "epoch": 0.14, "learning_rate": 1.9403272118949902e-05, "loss": 0.5391, "step": 611 }, { "epoch": 0.14, "learning_rate": 1.940079281013507e-05, "loss": 0.5085, "step": 612 }, { "epoch": 0.14, "learning_rate": 1.9398308520494643e-05, "loss": 0.552, "step": 613 }, { "epoch": 0.14, "learning_rate": 1.939581925134487e-05, "loss": 0.5474, "step": 614 }, { "epoch": 0.14, "learning_rate": 1.939332500400464e-05, "loss": 0.5479, "step": 615 }, { "epoch": 0.14, "learning_rate": 1.9390825779795485e-05, "loss": 0.5486, "step": 616 }, { "epoch": 0.14, "learning_rate": 1.9388321580041565e-05, "loss": 0.5246, "step": 617 }, { "epoch": 0.14, "learning_rate": 1.9385812406069685e-05, "loss": 0.5241, "step": 618 }, { "epoch": 0.14, "learning_rate": 1.938329825920928e-05, "loss": 0.5651, "step": 619 }, { "epoch": 0.14, "learning_rate": 1.9380779140792422e-05, "loss": 0.5596, "step": 620 }, { "epoch": 0.14, "learning_rate": 1.9378255052153818e-05, "loss": 0.5516, "step": 621 }, { "epoch": 0.14, "learning_rate": 1.9375725994630803e-05, "loss": 0.5254, "step": 622 }, { "epoch": 0.14, "learning_rate": 1.937319196956335e-05, "loss": 0.5227, "step": 623 }, { "epoch": 0.14, "learning_rate": 1.9370652978294065e-05, "loss": 0.5401, "step": 624 }, { "epoch": 0.14, "learning_rate": 1.936810902216818e-05, "loss": 0.5559, "step": 625 }, { "epoch": 0.14, "learning_rate": 1.936556010253356e-05, "loss": 0.5729, "step": 626 }, { "epoch": 0.14, "learning_rate": 1.9363006220740703e-05, "loss": 0.5519, "step": 627 }, { "epoch": 0.14, "learning_rate": 1.936044737814273e-05, "loss": 0.5162, "step": 628 }, { "epoch": 0.14, "learning_rate": 1.9357883576095395e-05, "loss": 0.5489, "step": 629 }, { "epoch": 0.14, "learning_rate": 1.9355314815957076e-05, "loss": 0.5598, "step": 630 }, { "epoch": 0.14, "learning_rate": 1.9352741099088785e-05, "loss": 0.5161, "step": 631 }, { "epoch": 0.14, "learning_rate": 1.9350162426854152e-05, "loss": 0.5741, "step": 632 }, { "epoch": 0.14, "learning_rate": 1.9347578800619432e-05, "loss": 0.5475, "step": 633 }, { "epoch": 0.14, "learning_rate": 1.9344990221753518e-05, "loss": 0.5569, "step": 634 }, { "epoch": 0.14, "learning_rate": 1.934239669162791e-05, "loss": 0.5085, "step": 635 }, { "epoch": 0.14, "learning_rate": 1.9339798211616744e-05, "loss": 0.5236, "step": 636 }, { "epoch": 0.14, "learning_rate": 1.9337194783096768e-05, "loss": 0.5649, "step": 637 }, { "epoch": 0.14, "learning_rate": 1.9334586407447365e-05, "loss": 0.5396, "step": 638 }, { "epoch": 0.14, "learning_rate": 1.9331973086050524e-05, "loss": 0.5271, "step": 639 }, { "epoch": 0.14, "learning_rate": 1.9329354820290866e-05, "loss": 0.5117, "step": 640 }, { "epoch": 0.14, "learning_rate": 1.932673161155563e-05, "loss": 0.544, "step": 641 }, { "epoch": 0.14, "learning_rate": 1.9324103461234666e-05, "loss": 0.5235, "step": 642 }, { "epoch": 0.14, "learning_rate": 1.9321470370720452e-05, "loss": 0.5442, "step": 643 }, { "epoch": 0.14, "learning_rate": 1.9318832341408078e-05, "loss": 0.4943, "step": 644 }, { "epoch": 0.14, "learning_rate": 1.9316189374695256e-05, "loss": 0.5429, "step": 645 }, { "epoch": 0.15, "learning_rate": 1.93135414719823e-05, "loss": 0.4783, "step": 646 }, { "epoch": 0.15, "learning_rate": 1.931088863467216e-05, "loss": 0.5457, "step": 647 }, { "epoch": 0.15, "learning_rate": 1.9308230864170383e-05, "loss": 0.5264, "step": 648 }, { "epoch": 0.15, "learning_rate": 1.930556816188514e-05, "loss": 0.5283, "step": 649 }, { "epoch": 0.15, "learning_rate": 1.9302900529227213e-05, "loss": 0.4711, "step": 650 }, { "epoch": 0.15, "learning_rate": 1.9300227967609987e-05, "loss": 0.5448, "step": 651 }, { "epoch": 0.15, "learning_rate": 1.9297550478449477e-05, "loss": 0.5093, "step": 652 }, { "epoch": 0.15, "learning_rate": 1.929486806316429e-05, "loss": 0.5153, "step": 653 }, { "epoch": 0.15, "learning_rate": 1.9292180723175656e-05, "loss": 0.5146, "step": 654 }, { "epoch": 0.15, "learning_rate": 1.9289488459907404e-05, "loss": 0.5206, "step": 655 }, { "epoch": 0.15, "learning_rate": 1.928679127478598e-05, "loss": 0.5511, "step": 656 }, { "epoch": 0.15, "learning_rate": 1.9284089169240436e-05, "loss": 0.5222, "step": 657 }, { "epoch": 0.15, "learning_rate": 1.928138214470243e-05, "loss": 0.5119, "step": 658 }, { "epoch": 0.15, "learning_rate": 1.9278670202606222e-05, "loss": 0.5044, "step": 659 }, { "epoch": 0.15, "learning_rate": 1.9275953344388686e-05, "loss": 0.5242, "step": 660 }, { "epoch": 0.15, "learning_rate": 1.927323157148929e-05, "loss": 0.5054, "step": 661 }, { "epoch": 0.15, "learning_rate": 1.9270504885350118e-05, "loss": 0.514, "step": 662 }, { "epoch": 0.15, "learning_rate": 1.9267773287415845e-05, "loss": 0.5677, "step": 663 }, { "epoch": 0.15, "learning_rate": 1.926503677913376e-05, "loss": 0.5457, "step": 664 }, { "epoch": 0.15, "learning_rate": 1.9262295361953746e-05, "loss": 0.5352, "step": 665 }, { "epoch": 0.15, "learning_rate": 1.925954903732829e-05, "loss": 0.5237, "step": 666 }, { "epoch": 0.15, "learning_rate": 1.9256797806712478e-05, "loss": 0.5091, "step": 667 }, { "epoch": 0.15, "learning_rate": 1.9254041671563995e-05, "loss": 0.5196, "step": 668 }, { "epoch": 0.15, "learning_rate": 1.9251280633343125e-05, "loss": 0.5577, "step": 669 }, { "epoch": 0.15, "learning_rate": 1.924851469351275e-05, "loss": 0.5189, "step": 670 }, { "epoch": 0.15, "learning_rate": 1.9245743853538352e-05, "loss": 0.5602, "step": 671 }, { "epoch": 0.15, "learning_rate": 1.9242968114888003e-05, "loss": 0.5693, "step": 672 }, { "epoch": 0.15, "learning_rate": 1.9240187479032372e-05, "loss": 0.5298, "step": 673 }, { "epoch": 0.15, "learning_rate": 1.9237401947444725e-05, "loss": 0.52, "step": 674 }, { "epoch": 0.15, "learning_rate": 1.9234611521600924e-05, "loss": 0.5373, "step": 675 }, { "epoch": 0.15, "learning_rate": 1.923181620297942e-05, "loss": 0.528, "step": 676 }, { "epoch": 0.15, "learning_rate": 1.9229015993061257e-05, "loss": 0.5444, "step": 677 }, { "epoch": 0.15, "learning_rate": 1.9226210893330074e-05, "loss": 0.5063, "step": 678 }, { "epoch": 0.15, "learning_rate": 1.9223400905272093e-05, "loss": 0.53, "step": 679 }, { "epoch": 0.15, "learning_rate": 1.9220586030376135e-05, "loss": 0.5404, "step": 680 }, { "epoch": 0.15, "learning_rate": 1.9217766270133607e-05, "loss": 0.5367, "step": 681 }, { "epoch": 0.15, "learning_rate": 1.92149416260385e-05, "loss": 0.5491, "step": 682 }, { "epoch": 0.15, "learning_rate": 1.92121120995874e-05, "loss": 0.5221, "step": 683 }, { "epoch": 0.15, "learning_rate": 1.9209277692279475e-05, "loss": 0.5469, "step": 684 }, { "epoch": 0.15, "learning_rate": 1.9206438405616476e-05, "loss": 0.5049, "step": 685 }, { "epoch": 0.15, "learning_rate": 1.920359424110275e-05, "loss": 0.5399, "step": 686 }, { "epoch": 0.15, "learning_rate": 1.9200745200245216e-05, "loss": 0.5401, "step": 687 }, { "epoch": 0.15, "learning_rate": 1.9197891284553387e-05, "loss": 0.4945, "step": 688 }, { "epoch": 0.15, "learning_rate": 1.919503249553935e-05, "loss": 0.4748, "step": 689 }, { "epoch": 0.16, "learning_rate": 1.919216883471778e-05, "loss": 0.5266, "step": 690 }, { "epoch": 0.16, "learning_rate": 1.9189300303605932e-05, "loss": 0.5288, "step": 691 }, { "epoch": 0.16, "learning_rate": 1.9186426903723644e-05, "loss": 0.5057, "step": 692 }, { "epoch": 0.16, "learning_rate": 1.9183548636593322e-05, "loss": 0.5272, "step": 693 }, { "epoch": 0.16, "learning_rate": 1.918066550373997e-05, "loss": 0.5269, "step": 694 }, { "epoch": 0.16, "learning_rate": 1.917777750669115e-05, "loss": 0.5308, "step": 695 }, { "epoch": 0.16, "learning_rate": 1.9174884646977014e-05, "loss": 0.525, "step": 696 }, { "epoch": 0.16, "learning_rate": 1.917198692613029e-05, "loss": 0.5341, "step": 697 }, { "epoch": 0.16, "learning_rate": 1.9169084345686274e-05, "loss": 0.5381, "step": 698 }, { "epoch": 0.16, "learning_rate": 1.9166176907182845e-05, "loss": 0.519, "step": 699 }, { "epoch": 0.16, "learning_rate": 1.916326461216045e-05, "loss": 0.5377, "step": 700 }, { "epoch": 0.16, "learning_rate": 1.9160347462162113e-05, "loss": 0.5276, "step": 701 }, { "epoch": 0.16, "learning_rate": 1.9157425458733426e-05, "loss": 0.532, "step": 702 }, { "epoch": 0.16, "learning_rate": 1.9154498603422557e-05, "loss": 0.5429, "step": 703 }, { "epoch": 0.16, "learning_rate": 1.915156689778024e-05, "loss": 0.517, "step": 704 }, { "epoch": 0.16, "learning_rate": 1.914863034335979e-05, "loss": 0.5067, "step": 705 }, { "epoch": 0.16, "learning_rate": 1.9145688941717074e-05, "loss": 0.5427, "step": 706 }, { "epoch": 0.16, "learning_rate": 1.914274269441054e-05, "loss": 0.5245, "step": 707 }, { "epoch": 0.16, "learning_rate": 1.9139791603001198e-05, "loss": 0.5232, "step": 708 }, { "epoch": 0.16, "learning_rate": 1.9136835669052624e-05, "loss": 0.4819, "step": 709 }, { "epoch": 0.16, "learning_rate": 1.9133874894130966e-05, "loss": 0.5229, "step": 710 }, { "epoch": 0.16, "learning_rate": 1.913090927980493e-05, "loss": 0.5101, "step": 711 }, { "epoch": 0.16, "learning_rate": 1.9127938827645787e-05, "loss": 0.5573, "step": 712 }, { "epoch": 0.16, "learning_rate": 1.912496353922738e-05, "loss": 0.5116, "step": 713 }, { "epoch": 0.16, "learning_rate": 1.9121983416126095e-05, "loss": 0.5498, "step": 714 }, { "epoch": 0.16, "learning_rate": 1.91189984599209e-05, "loss": 0.5366, "step": 715 }, { "epoch": 0.16, "learning_rate": 1.911600867219332e-05, "loss": 0.4954, "step": 716 }, { "epoch": 0.16, "learning_rate": 1.9113014054527424e-05, "loss": 0.5175, "step": 717 }, { "epoch": 0.16, "learning_rate": 1.911001460850986e-05, "loss": 0.5273, "step": 718 }, { "epoch": 0.16, "learning_rate": 1.910701033572982e-05, "loss": 0.5483, "step": 719 }, { "epoch": 0.16, "learning_rate": 1.910400123777906e-05, "loss": 0.4995, "step": 720 }, { "epoch": 0.16, "learning_rate": 1.9100987316251894e-05, "loss": 0.5231, "step": 721 }, { "epoch": 0.16, "learning_rate": 1.9097968572745188e-05, "loss": 0.5291, "step": 722 }, { "epoch": 0.16, "learning_rate": 1.9094945008858364e-05, "loss": 0.5064, "step": 723 }, { "epoch": 0.16, "learning_rate": 1.90919166261934e-05, "loss": 0.4729, "step": 724 }, { "epoch": 0.16, "learning_rate": 1.908888342635482e-05, "loss": 0.4921, "step": 725 }, { "epoch": 0.16, "learning_rate": 1.908584541094971e-05, "loss": 0.513, "step": 726 }, { "epoch": 0.16, "learning_rate": 1.9082802581587704e-05, "loss": 0.5192, "step": 727 }, { "epoch": 0.16, "learning_rate": 1.9079754939880983e-05, "loss": 0.5331, "step": 728 }, { "epoch": 0.16, "learning_rate": 1.9076702487444275e-05, "loss": 0.5193, "step": 729 }, { "epoch": 0.16, "learning_rate": 1.907364522589487e-05, "loss": 0.541, "step": 730 }, { "epoch": 0.16, "learning_rate": 1.9070583156852596e-05, "loss": 0.5109, "step": 731 }, { "epoch": 0.16, "learning_rate": 1.9067516281939826e-05, "loss": 0.5459, "step": 732 }, { "epoch": 0.16, "learning_rate": 1.906444460278149e-05, "loss": 0.497, "step": 733 }, { "epoch": 0.16, "learning_rate": 1.9061368121005053e-05, "loss": 0.5286, "step": 734 }, { "epoch": 0.17, "learning_rate": 1.905828683824053e-05, "loss": 0.4925, "step": 735 }, { "epoch": 0.17, "learning_rate": 1.9055200756120474e-05, "loss": 0.5307, "step": 736 }, { "epoch": 0.17, "learning_rate": 1.9052109876279993e-05, "loss": 0.577, "step": 737 }, { "epoch": 0.17, "learning_rate": 1.9049014200356724e-05, "loss": 0.5427, "step": 738 }, { "epoch": 0.17, "learning_rate": 1.904591372999085e-05, "loss": 0.5159, "step": 739 }, { "epoch": 0.17, "learning_rate": 1.904280846682509e-05, "loss": 0.4942, "step": 740 }, { "epoch": 0.17, "learning_rate": 1.9039698412504718e-05, "loss": 0.5326, "step": 741 }, { "epoch": 0.17, "learning_rate": 1.9036583568677528e-05, "loss": 0.5257, "step": 742 }, { "epoch": 0.17, "learning_rate": 1.903346393699386e-05, "loss": 0.5521, "step": 743 }, { "epoch": 0.17, "learning_rate": 1.9030339519106588e-05, "loss": 0.5154, "step": 744 }, { "epoch": 0.17, "learning_rate": 1.9027210316671125e-05, "loss": 0.5459, "step": 745 }, { "epoch": 0.17, "learning_rate": 1.902407633134542e-05, "loss": 0.5594, "step": 746 }, { "epoch": 0.17, "learning_rate": 1.902093756478995e-05, "loss": 0.5376, "step": 747 }, { "epoch": 0.17, "learning_rate": 1.901779401866773e-05, "loss": 0.5083, "step": 748 }, { "epoch": 0.17, "learning_rate": 1.9014645694644302e-05, "loss": 0.5566, "step": 749 }, { "epoch": 0.17, "learning_rate": 1.9011492594387753e-05, "loss": 0.5092, "step": 750 }, { "epoch": 0.17, "learning_rate": 1.9008334719568684e-05, "loss": 0.5143, "step": 751 }, { "epoch": 0.17, "learning_rate": 1.9005172071860235e-05, "loss": 0.522, "step": 752 }, { "epoch": 0.17, "learning_rate": 1.9002004652938074e-05, "loss": 0.5097, "step": 753 }, { "epoch": 0.17, "learning_rate": 1.8998832464480396e-05, "loss": 0.5102, "step": 754 }, { "epoch": 0.17, "learning_rate": 1.8995655508167922e-05, "loss": 0.5173, "step": 755 }, { "epoch": 0.17, "learning_rate": 1.8992473785683897e-05, "loss": 0.5222, "step": 756 }, { "epoch": 0.17, "learning_rate": 1.89892872987141e-05, "loss": 0.4981, "step": 757 }, { "epoch": 0.17, "learning_rate": 1.8986096048946826e-05, "loss": 0.498, "step": 758 }, { "epoch": 0.17, "learning_rate": 1.8982900038072892e-05, "loss": 0.5313, "step": 759 }, { "epoch": 0.17, "learning_rate": 1.8979699267785646e-05, "loss": 0.5152, "step": 760 }, { "epoch": 0.17, "learning_rate": 1.8976493739780952e-05, "loss": 0.5414, "step": 761 }, { "epoch": 0.17, "learning_rate": 1.8973283455757197e-05, "loss": 0.4805, "step": 762 }, { "epoch": 0.17, "learning_rate": 1.8970068417415288e-05, "loss": 0.5163, "step": 763 }, { "epoch": 0.17, "learning_rate": 1.8966848626458647e-05, "loss": 0.5331, "step": 764 }, { "epoch": 0.17, "learning_rate": 1.896362408459322e-05, "loss": 0.5089, "step": 765 }, { "epoch": 0.17, "learning_rate": 1.8960394793527464e-05, "loss": 0.4686, "step": 766 }, { "epoch": 0.17, "learning_rate": 1.8957160754972357e-05, "loss": 0.5306, "step": 767 }, { "epoch": 0.17, "learning_rate": 1.8953921970641396e-05, "loss": 0.5207, "step": 768 }, { "epoch": 0.17, "learning_rate": 1.895067844225058e-05, "loss": 0.5262, "step": 769 }, { "epoch": 0.17, "learning_rate": 1.8947430171518432e-05, "loss": 0.5165, "step": 770 }, { "epoch": 0.17, "learning_rate": 1.8944177160165983e-05, "loss": 0.5051, "step": 771 }, { "epoch": 0.17, "learning_rate": 1.8940919409916776e-05, "loss": 0.5139, "step": 772 }, { "epoch": 0.17, "learning_rate": 1.893765692249687e-05, "loss": 0.5344, "step": 773 }, { "epoch": 0.17, "learning_rate": 1.893438969963483e-05, "loss": 0.5316, "step": 774 }, { "epoch": 0.17, "learning_rate": 1.8931117743061725e-05, "loss": 0.5145, "step": 775 }, { "epoch": 0.17, "learning_rate": 1.8927841054511145e-05, "loss": 0.5289, "step": 776 }, { "epoch": 0.17, "learning_rate": 1.892455963571917e-05, "loss": 0.543, "step": 777 }, { "epoch": 0.17, "learning_rate": 1.89212734884244e-05, "loss": 0.5524, "step": 778 }, { "epoch": 0.18, "learning_rate": 1.8917982614367933e-05, "loss": 0.5654, "step": 779 }, { "epoch": 0.18, "learning_rate": 1.891468701529338e-05, "loss": 0.5513, "step": 780 }, { "epoch": 0.18, "learning_rate": 1.8911386692946844e-05, "loss": 0.5316, "step": 781 }, { "epoch": 0.18, "learning_rate": 1.8908081649076937e-05, "loss": 0.5007, "step": 782 }, { "epoch": 0.18, "learning_rate": 1.8904771885434775e-05, "loss": 0.5174, "step": 783 }, { "epoch": 0.18, "learning_rate": 1.890145740377397e-05, "loss": 0.4697, "step": 784 }, { "epoch": 0.18, "learning_rate": 1.8898138205850628e-05, "loss": 0.4904, "step": 785 }, { "epoch": 0.18, "learning_rate": 1.889481429342337e-05, "loss": 0.5215, "step": 786 }, { "epoch": 0.18, "learning_rate": 1.8891485668253304e-05, "loss": 0.5374, "step": 787 }, { "epoch": 0.18, "learning_rate": 1.8888152332104033e-05, "loss": 0.5114, "step": 788 }, { "epoch": 0.18, "learning_rate": 1.8884814286741663e-05, "loss": 0.5229, "step": 789 }, { "epoch": 0.18, "learning_rate": 1.888147153393479e-05, "loss": 0.5509, "step": 790 }, { "epoch": 0.18, "learning_rate": 1.887812407545451e-05, "loss": 0.5802, "step": 791 }, { "epoch": 0.18, "learning_rate": 1.88747719130744e-05, "loss": 0.5164, "step": 792 }, { "epoch": 0.18, "learning_rate": 1.8871415048570544e-05, "loss": 0.527, "step": 793 }, { "epoch": 0.18, "learning_rate": 1.8868053483721507e-05, "loss": 0.5117, "step": 794 }, { "epoch": 0.18, "learning_rate": 1.8864687220308354e-05, "loss": 0.5477, "step": 795 }, { "epoch": 0.18, "learning_rate": 1.886131626011463e-05, "loss": 0.5591, "step": 796 }, { "epoch": 0.18, "learning_rate": 1.885794060492637e-05, "loss": 0.54, "step": 797 }, { "epoch": 0.18, "learning_rate": 1.8854560256532098e-05, "loss": 0.5242, "step": 798 }, { "epoch": 0.18, "learning_rate": 1.8851175216722834e-05, "loss": 0.5462, "step": 799 }, { "epoch": 0.18, "learning_rate": 1.8847785487292066e-05, "loss": 0.5388, "step": 800 }, { "epoch": 0.18, "learning_rate": 1.884439107003578e-05, "loss": 0.5339, "step": 801 }, { "epoch": 0.18, "learning_rate": 1.884099196675244e-05, "loss": 0.4998, "step": 802 }, { "epoch": 0.18, "learning_rate": 1.883758817924299e-05, "loss": 0.5502, "step": 803 }, { "epoch": 0.18, "learning_rate": 1.8834179709310868e-05, "loss": 0.5407, "step": 804 }, { "epoch": 0.18, "learning_rate": 1.883076655876198e-05, "loss": 0.4855, "step": 805 }, { "epoch": 0.18, "learning_rate": 1.8827348729404716e-05, "loss": 0.4779, "step": 806 }, { "epoch": 0.18, "learning_rate": 1.882392622304995e-05, "loss": 0.5058, "step": 807 }, { "epoch": 0.18, "learning_rate": 1.882049904151103e-05, "loss": 0.5257, "step": 808 }, { "epoch": 0.18, "learning_rate": 1.8817067186603774e-05, "loss": 0.5322, "step": 809 }, { "epoch": 0.18, "learning_rate": 1.881363066014649e-05, "loss": 0.524, "step": 810 }, { "epoch": 0.18, "learning_rate": 1.881018946395995e-05, "loss": 0.5501, "step": 811 }, { "epoch": 0.18, "learning_rate": 1.8806743599867407e-05, "loss": 0.5485, "step": 812 }, { "epoch": 0.18, "learning_rate": 1.880329306969458e-05, "loss": 0.5121, "step": 813 }, { "epoch": 0.18, "learning_rate": 1.8799837875269672e-05, "loss": 0.5205, "step": 814 }, { "epoch": 0.18, "learning_rate": 1.8796378018423344e-05, "loss": 0.5109, "step": 815 }, { "epoch": 0.18, "learning_rate": 1.8792913500988733e-05, "loss": 0.4899, "step": 816 }, { "epoch": 0.18, "learning_rate": 1.8789444324801452e-05, "loss": 0.5392, "step": 817 }, { "epoch": 0.18, "learning_rate": 1.878597049169957e-05, "loss": 0.4911, "step": 818 }, { "epoch": 0.18, "learning_rate": 1.878249200352363e-05, "loss": 0.5305, "step": 819 }, { "epoch": 0.18, "learning_rate": 1.8779008862116642e-05, "loss": 0.5158, "step": 820 }, { "epoch": 0.18, "learning_rate": 1.877552106932408e-05, "loss": 0.5139, "step": 821 }, { "epoch": 0.18, "learning_rate": 1.8772028626993885e-05, "loss": 0.5003, "step": 822 }, { "epoch": 0.18, "learning_rate": 1.8768531536976452e-05, "loss": 0.5141, "step": 823 }, { "epoch": 0.19, "learning_rate": 1.8765029801124653e-05, "loss": 0.55, "step": 824 }, { "epoch": 0.19, "learning_rate": 1.8761523421293808e-05, "loss": 0.5115, "step": 825 }, { "epoch": 0.19, "learning_rate": 1.8758012399341708e-05, "loss": 0.4824, "step": 826 }, { "epoch": 0.19, "learning_rate": 1.8754496737128597e-05, "loss": 0.5175, "step": 827 }, { "epoch": 0.19, "learning_rate": 1.875097643651718e-05, "loss": 0.5222, "step": 828 }, { "epoch": 0.19, "learning_rate": 1.8747451499372623e-05, "loss": 0.5098, "step": 829 }, { "epoch": 0.19, "learning_rate": 1.8743921927562537e-05, "loss": 0.5224, "step": 830 }, { "epoch": 0.19, "learning_rate": 1.8740387722957002e-05, "loss": 0.5285, "step": 831 }, { "epoch": 0.19, "learning_rate": 1.8736848887428546e-05, "loss": 0.514, "step": 832 }, { "epoch": 0.19, "learning_rate": 1.8733305422852152e-05, "loss": 0.5211, "step": 833 }, { "epoch": 0.19, "learning_rate": 1.872975733110525e-05, "loss": 0.5151, "step": 834 }, { "epoch": 0.19, "learning_rate": 1.8726204614067734e-05, "loss": 0.5057, "step": 835 }, { "epoch": 0.19, "learning_rate": 1.872264727362194e-05, "loss": 0.5294, "step": 836 }, { "epoch": 0.19, "learning_rate": 1.871908531165265e-05, "loss": 0.5164, "step": 837 }, { "epoch": 0.19, "learning_rate": 1.8715518730047106e-05, "loss": 0.5135, "step": 838 }, { "epoch": 0.19, "learning_rate": 1.8711947530694986e-05, "loss": 0.4829, "step": 839 }, { "epoch": 0.19, "learning_rate": 1.8708371715488425e-05, "loss": 0.4908, "step": 840 }, { "epoch": 0.19, "learning_rate": 1.870479128632199e-05, "loss": 0.5134, "step": 841 }, { "epoch": 0.19, "learning_rate": 1.8701206245092717e-05, "loss": 0.5237, "step": 842 }, { "epoch": 0.19, "learning_rate": 1.869761659370005e-05, "loss": 0.5092, "step": 843 }, { "epoch": 0.19, "learning_rate": 1.869402233404591e-05, "loss": 0.5119, "step": 844 }, { "epoch": 0.19, "learning_rate": 1.8690423468034638e-05, "loss": 0.5052, "step": 845 }, { "epoch": 0.19, "learning_rate": 1.8686819997573024e-05, "loss": 0.5128, "step": 846 }, { "epoch": 0.19, "learning_rate": 1.86832119245703e-05, "loss": 0.5132, "step": 847 }, { "epoch": 0.19, "learning_rate": 1.867959925093813e-05, "loss": 0.528, "step": 848 }, { "epoch": 0.19, "learning_rate": 1.867598197859061e-05, "loss": 0.5246, "step": 849 }, { "epoch": 0.19, "learning_rate": 1.8672360109444295e-05, "loss": 0.4924, "step": 850 }, { "epoch": 0.19, "learning_rate": 1.866873364541815e-05, "loss": 0.5059, "step": 851 }, { "epoch": 0.19, "learning_rate": 1.8665102588433596e-05, "loss": 0.5278, "step": 852 }, { "epoch": 0.19, "learning_rate": 1.866146694041447e-05, "loss": 0.5023, "step": 853 }, { "epoch": 0.19, "learning_rate": 1.865782670328705e-05, "loss": 0.532, "step": 854 }, { "epoch": 0.19, "learning_rate": 1.865418187898004e-05, "loss": 0.5202, "step": 855 }, { "epoch": 0.19, "learning_rate": 1.865053246942459e-05, "loss": 0.5198, "step": 856 }, { "epoch": 0.19, "learning_rate": 1.8646878476554257e-05, "loss": 0.5212, "step": 857 }, { "epoch": 0.19, "learning_rate": 1.8643219902305042e-05, "loss": 0.5221, "step": 858 }, { "epoch": 0.19, "learning_rate": 1.8639556748615372e-05, "loss": 0.543, "step": 859 }, { "epoch": 0.19, "learning_rate": 1.8635889017426092e-05, "loss": 0.5375, "step": 860 }, { "epoch": 0.19, "learning_rate": 1.8632216710680478e-05, "loss": 0.538, "step": 861 }, { "epoch": 0.19, "learning_rate": 1.862853983032423e-05, "loss": 0.5319, "step": 862 }, { "epoch": 0.19, "learning_rate": 1.8624858378305474e-05, "loss": 0.536, "step": 863 }, { "epoch": 0.19, "learning_rate": 1.862117235657475e-05, "loss": 0.4896, "step": 864 }, { "epoch": 0.19, "learning_rate": 1.8617481767085028e-05, "loss": 0.5335, "step": 865 }, { "epoch": 0.19, "learning_rate": 1.8613786611791694e-05, "loss": 0.4978, "step": 866 }, { "epoch": 0.19, "learning_rate": 1.861008689265255e-05, "loss": 0.51, "step": 867 }, { "epoch": 0.2, "learning_rate": 1.860638261162783e-05, "loss": 0.5372, "step": 868 }, { "epoch": 0.2, "learning_rate": 1.860267377068016e-05, "loss": 0.5188, "step": 869 }, { "epoch": 0.2, "learning_rate": 1.859896037177461e-05, "loss": 0.5538, "step": 870 }, { "epoch": 0.2, "learning_rate": 1.8595242416878638e-05, "loss": 0.5331, "step": 871 }, { "epoch": 0.2, "learning_rate": 1.859151990796214e-05, "loss": 0.5182, "step": 872 }, { "epoch": 0.2, "learning_rate": 1.8587792846997412e-05, "loss": 0.5192, "step": 873 }, { "epoch": 0.2, "learning_rate": 1.8584061235959165e-05, "loss": 0.5341, "step": 874 }, { "epoch": 0.2, "learning_rate": 1.8580325076824513e-05, "loss": 0.5204, "step": 875 }, { "epoch": 0.2, "learning_rate": 1.8576584371572992e-05, "loss": 0.5417, "step": 876 }, { "epoch": 0.2, "learning_rate": 1.8572839122186542e-05, "loss": 0.5107, "step": 877 }, { "epoch": 0.2, "learning_rate": 1.856908933064951e-05, "loss": 0.5003, "step": 878 }, { "epoch": 0.2, "learning_rate": 1.8565334998948648e-05, "loss": 0.4674, "step": 879 }, { "epoch": 0.2, "learning_rate": 1.8561576129073116e-05, "loss": 0.4987, "step": 880 }, { "epoch": 0.2, "learning_rate": 1.8557812723014476e-05, "loss": 0.4869, "step": 881 }, { "epoch": 0.2, "learning_rate": 1.8554044782766697e-05, "loss": 0.4915, "step": 882 }, { "epoch": 0.2, "learning_rate": 1.8550272310326144e-05, "loss": 0.5134, "step": 883 }, { "epoch": 0.2, "learning_rate": 1.854649530769159e-05, "loss": 0.509, "step": 884 }, { "epoch": 0.2, "learning_rate": 1.8542713776864206e-05, "loss": 0.5134, "step": 885 }, { "epoch": 0.2, "learning_rate": 1.8538927719847566e-05, "loss": 0.5203, "step": 886 }, { "epoch": 0.2, "learning_rate": 1.853513713864763e-05, "loss": 0.4635, "step": 887 }, { "epoch": 0.2, "learning_rate": 1.8531342035272768e-05, "loss": 0.5061, "step": 888 }, { "epoch": 0.2, "learning_rate": 1.852754241173374e-05, "loss": 0.5296, "step": 889 }, { "epoch": 0.2, "learning_rate": 1.85237382700437e-05, "loss": 0.5254, "step": 890 }, { "epoch": 0.2, "learning_rate": 1.8519929612218204e-05, "loss": 0.5262, "step": 891 }, { "epoch": 0.2, "learning_rate": 1.8516116440275187e-05, "loss": 0.5401, "step": 892 }, { "epoch": 0.2, "learning_rate": 1.8512298756234986e-05, "loss": 0.5625, "step": 893 }, { "epoch": 0.2, "learning_rate": 1.8508476562120332e-05, "loss": 0.4837, "step": 894 }, { "epoch": 0.2, "learning_rate": 1.850464985995633e-05, "loss": 0.5439, "step": 895 }, { "epoch": 0.2, "learning_rate": 1.8500818651770487e-05, "loss": 0.5117, "step": 896 }, { "epoch": 0.2, "learning_rate": 1.8496982939592696e-05, "loss": 0.5224, "step": 897 }, { "epoch": 0.2, "learning_rate": 1.8493142725455228e-05, "loss": 0.5258, "step": 898 }, { "epoch": 0.2, "learning_rate": 1.848929801139275e-05, "loss": 0.4972, "step": 899 }, { "epoch": 0.2, "learning_rate": 1.8485448799442312e-05, "loss": 0.5101, "step": 900 }, { "epoch": 0.2, "learning_rate": 1.8481595091643333e-05, "loss": 0.5259, "step": 901 }, { "epoch": 0.2, "learning_rate": 1.8477736890037633e-05, "loss": 0.5338, "step": 902 }, { "epoch": 0.2, "learning_rate": 1.84738741966694e-05, "loss": 0.5447, "step": 903 }, { "epoch": 0.2, "learning_rate": 1.8470007013585206e-05, "loss": 0.5512, "step": 904 }, { "epoch": 0.2, "learning_rate": 1.8466135342834005e-05, "loss": 0.5182, "step": 905 }, { "epoch": 0.2, "learning_rate": 1.8462259186467123e-05, "loss": 0.5138, "step": 906 }, { "epoch": 0.2, "learning_rate": 1.845837854653827e-05, "loss": 0.5393, "step": 907 }, { "epoch": 0.2, "learning_rate": 1.845449342510352e-05, "loss": 0.5193, "step": 908 }, { "epoch": 0.2, "learning_rate": 1.8450603824221334e-05, "loss": 0.4944, "step": 909 }, { "epoch": 0.2, "learning_rate": 1.8446709745952537e-05, "loss": 0.5378, "step": 910 }, { "epoch": 0.2, "learning_rate": 1.8442811192360334e-05, "loss": 0.533, "step": 911 }, { "epoch": 0.2, "learning_rate": 1.8438908165510298e-05, "loss": 0.5059, "step": 912 }, { "epoch": 0.21, "learning_rate": 1.843500066747036e-05, "loss": 0.5145, "step": 913 }, { "epoch": 0.21, "learning_rate": 1.8431088700310846e-05, "loss": 0.5241, "step": 914 }, { "epoch": 0.21, "learning_rate": 1.8427172266104425e-05, "loss": 0.5435, "step": 915 }, { "epoch": 0.21, "learning_rate": 1.8423251366926148e-05, "loss": 0.5419, "step": 916 }, { "epoch": 0.21, "learning_rate": 1.8419326004853424e-05, "loss": 0.5291, "step": 917 }, { "epoch": 0.21, "learning_rate": 1.8415396181966027e-05, "loss": 0.5179, "step": 918 }, { "epoch": 0.21, "learning_rate": 1.84114619003461e-05, "loss": 0.5445, "step": 919 }, { "epoch": 0.21, "learning_rate": 1.8407523162078148e-05, "loss": 0.5195, "step": 920 }, { "epoch": 0.21, "learning_rate": 1.8403579969249024e-05, "loss": 0.5003, "step": 921 }, { "epoch": 0.21, "learning_rate": 1.8399632323947964e-05, "loss": 0.5136, "step": 922 }, { "epoch": 0.21, "learning_rate": 1.839568022826654e-05, "loss": 0.5252, "step": 923 }, { "epoch": 0.21, "learning_rate": 1.83917236842987e-05, "loss": 0.482, "step": 924 }, { "epoch": 0.21, "learning_rate": 1.8387762694140732e-05, "loss": 0.5112, "step": 925 }, { "epoch": 0.21, "learning_rate": 1.8383797259891298e-05, "loss": 0.5183, "step": 926 }, { "epoch": 0.21, "learning_rate": 1.83798273836514e-05, "loss": 0.5122, "step": 927 }, { "epoch": 0.21, "learning_rate": 1.8375853067524402e-05, "loss": 0.5089, "step": 928 }, { "epoch": 0.21, "learning_rate": 1.8371874313616017e-05, "loss": 0.5066, "step": 929 }, { "epoch": 0.21, "learning_rate": 1.8367891124034307e-05, "loss": 0.5215, "step": 930 }, { "epoch": 0.21, "learning_rate": 1.836390350088969e-05, "loss": 0.5181, "step": 931 }, { "epoch": 0.21, "learning_rate": 1.835991144629493e-05, "loss": 0.5119, "step": 932 }, { "epoch": 0.21, "learning_rate": 1.8355914962365136e-05, "loss": 0.5308, "step": 933 }, { "epoch": 0.21, "learning_rate": 1.8351914051217773e-05, "loss": 0.4999, "step": 934 }, { "epoch": 0.21, "learning_rate": 1.8347908714972633e-05, "loss": 0.4762, "step": 935 }, { "epoch": 0.21, "learning_rate": 1.8343898955751876e-05, "loss": 0.4898, "step": 936 }, { "epoch": 0.21, "learning_rate": 1.833988477567999e-05, "loss": 0.511, "step": 937 }, { "epoch": 0.21, "learning_rate": 1.833586617688381e-05, "loss": 0.4996, "step": 938 }, { "epoch": 0.21, "learning_rate": 1.833184316149251e-05, "loss": 0.5235, "step": 939 }, { "epoch": 0.21, "learning_rate": 1.8327815731637612e-05, "loss": 0.5052, "step": 940 }, { "epoch": 0.21, "learning_rate": 1.8323783889452963e-05, "loss": 0.513, "step": 941 }, { "epoch": 0.21, "learning_rate": 1.831974763707476e-05, "loss": 0.5137, "step": 942 }, { "epoch": 0.21, "learning_rate": 1.831570697664153e-05, "loss": 0.5061, "step": 943 }, { "epoch": 0.21, "learning_rate": 1.8311661910294138e-05, "loss": 0.5032, "step": 944 }, { "epoch": 0.21, "learning_rate": 1.8307612440175782e-05, "loss": 0.5344, "step": 945 }, { "epoch": 0.21, "learning_rate": 1.8303558568431997e-05, "loss": 0.5218, "step": 946 }, { "epoch": 0.21, "learning_rate": 1.8299500297210647e-05, "loss": 0.491, "step": 947 }, { "epoch": 0.21, "learning_rate": 1.8295437628661927e-05, "loss": 0.4958, "step": 948 }, { "epoch": 0.21, "learning_rate": 1.829137056493836e-05, "loss": 0.5623, "step": 949 }, { "epoch": 0.21, "learning_rate": 1.82872991081948e-05, "loss": 0.5079, "step": 950 }, { "epoch": 0.21, "learning_rate": 1.828322326058843e-05, "loss": 0.5302, "step": 951 }, { "epoch": 0.21, "learning_rate": 1.827914302427876e-05, "loss": 0.5233, "step": 952 }, { "epoch": 0.21, "learning_rate": 1.8275058401427622e-05, "loss": 0.5158, "step": 953 }, { "epoch": 0.21, "learning_rate": 1.8270969394199173e-05, "loss": 0.5244, "step": 954 }, { "epoch": 0.21, "learning_rate": 1.826687600475989e-05, "loss": 0.4706, "step": 955 }, { "epoch": 0.21, "learning_rate": 1.8262778235278577e-05, "loss": 0.5301, "step": 956 }, { "epoch": 0.22, "learning_rate": 1.825867608792636e-05, "loss": 0.506, "step": 957 }, { "epoch": 0.22, "learning_rate": 1.8254569564876676e-05, "loss": 0.5441, "step": 958 }, { "epoch": 0.22, "learning_rate": 1.825045866830529e-05, "loss": 0.5061, "step": 959 }, { "epoch": 0.22, "learning_rate": 1.8246343400390278e-05, "loss": 0.4991, "step": 960 }, { "epoch": 0.22, "learning_rate": 1.8242223763312034e-05, "loss": 0.5153, "step": 961 }, { "epoch": 0.22, "learning_rate": 1.8238099759253268e-05, "loss": 0.5044, "step": 962 }, { "epoch": 0.22, "learning_rate": 1.8233971390398996e-05, "loss": 0.4837, "step": 963 }, { "epoch": 0.22, "learning_rate": 1.8229838658936566e-05, "loss": 0.4741, "step": 964 }, { "epoch": 0.22, "learning_rate": 1.8225701567055612e-05, "loss": 0.5127, "step": 965 }, { "epoch": 0.22, "learning_rate": 1.8221560116948103e-05, "loss": 0.5139, "step": 966 }, { "epoch": 0.22, "learning_rate": 1.8217414310808292e-05, "loss": 0.4942, "step": 967 }, { "epoch": 0.22, "learning_rate": 1.8213264150832765e-05, "loss": 0.5267, "step": 968 }, { "epoch": 0.22, "learning_rate": 1.8209109639220393e-05, "loss": 0.5102, "step": 969 }, { "epoch": 0.22, "learning_rate": 1.820495077817237e-05, "loss": 0.5181, "step": 970 }, { "epoch": 0.22, "learning_rate": 1.820078756989218e-05, "loss": 0.5317, "step": 971 }, { "epoch": 0.22, "learning_rate": 1.8196620016585626e-05, "loss": 0.5008, "step": 972 }, { "epoch": 0.22, "learning_rate": 1.8192448120460794e-05, "loss": 0.5, "step": 973 }, { "epoch": 0.22, "learning_rate": 1.818827188372809e-05, "loss": 0.5014, "step": 974 }, { "epoch": 0.22, "learning_rate": 1.8184091308600208e-05, "loss": 0.4742, "step": 975 }, { "epoch": 0.22, "learning_rate": 1.8179906397292142e-05, "loss": 0.5486, "step": 976 }, { "epoch": 0.22, "learning_rate": 1.8175717152021187e-05, "loss": 0.5168, "step": 977 }, { "epoch": 0.22, "learning_rate": 1.817152357500693e-05, "loss": 0.5019, "step": 978 }, { "epoch": 0.22, "learning_rate": 1.816732566847126e-05, "loss": 0.499, "step": 979 }, { "epoch": 0.22, "learning_rate": 1.8163123434638354e-05, "loss": 0.5381, "step": 980 }, { "epoch": 0.22, "learning_rate": 1.815891687573468e-05, "loss": 0.5238, "step": 981 }, { "epoch": 0.22, "learning_rate": 1.8154705993989e-05, "loss": 0.4929, "step": 982 }, { "epoch": 0.22, "learning_rate": 1.8150490791632372e-05, "loss": 0.501, "step": 983 }, { "epoch": 0.22, "learning_rate": 1.8146271270898138e-05, "loss": 0.5137, "step": 984 }, { "epoch": 0.22, "learning_rate": 1.8142047434021924e-05, "loss": 0.5206, "step": 985 }, { "epoch": 0.22, "learning_rate": 1.8137819283241647e-05, "loss": 0.5254, "step": 986 }, { "epoch": 0.22, "learning_rate": 1.8133586820797515e-05, "loss": 0.4925, "step": 987 }, { "epoch": 0.22, "learning_rate": 1.8129350048932012e-05, "loss": 0.5117, "step": 988 }, { "epoch": 0.22, "learning_rate": 1.8125108969889908e-05, "loss": 0.5027, "step": 989 }, { "epoch": 0.22, "learning_rate": 1.8120863585918257e-05, "loss": 0.5232, "step": 990 }, { "epoch": 0.22, "learning_rate": 1.811661389926639e-05, "loss": 0.5102, "step": 991 }, { "epoch": 0.22, "learning_rate": 1.8112359912185923e-05, "loss": 0.4902, "step": 992 }, { "epoch": 0.22, "learning_rate": 1.8108101626930748e-05, "loss": 0.5254, "step": 993 }, { "epoch": 0.22, "learning_rate": 1.810383904575703e-05, "loss": 0.5072, "step": 994 }, { "epoch": 0.22, "learning_rate": 1.8099572170923214e-05, "loss": 0.5047, "step": 995 }, { "epoch": 0.22, "learning_rate": 1.8095301004690027e-05, "loss": 0.4921, "step": 996 }, { "epoch": 0.22, "learning_rate": 1.8091025549320455e-05, "loss": 0.5166, "step": 997 }, { "epoch": 0.22, "learning_rate": 1.8086745807079766e-05, "loss": 0.5039, "step": 998 }, { "epoch": 0.22, "learning_rate": 1.8082461780235497e-05, "loss": 0.4882, "step": 999 }, { "epoch": 0.22, "learning_rate": 1.8078173471057457e-05, "loss": 0.4988, "step": 1000 }, { "epoch": 0.22, "learning_rate": 1.8073880881817723e-05, "loss": 0.5353, "step": 1001 }, { "epoch": 0.23, "learning_rate": 1.806958401479064e-05, "loss": 0.521, "step": 1002 }, { "epoch": 0.23, "learning_rate": 1.8065282872252813e-05, "loss": 0.4987, "step": 1003 }, { "epoch": 0.23, "learning_rate": 1.8060977456483127e-05, "loss": 0.5152, "step": 1004 }, { "epoch": 0.23, "learning_rate": 1.8056667769762715e-05, "loss": 0.5385, "step": 1005 }, { "epoch": 0.23, "learning_rate": 1.805235381437498e-05, "loss": 0.5061, "step": 1006 }, { "epoch": 0.23, "learning_rate": 1.804803559260559e-05, "loss": 0.5259, "step": 1007 }, { "epoch": 0.23, "learning_rate": 1.8043713106742472e-05, "loss": 0.4961, "step": 1008 }, { "epoch": 0.23, "learning_rate": 1.80393863590758e-05, "loss": 0.5029, "step": 1009 }, { "epoch": 0.23, "learning_rate": 1.803505535189803e-05, "loss": 0.488, "step": 1010 }, { "epoch": 0.23, "learning_rate": 1.803072008750385e-05, "loss": 0.5162, "step": 1011 }, { "epoch": 0.23, "learning_rate": 1.802638056819022e-05, "loss": 0.4841, "step": 1012 }, { "epoch": 0.23, "learning_rate": 1.802203679625635e-05, "loss": 0.5034, "step": 1013 }, { "epoch": 0.23, "learning_rate": 1.80176887740037e-05, "loss": 0.5212, "step": 1014 }, { "epoch": 0.23, "learning_rate": 1.8013336503735987e-05, "loss": 0.4979, "step": 1015 }, { "epoch": 0.23, "learning_rate": 1.800897998775917e-05, "loss": 0.5345, "step": 1016 }, { "epoch": 0.23, "learning_rate": 1.8004619228381467e-05, "loss": 0.5262, "step": 1017 }, { "epoch": 0.23, "learning_rate": 1.8000254227913346e-05, "loss": 0.4846, "step": 1018 }, { "epoch": 0.23, "learning_rate": 1.7995884988667513e-05, "loss": 0.512, "step": 1019 }, { "epoch": 0.23, "learning_rate": 1.7991511512958917e-05, "loss": 0.5346, "step": 1020 }, { "epoch": 0.23, "learning_rate": 1.7987133803104768e-05, "loss": 0.5234, "step": 1021 }, { "epoch": 0.23, "learning_rate": 1.7982751861424504e-05, "loss": 0.5157, "step": 1022 }, { "epoch": 0.23, "learning_rate": 1.797836569023981e-05, "loss": 0.5391, "step": 1023 }, { "epoch": 0.23, "learning_rate": 1.797397529187462e-05, "loss": 0.524, "step": 1024 }, { "epoch": 0.23, "learning_rate": 1.7969580668655097e-05, "loss": 0.5179, "step": 1025 }, { "epoch": 0.23, "learning_rate": 1.7965181822909642e-05, "loss": 0.5156, "step": 1026 }, { "epoch": 0.23, "learning_rate": 1.79607787569689e-05, "loss": 0.5352, "step": 1027 }, { "epoch": 0.23, "learning_rate": 1.7956371473165747e-05, "loss": 0.5005, "step": 1028 }, { "epoch": 0.23, "learning_rate": 1.79519599738353e-05, "loss": 0.504, "step": 1029 }, { "epoch": 0.23, "learning_rate": 1.79475442613149e-05, "loss": 0.5428, "step": 1030 }, { "epoch": 0.23, "learning_rate": 1.794312433794413e-05, "loss": 0.4791, "step": 1031 }, { "epoch": 0.23, "learning_rate": 1.7938700206064798e-05, "loss": 0.4958, "step": 1032 }, { "epoch": 0.23, "learning_rate": 1.7934271868020942e-05, "loss": 0.4947, "step": 1033 }, { "epoch": 0.23, "learning_rate": 1.7929839326158838e-05, "loss": 0.5113, "step": 1034 }, { "epoch": 0.23, "learning_rate": 1.792540258282697e-05, "loss": 0.4701, "step": 1035 }, { "epoch": 0.23, "learning_rate": 1.7920961640376068e-05, "loss": 0.4506, "step": 1036 }, { "epoch": 0.23, "learning_rate": 1.791651650115907e-05, "loss": 0.5299, "step": 1037 }, { "epoch": 0.23, "learning_rate": 1.791206716753115e-05, "loss": 0.4958, "step": 1038 }, { "epoch": 0.23, "learning_rate": 1.7907613641849705e-05, "loss": 0.5194, "step": 1039 }, { "epoch": 0.23, "learning_rate": 1.7903155926474343e-05, "loss": 0.506, "step": 1040 }, { "epoch": 0.23, "learning_rate": 1.7898694023766896e-05, "loss": 0.5006, "step": 1041 }, { "epoch": 0.23, "learning_rate": 1.7894227936091417e-05, "loss": 0.5419, "step": 1042 }, { "epoch": 0.23, "learning_rate": 1.7889757665814177e-05, "loss": 0.5033, "step": 1043 }, { "epoch": 0.23, "learning_rate": 1.788528321530366e-05, "loss": 0.5469, "step": 1044 }, { "epoch": 0.23, "learning_rate": 1.7880804586930563e-05, "loss": 0.473, "step": 1045 }, { "epoch": 0.24, "learning_rate": 1.7876321783067802e-05, "loss": 0.5145, "step": 1046 }, { "epoch": 0.24, "learning_rate": 1.7871834806090502e-05, "loss": 0.4747, "step": 1047 }, { "epoch": 0.24, "learning_rate": 1.7867343658376e-05, "loss": 0.5527, "step": 1048 }, { "epoch": 0.24, "learning_rate": 1.7862848342303845e-05, "loss": 0.4766, "step": 1049 }, { "epoch": 0.24, "learning_rate": 1.7858348860255785e-05, "loss": 0.5027, "step": 1050 }, { "epoch": 0.24, "learning_rate": 1.785384521461579e-05, "loss": 0.5021, "step": 1051 }, { "epoch": 0.24, "learning_rate": 1.7849337407770023e-05, "loss": 0.5199, "step": 1052 }, { "epoch": 0.24, "learning_rate": 1.7844825442106855e-05, "loss": 0.5315, "step": 1053 }, { "epoch": 0.24, "learning_rate": 1.7840309320016875e-05, "loss": 0.4961, "step": 1054 }, { "epoch": 0.24, "learning_rate": 1.7835789043892847e-05, "loss": 0.5354, "step": 1055 }, { "epoch": 0.24, "learning_rate": 1.7831264616129758e-05, "loss": 0.498, "step": 1056 }, { "epoch": 0.24, "learning_rate": 1.7826736039124782e-05, "loss": 0.4903, "step": 1057 }, { "epoch": 0.24, "learning_rate": 1.7822203315277306e-05, "loss": 0.509, "step": 1058 }, { "epoch": 0.24, "learning_rate": 1.7817666446988896e-05, "loss": 0.4892, "step": 1059 }, { "epoch": 0.24, "learning_rate": 1.781312543666333e-05, "loss": 0.5146, "step": 1060 }, { "epoch": 0.24, "learning_rate": 1.7808580286706564e-05, "loss": 0.5067, "step": 1061 }, { "epoch": 0.24, "learning_rate": 1.7804030999526765e-05, "loss": 0.4904, "step": 1062 }, { "epoch": 0.24, "learning_rate": 1.7799477577534282e-05, "loss": 0.5514, "step": 1063 }, { "epoch": 0.24, "learning_rate": 1.7794920023141648e-05, "loss": 0.4859, "step": 1064 }, { "epoch": 0.24, "learning_rate": 1.7790358338763604e-05, "loss": 0.4832, "step": 1065 }, { "epoch": 0.24, "learning_rate": 1.7785792526817068e-05, "loss": 0.4922, "step": 1066 }, { "epoch": 0.24, "learning_rate": 1.7781222589721138e-05, "loss": 0.5529, "step": 1067 }, { "epoch": 0.24, "learning_rate": 1.7776648529897113e-05, "loss": 0.5481, "step": 1068 }, { "epoch": 0.24, "learning_rate": 1.7772070349768466e-05, "loss": 0.4607, "step": 1069 }, { "epoch": 0.24, "learning_rate": 1.7767488051760858e-05, "loss": 0.5176, "step": 1070 }, { "epoch": 0.24, "learning_rate": 1.7762901638302127e-05, "loss": 0.4931, "step": 1071 }, { "epoch": 0.24, "learning_rate": 1.7758311111822294e-05, "loss": 0.5111, "step": 1072 }, { "epoch": 0.24, "learning_rate": 1.775371647475356e-05, "loss": 0.5205, "step": 1073 }, { "epoch": 0.24, "learning_rate": 1.7749117729530306e-05, "loss": 0.5154, "step": 1074 }, { "epoch": 0.24, "learning_rate": 1.774451487858908e-05, "loss": 0.5195, "step": 1075 }, { "epoch": 0.24, "learning_rate": 1.7739907924368623e-05, "loss": 0.4991, "step": 1076 }, { "epoch": 0.24, "learning_rate": 1.773529686930983e-05, "loss": 0.5471, "step": 1077 }, { "epoch": 0.24, "learning_rate": 1.773068171585578e-05, "loss": 0.4861, "step": 1078 }, { "epoch": 0.24, "learning_rate": 1.772606246645173e-05, "loss": 0.5427, "step": 1079 }, { "epoch": 0.24, "learning_rate": 1.7721439123545084e-05, "loss": 0.5343, "step": 1080 }, { "epoch": 0.24, "learning_rate": 1.771681168958544e-05, "loss": 0.527, "step": 1081 }, { "epoch": 0.24, "learning_rate": 1.7712180167024548e-05, "loss": 0.4827, "step": 1082 }, { "epoch": 0.24, "learning_rate": 1.7707544558316332e-05, "loss": 0.5142, "step": 1083 }, { "epoch": 0.24, "learning_rate": 1.770290486591688e-05, "loss": 0.495, "step": 1084 }, { "epoch": 0.24, "learning_rate": 1.7698261092284436e-05, "loss": 0.5319, "step": 1085 }, { "epoch": 0.24, "learning_rate": 1.769361323987942e-05, "loss": 0.4946, "step": 1086 }, { "epoch": 0.24, "learning_rate": 1.76889613111644e-05, "loss": 0.5158, "step": 1087 }, { "epoch": 0.24, "learning_rate": 1.7684305308604108e-05, "loss": 0.5486, "step": 1088 }, { "epoch": 0.24, "learning_rate": 1.7679645234665442e-05, "loss": 0.5405, "step": 1089 }, { "epoch": 0.24, "learning_rate": 1.7674981091817444e-05, "loss": 0.5162, "step": 1090 }, { "epoch": 0.25, "learning_rate": 1.767031288253132e-05, "loss": 0.5105, "step": 1091 }, { "epoch": 0.25, "learning_rate": 1.7665640609280433e-05, "loss": 0.5238, "step": 1092 }, { "epoch": 0.25, "learning_rate": 1.7660964274540292e-05, "loss": 0.488, "step": 1093 }, { "epoch": 0.25, "learning_rate": 1.7656283880788565e-05, "loss": 0.5113, "step": 1094 }, { "epoch": 0.25, "learning_rate": 1.765159943050506e-05, "loss": 0.5103, "step": 1095 }, { "epoch": 0.25, "learning_rate": 1.7646910926171747e-05, "loss": 0.4766, "step": 1096 }, { "epoch": 0.25, "learning_rate": 1.7642218370272736e-05, "loss": 0.4974, "step": 1097 }, { "epoch": 0.25, "learning_rate": 1.7637521765294284e-05, "loss": 0.5085, "step": 1098 }, { "epoch": 0.25, "learning_rate": 1.7632821113724797e-05, "loss": 0.5064, "step": 1099 }, { "epoch": 0.25, "learning_rate": 1.7628116418054824e-05, "loss": 0.5261, "step": 1100 }, { "epoch": 0.25, "learning_rate": 1.7623407680777053e-05, "loss": 0.4704, "step": 1101 }, { "epoch": 0.25, "learning_rate": 1.7618694904386315e-05, "loss": 0.4866, "step": 1102 }, { "epoch": 0.25, "learning_rate": 1.7613978091379586e-05, "loss": 0.4729, "step": 1103 }, { "epoch": 0.25, "learning_rate": 1.7609257244255977e-05, "loss": 0.4943, "step": 1104 }, { "epoch": 0.25, "learning_rate": 1.7604532365516734e-05, "loss": 0.5507, "step": 1105 }, { "epoch": 0.25, "learning_rate": 1.7599803457665243e-05, "loss": 0.4772, "step": 1106 }, { "epoch": 0.25, "learning_rate": 1.7595070523207015e-05, "loss": 0.4912, "step": 1107 }, { "epoch": 0.25, "learning_rate": 1.7590333564649717e-05, "loss": 0.5361, "step": 1108 }, { "epoch": 0.25, "learning_rate": 1.758559258450312e-05, "loss": 0.4802, "step": 1109 }, { "epoch": 0.25, "learning_rate": 1.7580847585279144e-05, "loss": 0.551, "step": 1110 }, { "epoch": 0.25, "learning_rate": 1.757609856949184e-05, "loss": 0.5266, "step": 1111 }, { "epoch": 0.25, "learning_rate": 1.757134553965737e-05, "loss": 0.5117, "step": 1112 }, { "epoch": 0.25, "learning_rate": 1.7566588498294046e-05, "loss": 0.4991, "step": 1113 }, { "epoch": 0.25, "learning_rate": 1.756182744792228e-05, "loss": 0.5085, "step": 1114 }, { "epoch": 0.25, "learning_rate": 1.7557062391064633e-05, "loss": 0.5055, "step": 1115 }, { "epoch": 0.25, "learning_rate": 1.7552293330245765e-05, "loss": 0.49, "step": 1116 }, { "epoch": 0.25, "learning_rate": 1.754752026799248e-05, "loss": 0.5155, "step": 1117 }, { "epoch": 0.25, "learning_rate": 1.7542743206833684e-05, "loss": 0.5027, "step": 1118 }, { "epoch": 0.25, "learning_rate": 1.7537962149300412e-05, "loss": 0.4673, "step": 1119 }, { "epoch": 0.25, "learning_rate": 1.753317709792581e-05, "loss": 0.4931, "step": 1120 }, { "epoch": 0.25, "learning_rate": 1.7528388055245153e-05, "loss": 0.5097, "step": 1121 }, { "epoch": 0.25, "learning_rate": 1.7523595023795814e-05, "loss": 0.5259, "step": 1122 }, { "epoch": 0.25, "learning_rate": 1.751879800611729e-05, "loss": 0.5131, "step": 1123 }, { "epoch": 0.25, "learning_rate": 1.7513997004751178e-05, "loss": 0.5298, "step": 1124 }, { "epoch": 0.25, "learning_rate": 1.7509192022241205e-05, "loss": 0.515, "step": 1125 }, { "epoch": 0.25, "learning_rate": 1.750438306113319e-05, "loss": 0.4882, "step": 1126 }, { "epoch": 0.25, "learning_rate": 1.7499570123975075e-05, "loss": 0.5134, "step": 1127 }, { "epoch": 0.25, "learning_rate": 1.7494753213316888e-05, "loss": 0.5009, "step": 1128 }, { "epoch": 0.25, "learning_rate": 1.7489932331710785e-05, "loss": 0.4808, "step": 1129 }, { "epoch": 0.25, "learning_rate": 1.7485107481711014e-05, "loss": 0.4909, "step": 1130 }, { "epoch": 0.25, "learning_rate": 1.748027866587392e-05, "loss": 0.4714, "step": 1131 }, { "epoch": 0.25, "learning_rate": 1.747544588675796e-05, "loss": 0.5454, "step": 1132 }, { "epoch": 0.25, "learning_rate": 1.7470609146923695e-05, "loss": 0.5059, "step": 1133 }, { "epoch": 0.25, "learning_rate": 1.7465768448933768e-05, "loss": 0.4873, "step": 1134 }, { "epoch": 0.26, "learning_rate": 1.7460923795352926e-05, "loss": 0.454, "step": 1135 }, { "epoch": 0.26, "learning_rate": 1.745607518874802e-05, "loss": 0.4971, "step": 1136 }, { "epoch": 0.26, "learning_rate": 1.7451222631687987e-05, "loss": 0.4919, "step": 1137 }, { "epoch": 0.26, "learning_rate": 1.7446366126743857e-05, "loss": 0.5046, "step": 1138 }, { "epoch": 0.26, "learning_rate": 1.7441505676488758e-05, "loss": 0.4647, "step": 1139 }, { "epoch": 0.26, "learning_rate": 1.74366412834979e-05, "loss": 0.5173, "step": 1140 }, { "epoch": 0.26, "learning_rate": 1.7431772950348586e-05, "loss": 0.5043, "step": 1141 }, { "epoch": 0.26, "learning_rate": 1.742690067962021e-05, "loss": 0.5095, "step": 1142 }, { "epoch": 0.26, "learning_rate": 1.742202447389425e-05, "loss": 0.474, "step": 1143 }, { "epoch": 0.26, "learning_rate": 1.7417144335754265e-05, "loss": 0.5317, "step": 1144 }, { "epoch": 0.26, "learning_rate": 1.74122602677859e-05, "loss": 0.522, "step": 1145 }, { "epoch": 0.26, "learning_rate": 1.7407372272576885e-05, "loss": 0.5185, "step": 1146 }, { "epoch": 0.26, "learning_rate": 1.7402480352717025e-05, "loss": 0.5305, "step": 1147 }, { "epoch": 0.26, "learning_rate": 1.7397584510798208e-05, "loss": 0.4716, "step": 1148 }, { "epoch": 0.26, "learning_rate": 1.7392684749414406e-05, "loss": 0.4977, "step": 1149 }, { "epoch": 0.26, "learning_rate": 1.7387781071161655e-05, "loss": 0.488, "step": 1150 }, { "epoch": 0.26, "learning_rate": 1.7382873478638073e-05, "loss": 0.4914, "step": 1151 }, { "epoch": 0.26, "learning_rate": 1.737796197444386e-05, "loss": 0.5373, "step": 1152 }, { "epoch": 0.26, "learning_rate": 1.7373046561181263e-05, "loss": 0.507, "step": 1153 }, { "epoch": 0.26, "learning_rate": 1.7368127241454634e-05, "loss": 0.5415, "step": 1154 }, { "epoch": 0.26, "learning_rate": 1.736320401787037e-05, "loss": 0.4854, "step": 1155 }, { "epoch": 0.26, "learning_rate": 1.7358276893036947e-05, "loss": 0.4632, "step": 1156 }, { "epoch": 0.26, "learning_rate": 1.7353345869564903e-05, "loss": 0.5065, "step": 1157 }, { "epoch": 0.26, "learning_rate": 1.7348410950066844e-05, "loss": 0.5397, "step": 1158 }, { "epoch": 0.26, "learning_rate": 1.7343472137157444e-05, "loss": 0.4781, "step": 1159 }, { "epoch": 0.26, "learning_rate": 1.7338529433453433e-05, "loss": 0.5158, "step": 1160 }, { "epoch": 0.26, "learning_rate": 1.733358284157361e-05, "loss": 0.4977, "step": 1161 }, { "epoch": 0.26, "learning_rate": 1.7328632364138823e-05, "loss": 0.5179, "step": 1162 }, { "epoch": 0.26, "learning_rate": 1.7323678003771988e-05, "loss": 0.491, "step": 1163 }, { "epoch": 0.26, "learning_rate": 1.7318719763098077e-05, "loss": 0.5349, "step": 1164 }, { "epoch": 0.26, "learning_rate": 1.7313757644744114e-05, "loss": 0.505, "step": 1165 }, { "epoch": 0.26, "learning_rate": 1.7308791651339183e-05, "loss": 0.5306, "step": 1166 }, { "epoch": 0.26, "learning_rate": 1.7303821785514417e-05, "loss": 0.5337, "step": 1167 }, { "epoch": 0.26, "learning_rate": 1.7298848049903e-05, "loss": 0.4586, "step": 1168 }, { "epoch": 0.26, "learning_rate": 1.729387044714017e-05, "loss": 0.4967, "step": 1169 }, { "epoch": 0.26, "learning_rate": 1.728888897986321e-05, "loss": 0.4973, "step": 1170 }, { "epoch": 0.26, "learning_rate": 1.728390365071146e-05, "loss": 0.4843, "step": 1171 }, { "epoch": 0.26, "learning_rate": 1.727891446232629e-05, "loss": 0.5229, "step": 1172 }, { "epoch": 0.26, "learning_rate": 1.7273921417351126e-05, "loss": 0.5191, "step": 1173 }, { "epoch": 0.26, "learning_rate": 1.7268924518431437e-05, "loss": 0.5006, "step": 1174 }, { "epoch": 0.26, "learning_rate": 1.726392376821473e-05, "loss": 0.5029, "step": 1175 }, { "epoch": 0.26, "learning_rate": 1.725891916935056e-05, "loss": 0.4643, "step": 1176 }, { "epoch": 0.26, "learning_rate": 1.7253910724490507e-05, "loss": 0.4879, "step": 1177 }, { "epoch": 0.26, "learning_rate": 1.72488984362882e-05, "loss": 0.4855, "step": 1178 }, { "epoch": 0.26, "learning_rate": 1.7243882307399302e-05, "loss": 0.489, "step": 1179 }, { "epoch": 0.27, "learning_rate": 1.7238862340481513e-05, "loss": 0.5006, "step": 1180 }, { "epoch": 0.27, "learning_rate": 1.723383853819456e-05, "loss": 0.4767, "step": 1181 }, { "epoch": 0.27, "learning_rate": 1.722881090320021e-05, "loss": 0.4846, "step": 1182 }, { "epoch": 0.27, "learning_rate": 1.722377943816225e-05, "loss": 0.4751, "step": 1183 }, { "epoch": 0.27, "learning_rate": 1.721874414574651e-05, "loss": 0.5198, "step": 1184 }, { "epoch": 0.27, "learning_rate": 1.721370502862084e-05, "loss": 0.517, "step": 1185 }, { "epoch": 0.27, "learning_rate": 1.720866208945512e-05, "loss": 0.4994, "step": 1186 }, { "epoch": 0.27, "learning_rate": 1.720361533092124e-05, "loss": 0.4731, "step": 1187 }, { "epoch": 0.27, "learning_rate": 1.719856475569314e-05, "loss": 0.5207, "step": 1188 }, { "epoch": 0.27, "learning_rate": 1.719351036644676e-05, "loss": 0.4999, "step": 1189 }, { "epoch": 0.27, "learning_rate": 1.718845216586007e-05, "loss": 0.4792, "step": 1190 }, { "epoch": 0.27, "learning_rate": 1.718339015661306e-05, "loss": 0.4809, "step": 1191 }, { "epoch": 0.27, "learning_rate": 1.7178324341387736e-05, "loss": 0.4865, "step": 1192 }, { "epoch": 0.27, "learning_rate": 1.7173254722868123e-05, "loss": 0.501, "step": 1193 }, { "epoch": 0.27, "learning_rate": 1.7168181303740256e-05, "loss": 0.5065, "step": 1194 }, { "epoch": 0.27, "learning_rate": 1.7163104086692185e-05, "loss": 0.513, "step": 1195 }, { "epoch": 0.27, "learning_rate": 1.7158023074413976e-05, "loss": 0.4949, "step": 1196 }, { "epoch": 0.27, "learning_rate": 1.7152938269597706e-05, "loss": 0.4901, "step": 1197 }, { "epoch": 0.27, "learning_rate": 1.7147849674937455e-05, "loss": 0.4761, "step": 1198 }, { "epoch": 0.27, "learning_rate": 1.7142757293129318e-05, "loss": 0.5105, "step": 1199 }, { "epoch": 0.27, "learning_rate": 1.713766112687139e-05, "loss": 0.5209, "step": 1200 }, { "epoch": 0.27, "learning_rate": 1.713256117886378e-05, "loss": 0.4615, "step": 1201 }, { "epoch": 0.27, "learning_rate": 1.7127457451808588e-05, "loss": 0.4968, "step": 1202 }, { "epoch": 0.27, "learning_rate": 1.7122349948409934e-05, "loss": 0.4909, "step": 1203 }, { "epoch": 0.27, "learning_rate": 1.711723867137392e-05, "loss": 0.465, "step": 1204 }, { "epoch": 0.27, "learning_rate": 1.7112123623408658e-05, "loss": 0.4852, "step": 1205 }, { "epoch": 0.27, "learning_rate": 1.7107004807224254e-05, "loss": 0.51, "step": 1206 }, { "epoch": 0.27, "learning_rate": 1.7101882225532812e-05, "loss": 0.4906, "step": 1207 }, { "epoch": 0.27, "learning_rate": 1.7096755881048437e-05, "loss": 0.4948, "step": 1208 }, { "epoch": 0.27, "learning_rate": 1.709162577648722e-05, "loss": 0.4984, "step": 1209 }, { "epoch": 0.27, "learning_rate": 1.708649191456725e-05, "loss": 0.5245, "step": 1210 }, { "epoch": 0.27, "learning_rate": 1.708135429800859e-05, "loss": 0.4753, "step": 1211 }, { "epoch": 0.27, "learning_rate": 1.7076212929533316e-05, "loss": 0.4878, "step": 1212 }, { "epoch": 0.27, "learning_rate": 1.7071067811865477e-05, "loss": 0.4743, "step": 1213 }, { "epoch": 0.27, "learning_rate": 1.706591894773112e-05, "loss": 0.5254, "step": 1214 }, { "epoch": 0.27, "learning_rate": 1.7060766339858253e-05, "loss": 0.4913, "step": 1215 }, { "epoch": 0.27, "learning_rate": 1.70556099909769e-05, "loss": 0.5112, "step": 1216 }, { "epoch": 0.27, "learning_rate": 1.7050449903819052e-05, "loss": 0.5281, "step": 1217 }, { "epoch": 0.27, "learning_rate": 1.7045286081118668e-05, "loss": 0.4992, "step": 1218 }, { "epoch": 0.27, "learning_rate": 1.7040118525611705e-05, "loss": 0.4951, "step": 1219 }, { "epoch": 0.27, "learning_rate": 1.703494724003609e-05, "loss": 0.532, "step": 1220 }, { "epoch": 0.27, "learning_rate": 1.702977222713173e-05, "loss": 0.4947, "step": 1221 }, { "epoch": 0.27, "learning_rate": 1.70245934896405e-05, "loss": 0.4838, "step": 1222 }, { "epoch": 0.27, "learning_rate": 1.701941103030625e-05, "loss": 0.4889, "step": 1223 }, { "epoch": 0.28, "learning_rate": 1.7014224851874814e-05, "loss": 0.5177, "step": 1224 }, { "epoch": 0.28, "learning_rate": 1.7009034957093978e-05, "loss": 0.477, "step": 1225 }, { "epoch": 0.28, "learning_rate": 1.700384134871351e-05, "loss": 0.5211, "step": 1226 }, { "epoch": 0.28, "learning_rate": 1.699864402948514e-05, "loss": 0.4704, "step": 1227 }, { "epoch": 0.28, "learning_rate": 1.6993443002162567e-05, "loss": 0.4757, "step": 1228 }, { "epoch": 0.28, "learning_rate": 1.698823826950145e-05, "loss": 0.4655, "step": 1229 }, { "epoch": 0.28, "learning_rate": 1.698302983425942e-05, "loss": 0.4974, "step": 1230 }, { "epoch": 0.28, "learning_rate": 1.6977817699196057e-05, "loss": 0.4954, "step": 1231 }, { "epoch": 0.28, "learning_rate": 1.6972601867072917e-05, "loss": 0.4874, "step": 1232 }, { "epoch": 0.28, "learning_rate": 1.6967382340653497e-05, "loss": 0.5108, "step": 1233 }, { "epoch": 0.28, "learning_rate": 1.696215912270327e-05, "loss": 0.5094, "step": 1234 }, { "epoch": 0.28, "learning_rate": 1.695693221598965e-05, "loss": 0.4998, "step": 1235 }, { "epoch": 0.28, "learning_rate": 1.695170162328201e-05, "loss": 0.5084, "step": 1236 }, { "epoch": 0.28, "learning_rate": 1.694646734735168e-05, "loss": 0.4867, "step": 1237 }, { "epoch": 0.28, "learning_rate": 1.6941229390971935e-05, "loss": 0.4609, "step": 1238 }, { "epoch": 0.28, "learning_rate": 1.693598775691801e-05, "loss": 0.514, "step": 1239 }, { "epoch": 0.28, "learning_rate": 1.693074244796707e-05, "loss": 0.5274, "step": 1240 }, { "epoch": 0.28, "learning_rate": 1.6925493466898255e-05, "loss": 0.5056, "step": 1241 }, { "epoch": 0.28, "learning_rate": 1.6920240816492623e-05, "loss": 0.5156, "step": 1242 }, { "epoch": 0.28, "learning_rate": 1.691498449953319e-05, "loss": 0.5097, "step": 1243 }, { "epoch": 0.28, "learning_rate": 1.6909724518804916e-05, "loss": 0.5147, "step": 1244 }, { "epoch": 0.28, "learning_rate": 1.6904460877094693e-05, "loss": 0.4639, "step": 1245 }, { "epoch": 0.28, "learning_rate": 1.6899193577191364e-05, "loss": 0.5121, "step": 1246 }, { "epoch": 0.28, "learning_rate": 1.68939226218857e-05, "loss": 0.5082, "step": 1247 }, { "epoch": 0.28, "learning_rate": 1.6888648013970413e-05, "loss": 0.5055, "step": 1248 }, { "epoch": 0.28, "learning_rate": 1.6883369756240157e-05, "loss": 0.5186, "step": 1249 }, { "epoch": 0.28, "learning_rate": 1.6878087851491506e-05, "loss": 0.511, "step": 1250 }, { "epoch": 0.28, "learning_rate": 1.6872802302522978e-05, "loss": 0.4991, "step": 1251 }, { "epoch": 0.28, "learning_rate": 1.6867513112135012e-05, "loss": 0.5189, "step": 1252 }, { "epoch": 0.28, "learning_rate": 1.686222028312999e-05, "loss": 0.5096, "step": 1253 }, { "epoch": 0.28, "learning_rate": 1.6856923818312205e-05, "loss": 0.5152, "step": 1254 }, { "epoch": 0.28, "learning_rate": 1.6851623720487888e-05, "loss": 0.4808, "step": 1255 }, { "epoch": 0.28, "learning_rate": 1.6846319992465198e-05, "loss": 0.4995, "step": 1256 }, { "epoch": 0.28, "learning_rate": 1.6841012637054204e-05, "loss": 0.5052, "step": 1257 }, { "epoch": 0.28, "learning_rate": 1.6835701657066905e-05, "loss": 0.5231, "step": 1258 }, { "epoch": 0.28, "learning_rate": 1.683038705531722e-05, "loss": 0.5134, "step": 1259 }, { "epoch": 0.28, "learning_rate": 1.682506883462099e-05, "loss": 0.5035, "step": 1260 }, { "epoch": 0.28, "learning_rate": 1.6819746997795966e-05, "loss": 0.4816, "step": 1261 }, { "epoch": 0.28, "learning_rate": 1.6814421547661817e-05, "loss": 0.4491, "step": 1262 }, { "epoch": 0.28, "learning_rate": 1.6809092487040133e-05, "loss": 0.4796, "step": 1263 }, { "epoch": 0.28, "learning_rate": 1.680375981875441e-05, "loss": 0.5144, "step": 1264 }, { "epoch": 0.28, "learning_rate": 1.679842354563006e-05, "loss": 0.499, "step": 1265 }, { "epoch": 0.28, "learning_rate": 1.67930836704944e-05, "loss": 0.4667, "step": 1266 }, { "epoch": 0.28, "learning_rate": 1.6787740196176657e-05, "loss": 0.4742, "step": 1267 }, { "epoch": 0.28, "learning_rate": 1.6782393125507966e-05, "loss": 0.4878, "step": 1268 }, { "epoch": 0.29, "learning_rate": 1.6777042461321374e-05, "loss": 0.4929, "step": 1269 }, { "epoch": 0.29, "learning_rate": 1.6771688206451814e-05, "loss": 0.4963, "step": 1270 }, { "epoch": 0.29, "learning_rate": 1.6766330363736142e-05, "loss": 0.4913, "step": 1271 }, { "epoch": 0.29, "learning_rate": 1.6760968936013108e-05, "loss": 0.482, "step": 1272 }, { "epoch": 0.29, "learning_rate": 1.675560392612335e-05, "loss": 0.5076, "step": 1273 }, { "epoch": 0.29, "learning_rate": 1.6750235336909415e-05, "loss": 0.4999, "step": 1274 }, { "epoch": 0.29, "learning_rate": 1.6744863171215752e-05, "loss": 0.492, "step": 1275 }, { "epoch": 0.29, "learning_rate": 1.673948743188869e-05, "loss": 0.5075, "step": 1276 }, { "epoch": 0.29, "learning_rate": 1.673410812177646e-05, "loss": 0.4845, "step": 1277 }, { "epoch": 0.29, "learning_rate": 1.672872524372919e-05, "loss": 0.4722, "step": 1278 }, { "epoch": 0.29, "learning_rate": 1.6723338800598886e-05, "loss": 0.4888, "step": 1279 }, { "epoch": 0.29, "learning_rate": 1.6717948795239453e-05, "loss": 0.4285, "step": 1280 }, { "epoch": 0.29, "learning_rate": 1.6712555230506678e-05, "loss": 0.4977, "step": 1281 }, { "epoch": 0.29, "learning_rate": 1.670715810925823e-05, "loss": 0.5047, "step": 1282 }, { "epoch": 0.29, "learning_rate": 1.670175743435368e-05, "loss": 0.4918, "step": 1283 }, { "epoch": 0.29, "learning_rate": 1.669635320865446e-05, "loss": 0.511, "step": 1284 }, { "epoch": 0.29, "learning_rate": 1.6690945435023897e-05, "loss": 0.4789, "step": 1285 }, { "epoch": 0.29, "learning_rate": 1.668553411632719e-05, "loss": 0.522, "step": 1286 }, { "epoch": 0.29, "learning_rate": 1.6680119255431423e-05, "loss": 0.5134, "step": 1287 }, { "epoch": 0.29, "learning_rate": 1.6674700855205553e-05, "loss": 0.5271, "step": 1288 }, { "epoch": 0.29, "learning_rate": 1.6669278918520413e-05, "loss": 0.4623, "step": 1289 }, { "epoch": 0.29, "learning_rate": 1.666385344824871e-05, "loss": 0.5001, "step": 1290 }, { "epoch": 0.29, "learning_rate": 1.6658424447265023e-05, "loss": 0.4898, "step": 1291 }, { "epoch": 0.29, "learning_rate": 1.6652991918445804e-05, "loss": 0.4725, "step": 1292 }, { "epoch": 0.29, "learning_rate": 1.6647555864669366e-05, "loss": 0.5107, "step": 1293 }, { "epoch": 0.29, "learning_rate": 1.66421162888159e-05, "loss": 0.4776, "step": 1294 }, { "epoch": 0.29, "learning_rate": 1.6636673193767456e-05, "loss": 0.5479, "step": 1295 }, { "epoch": 0.29, "learning_rate": 1.6631226582407954e-05, "loss": 0.483, "step": 1296 }, { "epoch": 0.29, "learning_rate": 1.6625776457623174e-05, "loss": 0.4964, "step": 1297 }, { "epoch": 0.29, "learning_rate": 1.662032282230075e-05, "loss": 0.4687, "step": 1298 }, { "epoch": 0.29, "learning_rate": 1.6614865679330195e-05, "loss": 0.5008, "step": 1299 }, { "epoch": 0.29, "learning_rate": 1.6609405031602865e-05, "loss": 0.5477, "step": 1300 }, { "epoch": 0.29, "learning_rate": 1.6603940882011974e-05, "loss": 0.4806, "step": 1301 }, { "epoch": 0.29, "learning_rate": 1.6598473233452597e-05, "loss": 0.4919, "step": 1302 }, { "epoch": 0.29, "learning_rate": 1.6593002088821658e-05, "loss": 0.5029, "step": 1303 }, { "epoch": 0.29, "learning_rate": 1.658752745101794e-05, "loss": 0.4907, "step": 1304 }, { "epoch": 0.29, "learning_rate": 1.6582049322942067e-05, "loss": 0.5188, "step": 1305 }, { "epoch": 0.29, "learning_rate": 1.6576567707496524e-05, "loss": 0.4808, "step": 1306 }, { "epoch": 0.29, "learning_rate": 1.657108260758563e-05, "loss": 0.4824, "step": 1307 }, { "epoch": 0.29, "learning_rate": 1.6565594026115564e-05, "loss": 0.4908, "step": 1308 }, { "epoch": 0.29, "learning_rate": 1.656010196599434e-05, "loss": 0.495, "step": 1309 }, { "epoch": 0.29, "learning_rate": 1.6554606430131817e-05, "loss": 0.4744, "step": 1310 }, { "epoch": 0.29, "learning_rate": 1.65491074214397e-05, "loss": 0.4911, "step": 1311 }, { "epoch": 0.29, "learning_rate": 1.654360494283153e-05, "loss": 0.4921, "step": 1312 }, { "epoch": 0.3, "learning_rate": 1.6538098997222688e-05, "loss": 0.5168, "step": 1313 }, { "epoch": 0.3, "learning_rate": 1.653258958753039e-05, "loss": 0.4879, "step": 1314 }, { "epoch": 0.3, "learning_rate": 1.6527076716673686e-05, "loss": 0.5032, "step": 1315 }, { "epoch": 0.3, "learning_rate": 1.6521560387573466e-05, "loss": 0.5252, "step": 1316 }, { "epoch": 0.3, "learning_rate": 1.6516040603152448e-05, "loss": 0.4907, "step": 1317 }, { "epoch": 0.3, "learning_rate": 1.651051736633518e-05, "loss": 0.5139, "step": 1318 }, { "epoch": 0.3, "learning_rate": 1.6504990680048047e-05, "loss": 0.4808, "step": 1319 }, { "epoch": 0.3, "learning_rate": 1.649946054721925e-05, "loss": 0.4883, "step": 1320 }, { "epoch": 0.3, "learning_rate": 1.6493926970778815e-05, "loss": 0.4836, "step": 1321 }, { "epoch": 0.3, "learning_rate": 1.648838995365861e-05, "loss": 0.4902, "step": 1322 }, { "epoch": 0.3, "learning_rate": 1.6482849498792307e-05, "loss": 0.4642, "step": 1323 }, { "epoch": 0.3, "learning_rate": 1.6477305609115415e-05, "loss": 0.4776, "step": 1324 }, { "epoch": 0.3, "learning_rate": 1.647175828756525e-05, "loss": 0.4582, "step": 1325 }, { "epoch": 0.3, "learning_rate": 1.6466207537080953e-05, "loss": 0.492, "step": 1326 }, { "epoch": 0.3, "learning_rate": 1.6460653360603475e-05, "loss": 0.4967, "step": 1327 }, { "epoch": 0.3, "learning_rate": 1.6455095761075597e-05, "loss": 0.4653, "step": 1328 }, { "epoch": 0.3, "learning_rate": 1.6449534741441893e-05, "loss": 0.4633, "step": 1329 }, { "epoch": 0.3, "learning_rate": 1.644397030464877e-05, "loss": 0.4929, "step": 1330 }, { "epoch": 0.3, "learning_rate": 1.6438402453644437e-05, "loss": 0.4927, "step": 1331 }, { "epoch": 0.3, "learning_rate": 1.6432831191378896e-05, "loss": 0.4923, "step": 1332 }, { "epoch": 0.3, "learning_rate": 1.6427256520803985e-05, "loss": 0.5285, "step": 1333 }, { "epoch": 0.3, "learning_rate": 1.6421678444873327e-05, "loss": 0.5001, "step": 1334 }, { "epoch": 0.3, "learning_rate": 1.641609696654236e-05, "loss": 0.4798, "step": 1335 }, { "epoch": 0.3, "learning_rate": 1.641051208876832e-05, "loss": 0.5381, "step": 1336 }, { "epoch": 0.3, "learning_rate": 1.6404923814510242e-05, "loss": 0.5153, "step": 1337 }, { "epoch": 0.3, "learning_rate": 1.6399332146728964e-05, "loss": 0.5087, "step": 1338 }, { "epoch": 0.3, "learning_rate": 1.6393737088387126e-05, "loss": 0.4985, "step": 1339 }, { "epoch": 0.3, "learning_rate": 1.6388138642449155e-05, "loss": 0.483, "step": 1340 }, { "epoch": 0.3, "learning_rate": 1.638253681188128e-05, "loss": 0.459, "step": 1341 }, { "epoch": 0.3, "learning_rate": 1.637693159965152e-05, "loss": 0.506, "step": 1342 }, { "epoch": 0.3, "learning_rate": 1.637132300872969e-05, "loss": 0.4949, "step": 1343 }, { "epoch": 0.3, "learning_rate": 1.6365711042087385e-05, "loss": 0.4922, "step": 1344 }, { "epoch": 0.3, "learning_rate": 1.6360095702698e-05, "loss": 0.4816, "step": 1345 }, { "epoch": 0.3, "learning_rate": 1.6354476993536713e-05, "loss": 0.4819, "step": 1346 }, { "epoch": 0.3, "learning_rate": 1.6348854917580485e-05, "loss": 0.4758, "step": 1347 }, { "epoch": 0.3, "learning_rate": 1.634322947780806e-05, "loss": 0.4952, "step": 1348 }, { "epoch": 0.3, "learning_rate": 1.6337600677199973e-05, "loss": 0.492, "step": 1349 }, { "epoch": 0.3, "learning_rate": 1.6331968518738533e-05, "loss": 0.4746, "step": 1350 }, { "epoch": 0.3, "learning_rate": 1.6326333005407824e-05, "loss": 0.5055, "step": 1351 }, { "epoch": 0.3, "learning_rate": 1.6320694140193712e-05, "loss": 0.4904, "step": 1352 }, { "epoch": 0.3, "learning_rate": 1.6315051926083844e-05, "loss": 0.4674, "step": 1353 }, { "epoch": 0.3, "learning_rate": 1.6309406366067633e-05, "loss": 0.5123, "step": 1354 }, { "epoch": 0.3, "learning_rate": 1.630375746313627e-05, "loss": 0.4885, "step": 1355 }, { "epoch": 0.3, "learning_rate": 1.6298105220282715e-05, "loss": 0.4938, "step": 1356 }, { "epoch": 0.3, "learning_rate": 1.6292449640501694e-05, "loss": 0.5116, "step": 1357 }, { "epoch": 0.31, "learning_rate": 1.628679072678971e-05, "loss": 0.5247, "step": 1358 }, { "epoch": 0.31, "learning_rate": 1.6281128482145027e-05, "loss": 0.4852, "step": 1359 }, { "epoch": 0.31, "learning_rate": 1.627546290956767e-05, "loss": 0.4757, "step": 1360 }, { "epoch": 0.31, "learning_rate": 1.626979401205944e-05, "loss": 0.5122, "step": 1361 }, { "epoch": 0.31, "learning_rate": 1.626412179262388e-05, "loss": 0.4861, "step": 1362 }, { "epoch": 0.31, "learning_rate": 1.6258446254266313e-05, "loss": 0.483, "step": 1363 }, { "epoch": 0.31, "learning_rate": 1.6252767399993807e-05, "loss": 0.4777, "step": 1364 }, { "epoch": 0.31, "learning_rate": 1.6247085232815193e-05, "loss": 0.4879, "step": 1365 }, { "epoch": 0.31, "learning_rate": 1.6241399755741055e-05, "loss": 0.4577, "step": 1366 }, { "epoch": 0.31, "learning_rate": 1.623571097178373e-05, "loss": 0.5206, "step": 1367 }, { "epoch": 0.31, "learning_rate": 1.6230018883957314e-05, "loss": 0.4554, "step": 1368 }, { "epoch": 0.31, "learning_rate": 1.6224323495277646e-05, "loss": 0.4775, "step": 1369 }, { "epoch": 0.31, "learning_rate": 1.621862480876231e-05, "loss": 0.5077, "step": 1370 }, { "epoch": 0.31, "learning_rate": 1.6212922827430652e-05, "loss": 0.5158, "step": 1371 }, { "epoch": 0.31, "learning_rate": 1.6207217554303747e-05, "loss": 0.5002, "step": 1372 }, { "epoch": 0.31, "learning_rate": 1.620150899240443e-05, "loss": 0.4717, "step": 1373 }, { "epoch": 0.31, "learning_rate": 1.619579714475726e-05, "loss": 0.4767, "step": 1374 }, { "epoch": 0.31, "learning_rate": 1.619008201438856e-05, "loss": 0.496, "step": 1375 }, { "epoch": 0.31, "learning_rate": 1.618436360432637e-05, "loss": 0.4679, "step": 1376 }, { "epoch": 0.31, "learning_rate": 1.617864191760048e-05, "loss": 0.4938, "step": 1377 }, { "epoch": 0.31, "learning_rate": 1.6172916957242416e-05, "loss": 0.5001, "step": 1378 }, { "epoch": 0.31, "learning_rate": 1.6167188726285433e-05, "loss": 0.4722, "step": 1379 }, { "epoch": 0.31, "learning_rate": 1.616145722776452e-05, "loss": 0.509, "step": 1380 }, { "epoch": 0.31, "learning_rate": 1.615572246471641e-05, "loss": 0.4791, "step": 1381 }, { "epoch": 0.31, "learning_rate": 1.614998444017954e-05, "loss": 0.5072, "step": 1382 }, { "epoch": 0.31, "learning_rate": 1.6144243157194093e-05, "loss": 0.4935, "step": 1383 }, { "epoch": 0.31, "learning_rate": 1.6138498618801982e-05, "loss": 0.4839, "step": 1384 }, { "epoch": 0.31, "learning_rate": 1.6132750828046838e-05, "loss": 0.5151, "step": 1385 }, { "epoch": 0.31, "learning_rate": 1.6126999787974006e-05, "loss": 0.5286, "step": 1386 }, { "epoch": 0.31, "learning_rate": 1.6121245501630567e-05, "loss": 0.4773, "step": 1387 }, { "epoch": 0.31, "learning_rate": 1.611548797206532e-05, "loss": 0.5001, "step": 1388 }, { "epoch": 0.31, "learning_rate": 1.6109727202328778e-05, "loss": 0.4866, "step": 1389 }, { "epoch": 0.31, "learning_rate": 1.6103963195473164e-05, "loss": 0.4674, "step": 1390 }, { "epoch": 0.31, "learning_rate": 1.609819595455243e-05, "loss": 0.4827, "step": 1391 }, { "epoch": 0.31, "learning_rate": 1.6092425482622237e-05, "loss": 0.4633, "step": 1392 }, { "epoch": 0.31, "learning_rate": 1.6086651782739954e-05, "loss": 0.521, "step": 1393 }, { "epoch": 0.31, "learning_rate": 1.6080874857964666e-05, "loss": 0.488, "step": 1394 }, { "epoch": 0.31, "learning_rate": 1.6075094711357156e-05, "loss": 0.5181, "step": 1395 }, { "epoch": 0.31, "learning_rate": 1.6069311345979927e-05, "loss": 0.4907, "step": 1396 }, { "epoch": 0.31, "learning_rate": 1.6063524764897177e-05, "loss": 0.4976, "step": 1397 }, { "epoch": 0.31, "learning_rate": 1.605773497117482e-05, "loss": 0.4757, "step": 1398 }, { "epoch": 0.31, "learning_rate": 1.605194196788046e-05, "loss": 0.5034, "step": 1399 }, { "epoch": 0.31, "learning_rate": 1.6046145758083404e-05, "loss": 0.4848, "step": 1400 }, { "epoch": 0.31, "learning_rate": 1.604034634485466e-05, "loss": 0.4979, "step": 1401 }, { "epoch": 0.32, "learning_rate": 1.603454373126694e-05, "loss": 0.5244, "step": 1402 }, { "epoch": 0.32, "learning_rate": 1.6028737920394636e-05, "loss": 0.5206, "step": 1403 }, { "epoch": 0.32, "learning_rate": 1.602292891531385e-05, "loss": 0.488, "step": 1404 }, { "epoch": 0.32, "learning_rate": 1.601711671910237e-05, "loss": 0.5023, "step": 1405 }, { "epoch": 0.32, "learning_rate": 1.6011301334839662e-05, "loss": 0.5222, "step": 1406 }, { "epoch": 0.32, "learning_rate": 1.6005482765606906e-05, "loss": 0.499, "step": 1407 }, { "epoch": 0.32, "learning_rate": 1.5999661014486956e-05, "loss": 0.5027, "step": 1408 }, { "epoch": 0.32, "learning_rate": 1.599383608456435e-05, "loss": 0.4948, "step": 1409 }, { "epoch": 0.32, "learning_rate": 1.598800797892531e-05, "loss": 0.5122, "step": 1410 }, { "epoch": 0.32, "learning_rate": 1.5982176700657745e-05, "loss": 0.5015, "step": 1411 }, { "epoch": 0.32, "learning_rate": 1.5976342252851244e-05, "loss": 0.5014, "step": 1412 }, { "epoch": 0.32, "learning_rate": 1.5970504638597075e-05, "loss": 0.456, "step": 1413 }, { "epoch": 0.32, "learning_rate": 1.5964663860988186e-05, "loss": 0.4918, "step": 1414 }, { "epoch": 0.32, "learning_rate": 1.5958819923119192e-05, "loss": 0.5013, "step": 1415 }, { "epoch": 0.32, "learning_rate": 1.5952972828086394e-05, "loss": 0.5218, "step": 1416 }, { "epoch": 0.32, "learning_rate": 1.5947122578987757e-05, "loss": 0.5385, "step": 1417 }, { "epoch": 0.32, "learning_rate": 1.594126917892292e-05, "loss": 0.4744, "step": 1418 }, { "epoch": 0.32, "learning_rate": 1.59354126309932e-05, "loss": 0.4599, "step": 1419 }, { "epoch": 0.32, "learning_rate": 1.5929552938301567e-05, "loss": 0.4924, "step": 1420 }, { "epoch": 0.32, "learning_rate": 1.5923690103952668e-05, "loss": 0.4827, "step": 1421 }, { "epoch": 0.32, "learning_rate": 1.5917824131052806e-05, "loss": 0.5007, "step": 1422 }, { "epoch": 0.32, "learning_rate": 1.591195502270995e-05, "loss": 0.4955, "step": 1423 }, { "epoch": 0.32, "learning_rate": 1.5906082782033744e-05, "loss": 0.4889, "step": 1424 }, { "epoch": 0.32, "learning_rate": 1.5900207412135468e-05, "loss": 0.4902, "step": 1425 }, { "epoch": 0.32, "learning_rate": 1.5894328916128075e-05, "loss": 0.4995, "step": 1426 }, { "epoch": 0.32, "learning_rate": 1.588844729712617e-05, "loss": 0.4703, "step": 1427 }, { "epoch": 0.32, "learning_rate": 1.5882562558246014e-05, "loss": 0.4797, "step": 1428 }, { "epoch": 0.32, "learning_rate": 1.5876674702605524e-05, "loss": 0.4778, "step": 1429 }, { "epoch": 0.32, "learning_rate": 1.5870783733324257e-05, "loss": 0.4876, "step": 1430 }, { "epoch": 0.32, "learning_rate": 1.5864889653523433e-05, "loss": 0.504, "step": 1431 }, { "epoch": 0.32, "learning_rate": 1.585899246632592e-05, "loss": 0.5209, "step": 1432 }, { "epoch": 0.32, "learning_rate": 1.5853092174856214e-05, "loss": 0.4614, "step": 1433 }, { "epoch": 0.32, "learning_rate": 1.5847188782240473e-05, "loss": 0.4497, "step": 1434 }, { "epoch": 0.32, "learning_rate": 1.58412822916065e-05, "loss": 0.4984, "step": 1435 }, { "epoch": 0.32, "learning_rate": 1.5835372706083725e-05, "loss": 0.4932, "step": 1436 }, { "epoch": 0.32, "learning_rate": 1.5829460028803232e-05, "loss": 0.5048, "step": 1437 }, { "epoch": 0.32, "learning_rate": 1.5823544262897735e-05, "loss": 0.4939, "step": 1438 }, { "epoch": 0.32, "learning_rate": 1.5817625411501583e-05, "loss": 0.5165, "step": 1439 }, { "epoch": 0.32, "learning_rate": 1.5811703477750775e-05, "loss": 0.4907, "step": 1440 }, { "epoch": 0.32, "learning_rate": 1.5805778464782916e-05, "loss": 0.478, "step": 1441 }, { "epoch": 0.32, "learning_rate": 1.5799850375737267e-05, "loss": 0.4898, "step": 1442 }, { "epoch": 0.32, "learning_rate": 1.5793919213754704e-05, "loss": 0.4914, "step": 1443 }, { "epoch": 0.32, "learning_rate": 1.5787984981977745e-05, "loss": 0.4645, "step": 1444 }, { "epoch": 0.32, "learning_rate": 1.578204768355052e-05, "loss": 0.5136, "step": 1445 }, { "epoch": 0.32, "learning_rate": 1.577610732161879e-05, "loss": 0.5084, "step": 1446 }, { "epoch": 0.33, "learning_rate": 1.5770163899329943e-05, "loss": 0.5194, "step": 1447 }, { "epoch": 0.33, "learning_rate": 1.5764217419832982e-05, "loss": 0.452, "step": 1448 }, { "epoch": 0.33, "learning_rate": 1.5758267886278533e-05, "loss": 0.48, "step": 1449 }, { "epoch": 0.33, "learning_rate": 1.5752315301818838e-05, "loss": 0.4657, "step": 1450 }, { "epoch": 0.33, "learning_rate": 1.5746359669607758e-05, "loss": 0.4911, "step": 1451 }, { "epoch": 0.33, "learning_rate": 1.574040099280077e-05, "loss": 0.4621, "step": 1452 }, { "epoch": 0.33, "learning_rate": 1.5734439274554962e-05, "loss": 0.4848, "step": 1453 }, { "epoch": 0.33, "learning_rate": 1.572847451802903e-05, "loss": 0.4892, "step": 1454 }, { "epoch": 0.33, "learning_rate": 1.5722506726383286e-05, "loss": 0.4904, "step": 1455 }, { "epoch": 0.33, "learning_rate": 1.5716535902779645e-05, "loss": 0.5016, "step": 1456 }, { "epoch": 0.33, "learning_rate": 1.5710562050381633e-05, "loss": 0.5146, "step": 1457 }, { "epoch": 0.33, "learning_rate": 1.5704585172354377e-05, "loss": 0.4597, "step": 1458 }, { "epoch": 0.33, "learning_rate": 1.5698605271864606e-05, "loss": 0.4817, "step": 1459 }, { "epoch": 0.33, "learning_rate": 1.5692622352080662e-05, "loss": 0.5041, "step": 1460 }, { "epoch": 0.33, "learning_rate": 1.568663641617247e-05, "loss": 0.4708, "step": 1461 }, { "epoch": 0.33, "learning_rate": 1.568064746731156e-05, "loss": 0.4637, "step": 1462 }, { "epoch": 0.33, "learning_rate": 1.5674655508671062e-05, "loss": 0.4981, "step": 1463 }, { "epoch": 0.33, "learning_rate": 1.56686605434257e-05, "loss": 0.5135, "step": 1464 }, { "epoch": 0.33, "learning_rate": 1.5662662574751785e-05, "loss": 0.5004, "step": 1465 }, { "epoch": 0.33, "learning_rate": 1.5656661605827216e-05, "loss": 0.5115, "step": 1466 }, { "epoch": 0.33, "learning_rate": 1.5650657639831502e-05, "loss": 0.4956, "step": 1467 }, { "epoch": 0.33, "learning_rate": 1.5644650679945727e-05, "loss": 0.4734, "step": 1468 }, { "epoch": 0.33, "learning_rate": 1.5638640729352548e-05, "loss": 0.493, "step": 1469 }, { "epoch": 0.33, "learning_rate": 1.5632627791236225e-05, "loss": 0.4778, "step": 1470 }, { "epoch": 0.33, "learning_rate": 1.56266118687826e-05, "loss": 0.5071, "step": 1471 }, { "epoch": 0.33, "learning_rate": 1.5620592965179087e-05, "loss": 0.5102, "step": 1472 }, { "epoch": 0.33, "learning_rate": 1.5614571083614683e-05, "loss": 0.486, "step": 1473 }, { "epoch": 0.33, "learning_rate": 1.5608546227279967e-05, "loss": 0.4621, "step": 1474 }, { "epoch": 0.33, "learning_rate": 1.560251839936709e-05, "loss": 0.5261, "step": 1475 }, { "epoch": 0.33, "learning_rate": 1.5596487603069783e-05, "loss": 0.4564, "step": 1476 }, { "epoch": 0.33, "learning_rate": 1.559045384158333e-05, "loss": 0.4781, "step": 1477 }, { "epoch": 0.33, "learning_rate": 1.5584417118104615e-05, "loss": 0.5345, "step": 1478 }, { "epoch": 0.33, "learning_rate": 1.557837743583208e-05, "loss": 0.4876, "step": 1479 }, { "epoch": 0.33, "learning_rate": 1.5572334797965717e-05, "loss": 0.5084, "step": 1480 }, { "epoch": 0.33, "learning_rate": 1.556628920770711e-05, "loss": 0.4879, "step": 1481 }, { "epoch": 0.33, "learning_rate": 1.5560240668259393e-05, "loss": 0.4943, "step": 1482 }, { "epoch": 0.33, "learning_rate": 1.5554189182827268e-05, "loss": 0.5084, "step": 1483 }, { "epoch": 0.33, "learning_rate": 1.5548134754616998e-05, "loss": 0.4918, "step": 1484 }, { "epoch": 0.33, "learning_rate": 1.5542077386836395e-05, "loss": 0.4877, "step": 1485 }, { "epoch": 0.33, "learning_rate": 1.5536017082694846e-05, "loss": 0.5167, "step": 1486 }, { "epoch": 0.33, "learning_rate": 1.552995384540328e-05, "loss": 0.5108, "step": 1487 }, { "epoch": 0.33, "learning_rate": 1.5523887678174178e-05, "loss": 0.4718, "step": 1488 }, { "epoch": 0.33, "learning_rate": 1.551781858422159e-05, "loss": 0.5017, "step": 1489 }, { "epoch": 0.33, "learning_rate": 1.55117465667611e-05, "loss": 0.4397, "step": 1490 }, { "epoch": 0.34, "learning_rate": 1.5505671629009852e-05, "loss": 0.5203, "step": 1491 }, { "epoch": 0.34, "learning_rate": 1.549959377418653e-05, "loss": 0.503, "step": 1492 }, { "epoch": 0.34, "learning_rate": 1.5493513005511358e-05, "loss": 0.5154, "step": 1493 }, { "epoch": 0.34, "learning_rate": 1.5487429326206126e-05, "loss": 0.5104, "step": 1494 }, { "epoch": 0.34, "learning_rate": 1.5481342739494148e-05, "loss": 0.4941, "step": 1495 }, { "epoch": 0.34, "learning_rate": 1.5475253248600276e-05, "loss": 0.5284, "step": 1496 }, { "epoch": 0.34, "learning_rate": 1.5469160856750914e-05, "loss": 0.4769, "step": 1497 }, { "epoch": 0.34, "learning_rate": 1.5463065567173995e-05, "loss": 0.4532, "step": 1498 }, { "epoch": 0.34, "learning_rate": 1.5456967383098983e-05, "loss": 0.4731, "step": 1499 }, { "epoch": 0.34, "learning_rate": 1.545086630775689e-05, "loss": 0.4815, "step": 1500 }, { "epoch": 0.34, "learning_rate": 1.5444762344380243e-05, "loss": 0.5104, "step": 1501 }, { "epoch": 0.34, "learning_rate": 1.5438655496203114e-05, "loss": 0.4798, "step": 1502 }, { "epoch": 0.34, "learning_rate": 1.5432545766461095e-05, "loss": 0.5049, "step": 1503 }, { "epoch": 0.34, "learning_rate": 1.54264331583913e-05, "loss": 0.4828, "step": 1504 }, { "epoch": 0.34, "learning_rate": 1.542031767523238e-05, "loss": 0.5139, "step": 1505 }, { "epoch": 0.34, "learning_rate": 1.541419932022451e-05, "loss": 0.4705, "step": 1506 }, { "epoch": 0.34, "learning_rate": 1.5408078096609368e-05, "loss": 0.4765, "step": 1507 }, { "epoch": 0.34, "learning_rate": 1.5401954007630168e-05, "loss": 0.4736, "step": 1508 }, { "epoch": 0.34, "learning_rate": 1.5395827056531643e-05, "loss": 0.494, "step": 1509 }, { "epoch": 0.34, "learning_rate": 1.5389697246560037e-05, "loss": 0.454, "step": 1510 }, { "epoch": 0.34, "learning_rate": 1.5383564580963105e-05, "loss": 0.5111, "step": 1511 }, { "epoch": 0.34, "learning_rate": 1.5377429062990122e-05, "loss": 0.4948, "step": 1512 }, { "epoch": 0.34, "learning_rate": 1.537129069589187e-05, "loss": 0.4529, "step": 1513 }, { "epoch": 0.34, "learning_rate": 1.5365149482920646e-05, "loss": 0.5093, "step": 1514 }, { "epoch": 0.34, "learning_rate": 1.535900542733025e-05, "loss": 0.4938, "step": 1515 }, { "epoch": 0.34, "learning_rate": 1.5352858532375988e-05, "loss": 0.4982, "step": 1516 }, { "epoch": 0.34, "learning_rate": 1.534670880131467e-05, "loss": 0.5055, "step": 1517 }, { "epoch": 0.34, "learning_rate": 1.534055623740462e-05, "loss": 0.487, "step": 1518 }, { "epoch": 0.34, "learning_rate": 1.533440084390564e-05, "loss": 0.5035, "step": 1519 }, { "epoch": 0.34, "learning_rate": 1.532824262407905e-05, "loss": 0.4967, "step": 1520 }, { "epoch": 0.34, "learning_rate": 1.5322081581187664e-05, "loss": 0.514, "step": 1521 }, { "epoch": 0.34, "learning_rate": 1.531591771849579e-05, "loss": 0.4873, "step": 1522 }, { "epoch": 0.34, "learning_rate": 1.5309751039269226e-05, "loss": 0.4903, "step": 1523 }, { "epoch": 0.34, "learning_rate": 1.5303581546775263e-05, "loss": 0.4903, "step": 1524 }, { "epoch": 0.34, "learning_rate": 1.5297409244282694e-05, "loss": 0.4659, "step": 1525 }, { "epoch": 0.34, "learning_rate": 1.529123413506179e-05, "loss": 0.4424, "step": 1526 }, { "epoch": 0.34, "learning_rate": 1.528505622238431e-05, "loss": 0.4855, "step": 1527 }, { "epoch": 0.34, "learning_rate": 1.5278875509523493e-05, "loss": 0.4916, "step": 1528 }, { "epoch": 0.34, "learning_rate": 1.5272691999754084e-05, "loss": 0.4788, "step": 1529 }, { "epoch": 0.34, "learning_rate": 1.526650569635228e-05, "loss": 0.4822, "step": 1530 }, { "epoch": 0.34, "learning_rate": 1.526031660259578e-05, "loss": 0.5076, "step": 1531 }, { "epoch": 0.34, "learning_rate": 1.5254124721763753e-05, "loss": 0.497, "step": 1532 }, { "epoch": 0.34, "learning_rate": 1.5247930057136842e-05, "loss": 0.4693, "step": 1533 }, { "epoch": 0.34, "learning_rate": 1.5241732611997174e-05, "loss": 0.468, "step": 1534 }, { "epoch": 0.34, "learning_rate": 1.5235532389628338e-05, "loss": 0.4703, "step": 1535 }, { "epoch": 0.35, "learning_rate": 1.5229329393315401e-05, "loss": 0.5163, "step": 1536 }, { "epoch": 0.35, "learning_rate": 1.5223123626344904e-05, "loss": 0.5213, "step": 1537 }, { "epoch": 0.35, "learning_rate": 1.5216915092004847e-05, "loss": 0.5008, "step": 1538 }, { "epoch": 0.35, "learning_rate": 1.52107037935847e-05, "loss": 0.4711, "step": 1539 }, { "epoch": 0.35, "learning_rate": 1.5204489734375397e-05, "loss": 0.4813, "step": 1540 }, { "epoch": 0.35, "learning_rate": 1.5198272917669337e-05, "loss": 0.4752, "step": 1541 }, { "epoch": 0.35, "learning_rate": 1.519205334676038e-05, "loss": 0.4964, "step": 1542 }, { "epoch": 0.35, "learning_rate": 1.5185831024943837e-05, "loss": 0.4577, "step": 1543 }, { "epoch": 0.35, "learning_rate": 1.517960595551649e-05, "loss": 0.4979, "step": 1544 }, { "epoch": 0.35, "learning_rate": 1.5173378141776569e-05, "loss": 0.4945, "step": 1545 }, { "epoch": 0.35, "learning_rate": 1.5167147587023756e-05, "loss": 0.5134, "step": 1546 }, { "epoch": 0.35, "learning_rate": 1.516091429455919e-05, "loss": 0.4805, "step": 1547 }, { "epoch": 0.35, "learning_rate": 1.5154678267685456e-05, "loss": 0.4871, "step": 1548 }, { "epoch": 0.35, "learning_rate": 1.5148439509706596e-05, "loss": 0.4755, "step": 1549 }, { "epoch": 0.35, "learning_rate": 1.5142198023928088e-05, "loss": 0.4947, "step": 1550 }, { "epoch": 0.35, "learning_rate": 1.5135953813656863e-05, "loss": 0.4979, "step": 1551 }, { "epoch": 0.35, "learning_rate": 1.5129706882201291e-05, "loss": 0.4821, "step": 1552 }, { "epoch": 0.35, "learning_rate": 1.5123457232871191e-05, "loss": 0.4973, "step": 1553 }, { "epoch": 0.35, "learning_rate": 1.5117204868977815e-05, "loss": 0.502, "step": 1554 }, { "epoch": 0.35, "learning_rate": 1.5110949793833856e-05, "loss": 0.4676, "step": 1555 }, { "epoch": 0.35, "learning_rate": 1.5104692010753437e-05, "loss": 0.5063, "step": 1556 }, { "epoch": 0.35, "learning_rate": 1.5098431523052126e-05, "loss": 0.4689, "step": 1557 }, { "epoch": 0.35, "learning_rate": 1.5092168334046925e-05, "loss": 0.5009, "step": 1558 }, { "epoch": 0.35, "learning_rate": 1.5085902447056249e-05, "loss": 0.4661, "step": 1559 }, { "epoch": 0.35, "learning_rate": 1.5079633865399968e-05, "loss": 0.458, "step": 1560 }, { "epoch": 0.35, "learning_rate": 1.507336259239936e-05, "loss": 0.4762, "step": 1561 }, { "epoch": 0.35, "learning_rate": 1.5067088631377136e-05, "loss": 0.4756, "step": 1562 }, { "epoch": 0.35, "learning_rate": 1.5060811985657433e-05, "loss": 0.4446, "step": 1563 }, { "epoch": 0.35, "learning_rate": 1.505453265856581e-05, "loss": 0.4781, "step": 1564 }, { "epoch": 0.35, "learning_rate": 1.5048250653429243e-05, "loss": 0.5098, "step": 1565 }, { "epoch": 0.35, "learning_rate": 1.504196597357613e-05, "loss": 0.4982, "step": 1566 }, { "epoch": 0.35, "learning_rate": 1.5035678622336283e-05, "loss": 0.5096, "step": 1567 }, { "epoch": 0.35, "learning_rate": 1.5029388603040934e-05, "loss": 0.4726, "step": 1568 }, { "epoch": 0.35, "learning_rate": 1.5023095919022728e-05, "loss": 0.4697, "step": 1569 }, { "epoch": 0.35, "learning_rate": 1.5016800573615718e-05, "loss": 0.4652, "step": 1570 }, { "epoch": 0.35, "learning_rate": 1.5010502570155365e-05, "loss": 0.4772, "step": 1571 }, { "epoch": 0.35, "learning_rate": 1.500420191197855e-05, "loss": 0.5201, "step": 1572 }, { "epoch": 0.35, "learning_rate": 1.4997898602423551e-05, "loss": 0.476, "step": 1573 }, { "epoch": 0.35, "learning_rate": 1.499159264483005e-05, "loss": 0.4884, "step": 1574 }, { "epoch": 0.35, "learning_rate": 1.4985284042539136e-05, "loss": 0.4962, "step": 1575 }, { "epoch": 0.35, "learning_rate": 1.49789727988933e-05, "loss": 0.511, "step": 1576 }, { "epoch": 0.35, "learning_rate": 1.497265891723643e-05, "loss": 0.4746, "step": 1577 }, { "epoch": 0.35, "learning_rate": 1.4966342400913812e-05, "loss": 0.4594, "step": 1578 }, { "epoch": 0.35, "learning_rate": 1.4960023253272125e-05, "loss": 0.4835, "step": 1579 }, { "epoch": 0.36, "learning_rate": 1.495370147765945e-05, "loss": 0.4674, "step": 1580 }, { "epoch": 0.36, "learning_rate": 1.4947377077425253e-05, "loss": 0.4854, "step": 1581 }, { "epoch": 0.36, "learning_rate": 1.4941050055920397e-05, "loss": 0.4821, "step": 1582 }, { "epoch": 0.36, "learning_rate": 1.4934720416497126e-05, "loss": 0.5002, "step": 1583 }, { "epoch": 0.36, "learning_rate": 1.4928388162509078e-05, "loss": 0.4765, "step": 1584 }, { "epoch": 0.36, "learning_rate": 1.4922053297311267e-05, "loss": 0.4798, "step": 1585 }, { "epoch": 0.36, "learning_rate": 1.4915715824260109e-05, "loss": 0.4906, "step": 1586 }, { "epoch": 0.36, "learning_rate": 1.4909375746713382e-05, "loss": 0.4647, "step": 1587 }, { "epoch": 0.36, "learning_rate": 1.4903033068030249e-05, "loss": 0.4587, "step": 1588 }, { "epoch": 0.36, "learning_rate": 1.489668779157126e-05, "loss": 0.4646, "step": 1589 }, { "epoch": 0.36, "learning_rate": 1.4890339920698334e-05, "loss": 0.5053, "step": 1590 }, { "epoch": 0.36, "learning_rate": 1.4883989458774764e-05, "loss": 0.4779, "step": 1591 }, { "epoch": 0.36, "learning_rate": 1.4877636409165216e-05, "loss": 0.4851, "step": 1592 }, { "epoch": 0.36, "learning_rate": 1.4871280775235734e-05, "loss": 0.4812, "step": 1593 }, { "epoch": 0.36, "learning_rate": 1.4864922560353722e-05, "loss": 0.4943, "step": 1594 }, { "epoch": 0.36, "learning_rate": 1.4858561767887954e-05, "loss": 0.4562, "step": 1595 }, { "epoch": 0.36, "learning_rate": 1.4852198401208578e-05, "loss": 0.4969, "step": 1596 }, { "epoch": 0.36, "learning_rate": 1.4845832463687091e-05, "loss": 0.468, "step": 1597 }, { "epoch": 0.36, "learning_rate": 1.4839463958696363e-05, "loss": 0.4843, "step": 1598 }, { "epoch": 0.36, "learning_rate": 1.4833092889610624e-05, "loss": 0.48, "step": 1599 }, { "epoch": 0.36, "learning_rate": 1.4826719259805456e-05, "loss": 0.5226, "step": 1600 }, { "epoch": 0.36, "learning_rate": 1.4820343072657804e-05, "loss": 0.4819, "step": 1601 }, { "epoch": 0.36, "learning_rate": 1.4813964331545962e-05, "loss": 0.5131, "step": 1602 }, { "epoch": 0.36, "learning_rate": 1.4807583039849589e-05, "loss": 0.4694, "step": 1603 }, { "epoch": 0.36, "learning_rate": 1.4801199200949678e-05, "loss": 0.5022, "step": 1604 }, { "epoch": 0.36, "learning_rate": 1.4794812818228588e-05, "loss": 0.4915, "step": 1605 }, { "epoch": 0.36, "learning_rate": 1.4788423895070012e-05, "loss": 0.4762, "step": 1606 }, { "epoch": 0.36, "learning_rate": 1.4782032434859001e-05, "loss": 0.4679, "step": 1607 }, { "epoch": 0.36, "learning_rate": 1.4775638440981943e-05, "loss": 0.4532, "step": 1608 }, { "epoch": 0.36, "learning_rate": 1.4769241916826571e-05, "loss": 0.4752, "step": 1609 }, { "epoch": 0.36, "learning_rate": 1.4762842865781952e-05, "loss": 0.4797, "step": 1610 }, { "epoch": 0.36, "learning_rate": 1.4756441291238508e-05, "loss": 0.4759, "step": 1611 }, { "epoch": 0.36, "learning_rate": 1.4750037196587979e-05, "loss": 0.4907, "step": 1612 }, { "epoch": 0.36, "learning_rate": 1.4743630585223452e-05, "loss": 0.505, "step": 1613 }, { "epoch": 0.36, "learning_rate": 1.4737221460539344e-05, "loss": 0.4606, "step": 1614 }, { "epoch": 0.36, "learning_rate": 1.4730809825931403e-05, "loss": 0.501, "step": 1615 }, { "epoch": 0.36, "learning_rate": 1.472439568479671e-05, "loss": 0.4748, "step": 1616 }, { "epoch": 0.36, "learning_rate": 1.4717979040533674e-05, "loss": 0.5108, "step": 1617 }, { "epoch": 0.36, "learning_rate": 1.4711559896542017e-05, "loss": 0.5001, "step": 1618 }, { "epoch": 0.36, "learning_rate": 1.4705138256222813e-05, "loss": 0.4475, "step": 1619 }, { "epoch": 0.36, "learning_rate": 1.4698714122978429e-05, "loss": 0.4852, "step": 1620 }, { "epoch": 0.36, "learning_rate": 1.469228750021257e-05, "loss": 0.4331, "step": 1621 }, { "epoch": 0.36, "learning_rate": 1.4685858391330255e-05, "loss": 0.4688, "step": 1622 }, { "epoch": 0.36, "learning_rate": 1.4679426799737826e-05, "loss": 0.446, "step": 1623 }, { "epoch": 0.36, "learning_rate": 1.467299272884293e-05, "loss": 0.5024, "step": 1624 }, { "epoch": 0.37, "learning_rate": 1.4666556182054537e-05, "loss": 0.4893, "step": 1625 }, { "epoch": 0.37, "learning_rate": 1.466011716278292e-05, "loss": 0.4637, "step": 1626 }, { "epoch": 0.37, "learning_rate": 1.465367567443967e-05, "loss": 0.4901, "step": 1627 }, { "epoch": 0.37, "learning_rate": 1.4647231720437687e-05, "loss": 0.4959, "step": 1628 }, { "epoch": 0.37, "learning_rate": 1.4640785304191169e-05, "loss": 0.442, "step": 1629 }, { "epoch": 0.37, "learning_rate": 1.463433642911562e-05, "loss": 0.4738, "step": 1630 }, { "epoch": 0.37, "learning_rate": 1.462788509862786e-05, "loss": 0.4826, "step": 1631 }, { "epoch": 0.37, "learning_rate": 1.4621431316145993e-05, "loss": 0.4951, "step": 1632 }, { "epoch": 0.37, "learning_rate": 1.461497508508943e-05, "loss": 0.4685, "step": 1633 }, { "epoch": 0.37, "learning_rate": 1.4608516408878875e-05, "loss": 0.4682, "step": 1634 }, { "epoch": 0.37, "learning_rate": 1.4602055290936335e-05, "loss": 0.4626, "step": 1635 }, { "epoch": 0.37, "learning_rate": 1.4595591734685108e-05, "loss": 0.5299, "step": 1636 }, { "epoch": 0.37, "learning_rate": 1.4589125743549779e-05, "loss": 0.474, "step": 1637 }, { "epoch": 0.37, "learning_rate": 1.4582657320956226e-05, "loss": 0.5018, "step": 1638 }, { "epoch": 0.37, "learning_rate": 1.457618647033162e-05, "loss": 0.4918, "step": 1639 }, { "epoch": 0.37, "learning_rate": 1.456971319510441e-05, "loss": 0.481, "step": 1640 }, { "epoch": 0.37, "learning_rate": 1.4563237498704337e-05, "loss": 0.4628, "step": 1641 }, { "epoch": 0.37, "learning_rate": 1.4556759384562418e-05, "loss": 0.4955, "step": 1642 }, { "epoch": 0.37, "learning_rate": 1.4550278856110957e-05, "loss": 0.4535, "step": 1643 }, { "epoch": 0.37, "learning_rate": 1.4543795916783536e-05, "loss": 0.481, "step": 1644 }, { "epoch": 0.37, "learning_rate": 1.453731057001501e-05, "loss": 0.5013, "step": 1645 }, { "epoch": 0.37, "learning_rate": 1.4530822819241517e-05, "loss": 0.4806, "step": 1646 }, { "epoch": 0.37, "learning_rate": 1.4524332667900463e-05, "loss": 0.4839, "step": 1647 }, { "epoch": 0.37, "learning_rate": 1.4517840119430524e-05, "loss": 0.4949, "step": 1648 }, { "epoch": 0.37, "learning_rate": 1.451134517727165e-05, "loss": 0.4813, "step": 1649 }, { "epoch": 0.37, "learning_rate": 1.4504847844865064e-05, "loss": 0.4841, "step": 1650 }, { "epoch": 0.37, "learning_rate": 1.4498348125653246e-05, "loss": 0.5146, "step": 1651 }, { "epoch": 0.37, "learning_rate": 1.4491846023079945e-05, "loss": 0.4847, "step": 1652 }, { "epoch": 0.37, "learning_rate": 1.448534154059017e-05, "loss": 0.4912, "step": 1653 }, { "epoch": 0.37, "learning_rate": 1.4478834681630199e-05, "loss": 0.4724, "step": 1654 }, { "epoch": 0.37, "learning_rate": 1.4472325449647559e-05, "loss": 0.5138, "step": 1655 }, { "epoch": 0.37, "learning_rate": 1.446581384809104e-05, "loss": 0.4955, "step": 1656 }, { "epoch": 0.37, "learning_rate": 1.4459299880410687e-05, "loss": 0.4796, "step": 1657 }, { "epoch": 0.37, "learning_rate": 1.4452783550057799e-05, "loss": 0.458, "step": 1658 }, { "epoch": 0.37, "learning_rate": 1.4446264860484924e-05, "loss": 0.4683, "step": 1659 }, { "epoch": 0.37, "learning_rate": 1.4439743815145861e-05, "loss": 0.4657, "step": 1660 }, { "epoch": 0.37, "learning_rate": 1.4433220417495659e-05, "loss": 0.462, "step": 1661 }, { "epoch": 0.37, "learning_rate": 1.4426694670990612e-05, "loss": 0.4699, "step": 1662 }, { "epoch": 0.37, "learning_rate": 1.4420166579088264e-05, "loss": 0.4664, "step": 1663 }, { "epoch": 0.37, "learning_rate": 1.4413636145247386e-05, "loss": 0.4864, "step": 1664 }, { "epoch": 0.37, "learning_rate": 1.4407103372928006e-05, "loss": 0.4819, "step": 1665 }, { "epoch": 0.37, "learning_rate": 1.4400568265591388e-05, "loss": 0.496, "step": 1666 }, { "epoch": 0.37, "learning_rate": 1.4394030826700025e-05, "loss": 0.4976, "step": 1667 }, { "epoch": 0.37, "learning_rate": 1.4387491059717653e-05, "loss": 0.4727, "step": 1668 }, { "epoch": 0.38, "learning_rate": 1.438094896810924e-05, "loss": 0.4678, "step": 1669 }, { "epoch": 0.38, "learning_rate": 1.4374404555340984e-05, "loss": 0.491, "step": 1670 }, { "epoch": 0.38, "learning_rate": 1.436785782488031e-05, "loss": 0.451, "step": 1671 }, { "epoch": 0.38, "learning_rate": 1.4361308780195884e-05, "loss": 0.5047, "step": 1672 }, { "epoch": 0.38, "learning_rate": 1.4354757424757581e-05, "loss": 0.4994, "step": 1673 }, { "epoch": 0.38, "learning_rate": 1.434820376203651e-05, "loss": 0.4989, "step": 1674 }, { "epoch": 0.38, "learning_rate": 1.4341647795505003e-05, "loss": 0.4244, "step": 1675 }, { "epoch": 0.38, "learning_rate": 1.4335089528636606e-05, "loss": 0.4909, "step": 1676 }, { "epoch": 0.38, "learning_rate": 1.432852896490609e-05, "loss": 0.4938, "step": 1677 }, { "epoch": 0.38, "learning_rate": 1.432196610778944e-05, "loss": 0.4408, "step": 1678 }, { "epoch": 0.38, "learning_rate": 1.4315400960763861e-05, "loss": 0.4331, "step": 1679 }, { "epoch": 0.38, "learning_rate": 1.4308833527307764e-05, "loss": 0.4886, "step": 1680 }, { "epoch": 0.38, "learning_rate": 1.4302263810900772e-05, "loss": 0.4697, "step": 1681 }, { "epoch": 0.38, "learning_rate": 1.4295691815023726e-05, "loss": 0.4776, "step": 1682 }, { "epoch": 0.38, "learning_rate": 1.4289117543158668e-05, "loss": 0.4687, "step": 1683 }, { "epoch": 0.38, "learning_rate": 1.4282540998788846e-05, "loss": 0.4817, "step": 1684 }, { "epoch": 0.38, "learning_rate": 1.4275962185398708e-05, "loss": 0.5022, "step": 1685 }, { "epoch": 0.38, "learning_rate": 1.4269381106473916e-05, "loss": 0.4891, "step": 1686 }, { "epoch": 0.38, "learning_rate": 1.4262797765501327e-05, "loss": 0.4938, "step": 1687 }, { "epoch": 0.38, "learning_rate": 1.4256212165968986e-05, "loss": 0.5019, "step": 1688 }, { "epoch": 0.38, "learning_rate": 1.4249624311366151e-05, "loss": 0.4975, "step": 1689 }, { "epoch": 0.38, "learning_rate": 1.4243034205183266e-05, "loss": 0.4907, "step": 1690 }, { "epoch": 0.38, "learning_rate": 1.4236441850911969e-05, "loss": 0.4779, "step": 1691 }, { "epoch": 0.38, "learning_rate": 1.4229847252045086e-05, "loss": 0.5081, "step": 1692 }, { "epoch": 0.38, "learning_rate": 1.4223250412076643e-05, "loss": 0.5134, "step": 1693 }, { "epoch": 0.38, "learning_rate": 1.421665133450184e-05, "loss": 0.4427, "step": 1694 }, { "epoch": 0.38, "learning_rate": 1.4210050022817075e-05, "loss": 0.4615, "step": 1695 }, { "epoch": 0.38, "learning_rate": 1.4203446480519913e-05, "loss": 0.4831, "step": 1696 }, { "epoch": 0.38, "learning_rate": 1.4196840711109123e-05, "loss": 0.4705, "step": 1697 }, { "epoch": 0.38, "learning_rate": 1.4190232718084638e-05, "loss": 0.5002, "step": 1698 }, { "epoch": 0.38, "learning_rate": 1.4183622504947571e-05, "loss": 0.4956, "step": 1699 }, { "epoch": 0.38, "learning_rate": 1.4177010075200214e-05, "loss": 0.482, "step": 1700 }, { "epoch": 0.38, "learning_rate": 1.4170395432346035e-05, "loss": 0.461, "step": 1701 }, { "epoch": 0.38, "learning_rate": 1.4163778579889675e-05, "loss": 0.4669, "step": 1702 }, { "epoch": 0.38, "learning_rate": 1.4157159521336938e-05, "loss": 0.4622, "step": 1703 }, { "epoch": 0.38, "learning_rate": 1.4150538260194806e-05, "loss": 0.5019, "step": 1704 }, { "epoch": 0.38, "learning_rate": 1.4143914799971423e-05, "loss": 0.4828, "step": 1705 }, { "epoch": 0.38, "learning_rate": 1.4137289144176101e-05, "loss": 0.502, "step": 1706 }, { "epoch": 0.38, "learning_rate": 1.4130661296319313e-05, "loss": 0.4845, "step": 1707 }, { "epoch": 0.38, "learning_rate": 1.4124031259912689e-05, "loss": 0.4867, "step": 1708 }, { "epoch": 0.38, "learning_rate": 1.411739903846903e-05, "loss": 0.4745, "step": 1709 }, { "epoch": 0.38, "learning_rate": 1.411076463550229e-05, "loss": 0.5114, "step": 1710 }, { "epoch": 0.38, "learning_rate": 1.410412805452757e-05, "loss": 0.4813, "step": 1711 }, { "epoch": 0.38, "learning_rate": 1.4097489299061135e-05, "loss": 0.4749, "step": 1712 }, { "epoch": 0.38, "learning_rate": 1.4090848372620405e-05, "loss": 0.4939, "step": 1713 }, { "epoch": 0.39, "learning_rate": 1.4084205278723937e-05, "loss": 0.4265, "step": 1714 }, { "epoch": 0.39, "learning_rate": 1.4077560020891447e-05, "loss": 0.5007, "step": 1715 }, { "epoch": 0.39, "learning_rate": 1.4070912602643793e-05, "loss": 0.4735, "step": 1716 }, { "epoch": 0.39, "learning_rate": 1.4064263027502984e-05, "loss": 0.4825, "step": 1717 }, { "epoch": 0.39, "learning_rate": 1.4057611298992162e-05, "loss": 0.4922, "step": 1718 }, { "epoch": 0.39, "learning_rate": 1.4050957420635615e-05, "loss": 0.474, "step": 1719 }, { "epoch": 0.39, "learning_rate": 1.404430139595877e-05, "loss": 0.4622, "step": 1720 }, { "epoch": 0.39, "learning_rate": 1.4037643228488197e-05, "loss": 0.4539, "step": 1721 }, { "epoch": 0.39, "learning_rate": 1.4030982921751586e-05, "loss": 0.4956, "step": 1722 }, { "epoch": 0.39, "learning_rate": 1.4024320479277776e-05, "loss": 0.4452, "step": 1723 }, { "epoch": 0.39, "learning_rate": 1.4017655904596727e-05, "loss": 0.4489, "step": 1724 }, { "epoch": 0.39, "learning_rate": 1.4010989201239538e-05, "loss": 0.4658, "step": 1725 }, { "epoch": 0.39, "learning_rate": 1.400432037273843e-05, "loss": 0.4788, "step": 1726 }, { "epoch": 0.39, "learning_rate": 1.3997649422626749e-05, "loss": 0.4756, "step": 1727 }, { "epoch": 0.39, "learning_rate": 1.3990976354438965e-05, "loss": 0.4865, "step": 1728 }, { "epoch": 0.39, "learning_rate": 1.3984301171710677e-05, "loss": 0.5211, "step": 1729 }, { "epoch": 0.39, "learning_rate": 1.3977623877978599e-05, "loss": 0.4972, "step": 1730 }, { "epoch": 0.39, "learning_rate": 1.3970944476780559e-05, "loss": 0.474, "step": 1731 }, { "epoch": 0.39, "learning_rate": 1.3964262971655513e-05, "loss": 0.466, "step": 1732 }, { "epoch": 0.39, "learning_rate": 1.3957579366143521e-05, "loss": 0.4821, "step": 1733 }, { "epoch": 0.39, "learning_rate": 1.3950893663785765e-05, "loss": 0.4615, "step": 1734 }, { "epoch": 0.39, "learning_rate": 1.3944205868124525e-05, "loss": 0.4787, "step": 1735 }, { "epoch": 0.39, "learning_rate": 1.3937515982703206e-05, "loss": 0.4711, "step": 1736 }, { "epoch": 0.39, "learning_rate": 1.3930824011066312e-05, "loss": 0.4826, "step": 1737 }, { "epoch": 0.39, "learning_rate": 1.3924129956759449e-05, "loss": 0.4819, "step": 1738 }, { "epoch": 0.39, "learning_rate": 1.391743382332933e-05, "loss": 0.4739, "step": 1739 }, { "epoch": 0.39, "learning_rate": 1.3910735614323774e-05, "loss": 0.4915, "step": 1740 }, { "epoch": 0.39, "learning_rate": 1.3904035333291695e-05, "loss": 0.4678, "step": 1741 }, { "epoch": 0.39, "learning_rate": 1.38973329837831e-05, "loss": 0.485, "step": 1742 }, { "epoch": 0.39, "learning_rate": 1.3890628569349103e-05, "loss": 0.4837, "step": 1743 }, { "epoch": 0.39, "learning_rate": 1.3883922093541903e-05, "loss": 0.4964, "step": 1744 }, { "epoch": 0.39, "learning_rate": 1.38772135599148e-05, "loss": 0.4838, "step": 1745 }, { "epoch": 0.39, "learning_rate": 1.3870502972022175e-05, "loss": 0.4794, "step": 1746 }, { "epoch": 0.39, "learning_rate": 1.3863790333419499e-05, "loss": 0.4627, "step": 1747 }, { "epoch": 0.39, "learning_rate": 1.3857075647663335e-05, "loss": 0.4783, "step": 1748 }, { "epoch": 0.39, "learning_rate": 1.385035891831133e-05, "loss": 0.4583, "step": 1749 }, { "epoch": 0.39, "learning_rate": 1.3843640148922202e-05, "loss": 0.4824, "step": 1750 }, { "epoch": 0.39, "learning_rate": 1.3836919343055769e-05, "loss": 0.4652, "step": 1751 }, { "epoch": 0.39, "learning_rate": 1.3830196504272914e-05, "loss": 0.4586, "step": 1752 }, { "epoch": 0.39, "learning_rate": 1.3823471636135599e-05, "loss": 0.4674, "step": 1753 }, { "epoch": 0.39, "learning_rate": 1.3816744742206868e-05, "loss": 0.4727, "step": 1754 }, { "epoch": 0.39, "learning_rate": 1.3810015826050833e-05, "loss": 0.4431, "step": 1755 }, { "epoch": 0.39, "learning_rate": 1.3803284891232675e-05, "loss": 0.4662, "step": 1756 }, { "epoch": 0.39, "learning_rate": 1.3796551941318651e-05, "loss": 0.46, "step": 1757 }, { "epoch": 0.4, "learning_rate": 1.3789816979876082e-05, "loss": 0.4814, "step": 1758 }, { "epoch": 0.4, "learning_rate": 1.3783080010473351e-05, "loss": 0.4549, "step": 1759 }, { "epoch": 0.4, "learning_rate": 1.3776341036679914e-05, "loss": 0.4793, "step": 1760 }, { "epoch": 0.4, "learning_rate": 1.3769600062066284e-05, "loss": 0.5005, "step": 1761 }, { "epoch": 0.4, "learning_rate": 1.376285709020403e-05, "loss": 0.4704, "step": 1762 }, { "epoch": 0.4, "learning_rate": 1.3756112124665786e-05, "loss": 0.4978, "step": 1763 }, { "epoch": 0.4, "learning_rate": 1.374936516902524e-05, "loss": 0.4461, "step": 1764 }, { "epoch": 0.4, "learning_rate": 1.3742616226857133e-05, "loss": 0.4498, "step": 1765 }, { "epoch": 0.4, "learning_rate": 1.3735865301737262e-05, "loss": 0.4971, "step": 1766 }, { "epoch": 0.4, "learning_rate": 1.3729112397242465e-05, "loss": 0.4798, "step": 1767 }, { "epoch": 0.4, "learning_rate": 1.3722357516950643e-05, "loss": 0.4823, "step": 1768 }, { "epoch": 0.4, "learning_rate": 1.3715600664440738e-05, "loss": 0.4634, "step": 1769 }, { "epoch": 0.4, "learning_rate": 1.3708841843292731e-05, "loss": 0.4618, "step": 1770 }, { "epoch": 0.4, "learning_rate": 1.370208105708765e-05, "loss": 0.465, "step": 1771 }, { "epoch": 0.4, "learning_rate": 1.369531830940757e-05, "loss": 0.4551, "step": 1772 }, { "epoch": 0.4, "learning_rate": 1.3688553603835596e-05, "loss": 0.4797, "step": 1773 }, { "epoch": 0.4, "learning_rate": 1.3681786943955876e-05, "loss": 0.4943, "step": 1774 }, { "epoch": 0.4, "learning_rate": 1.3675018333353594e-05, "loss": 0.4696, "step": 1775 }, { "epoch": 0.4, "learning_rate": 1.366824777561496e-05, "loss": 0.4848, "step": 1776 }, { "epoch": 0.4, "learning_rate": 1.3661475274327227e-05, "loss": 0.4538, "step": 1777 }, { "epoch": 0.4, "learning_rate": 1.3654700833078667e-05, "loss": 0.4848, "step": 1778 }, { "epoch": 0.4, "learning_rate": 1.3647924455458588e-05, "loss": 0.4806, "step": 1779 }, { "epoch": 0.4, "learning_rate": 1.3641146145057321e-05, "loss": 0.4477, "step": 1780 }, { "epoch": 0.4, "learning_rate": 1.363436590546622e-05, "loss": 0.4211, "step": 1781 }, { "epoch": 0.4, "learning_rate": 1.3627583740277656e-05, "loss": 0.465, "step": 1782 }, { "epoch": 0.4, "learning_rate": 1.3620799653085039e-05, "loss": 0.4912, "step": 1783 }, { "epoch": 0.4, "learning_rate": 1.3614013647482774e-05, "loss": 0.5016, "step": 1784 }, { "epoch": 0.4, "learning_rate": 1.3607225727066298e-05, "loss": 0.4677, "step": 1785 }, { "epoch": 0.4, "learning_rate": 1.3600435895432051e-05, "loss": 0.4906, "step": 1786 }, { "epoch": 0.4, "learning_rate": 1.3593644156177499e-05, "loss": 0.4527, "step": 1787 }, { "epoch": 0.4, "learning_rate": 1.3586850512901111e-05, "loss": 0.4762, "step": 1788 }, { "epoch": 0.4, "learning_rate": 1.3580054969202362e-05, "loss": 0.4833, "step": 1789 }, { "epoch": 0.4, "learning_rate": 1.3573257528681738e-05, "loss": 0.4999, "step": 1790 }, { "epoch": 0.4, "learning_rate": 1.3566458194940734e-05, "loss": 0.4686, "step": 1791 }, { "epoch": 0.4, "learning_rate": 1.3559656971581839e-05, "loss": 0.4749, "step": 1792 }, { "epoch": 0.4, "learning_rate": 1.3552853862208549e-05, "loss": 0.4644, "step": 1793 }, { "epoch": 0.4, "learning_rate": 1.3546048870425356e-05, "loss": 0.4945, "step": 1794 }, { "epoch": 0.4, "learning_rate": 1.3539241999837758e-05, "loss": 0.4638, "step": 1795 }, { "epoch": 0.4, "learning_rate": 1.3532433254052235e-05, "loss": 0.4798, "step": 1796 }, { "epoch": 0.4, "learning_rate": 1.352562263667627e-05, "loss": 0.4758, "step": 1797 }, { "epoch": 0.4, "learning_rate": 1.351881015131833e-05, "loss": 0.4898, "step": 1798 }, { "epoch": 0.4, "learning_rate": 1.3511995801587886e-05, "loss": 0.4472, "step": 1799 }, { "epoch": 0.4, "learning_rate": 1.3505179591095386e-05, "loss": 0.4786, "step": 1800 }, { "epoch": 0.4, "learning_rate": 1.3498361523452258e-05, "loss": 0.49, "step": 1801 }, { "epoch": 0.4, "learning_rate": 1.3491541602270924e-05, "loss": 0.4964, "step": 1802 }, { "epoch": 0.41, "learning_rate": 1.348471983116479e-05, "loss": 0.4698, "step": 1803 }, { "epoch": 0.41, "learning_rate": 1.3477896213748232e-05, "loss": 0.5035, "step": 1804 }, { "epoch": 0.41, "learning_rate": 1.3471070753636614e-05, "loss": 0.4661, "step": 1805 }, { "epoch": 0.41, "learning_rate": 1.3464243454446268e-05, "loss": 0.484, "step": 1806 }, { "epoch": 0.41, "learning_rate": 1.3457414319794508e-05, "loss": 0.4942, "step": 1807 }, { "epoch": 0.41, "learning_rate": 1.345058335329962e-05, "loss": 0.5089, "step": 1808 }, { "epoch": 0.41, "learning_rate": 1.3443750558580847e-05, "loss": 0.4634, "step": 1809 }, { "epoch": 0.41, "learning_rate": 1.343691593925842e-05, "loss": 0.4868, "step": 1810 }, { "epoch": 0.41, "learning_rate": 1.3430079498953527e-05, "loss": 0.4651, "step": 1811 }, { "epoch": 0.41, "learning_rate": 1.342324124128832e-05, "loss": 0.4906, "step": 1812 }, { "epoch": 0.41, "learning_rate": 1.3416401169885915e-05, "loss": 0.4688, "step": 1813 }, { "epoch": 0.41, "learning_rate": 1.340955928837039e-05, "loss": 0.4802, "step": 1814 }, { "epoch": 0.41, "learning_rate": 1.3402715600366784e-05, "loss": 0.4659, "step": 1815 }, { "epoch": 0.41, "learning_rate": 1.3395870109501088e-05, "loss": 0.4798, "step": 1816 }, { "epoch": 0.41, "learning_rate": 1.3389022819400253e-05, "loss": 0.4544, "step": 1817 }, { "epoch": 0.41, "learning_rate": 1.3382173733692177e-05, "loss": 0.4804, "step": 1818 }, { "epoch": 0.41, "learning_rate": 1.3375322856005719e-05, "loss": 0.4562, "step": 1819 }, { "epoch": 0.41, "learning_rate": 1.3368470189970681e-05, "loss": 0.4553, "step": 1820 }, { "epoch": 0.41, "learning_rate": 1.336161573921781e-05, "loss": 0.4704, "step": 1821 }, { "epoch": 0.41, "learning_rate": 1.3354759507378808e-05, "loss": 0.451, "step": 1822 }, { "epoch": 0.41, "learning_rate": 1.334790149808631e-05, "loss": 0.4528, "step": 1823 }, { "epoch": 0.41, "learning_rate": 1.3341041714973901e-05, "loss": 0.4473, "step": 1824 }, { "epoch": 0.41, "learning_rate": 1.3334180161676098e-05, "loss": 0.4649, "step": 1825 }, { "epoch": 0.41, "learning_rate": 1.3327316841828366e-05, "loss": 0.4902, "step": 1826 }, { "epoch": 0.41, "learning_rate": 1.3320451759067097e-05, "loss": 0.48, "step": 1827 }, { "epoch": 0.41, "learning_rate": 1.3313584917029621e-05, "loss": 0.4666, "step": 1828 }, { "epoch": 0.41, "learning_rate": 1.3306716319354197e-05, "loss": 0.4878, "step": 1829 }, { "epoch": 0.41, "learning_rate": 1.3299845969680024e-05, "loss": 0.4712, "step": 1830 }, { "epoch": 0.41, "learning_rate": 1.3292973871647212e-05, "loss": 0.471, "step": 1831 }, { "epoch": 0.41, "learning_rate": 1.3286100028896815e-05, "loss": 0.4931, "step": 1832 }, { "epoch": 0.41, "learning_rate": 1.3279224445070801e-05, "loss": 0.4539, "step": 1833 }, { "epoch": 0.41, "learning_rate": 1.3272347123812063e-05, "loss": 0.4566, "step": 1834 }, { "epoch": 0.41, "learning_rate": 1.3265468068764416e-05, "loss": 0.4562, "step": 1835 }, { "epoch": 0.41, "learning_rate": 1.3258587283572592e-05, "loss": 0.4504, "step": 1836 }, { "epoch": 0.41, "learning_rate": 1.325170477188224e-05, "loss": 0.4655, "step": 1837 }, { "epoch": 0.41, "learning_rate": 1.3244820537339918e-05, "loss": 0.486, "step": 1838 }, { "epoch": 0.41, "learning_rate": 1.3237934583593112e-05, "loss": 0.4962, "step": 1839 }, { "epoch": 0.41, "learning_rate": 1.3231046914290204e-05, "loss": 0.4804, "step": 1840 }, { "epoch": 0.41, "learning_rate": 1.3224157533080492e-05, "loss": 0.4876, "step": 1841 }, { "epoch": 0.41, "learning_rate": 1.3217266443614179e-05, "loss": 0.5055, "step": 1842 }, { "epoch": 0.41, "learning_rate": 1.3210373649542373e-05, "loss": 0.4851, "step": 1843 }, { "epoch": 0.41, "learning_rate": 1.320347915451709e-05, "loss": 0.458, "step": 1844 }, { "epoch": 0.41, "learning_rate": 1.3196582962191233e-05, "loss": 0.4997, "step": 1845 }, { "epoch": 0.41, "learning_rate": 1.3189685076218624e-05, "loss": 0.4873, "step": 1846 }, { "epoch": 0.42, "learning_rate": 1.318278550025397e-05, "loss": 0.4802, "step": 1847 }, { "epoch": 0.42, "learning_rate": 1.3175884237952875e-05, "loss": 0.5033, "step": 1848 }, { "epoch": 0.42, "learning_rate": 1.3168981292971832e-05, "loss": 0.4922, "step": 1849 }, { "epoch": 0.42, "learning_rate": 1.316207666896824e-05, "loss": 0.4708, "step": 1850 }, { "epoch": 0.42, "learning_rate": 1.3155170369600376e-05, "loss": 0.46, "step": 1851 }, { "epoch": 0.42, "learning_rate": 1.3148262398527404e-05, "loss": 0.4419, "step": 1852 }, { "epoch": 0.42, "learning_rate": 1.3141352759409376e-05, "loss": 0.4736, "step": 1853 }, { "epoch": 0.42, "learning_rate": 1.3134441455907237e-05, "loss": 0.4528, "step": 1854 }, { "epoch": 0.42, "learning_rate": 1.3127528491682798e-05, "loss": 0.4626, "step": 1855 }, { "epoch": 0.42, "learning_rate": 1.3120613870398758e-05, "loss": 0.4612, "step": 1856 }, { "epoch": 0.42, "learning_rate": 1.3113697595718695e-05, "loss": 0.4736, "step": 1857 }, { "epoch": 0.42, "learning_rate": 1.3106779671307064e-05, "loss": 0.4678, "step": 1858 }, { "epoch": 0.42, "learning_rate": 1.3099860100829185e-05, "loss": 0.502, "step": 1859 }, { "epoch": 0.42, "learning_rate": 1.3092938887951263e-05, "loss": 0.4599, "step": 1860 }, { "epoch": 0.42, "learning_rate": 1.3086016036340363e-05, "loss": 0.4855, "step": 1861 }, { "epoch": 0.42, "learning_rate": 1.3079091549664425e-05, "loss": 0.4648, "step": 1862 }, { "epoch": 0.42, "learning_rate": 1.307216543159225e-05, "loss": 0.4969, "step": 1863 }, { "epoch": 0.42, "learning_rate": 1.3065237685793503e-05, "loss": 0.4626, "step": 1864 }, { "epoch": 0.42, "learning_rate": 1.3058308315938724e-05, "loss": 0.4504, "step": 1865 }, { "epoch": 0.42, "learning_rate": 1.3051377325699295e-05, "loss": 0.4875, "step": 1866 }, { "epoch": 0.42, "learning_rate": 1.3044444718747467e-05, "loss": 0.4945, "step": 1867 }, { "epoch": 0.42, "learning_rate": 1.3037510498756343e-05, "loss": 0.4446, "step": 1868 }, { "epoch": 0.42, "learning_rate": 1.303057466939989e-05, "loss": 0.4377, "step": 1869 }, { "epoch": 0.42, "learning_rate": 1.3023637234352917e-05, "loss": 0.4774, "step": 1870 }, { "epoch": 0.42, "learning_rate": 1.3016698197291092e-05, "loss": 0.4815, "step": 1871 }, { "epoch": 0.42, "learning_rate": 1.3009757561890916e-05, "loss": 0.4747, "step": 1872 }, { "epoch": 0.42, "learning_rate": 1.3002815331829763e-05, "loss": 0.4466, "step": 1873 }, { "epoch": 0.42, "learning_rate": 1.2995871510785829e-05, "loss": 0.3933, "step": 1874 }, { "epoch": 0.42, "learning_rate": 1.2988926102438164e-05, "loss": 0.4816, "step": 1875 }, { "epoch": 0.42, "learning_rate": 1.2981979110466655e-05, "loss": 0.449, "step": 1876 }, { "epoch": 0.42, "learning_rate": 1.297503053855203e-05, "loss": 0.4874, "step": 1877 }, { "epoch": 0.42, "learning_rate": 1.2968080390375855e-05, "loss": 0.4594, "step": 1878 }, { "epoch": 0.42, "learning_rate": 1.2961128669620528e-05, "loss": 0.4564, "step": 1879 }, { "epoch": 0.42, "learning_rate": 1.2954175379969279e-05, "loss": 0.4637, "step": 1880 }, { "epoch": 0.42, "learning_rate": 1.2947220525106177e-05, "loss": 0.4642, "step": 1881 }, { "epoch": 0.42, "learning_rate": 1.2940264108716113e-05, "loss": 0.462, "step": 1882 }, { "epoch": 0.42, "learning_rate": 1.293330613448481e-05, "loss": 0.5087, "step": 1883 }, { "epoch": 0.42, "learning_rate": 1.2926346606098807e-05, "loss": 0.4516, "step": 1884 }, { "epoch": 0.42, "learning_rate": 1.2919385527245485e-05, "loss": 0.4476, "step": 1885 }, { "epoch": 0.42, "learning_rate": 1.2912422901613027e-05, "loss": 0.4858, "step": 1886 }, { "epoch": 0.42, "learning_rate": 1.2905458732890444e-05, "loss": 0.4528, "step": 1887 }, { "epoch": 0.42, "learning_rate": 1.2898493024767565e-05, "loss": 0.4764, "step": 1888 }, { "epoch": 0.42, "learning_rate": 1.2891525780935035e-05, "loss": 0.5108, "step": 1889 }, { "epoch": 0.42, "learning_rate": 1.2884557005084311e-05, "loss": 0.4934, "step": 1890 }, { "epoch": 0.42, "learning_rate": 1.2877586700907663e-05, "loss": 0.4716, "step": 1891 }, { "epoch": 0.43, "learning_rate": 1.2870614872098163e-05, "loss": 0.5236, "step": 1892 }, { "epoch": 0.43, "learning_rate": 1.286364152234971e-05, "loss": 0.4773, "step": 1893 }, { "epoch": 0.43, "learning_rate": 1.2856666655356988e-05, "loss": 0.4587, "step": 1894 }, { "epoch": 0.43, "learning_rate": 1.2849690274815494e-05, "loss": 0.4392, "step": 1895 }, { "epoch": 0.43, "learning_rate": 1.2842712384421527e-05, "loss": 0.4574, "step": 1896 }, { "epoch": 0.43, "learning_rate": 1.2835732987872189e-05, "loss": 0.4742, "step": 1897 }, { "epoch": 0.43, "learning_rate": 1.2828752088865373e-05, "loss": 0.4612, "step": 1898 }, { "epoch": 0.43, "learning_rate": 1.282176969109977e-05, "loss": 0.4817, "step": 1899 }, { "epoch": 0.43, "learning_rate": 1.2814785798274866e-05, "loss": 0.456, "step": 1900 }, { "epoch": 0.43, "learning_rate": 1.2807800414090945e-05, "loss": 0.4591, "step": 1901 }, { "epoch": 0.43, "learning_rate": 1.2800813542249073e-05, "loss": 0.4919, "step": 1902 }, { "epoch": 0.43, "learning_rate": 1.2793825186451106e-05, "loss": 0.4606, "step": 1903 }, { "epoch": 0.43, "learning_rate": 1.2786835350399682e-05, "loss": 0.4729, "step": 1904 }, { "epoch": 0.43, "learning_rate": 1.2779844037798238e-05, "loss": 0.4767, "step": 1905 }, { "epoch": 0.43, "learning_rate": 1.2772851252350982e-05, "loss": 0.4707, "step": 1906 }, { "epoch": 0.43, "learning_rate": 1.2765856997762898e-05, "loss": 0.4558, "step": 1907 }, { "epoch": 0.43, "learning_rate": 1.275886127773976e-05, "loss": 0.4821, "step": 1908 }, { "epoch": 0.43, "learning_rate": 1.2751864095988112e-05, "loss": 0.4768, "step": 1909 }, { "epoch": 0.43, "learning_rate": 1.2744865456215272e-05, "loss": 0.4859, "step": 1910 }, { "epoch": 0.43, "learning_rate": 1.2737865362129331e-05, "loss": 0.437, "step": 1911 }, { "epoch": 0.43, "learning_rate": 1.2730863817439157e-05, "loss": 0.4844, "step": 1912 }, { "epoch": 0.43, "learning_rate": 1.2723860825854377e-05, "loss": 0.4746, "step": 1913 }, { "epoch": 0.43, "learning_rate": 1.2716856391085384e-05, "loss": 0.4737, "step": 1914 }, { "epoch": 0.43, "learning_rate": 1.2709850516843351e-05, "loss": 0.4424, "step": 1915 }, { "epoch": 0.43, "learning_rate": 1.2702843206840197e-05, "loss": 0.4796, "step": 1916 }, { "epoch": 0.43, "learning_rate": 1.2695834464788606e-05, "loss": 0.4685, "step": 1917 }, { "epoch": 0.43, "learning_rate": 1.2688824294402022e-05, "loss": 0.4544, "step": 1918 }, { "epoch": 0.43, "learning_rate": 1.2681812699394653e-05, "loss": 0.4899, "step": 1919 }, { "epoch": 0.43, "learning_rate": 1.267479968348145e-05, "loss": 0.4683, "step": 1920 }, { "epoch": 0.43, "learning_rate": 1.266778525037812e-05, "loss": 0.4587, "step": 1921 }, { "epoch": 0.43, "learning_rate": 1.2660769403801127e-05, "loss": 0.4776, "step": 1922 }, { "epoch": 0.43, "learning_rate": 1.2653752147467677e-05, "loss": 0.4969, "step": 1923 }, { "epoch": 0.43, "learning_rate": 1.2646733485095727e-05, "loss": 0.4746, "step": 1924 }, { "epoch": 0.43, "learning_rate": 1.2639713420403979e-05, "loss": 0.4631, "step": 1925 }, { "epoch": 0.43, "learning_rate": 1.2632691957111874e-05, "loss": 0.4869, "step": 1926 }, { "epoch": 0.43, "learning_rate": 1.2625669098939595e-05, "loss": 0.461, "step": 1927 }, { "epoch": 0.43, "learning_rate": 1.2618644849608068e-05, "loss": 0.4728, "step": 1928 }, { "epoch": 0.43, "learning_rate": 1.2611619212838954e-05, "loss": 0.4757, "step": 1929 }, { "epoch": 0.43, "learning_rate": 1.260459219235465e-05, "loss": 0.42, "step": 1930 }, { "epoch": 0.43, "learning_rate": 1.259756379187828e-05, "loss": 0.46, "step": 1931 }, { "epoch": 0.43, "learning_rate": 1.2590534015133708e-05, "loss": 0.4625, "step": 1932 }, { "epoch": 0.43, "learning_rate": 1.2583502865845523e-05, "loss": 0.45, "step": 1933 }, { "epoch": 0.43, "learning_rate": 1.2576470347739043e-05, "loss": 0.4828, "step": 1934 }, { "epoch": 0.43, "learning_rate": 1.2569436464540304e-05, "loss": 0.4353, "step": 1935 }, { "epoch": 0.44, "learning_rate": 1.2562401219976077e-05, "loss": 0.4658, "step": 1936 }, { "epoch": 0.44, "learning_rate": 1.2555364617773844e-05, "loss": 0.4834, "step": 1937 }, { "epoch": 0.44, "learning_rate": 1.2548326661661814e-05, "loss": 0.4404, "step": 1938 }, { "epoch": 0.44, "learning_rate": 1.2541287355368908e-05, "loss": 0.4664, "step": 1939 }, { "epoch": 0.44, "learning_rate": 1.2534246702624762e-05, "loss": 0.5035, "step": 1940 }, { "epoch": 0.44, "learning_rate": 1.2527204707159736e-05, "loss": 0.4619, "step": 1941 }, { "epoch": 0.44, "learning_rate": 1.2520161372704882e-05, "loss": 0.4624, "step": 1942 }, { "epoch": 0.44, "learning_rate": 1.2513116702991978e-05, "loss": 0.4422, "step": 1943 }, { "epoch": 0.44, "learning_rate": 1.250607070175351e-05, "loss": 0.4781, "step": 1944 }, { "epoch": 0.44, "learning_rate": 1.2499023372722654e-05, "loss": 0.4977, "step": 1945 }, { "epoch": 0.44, "learning_rate": 1.2491974719633302e-05, "loss": 0.4623, "step": 1946 }, { "epoch": 0.44, "learning_rate": 1.2484924746220043e-05, "loss": 0.4739, "step": 1947 }, { "epoch": 0.44, "learning_rate": 1.2477873456218172e-05, "loss": 0.4643, "step": 1948 }, { "epoch": 0.44, "learning_rate": 1.2470820853363674e-05, "loss": 0.4618, "step": 1949 }, { "epoch": 0.44, "learning_rate": 1.2463766941393232e-05, "loss": 0.4464, "step": 1950 }, { "epoch": 0.44, "learning_rate": 1.2456711724044221e-05, "loss": 0.4538, "step": 1951 }, { "epoch": 0.44, "learning_rate": 1.2449655205054714e-05, "loss": 0.462, "step": 1952 }, { "epoch": 0.44, "learning_rate": 1.2442597388163467e-05, "loss": 0.4826, "step": 1953 }, { "epoch": 0.44, "learning_rate": 1.2435538277109919e-05, "loss": 0.4778, "step": 1954 }, { "epoch": 0.44, "learning_rate": 1.2428477875634214e-05, "loss": 0.5022, "step": 1955 }, { "epoch": 0.44, "learning_rate": 1.242141618747716e-05, "loss": 0.4769, "step": 1956 }, { "epoch": 0.44, "learning_rate": 1.2414353216380252e-05, "loss": 0.4793, "step": 1957 }, { "epoch": 0.44, "learning_rate": 1.240728896608567e-05, "loss": 0.4605, "step": 1958 }, { "epoch": 0.44, "learning_rate": 1.240022344033627e-05, "loss": 0.4835, "step": 1959 }, { "epoch": 0.44, "learning_rate": 1.2393156642875579e-05, "loss": 0.4815, "step": 1960 }, { "epoch": 0.44, "learning_rate": 1.2386088577447804e-05, "loss": 0.4717, "step": 1961 }, { "epoch": 0.44, "learning_rate": 1.2379019247797811e-05, "loss": 0.475, "step": 1962 }, { "epoch": 0.44, "learning_rate": 1.2371948657671162e-05, "loss": 0.4491, "step": 1963 }, { "epoch": 0.44, "learning_rate": 1.2364876810814059e-05, "loss": 0.4722, "step": 1964 }, { "epoch": 0.44, "learning_rate": 1.2357803710973385e-05, "loss": 0.454, "step": 1965 }, { "epoch": 0.44, "learning_rate": 1.2350729361896678e-05, "loss": 0.4602, "step": 1966 }, { "epoch": 0.44, "learning_rate": 1.234365376733215e-05, "loss": 0.4675, "step": 1967 }, { "epoch": 0.44, "learning_rate": 1.2336576931028664e-05, "loss": 0.4967, "step": 1968 }, { "epoch": 0.44, "learning_rate": 1.2329498856735739e-05, "loss": 0.4453, "step": 1969 }, { "epoch": 0.44, "learning_rate": 1.232241954820356e-05, "loss": 0.4752, "step": 1970 }, { "epoch": 0.44, "learning_rate": 1.2315339009182954e-05, "loss": 0.4924, "step": 1971 }, { "epoch": 0.44, "learning_rate": 1.230825724342541e-05, "loss": 0.465, "step": 1972 }, { "epoch": 0.44, "learning_rate": 1.2301174254683058e-05, "loss": 0.4773, "step": 1973 }, { "epoch": 0.44, "learning_rate": 1.2294090046708684e-05, "loss": 0.4637, "step": 1974 }, { "epoch": 0.44, "learning_rate": 1.2287004623255718e-05, "loss": 0.4669, "step": 1975 }, { "epoch": 0.44, "learning_rate": 1.2279917988078232e-05, "loss": 0.4437, "step": 1976 }, { "epoch": 0.44, "learning_rate": 1.2272830144930936e-05, "loss": 0.4545, "step": 1977 }, { "epoch": 0.44, "learning_rate": 1.2265741097569185e-05, "loss": 0.4886, "step": 1978 }, { "epoch": 0.44, "learning_rate": 1.225865084974898e-05, "loss": 0.4685, "step": 1979 }, { "epoch": 0.44, "learning_rate": 1.2251559405226943e-05, "loss": 0.4667, "step": 1980 }, { "epoch": 0.45, "learning_rate": 1.2244466767760336e-05, "loss": 0.4663, "step": 1981 }, { "epoch": 0.45, "learning_rate": 1.2237372941107055e-05, "loss": 0.4358, "step": 1982 }, { "epoch": 0.45, "learning_rate": 1.2230277929025627e-05, "loss": 0.4751, "step": 1983 }, { "epoch": 0.45, "learning_rate": 1.2223181735275203e-05, "loss": 0.4721, "step": 1984 }, { "epoch": 0.45, "learning_rate": 1.2216084363615562e-05, "loss": 0.4603, "step": 1985 }, { "epoch": 0.45, "learning_rate": 1.2208985817807104e-05, "loss": 0.4579, "step": 1986 }, { "epoch": 0.45, "learning_rate": 1.2201886101610863e-05, "loss": 0.5016, "step": 1987 }, { "epoch": 0.45, "learning_rate": 1.2194785218788476e-05, "loss": 0.4662, "step": 1988 }, { "epoch": 0.45, "learning_rate": 1.2187683173102212e-05, "loss": 0.468, "step": 1989 }, { "epoch": 0.45, "learning_rate": 1.2180579968314942e-05, "loss": 0.4781, "step": 1990 }, { "epoch": 0.45, "learning_rate": 1.217347560819017e-05, "loss": 0.459, "step": 1991 }, { "epoch": 0.45, "learning_rate": 1.2166370096491997e-05, "loss": 0.4548, "step": 1992 }, { "epoch": 0.45, "learning_rate": 1.2159263436985139e-05, "loss": 0.4742, "step": 1993 }, { "epoch": 0.45, "learning_rate": 1.2152155633434922e-05, "loss": 0.4687, "step": 1994 }, { "epoch": 0.45, "learning_rate": 1.2145046689607272e-05, "loss": 0.4467, "step": 1995 }, { "epoch": 0.45, "learning_rate": 1.2137936609268728e-05, "loss": 0.4639, "step": 1996 }, { "epoch": 0.45, "learning_rate": 1.2130825396186421e-05, "loss": 0.466, "step": 1997 }, { "epoch": 0.45, "learning_rate": 1.2123713054128095e-05, "loss": 0.4603, "step": 1998 }, { "epoch": 0.45, "learning_rate": 1.2116599586862079e-05, "loss": 0.4352, "step": 1999 }, { "epoch": 0.45, "learning_rate": 1.2109484998157306e-05, "loss": 0.4674, "step": 2000 }, { "epoch": 0.45, "learning_rate": 1.2102369291783302e-05, "loss": 0.4771, "step": 2001 }, { "epoch": 0.45, "learning_rate": 1.2095252471510187e-05, "loss": 0.4852, "step": 2002 }, { "epoch": 0.45, "learning_rate": 1.2088134541108658e-05, "loss": 0.4789, "step": 2003 }, { "epoch": 0.45, "learning_rate": 1.2081015504350025e-05, "loss": 0.4787, "step": 2004 }, { "epoch": 0.45, "learning_rate": 1.207389536500616e-05, "loss": 0.4604, "step": 2005 }, { "epoch": 0.45, "learning_rate": 1.206677412684953e-05, "loss": 0.4709, "step": 2006 }, { "epoch": 0.45, "learning_rate": 1.2059651793653188e-05, "loss": 0.506, "step": 2007 }, { "epoch": 0.45, "learning_rate": 1.2052528369190762e-05, "loss": 0.4485, "step": 2008 }, { "epoch": 0.45, "learning_rate": 1.204540385723645e-05, "loss": 0.489, "step": 2009 }, { "epoch": 0.45, "learning_rate": 1.2038278261565049e-05, "loss": 0.4794, "step": 2010 }, { "epoch": 0.45, "learning_rate": 1.2031151585951904e-05, "loss": 0.4781, "step": 2011 }, { "epoch": 0.45, "learning_rate": 1.202402383417295e-05, "loss": 0.496, "step": 2012 }, { "epoch": 0.45, "learning_rate": 1.2016895010004684e-05, "loss": 0.453, "step": 2013 }, { "epoch": 0.45, "learning_rate": 1.2009765117224177e-05, "loss": 0.4549, "step": 2014 }, { "epoch": 0.45, "learning_rate": 1.2002634159609058e-05, "loss": 0.4652, "step": 2015 }, { "epoch": 0.45, "learning_rate": 1.1995502140937529e-05, "loss": 0.4983, "step": 2016 }, { "epoch": 0.45, "learning_rate": 1.1988369064988348e-05, "loss": 0.4963, "step": 2017 }, { "epoch": 0.45, "learning_rate": 1.1981234935540836e-05, "loss": 0.4385, "step": 2018 }, { "epoch": 0.45, "learning_rate": 1.1974099756374874e-05, "loss": 0.4681, "step": 2019 }, { "epoch": 0.45, "learning_rate": 1.196696353127089e-05, "loss": 0.4522, "step": 2020 }, { "epoch": 0.45, "learning_rate": 1.1959826264009875e-05, "loss": 0.4209, "step": 2021 }, { "epoch": 0.45, "learning_rate": 1.1952687958373376e-05, "loss": 0.4711, "step": 2022 }, { "epoch": 0.45, "learning_rate": 1.1945548618143482e-05, "loss": 0.5002, "step": 2023 }, { "epoch": 0.45, "learning_rate": 1.1938408247102825e-05, "loss": 0.462, "step": 2024 }, { "epoch": 0.46, "learning_rate": 1.1931266849034597e-05, "loss": 0.4263, "step": 2025 }, { "epoch": 0.46, "learning_rate": 1.192412442772253e-05, "loss": 0.5009, "step": 2026 }, { "epoch": 0.46, "learning_rate": 1.1916980986950891e-05, "loss": 0.4629, "step": 2027 }, { "epoch": 0.46, "learning_rate": 1.1909836530504493e-05, "loss": 0.4485, "step": 2028 }, { "epoch": 0.46, "learning_rate": 1.1902691062168684e-05, "loss": 0.4269, "step": 2029 }, { "epoch": 0.46, "learning_rate": 1.1895544585729353e-05, "loss": 0.4566, "step": 2030 }, { "epoch": 0.46, "learning_rate": 1.1888397104972922e-05, "loss": 0.4642, "step": 2031 }, { "epoch": 0.46, "learning_rate": 1.188124862368634e-05, "loss": 0.4579, "step": 2032 }, { "epoch": 0.46, "learning_rate": 1.1874099145657088e-05, "loss": 0.4687, "step": 2033 }, { "epoch": 0.46, "learning_rate": 1.1866948674673182e-05, "loss": 0.4612, "step": 2034 }, { "epoch": 0.46, "learning_rate": 1.1859797214523157e-05, "loss": 0.4538, "step": 2035 }, { "epoch": 0.46, "learning_rate": 1.185264476899607e-05, "loss": 0.4446, "step": 2036 }, { "epoch": 0.46, "learning_rate": 1.1845491341881509e-05, "loss": 0.4588, "step": 2037 }, { "epoch": 0.46, "learning_rate": 1.1838336936969574e-05, "loss": 0.4563, "step": 2038 }, { "epoch": 0.46, "learning_rate": 1.1831181558050889e-05, "loss": 0.4575, "step": 2039 }, { "epoch": 0.46, "learning_rate": 1.1824025208916586e-05, "loss": 0.4567, "step": 2040 }, { "epoch": 0.46, "learning_rate": 1.181686789335832e-05, "loss": 0.4886, "step": 2041 }, { "epoch": 0.46, "learning_rate": 1.1809709615168252e-05, "loss": 0.4615, "step": 2042 }, { "epoch": 0.46, "learning_rate": 1.180255037813906e-05, "loss": 0.4675, "step": 2043 }, { "epoch": 0.46, "learning_rate": 1.1795390186063917e-05, "loss": 0.4733, "step": 2044 }, { "epoch": 0.46, "learning_rate": 1.1788229042736518e-05, "loss": 0.4512, "step": 2045 }, { "epoch": 0.46, "learning_rate": 1.1781066951951053e-05, "loss": 0.4447, "step": 2046 }, { "epoch": 0.46, "learning_rate": 1.1773903917502212e-05, "loss": 0.4642, "step": 2047 }, { "epoch": 0.46, "learning_rate": 1.1766739943185189e-05, "loss": 0.4756, "step": 2048 }, { "epoch": 0.46, "learning_rate": 1.1759575032795674e-05, "loss": 0.4483, "step": 2049 }, { "epoch": 0.46, "learning_rate": 1.175240919012986e-05, "loss": 0.4765, "step": 2050 }, { "epoch": 0.46, "learning_rate": 1.1745242418984424e-05, "loss": 0.4702, "step": 2051 }, { "epoch": 0.46, "learning_rate": 1.1738074723156536e-05, "loss": 0.4782, "step": 2052 }, { "epoch": 0.46, "learning_rate": 1.1730906106443865e-05, "loss": 0.4533, "step": 2053 }, { "epoch": 0.46, "learning_rate": 1.172373657264456e-05, "loss": 0.4411, "step": 2054 }, { "epoch": 0.46, "learning_rate": 1.1716566125557257e-05, "loss": 0.4905, "step": 2055 }, { "epoch": 0.46, "learning_rate": 1.1709394768981072e-05, "loss": 0.4779, "step": 2056 }, { "epoch": 0.46, "learning_rate": 1.1702222506715617e-05, "loss": 0.4776, "step": 2057 }, { "epoch": 0.46, "learning_rate": 1.1695049342560969e-05, "loss": 0.4848, "step": 2058 }, { "epoch": 0.46, "learning_rate": 1.1687875280317689e-05, "loss": 0.4795, "step": 2059 }, { "epoch": 0.46, "learning_rate": 1.1680700323786811e-05, "loss": 0.4527, "step": 2060 }, { "epoch": 0.46, "learning_rate": 1.1673524476769848e-05, "loss": 0.4761, "step": 2061 }, { "epoch": 0.46, "learning_rate": 1.1666347743068783e-05, "loss": 0.4667, "step": 2062 }, { "epoch": 0.46, "learning_rate": 1.1659170126486064e-05, "loss": 0.4537, "step": 2063 }, { "epoch": 0.46, "learning_rate": 1.1651991630824608e-05, "loss": 0.4707, "step": 2064 }, { "epoch": 0.46, "learning_rate": 1.1644812259887808e-05, "loss": 0.4719, "step": 2065 }, { "epoch": 0.46, "learning_rate": 1.1637632017479508e-05, "loss": 0.4629, "step": 2066 }, { "epoch": 0.46, "learning_rate": 1.163045090740402e-05, "loss": 0.4853, "step": 2067 }, { "epoch": 0.46, "learning_rate": 1.1623268933466107e-05, "loss": 0.4555, "step": 2068 }, { "epoch": 0.46, "learning_rate": 1.161608609947101e-05, "loss": 0.4638, "step": 2069 }, { "epoch": 0.47, "learning_rate": 1.1608902409224402e-05, "loss": 0.4894, "step": 2070 }, { "epoch": 0.47, "learning_rate": 1.1601717866532428e-05, "loss": 0.4495, "step": 2071 }, { "epoch": 0.47, "learning_rate": 1.1594532475201668e-05, "loss": 0.4746, "step": 2072 }, { "epoch": 0.47, "learning_rate": 1.1587346239039168e-05, "loss": 0.4869, "step": 2073 }, { "epoch": 0.47, "learning_rate": 1.1580159161852413e-05, "loss": 0.4663, "step": 2074 }, { "epoch": 0.47, "learning_rate": 1.1572971247449334e-05, "loss": 0.465, "step": 2075 }, { "epoch": 0.47, "learning_rate": 1.1565782499638302e-05, "loss": 0.4887, "step": 2076 }, { "epoch": 0.47, "learning_rate": 1.1558592922228143e-05, "loss": 0.4597, "step": 2077 }, { "epoch": 0.47, "learning_rate": 1.1551402519028105e-05, "loss": 0.4779, "step": 2078 }, { "epoch": 0.47, "learning_rate": 1.1544211293847886e-05, "loss": 0.5053, "step": 2079 }, { "epoch": 0.47, "learning_rate": 1.1537019250497616e-05, "loss": 0.4675, "step": 2080 }, { "epoch": 0.47, "learning_rate": 1.152982639278786e-05, "loss": 0.4568, "step": 2081 }, { "epoch": 0.47, "learning_rate": 1.1522632724529602e-05, "loss": 0.4535, "step": 2082 }, { "epoch": 0.47, "learning_rate": 1.151543824953428e-05, "loss": 0.4657, "step": 2083 }, { "epoch": 0.47, "learning_rate": 1.1508242971613741e-05, "loss": 0.4951, "step": 2084 }, { "epoch": 0.47, "learning_rate": 1.1501046894580254e-05, "loss": 0.468, "step": 2085 }, { "epoch": 0.47, "learning_rate": 1.149385002224653e-05, "loss": 0.4707, "step": 2086 }, { "epoch": 0.47, "learning_rate": 1.1486652358425683e-05, "loss": 0.4397, "step": 2087 }, { "epoch": 0.47, "learning_rate": 1.1479453906931255e-05, "loss": 0.4641, "step": 2088 }, { "epoch": 0.47, "learning_rate": 1.147225467157721e-05, "loss": 0.4677, "step": 2089 }, { "epoch": 0.47, "learning_rate": 1.1465054656177917e-05, "loss": 0.4656, "step": 2090 }, { "epoch": 0.47, "learning_rate": 1.1457853864548161e-05, "loss": 0.4647, "step": 2091 }, { "epoch": 0.47, "learning_rate": 1.1450652300503141e-05, "loss": 0.4783, "step": 2092 }, { "epoch": 0.47, "learning_rate": 1.144344996785847e-05, "loss": 0.4605, "step": 2093 }, { "epoch": 0.47, "learning_rate": 1.1436246870430157e-05, "loss": 0.44, "step": 2094 }, { "epoch": 0.47, "learning_rate": 1.1429043012034622e-05, "loss": 0.4654, "step": 2095 }, { "epoch": 0.47, "learning_rate": 1.1421838396488691e-05, "loss": 0.4294, "step": 2096 }, { "epoch": 0.47, "learning_rate": 1.1414633027609585e-05, "loss": 0.476, "step": 2097 }, { "epoch": 0.47, "learning_rate": 1.1407426909214932e-05, "loss": 0.4476, "step": 2098 }, { "epoch": 0.47, "learning_rate": 1.1400220045122746e-05, "loss": 0.4617, "step": 2099 }, { "epoch": 0.47, "learning_rate": 1.1393012439151445e-05, "loss": 0.4737, "step": 2100 }, { "epoch": 0.47, "learning_rate": 1.1385804095119841e-05, "loss": 0.4391, "step": 2101 }, { "epoch": 0.47, "learning_rate": 1.137859501684713e-05, "loss": 0.4453, "step": 2102 }, { "epoch": 0.47, "learning_rate": 1.13713852081529e-05, "loss": 0.4654, "step": 2103 }, { "epoch": 0.47, "learning_rate": 1.1364174672857131e-05, "loss": 0.4388, "step": 2104 }, { "epoch": 0.47, "learning_rate": 1.1356963414780183e-05, "loss": 0.4736, "step": 2105 }, { "epoch": 0.47, "learning_rate": 1.1349751437742794e-05, "loss": 0.468, "step": 2106 }, { "epoch": 0.47, "learning_rate": 1.134253874556609e-05, "loss": 0.4635, "step": 2107 }, { "epoch": 0.47, "learning_rate": 1.1335325342071579e-05, "loss": 0.4601, "step": 2108 }, { "epoch": 0.47, "learning_rate": 1.132811123108114e-05, "loss": 0.4559, "step": 2109 }, { "epoch": 0.47, "learning_rate": 1.1320896416417026e-05, "loss": 0.4552, "step": 2110 }, { "epoch": 0.47, "learning_rate": 1.1313680901901863e-05, "loss": 0.4997, "step": 2111 }, { "epoch": 0.47, "learning_rate": 1.1306464691358657e-05, "loss": 0.4657, "step": 2112 }, { "epoch": 0.47, "learning_rate": 1.1299247788610768e-05, "loss": 0.4329, "step": 2113 }, { "epoch": 0.48, "learning_rate": 1.1292030197481935e-05, "loss": 0.5021, "step": 2114 }, { "epoch": 0.48, "learning_rate": 1.1284811921796252e-05, "loss": 0.4782, "step": 2115 }, { "epoch": 0.48, "learning_rate": 1.127759296537819e-05, "loss": 0.4831, "step": 2116 }, { "epoch": 0.48, "learning_rate": 1.1270373332052563e-05, "loss": 0.4478, "step": 2117 }, { "epoch": 0.48, "learning_rate": 1.1263153025644555e-05, "loss": 0.4441, "step": 2118 }, { "epoch": 0.48, "learning_rate": 1.12559320499797e-05, "loss": 0.4686, "step": 2119 }, { "epoch": 0.48, "learning_rate": 1.1248710408883898e-05, "loss": 0.4788, "step": 2120 }, { "epoch": 0.48, "learning_rate": 1.1241488106183389e-05, "loss": 0.4208, "step": 2121 }, { "epoch": 0.48, "learning_rate": 1.1234265145704766e-05, "loss": 0.5005, "step": 2122 }, { "epoch": 0.48, "learning_rate": 1.1227041531274978e-05, "loss": 0.4776, "step": 2123 }, { "epoch": 0.48, "learning_rate": 1.1219817266721314e-05, "loss": 0.4537, "step": 2124 }, { "epoch": 0.48, "learning_rate": 1.1212592355871406e-05, "loss": 0.4766, "step": 2125 }, { "epoch": 0.48, "learning_rate": 1.1205366802553231e-05, "loss": 0.4593, "step": 2126 }, { "epoch": 0.48, "learning_rate": 1.1198140610595111e-05, "loss": 0.4625, "step": 2127 }, { "epoch": 0.48, "learning_rate": 1.11909137838257e-05, "loss": 0.4813, "step": 2128 }, { "epoch": 0.48, "learning_rate": 1.118368632607399e-05, "loss": 0.4308, "step": 2129 }, { "epoch": 0.48, "learning_rate": 1.1176458241169308e-05, "loss": 0.4533, "step": 2130 }, { "epoch": 0.48, "learning_rate": 1.1169229532941313e-05, "loss": 0.4778, "step": 2131 }, { "epoch": 0.48, "learning_rate": 1.1162000205219992e-05, "loss": 0.4761, "step": 2132 }, { "epoch": 0.48, "learning_rate": 1.1154770261835667e-05, "loss": 0.4747, "step": 2133 }, { "epoch": 0.48, "learning_rate": 1.1147539706618976e-05, "loss": 0.4491, "step": 2134 }, { "epoch": 0.48, "learning_rate": 1.1140308543400891e-05, "loss": 0.4626, "step": 2135 }, { "epoch": 0.48, "learning_rate": 1.1133076776012702e-05, "loss": 0.4504, "step": 2136 }, { "epoch": 0.48, "learning_rate": 1.1125844408286015e-05, "loss": 0.435, "step": 2137 }, { "epoch": 0.48, "learning_rate": 1.1118611444052761e-05, "loss": 0.4752, "step": 2138 }, { "epoch": 0.48, "learning_rate": 1.1111377887145186e-05, "loss": 0.4624, "step": 2139 }, { "epoch": 0.48, "learning_rate": 1.1104143741395845e-05, "loss": 0.4859, "step": 2140 }, { "epoch": 0.48, "learning_rate": 1.109690901063761e-05, "loss": 0.4644, "step": 2141 }, { "epoch": 0.48, "learning_rate": 1.108967369870366e-05, "loss": 0.4724, "step": 2142 }, { "epoch": 0.48, "learning_rate": 1.1082437809427485e-05, "loss": 0.4506, "step": 2143 }, { "epoch": 0.48, "learning_rate": 1.1075201346642875e-05, "loss": 0.4575, "step": 2144 }, { "epoch": 0.48, "learning_rate": 1.1067964314183932e-05, "loss": 0.4401, "step": 2145 }, { "epoch": 0.48, "learning_rate": 1.1060726715885052e-05, "loss": 0.4745, "step": 2146 }, { "epoch": 0.48, "learning_rate": 1.1053488555580939e-05, "loss": 0.4337, "step": 2147 }, { "epoch": 0.48, "learning_rate": 1.1046249837106587e-05, "loss": 0.4714, "step": 2148 }, { "epoch": 0.48, "learning_rate": 1.1039010564297288e-05, "loss": 0.4661, "step": 2149 }, { "epoch": 0.48, "learning_rate": 1.103177074098863e-05, "loss": 0.4722, "step": 2150 }, { "epoch": 0.48, "learning_rate": 1.1024530371016494e-05, "loss": 0.4571, "step": 2151 }, { "epoch": 0.48, "learning_rate": 1.1017289458217042e-05, "loss": 0.45, "step": 2152 }, { "epoch": 0.48, "learning_rate": 1.1010048006426735e-05, "loss": 0.4593, "step": 2153 }, { "epoch": 0.48, "learning_rate": 1.100280601948231e-05, "loss": 0.4635, "step": 2154 }, { "epoch": 0.48, "learning_rate": 1.0995563501220797e-05, "loss": 0.4842, "step": 2155 }, { "epoch": 0.48, "learning_rate": 1.0988320455479498e-05, "loss": 0.4746, "step": 2156 }, { "epoch": 0.48, "learning_rate": 1.0981076886095998e-05, "loss": 0.4256, "step": 2157 }, { "epoch": 0.48, "learning_rate": 1.0973832796908162e-05, "loss": 0.4476, "step": 2158 }, { "epoch": 0.49, "learning_rate": 1.0966588191754129e-05, "loss": 0.4748, "step": 2159 }, { "epoch": 0.49, "learning_rate": 1.095934307447231e-05, "loss": 0.4779, "step": 2160 }, { "epoch": 0.49, "learning_rate": 1.0952097448901387e-05, "loss": 0.4806, "step": 2161 }, { "epoch": 0.49, "learning_rate": 1.0944851318880314e-05, "loss": 0.4924, "step": 2162 }, { "epoch": 0.49, "learning_rate": 1.0937604688248313e-05, "loss": 0.4796, "step": 2163 }, { "epoch": 0.49, "learning_rate": 1.0930357560844862e-05, "loss": 0.437, "step": 2164 }, { "epoch": 0.49, "learning_rate": 1.0923109940509718e-05, "loss": 0.4675, "step": 2165 }, { "epoch": 0.49, "learning_rate": 1.0915861831082887e-05, "loss": 0.4675, "step": 2166 }, { "epoch": 0.49, "learning_rate": 1.0908613236404635e-05, "loss": 0.4695, "step": 2167 }, { "epoch": 0.49, "learning_rate": 1.0901364160315488e-05, "loss": 0.4634, "step": 2168 }, { "epoch": 0.49, "learning_rate": 1.089411460665623e-05, "loss": 0.4632, "step": 2169 }, { "epoch": 0.49, "learning_rate": 1.0886864579267893e-05, "loss": 0.4874, "step": 2170 }, { "epoch": 0.49, "learning_rate": 1.0879614081991761e-05, "loss": 0.4762, "step": 2171 }, { "epoch": 0.49, "learning_rate": 1.0872363118669368e-05, "loss": 0.4353, "step": 2172 }, { "epoch": 0.49, "learning_rate": 1.0865111693142496e-05, "loss": 0.4615, "step": 2173 }, { "epoch": 0.49, "learning_rate": 1.0857859809253168e-05, "loss": 0.4574, "step": 2174 }, { "epoch": 0.49, "learning_rate": 1.0850607470843657e-05, "loss": 0.4707, "step": 2175 }, { "epoch": 0.49, "learning_rate": 1.0843354681756468e-05, "loss": 0.4486, "step": 2176 }, { "epoch": 0.49, "learning_rate": 1.0836101445834349e-05, "loss": 0.4632, "step": 2177 }, { "epoch": 0.49, "learning_rate": 1.0828847766920285e-05, "loss": 0.4727, "step": 2178 }, { "epoch": 0.49, "learning_rate": 1.08215936488575e-05, "loss": 0.4385, "step": 2179 }, { "epoch": 0.49, "learning_rate": 1.0814339095489442e-05, "loss": 0.4565, "step": 2180 }, { "epoch": 0.49, "learning_rate": 1.0807084110659789e-05, "loss": 0.4433, "step": 2181 }, { "epoch": 0.49, "learning_rate": 1.079982869821246e-05, "loss": 0.4497, "step": 2182 }, { "epoch": 0.49, "learning_rate": 1.0792572861991592e-05, "loss": 0.458, "step": 2183 }, { "epoch": 0.49, "learning_rate": 1.0785316605841544e-05, "loss": 0.4804, "step": 2184 }, { "epoch": 0.49, "learning_rate": 1.0778059933606903e-05, "loss": 0.4771, "step": 2185 }, { "epoch": 0.49, "learning_rate": 1.0770802849132475e-05, "loss": 0.4762, "step": 2186 }, { "epoch": 0.49, "learning_rate": 1.0763545356263285e-05, "loss": 0.4724, "step": 2187 }, { "epoch": 0.49, "learning_rate": 1.075628745884457e-05, "loss": 0.4655, "step": 2188 }, { "epoch": 0.49, "learning_rate": 1.0749029160721782e-05, "loss": 0.4613, "step": 2189 }, { "epoch": 0.49, "learning_rate": 1.0741770465740592e-05, "loss": 0.476, "step": 2190 }, { "epoch": 0.49, "learning_rate": 1.0734511377746881e-05, "loss": 0.4496, "step": 2191 }, { "epoch": 0.49, "learning_rate": 1.0727251900586728e-05, "loss": 0.458, "step": 2192 }, { "epoch": 0.49, "learning_rate": 1.0719992038106424e-05, "loss": 0.4572, "step": 2193 }, { "epoch": 0.49, "learning_rate": 1.0712731794152468e-05, "loss": 0.4383, "step": 2194 }, { "epoch": 0.49, "learning_rate": 1.070547117257156e-05, "loss": 0.4491, "step": 2195 }, { "epoch": 0.49, "learning_rate": 1.0698210177210593e-05, "loss": 0.4721, "step": 2196 }, { "epoch": 0.49, "learning_rate": 1.0690948811916661e-05, "loss": 0.4651, "step": 2197 }, { "epoch": 0.49, "learning_rate": 1.0683687080537063e-05, "loss": 0.4393, "step": 2198 }, { "epoch": 0.49, "learning_rate": 1.0676424986919282e-05, "loss": 0.4505, "step": 2199 }, { "epoch": 0.49, "learning_rate": 1.0669162534910995e-05, "loss": 0.4873, "step": 2200 }, { "epoch": 0.49, "learning_rate": 1.0661899728360068e-05, "loss": 0.4865, "step": 2201 }, { "epoch": 0.49, "learning_rate": 1.065463657111456e-05, "loss": 0.4761, "step": 2202 }, { "epoch": 0.5, "learning_rate": 1.0647373067022712e-05, "loss": 0.4693, "step": 2203 }, { "epoch": 0.5, "learning_rate": 1.0640109219932946e-05, "loss": 0.4548, "step": 2204 }, { "epoch": 0.5, "learning_rate": 1.063284503369387e-05, "loss": 0.484, "step": 2205 }, { "epoch": 0.5, "learning_rate": 1.0625580512154273e-05, "loss": 0.4557, "step": 2206 }, { "epoch": 0.5, "learning_rate": 1.0618315659163119e-05, "loss": 0.4286, "step": 2207 }, { "epoch": 0.5, "learning_rate": 1.061105047856954e-05, "loss": 0.4958, "step": 2208 }, { "epoch": 0.5, "learning_rate": 1.0603784974222862e-05, "loss": 0.4427, "step": 2209 }, { "epoch": 0.5, "learning_rate": 1.0596519149972562e-05, "loss": 0.4543, "step": 2210 }, { "epoch": 0.5, "learning_rate": 1.0589253009668295e-05, "loss": 0.443, "step": 2211 }, { "epoch": 0.5, "learning_rate": 1.0581986557159882e-05, "loss": 0.4985, "step": 2212 }, { "epoch": 0.5, "learning_rate": 1.0574719796297314e-05, "loss": 0.4665, "step": 2213 }, { "epoch": 0.5, "learning_rate": 1.0567452730930743e-05, "loss": 0.4269, "step": 2214 }, { "epoch": 0.5, "learning_rate": 1.0560185364910474e-05, "loss": 0.4255, "step": 2215 }, { "epoch": 0.5, "learning_rate": 1.055291770208698e-05, "loss": 0.4507, "step": 2216 }, { "epoch": 0.5, "learning_rate": 1.0545649746310896e-05, "loss": 0.4802, "step": 2217 }, { "epoch": 0.5, "learning_rate": 1.0538381501432998e-05, "loss": 0.4557, "step": 2218 }, { "epoch": 0.5, "learning_rate": 1.053111297130423e-05, "loss": 0.4601, "step": 2219 }, { "epoch": 0.5, "learning_rate": 1.0523844159775672e-05, "loss": 0.459, "step": 2220 }, { "epoch": 0.5, "learning_rate": 1.0516575070698567e-05, "loss": 0.4519, "step": 2221 }, { "epoch": 0.5, "learning_rate": 1.0509305707924298e-05, "loss": 0.4297, "step": 2222 }, { "epoch": 0.5, "learning_rate": 1.0502036075304394e-05, "loss": 0.4713, "step": 2223 }, { "epoch": 0.5, "learning_rate": 1.0494766176690526e-05, "loss": 0.4358, "step": 2224 }, { "epoch": 0.5, "learning_rate": 1.0487496015934509e-05, "loss": 0.447, "step": 2225 }, { "epoch": 0.5, "learning_rate": 1.0480225596888294e-05, "loss": 0.4343, "step": 2226 }, { "epoch": 0.5, "learning_rate": 1.047295492340397e-05, "loss": 0.4787, "step": 2227 }, { "epoch": 0.5, "learning_rate": 1.0465683999333758e-05, "loss": 0.4761, "step": 2228 }, { "epoch": 0.5, "learning_rate": 1.045841282853002e-05, "loss": 0.4435, "step": 2229 }, { "epoch": 0.5, "learning_rate": 1.0451141414845241e-05, "loss": 0.4558, "step": 2230 }, { "epoch": 0.5, "learning_rate": 1.0443869762132035e-05, "loss": 0.48, "step": 2231 }, { "epoch": 0.5, "learning_rate": 1.0436597874243143e-05, "loss": 0.4857, "step": 2232 }, { "epoch": 0.5, "learning_rate": 1.042932575503144e-05, "loss": 0.4626, "step": 2233 }, { "epoch": 0.5, "learning_rate": 1.0422053408349908e-05, "loss": 0.487, "step": 2234 }, { "epoch": 0.5, "learning_rate": 1.041478083805166e-05, "loss": 0.4814, "step": 2235 }, { "epoch": 0.5, "learning_rate": 1.0407508047989922e-05, "loss": 0.4947, "step": 2236 }, { "epoch": 0.5, "learning_rate": 1.0400235042018043e-05, "loss": 0.4652, "step": 2237 }, { "epoch": 0.5, "learning_rate": 1.0392961823989482e-05, "loss": 0.4361, "step": 2238 }, { "epoch": 0.5, "learning_rate": 1.0385688397757809e-05, "loss": 0.4397, "step": 2239 }, { "epoch": 0.5, "learning_rate": 1.0378414767176706e-05, "loss": 0.4624, "step": 2240 }, { "epoch": 0.5, "learning_rate": 1.0371140936099963e-05, "loss": 0.4778, "step": 2241 }, { "epoch": 0.5, "learning_rate": 1.0363866908381481e-05, "loss": 0.4685, "step": 2242 }, { "epoch": 0.5, "learning_rate": 1.0356592687875255e-05, "loss": 0.4506, "step": 2243 }, { "epoch": 0.5, "learning_rate": 1.0349318278435392e-05, "loss": 0.4699, "step": 2244 }, { "epoch": 0.5, "learning_rate": 1.0342043683916094e-05, "loss": 0.4501, "step": 2245 }, { "epoch": 0.5, "learning_rate": 1.0334768908171664e-05, "loss": 0.4622, "step": 2246 }, { "epoch": 0.5, "learning_rate": 1.0327493955056494e-05, "loss": 0.4617, "step": 2247 }, { "epoch": 0.51, "learning_rate": 1.032021882842508e-05, "loss": 0.4536, "step": 2248 }, { "epoch": 0.51, "learning_rate": 1.0312943532132003e-05, "loss": 0.4992, "step": 2249 }, { "epoch": 0.51, "learning_rate": 1.0305668070031933e-05, "loss": 0.4435, "step": 2250 }, { "epoch": 0.51, "learning_rate": 1.029839244597964e-05, "loss": 0.4412, "step": 2251 }, { "epoch": 0.51, "learning_rate": 1.0291116663829962e-05, "loss": 0.4516, "step": 2252 }, { "epoch": 0.51, "learning_rate": 1.0283840727437832e-05, "loss": 0.4779, "step": 2253 }, { "epoch": 0.51, "learning_rate": 1.0276564640658265e-05, "loss": 0.4932, "step": 2254 }, { "epoch": 0.51, "learning_rate": 1.0269288407346348e-05, "loss": 0.4358, "step": 2255 }, { "epoch": 0.51, "learning_rate": 1.0262012031357252e-05, "loss": 0.4846, "step": 2256 }, { "epoch": 0.51, "learning_rate": 1.0254735516546224e-05, "loss": 0.4498, "step": 2257 }, { "epoch": 0.51, "learning_rate": 1.0247458866768581e-05, "loss": 0.4715, "step": 2258 }, { "epoch": 0.51, "learning_rate": 1.0240182085879713e-05, "loss": 0.4369, "step": 2259 }, { "epoch": 0.51, "learning_rate": 1.023290517773508e-05, "loss": 0.4749, "step": 2260 }, { "epoch": 0.51, "learning_rate": 1.022562814619021e-05, "loss": 0.4414, "step": 2261 }, { "epoch": 0.51, "learning_rate": 1.0218350995100695e-05, "loss": 0.4639, "step": 2262 }, { "epoch": 0.51, "learning_rate": 1.0211073728322185e-05, "loss": 0.4698, "step": 2263 }, { "epoch": 0.51, "learning_rate": 1.0203796349710406e-05, "loss": 0.4638, "step": 2264 }, { "epoch": 0.51, "learning_rate": 1.0196518863121129e-05, "loss": 0.4421, "step": 2265 }, { "epoch": 0.51, "learning_rate": 1.0189241272410191e-05, "loss": 0.4326, "step": 2266 }, { "epoch": 0.51, "learning_rate": 1.0181963581433477e-05, "loss": 0.4601, "step": 2267 }, { "epoch": 0.51, "learning_rate": 1.0174685794046928e-05, "loss": 0.4675, "step": 2268 }, { "epoch": 0.51, "learning_rate": 1.0167407914106541e-05, "loss": 0.4377, "step": 2269 }, { "epoch": 0.51, "learning_rate": 1.0160129945468358e-05, "loss": 0.4675, "step": 2270 }, { "epoch": 0.51, "learning_rate": 1.015285189198846e-05, "loss": 0.4666, "step": 2271 }, { "epoch": 0.51, "learning_rate": 1.014557375752299e-05, "loss": 0.49, "step": 2272 }, { "epoch": 0.51, "learning_rate": 1.013829554592812e-05, "loss": 0.4785, "step": 2273 }, { "epoch": 0.51, "learning_rate": 1.0131017261060072e-05, "loss": 0.4794, "step": 2274 }, { "epoch": 0.51, "learning_rate": 1.0123738906775094e-05, "loss": 0.4455, "step": 2275 }, { "epoch": 0.51, "learning_rate": 1.011646048692949e-05, "loss": 0.425, "step": 2276 }, { "epoch": 0.51, "learning_rate": 1.0109182005379578e-05, "loss": 0.4783, "step": 2277 }, { "epoch": 0.51, "learning_rate": 1.0101903465981724e-05, "loss": 0.4036, "step": 2278 }, { "epoch": 0.51, "learning_rate": 1.0094624872592318e-05, "loss": 0.4506, "step": 2279 }, { "epoch": 0.51, "learning_rate": 1.008734622906778e-05, "loss": 0.446, "step": 2280 }, { "epoch": 0.51, "learning_rate": 1.0080067539264558e-05, "loss": 0.4193, "step": 2281 }, { "epoch": 0.51, "learning_rate": 1.0072788807039123e-05, "loss": 0.435, "step": 2282 }, { "epoch": 0.51, "learning_rate": 1.0065510036247961e-05, "loss": 0.4688, "step": 2283 }, { "epoch": 0.51, "learning_rate": 1.0058231230747597e-05, "loss": 0.4369, "step": 2284 }, { "epoch": 0.51, "learning_rate": 1.0050952394394557e-05, "loss": 0.4662, "step": 2285 }, { "epoch": 0.51, "learning_rate": 1.004367353104539e-05, "loss": 0.4541, "step": 2286 }, { "epoch": 0.51, "learning_rate": 1.0036394644556658e-05, "loss": 0.469, "step": 2287 }, { "epoch": 0.51, "learning_rate": 1.002911573878494e-05, "loss": 0.4598, "step": 2288 }, { "epoch": 0.51, "learning_rate": 1.0021836817586819e-05, "loss": 0.4582, "step": 2289 }, { "epoch": 0.51, "learning_rate": 1.001455788481889e-05, "loss": 0.4578, "step": 2290 }, { "epoch": 0.51, "learning_rate": 1.0007278944337746e-05, "loss": 0.4788, "step": 2291 }, { "epoch": 0.52, "learning_rate": 1e-05, "loss": 0.4685, "step": 2292 }, { "epoch": 0.52, "learning_rate": 9.992721055662256e-06, "loss": 0.4309, "step": 2293 }, { "epoch": 0.52, "learning_rate": 9.985442115181117e-06, "loss": 0.4388, "step": 2294 }, { "epoch": 0.52, "learning_rate": 9.978163182413186e-06, "loss": 0.4544, "step": 2295 }, { "epoch": 0.52, "learning_rate": 9.97088426121506e-06, "loss": 0.4331, "step": 2296 }, { "epoch": 0.52, "learning_rate": 9.963605355443342e-06, "loss": 0.4644, "step": 2297 }, { "epoch": 0.52, "learning_rate": 9.956326468954612e-06, "loss": 0.4273, "step": 2298 }, { "epoch": 0.52, "learning_rate": 9.949047605605446e-06, "loss": 0.4345, "step": 2299 }, { "epoch": 0.52, "learning_rate": 9.941768769252406e-06, "loss": 0.437, "step": 2300 }, { "epoch": 0.52, "learning_rate": 9.93448996375204e-06, "loss": 0.4425, "step": 2301 }, { "epoch": 0.52, "learning_rate": 9.927211192960884e-06, "loss": 0.449, "step": 2302 }, { "epoch": 0.52, "learning_rate": 9.919932460735446e-06, "loss": 0.4527, "step": 2303 }, { "epoch": 0.52, "learning_rate": 9.91265377093222e-06, "loss": 0.4661, "step": 2304 }, { "epoch": 0.52, "learning_rate": 9.905375127407682e-06, "loss": 0.4358, "step": 2305 }, { "epoch": 0.52, "learning_rate": 9.898096534018278e-06, "loss": 0.4616, "step": 2306 }, { "epoch": 0.52, "learning_rate": 9.890817994620425e-06, "loss": 0.4533, "step": 2307 }, { "epoch": 0.52, "learning_rate": 9.883539513070515e-06, "loss": 0.4694, "step": 2308 }, { "epoch": 0.52, "learning_rate": 9.876261093224908e-06, "loss": 0.4461, "step": 2309 }, { "epoch": 0.52, "learning_rate": 9.868982738939934e-06, "loss": 0.4341, "step": 2310 }, { "epoch": 0.52, "learning_rate": 9.861704454071883e-06, "loss": 0.4627, "step": 2311 }, { "epoch": 0.52, "learning_rate": 9.85442624247701e-06, "loss": 0.4517, "step": 2312 }, { "epoch": 0.52, "learning_rate": 9.84714810801154e-06, "loss": 0.4618, "step": 2313 }, { "epoch": 0.52, "learning_rate": 9.839870054531646e-06, "loss": 0.4633, "step": 2314 }, { "epoch": 0.52, "learning_rate": 9.83259208589346e-06, "loss": 0.4585, "step": 2315 }, { "epoch": 0.52, "learning_rate": 9.825314205953073e-06, "loss": 0.4524, "step": 2316 }, { "epoch": 0.52, "learning_rate": 9.818036418566528e-06, "loss": 0.4612, "step": 2317 }, { "epoch": 0.52, "learning_rate": 9.810758727589814e-06, "loss": 0.4438, "step": 2318 }, { "epoch": 0.52, "learning_rate": 9.803481136878872e-06, "loss": 0.4521, "step": 2319 }, { "epoch": 0.52, "learning_rate": 9.796203650289594e-06, "loss": 0.4681, "step": 2320 }, { "epoch": 0.52, "learning_rate": 9.788926271677815e-06, "loss": 0.4775, "step": 2321 }, { "epoch": 0.52, "learning_rate": 9.78164900489931e-06, "loss": 0.4566, "step": 2322 }, { "epoch": 0.52, "learning_rate": 9.774371853809793e-06, "loss": 0.4715, "step": 2323 }, { "epoch": 0.52, "learning_rate": 9.767094822264923e-06, "loss": 0.4456, "step": 2324 }, { "epoch": 0.52, "learning_rate": 9.75981791412029e-06, "loss": 0.4645, "step": 2325 }, { "epoch": 0.52, "learning_rate": 9.75254113323142e-06, "loss": 0.4632, "step": 2326 }, { "epoch": 0.52, "learning_rate": 9.74526448345378e-06, "loss": 0.4458, "step": 2327 }, { "epoch": 0.52, "learning_rate": 9.73798796864275e-06, "loss": 0.4507, "step": 2328 }, { "epoch": 0.52, "learning_rate": 9.730711592653656e-06, "loss": 0.457, "step": 2329 }, { "epoch": 0.52, "learning_rate": 9.723435359341739e-06, "loss": 0.4693, "step": 2330 }, { "epoch": 0.52, "learning_rate": 9.716159272562171e-06, "loss": 0.4519, "step": 2331 }, { "epoch": 0.52, "learning_rate": 9.70888333617004e-06, "loss": 0.4461, "step": 2332 }, { "epoch": 0.52, "learning_rate": 9.701607554020364e-06, "loss": 0.4485, "step": 2333 }, { "epoch": 0.52, "learning_rate": 9.694331929968069e-06, "loss": 0.457, "step": 2334 }, { "epoch": 0.52, "learning_rate": 9.687056467868002e-06, "loss": 0.4199, "step": 2335 }, { "epoch": 0.52, "learning_rate": 9.679781171574924e-06, "loss": 0.4471, "step": 2336 }, { "epoch": 0.53, "learning_rate": 9.67250604494351e-06, "loss": 0.485, "step": 2337 }, { "epoch": 0.53, "learning_rate": 9.66523109182834e-06, "loss": 0.482, "step": 2338 }, { "epoch": 0.53, "learning_rate": 9.657956316083908e-06, "loss": 0.4581, "step": 2339 }, { "epoch": 0.53, "learning_rate": 9.650681721564612e-06, "loss": 0.4308, "step": 2340 }, { "epoch": 0.53, "learning_rate": 9.643407312124749e-06, "loss": 0.4602, "step": 2341 }, { "epoch": 0.53, "learning_rate": 9.636133091618525e-06, "loss": 0.4523, "step": 2342 }, { "epoch": 0.53, "learning_rate": 9.628859063900038e-06, "loss": 0.4563, "step": 2343 }, { "epoch": 0.53, "learning_rate": 9.6215852328233e-06, "loss": 0.4511, "step": 2344 }, { "epoch": 0.53, "learning_rate": 9.614311602242195e-06, "loss": 0.4209, "step": 2345 }, { "epoch": 0.53, "learning_rate": 9.607038176010521e-06, "loss": 0.4259, "step": 2346 }, { "epoch": 0.53, "learning_rate": 9.59976495798196e-06, "loss": 0.4659, "step": 2347 }, { "epoch": 0.53, "learning_rate": 9.592491952010081e-06, "loss": 0.444, "step": 2348 }, { "epoch": 0.53, "learning_rate": 9.585219161948345e-06, "loss": 0.4555, "step": 2349 }, { "epoch": 0.53, "learning_rate": 9.577946591650097e-06, "loss": 0.4856, "step": 2350 }, { "epoch": 0.53, "learning_rate": 9.570674244968561e-06, "loss": 0.442, "step": 2351 }, { "epoch": 0.53, "learning_rate": 9.563402125756855e-06, "loss": 0.4263, "step": 2352 }, { "epoch": 0.53, "learning_rate": 9.556130237867967e-06, "loss": 0.4377, "step": 2353 }, { "epoch": 0.53, "learning_rate": 9.54885858515476e-06, "loss": 0.4633, "step": 2354 }, { "epoch": 0.53, "learning_rate": 9.541587171469982e-06, "loss": 0.4873, "step": 2355 }, { "epoch": 0.53, "learning_rate": 9.534316000666244e-06, "loss": 0.4507, "step": 2356 }, { "epoch": 0.53, "learning_rate": 9.527045076596036e-06, "loss": 0.4717, "step": 2357 }, { "epoch": 0.53, "learning_rate": 9.519774403111711e-06, "loss": 0.4326, "step": 2358 }, { "epoch": 0.53, "learning_rate": 9.512503984065493e-06, "loss": 0.4903, "step": 2359 }, { "epoch": 0.53, "learning_rate": 9.505233823309476e-06, "loss": 0.4597, "step": 2360 }, { "epoch": 0.53, "learning_rate": 9.49796392469561e-06, "loss": 0.4615, "step": 2361 }, { "epoch": 0.53, "learning_rate": 9.490694292075706e-06, "loss": 0.4788, "step": 2362 }, { "epoch": 0.53, "learning_rate": 9.483424929301436e-06, "loss": 0.4727, "step": 2363 }, { "epoch": 0.53, "learning_rate": 9.476155840224331e-06, "loss": 0.4457, "step": 2364 }, { "epoch": 0.53, "learning_rate": 9.468887028695775e-06, "loss": 0.4659, "step": 2365 }, { "epoch": 0.53, "learning_rate": 9.461618498567005e-06, "loss": 0.4558, "step": 2366 }, { "epoch": 0.53, "learning_rate": 9.454350253689106e-06, "loss": 0.4225, "step": 2367 }, { "epoch": 0.53, "learning_rate": 9.44708229791302e-06, "loss": 0.4528, "step": 2368 }, { "epoch": 0.53, "learning_rate": 9.43981463508953e-06, "loss": 0.4599, "step": 2369 }, { "epoch": 0.53, "learning_rate": 9.43254726906926e-06, "loss": 0.4682, "step": 2370 }, { "epoch": 0.53, "learning_rate": 9.425280203702689e-06, "loss": 0.4249, "step": 2371 }, { "epoch": 0.53, "learning_rate": 9.41801344284012e-06, "loss": 0.4372, "step": 2372 }, { "epoch": 0.53, "learning_rate": 9.41074699033171e-06, "loss": 0.4677, "step": 2373 }, { "epoch": 0.53, "learning_rate": 9.403480850027443e-06, "loss": 0.4575, "step": 2374 }, { "epoch": 0.53, "learning_rate": 9.39621502577714e-06, "loss": 0.4554, "step": 2375 }, { "epoch": 0.53, "learning_rate": 9.38894952143046e-06, "loss": 0.468, "step": 2376 }, { "epoch": 0.53, "learning_rate": 9.381684340836884e-06, "loss": 0.427, "step": 2377 }, { "epoch": 0.53, "learning_rate": 9.374419487845729e-06, "loss": 0.475, "step": 2378 }, { "epoch": 0.53, "learning_rate": 9.367154966306133e-06, "loss": 0.4432, "step": 2379 }, { "epoch": 0.53, "learning_rate": 9.359890780067059e-06, "loss": 0.4592, "step": 2380 }, { "epoch": 0.54, "learning_rate": 9.352626932977293e-06, "loss": 0.4574, "step": 2381 }, { "epoch": 0.54, "learning_rate": 9.34536342888544e-06, "loss": 0.4556, "step": 2382 }, { "epoch": 0.54, "learning_rate": 9.338100271639932e-06, "loss": 0.4632, "step": 2383 }, { "epoch": 0.54, "learning_rate": 9.330837465089007e-06, "loss": 0.4942, "step": 2384 }, { "epoch": 0.54, "learning_rate": 9.32357501308072e-06, "loss": 0.4579, "step": 2385 }, { "epoch": 0.54, "learning_rate": 9.316312919462938e-06, "loss": 0.4666, "step": 2386 }, { "epoch": 0.54, "learning_rate": 9.309051188083342e-06, "loss": 0.4395, "step": 2387 }, { "epoch": 0.54, "learning_rate": 9.301789822789412e-06, "loss": 0.4623, "step": 2388 }, { "epoch": 0.54, "learning_rate": 9.294528827428445e-06, "loss": 0.4485, "step": 2389 }, { "epoch": 0.54, "learning_rate": 9.287268205847532e-06, "loss": 0.46, "step": 2390 }, { "epoch": 0.54, "learning_rate": 9.280007961893576e-06, "loss": 0.4362, "step": 2391 }, { "epoch": 0.54, "learning_rate": 9.272748099413275e-06, "loss": 0.4547, "step": 2392 }, { "epoch": 0.54, "learning_rate": 9.265488622253122e-06, "loss": 0.4549, "step": 2393 }, { "epoch": 0.54, "learning_rate": 9.25822953425941e-06, "loss": 0.4582, "step": 2394 }, { "epoch": 0.54, "learning_rate": 9.250970839278221e-06, "loss": 0.4473, "step": 2395 }, { "epoch": 0.54, "learning_rate": 9.243712541155437e-06, "loss": 0.4215, "step": 2396 }, { "epoch": 0.54, "learning_rate": 9.236454643736722e-06, "loss": 0.4416, "step": 2397 }, { "epoch": 0.54, "learning_rate": 9.229197150867525e-06, "loss": 0.4454, "step": 2398 }, { "epoch": 0.54, "learning_rate": 9.221940066393097e-06, "loss": 0.4755, "step": 2399 }, { "epoch": 0.54, "learning_rate": 9.214683394158458e-06, "loss": 0.4447, "step": 2400 }, { "epoch": 0.54, "learning_rate": 9.20742713800841e-06, "loss": 0.437, "step": 2401 }, { "epoch": 0.54, "learning_rate": 9.200171301787542e-06, "loss": 0.4403, "step": 2402 }, { "epoch": 0.54, "learning_rate": 9.192915889340214e-06, "loss": 0.4615, "step": 2403 }, { "epoch": 0.54, "learning_rate": 9.185660904510565e-06, "loss": 0.4427, "step": 2404 }, { "epoch": 0.54, "learning_rate": 9.178406351142504e-06, "loss": 0.4631, "step": 2405 }, { "epoch": 0.54, "learning_rate": 9.171152233079714e-06, "loss": 0.4588, "step": 2406 }, { "epoch": 0.54, "learning_rate": 9.163898554165653e-06, "loss": 0.4703, "step": 2407 }, { "epoch": 0.54, "learning_rate": 9.156645318243534e-06, "loss": 0.4685, "step": 2408 }, { "epoch": 0.54, "learning_rate": 9.149392529156347e-06, "loss": 0.4368, "step": 2409 }, { "epoch": 0.54, "learning_rate": 9.142140190746834e-06, "loss": 0.4228, "step": 2410 }, { "epoch": 0.54, "learning_rate": 9.134888306857506e-06, "loss": 0.4459, "step": 2411 }, { "epoch": 0.54, "learning_rate": 9.127636881330633e-06, "loss": 0.4521, "step": 2412 }, { "epoch": 0.54, "learning_rate": 9.120385918008244e-06, "loss": 0.4624, "step": 2413 }, { "epoch": 0.54, "learning_rate": 9.11313542073211e-06, "loss": 0.4475, "step": 2414 }, { "epoch": 0.54, "learning_rate": 9.105885393343774e-06, "loss": 0.4432, "step": 2415 }, { "epoch": 0.54, "learning_rate": 9.098635839684515e-06, "loss": 0.4772, "step": 2416 }, { "epoch": 0.54, "learning_rate": 9.091386763595368e-06, "loss": 0.4644, "step": 2417 }, { "epoch": 0.54, "learning_rate": 9.084138168917117e-06, "loss": 0.4532, "step": 2418 }, { "epoch": 0.54, "learning_rate": 9.076890059490284e-06, "loss": 0.443, "step": 2419 }, { "epoch": 0.54, "learning_rate": 9.06964243915514e-06, "loss": 0.4706, "step": 2420 }, { "epoch": 0.54, "learning_rate": 9.062395311751692e-06, "loss": 0.4596, "step": 2421 }, { "epoch": 0.54, "learning_rate": 9.055148681119688e-06, "loss": 0.4691, "step": 2422 }, { "epoch": 0.54, "learning_rate": 9.047902551098618e-06, "loss": 0.4225, "step": 2423 }, { "epoch": 0.54, "learning_rate": 9.040656925527693e-06, "loss": 0.4652, "step": 2424 }, { "epoch": 0.54, "learning_rate": 9.033411808245874e-06, "loss": 0.4299, "step": 2425 }, { "epoch": 0.55, "learning_rate": 9.026167203091841e-06, "loss": 0.4507, "step": 2426 }, { "epoch": 0.55, "learning_rate": 9.018923113904007e-06, "loss": 0.458, "step": 2427 }, { "epoch": 0.55, "learning_rate": 9.011679544520508e-06, "loss": 0.4557, "step": 2428 }, { "epoch": 0.55, "learning_rate": 9.004436498779207e-06, "loss": 0.4716, "step": 2429 }, { "epoch": 0.55, "learning_rate": 8.99719398051769e-06, "loss": 0.4505, "step": 2430 }, { "epoch": 0.55, "learning_rate": 8.989951993573267e-06, "loss": 0.4326, "step": 2431 }, { "epoch": 0.55, "learning_rate": 8.982710541782961e-06, "loss": 0.4196, "step": 2432 }, { "epoch": 0.55, "learning_rate": 8.975469628983511e-06, "loss": 0.4355, "step": 2433 }, { "epoch": 0.55, "learning_rate": 8.968229259011373e-06, "loss": 0.4331, "step": 2434 }, { "epoch": 0.55, "learning_rate": 8.960989435702717e-06, "loss": 0.4436, "step": 2435 }, { "epoch": 0.55, "learning_rate": 8.953750162893418e-06, "loss": 0.4526, "step": 2436 }, { "epoch": 0.55, "learning_rate": 8.946511444419063e-06, "loss": 0.4606, "step": 2437 }, { "epoch": 0.55, "learning_rate": 8.93927328411495e-06, "loss": 0.4574, "step": 2438 }, { "epoch": 0.55, "learning_rate": 8.932035685816071e-06, "loss": 0.4334, "step": 2439 }, { "epoch": 0.55, "learning_rate": 8.924798653357127e-06, "loss": 0.433, "step": 2440 }, { "epoch": 0.55, "learning_rate": 8.91756219057252e-06, "loss": 0.4535, "step": 2441 }, { "epoch": 0.55, "learning_rate": 8.910326301296344e-06, "loss": 0.4666, "step": 2442 }, { "epoch": 0.55, "learning_rate": 8.903090989362394e-06, "loss": 0.4397, "step": 2443 }, { "epoch": 0.55, "learning_rate": 8.89585625860416e-06, "loss": 0.4328, "step": 2444 }, { "epoch": 0.55, "learning_rate": 8.888622112854814e-06, "loss": 0.4503, "step": 2445 }, { "epoch": 0.55, "learning_rate": 8.88138855594724e-06, "loss": 0.4448, "step": 2446 }, { "epoch": 0.55, "learning_rate": 8.874155591713987e-06, "loss": 0.4684, "step": 2447 }, { "epoch": 0.55, "learning_rate": 8.866923223987303e-06, "loss": 0.4652, "step": 2448 }, { "epoch": 0.55, "learning_rate": 8.859691456599114e-06, "loss": 0.4697, "step": 2449 }, { "epoch": 0.55, "learning_rate": 8.85246029338103e-06, "loss": 0.4374, "step": 2450 }, { "epoch": 0.55, "learning_rate": 8.845229738164338e-06, "loss": 0.4316, "step": 2451 }, { "epoch": 0.55, "learning_rate": 8.837999794780013e-06, "loss": 0.4539, "step": 2452 }, { "epoch": 0.55, "learning_rate": 8.830770467058688e-06, "loss": 0.4512, "step": 2453 }, { "epoch": 0.55, "learning_rate": 8.823541758830693e-06, "loss": 0.4333, "step": 2454 }, { "epoch": 0.55, "learning_rate": 8.816313673926011e-06, "loss": 0.4431, "step": 2455 }, { "epoch": 0.55, "learning_rate": 8.809086216174302e-06, "loss": 0.4258, "step": 2456 }, { "epoch": 0.55, "learning_rate": 8.80185938940489e-06, "loss": 0.495, "step": 2457 }, { "epoch": 0.55, "learning_rate": 8.79463319744677e-06, "loss": 0.4744, "step": 2458 }, { "epoch": 0.55, "learning_rate": 8.7874076441286e-06, "loss": 0.4598, "step": 2459 }, { "epoch": 0.55, "learning_rate": 8.780182733278693e-06, "loss": 0.4397, "step": 2460 }, { "epoch": 0.55, "learning_rate": 8.772958468725023e-06, "loss": 0.47, "step": 2461 }, { "epoch": 0.55, "learning_rate": 8.765734854295234e-06, "loss": 0.4626, "step": 2462 }, { "epoch": 0.55, "learning_rate": 8.758511893816614e-06, "loss": 0.4267, "step": 2463 }, { "epoch": 0.55, "learning_rate": 8.751289591116105e-06, "loss": 0.4475, "step": 2464 }, { "epoch": 0.55, "learning_rate": 8.744067950020302e-06, "loss": 0.4519, "step": 2465 }, { "epoch": 0.55, "learning_rate": 8.73684697435545e-06, "loss": 0.4659, "step": 2466 }, { "epoch": 0.55, "learning_rate": 8.729626667947442e-06, "loss": 0.4445, "step": 2467 }, { "epoch": 0.55, "learning_rate": 8.722407034621812e-06, "loss": 0.443, "step": 2468 }, { "epoch": 0.55, "learning_rate": 8.715188078203746e-06, "loss": 0.4877, "step": 2469 }, { "epoch": 0.56, "learning_rate": 8.707969802518068e-06, "loss": 0.4606, "step": 2470 }, { "epoch": 0.56, "learning_rate": 8.700752211389236e-06, "loss": 0.451, "step": 2471 }, { "epoch": 0.56, "learning_rate": 8.693535308641348e-06, "loss": 0.4268, "step": 2472 }, { "epoch": 0.56, "learning_rate": 8.686319098098139e-06, "loss": 0.4582, "step": 2473 }, { "epoch": 0.56, "learning_rate": 8.67910358358298e-06, "loss": 0.4331, "step": 2474 }, { "epoch": 0.56, "learning_rate": 8.671888768918865e-06, "loss": 0.4293, "step": 2475 }, { "epoch": 0.56, "learning_rate": 8.664674657928421e-06, "loss": 0.439, "step": 2476 }, { "epoch": 0.56, "learning_rate": 8.657461254433911e-06, "loss": 0.4221, "step": 2477 }, { "epoch": 0.56, "learning_rate": 8.65024856225721e-06, "loss": 0.4353, "step": 2478 }, { "epoch": 0.56, "learning_rate": 8.643036585219822e-06, "loss": 0.4354, "step": 2479 }, { "epoch": 0.56, "learning_rate": 8.63582532714287e-06, "loss": 0.4379, "step": 2480 }, { "epoch": 0.56, "learning_rate": 8.628614791847101e-06, "loss": 0.4702, "step": 2481 }, { "epoch": 0.56, "learning_rate": 8.621404983152873e-06, "loss": 0.4483, "step": 2482 }, { "epoch": 0.56, "learning_rate": 8.614195904880164e-06, "loss": 0.4746, "step": 2483 }, { "epoch": 0.56, "learning_rate": 8.606987560848555e-06, "loss": 0.4471, "step": 2484 }, { "epoch": 0.56, "learning_rate": 8.599779954877256e-06, "loss": 0.4348, "step": 2485 }, { "epoch": 0.56, "learning_rate": 8.592573090785072e-06, "loss": 0.4585, "step": 2486 }, { "epoch": 0.56, "learning_rate": 8.585366972390416e-06, "loss": 0.4471, "step": 2487 }, { "epoch": 0.56, "learning_rate": 8.578161603511312e-06, "loss": 0.48, "step": 2488 }, { "epoch": 0.56, "learning_rate": 8.57095698796538e-06, "loss": 0.4493, "step": 2489 }, { "epoch": 0.56, "learning_rate": 8.563753129569845e-06, "loss": 0.424, "step": 2490 }, { "epoch": 0.56, "learning_rate": 8.556550032141533e-06, "loss": 0.4346, "step": 2491 }, { "epoch": 0.56, "learning_rate": 8.549347699496858e-06, "loss": 0.4466, "step": 2492 }, { "epoch": 0.56, "learning_rate": 8.54214613545184e-06, "loss": 0.4571, "step": 2493 }, { "epoch": 0.56, "learning_rate": 8.534945343822088e-06, "loss": 0.4719, "step": 2494 }, { "epoch": 0.56, "learning_rate": 8.527745328422793e-06, "loss": 0.4416, "step": 2495 }, { "epoch": 0.56, "learning_rate": 8.520546093068746e-06, "loss": 0.4725, "step": 2496 }, { "epoch": 0.56, "learning_rate": 8.51334764157432e-06, "loss": 0.4751, "step": 2497 }, { "epoch": 0.56, "learning_rate": 8.506149977753474e-06, "loss": 0.4409, "step": 2498 }, { "epoch": 0.56, "learning_rate": 8.498953105419748e-06, "loss": 0.436, "step": 2499 }, { "epoch": 0.56, "learning_rate": 8.491757028386262e-06, "loss": 0.4329, "step": 2500 }, { "epoch": 0.56, "learning_rate": 8.484561750465721e-06, "loss": 0.4574, "step": 2501 }, { "epoch": 0.56, "learning_rate": 8.4773672754704e-06, "loss": 0.4574, "step": 2502 }, { "epoch": 0.56, "learning_rate": 8.470173607212145e-06, "loss": 0.4642, "step": 2503 }, { "epoch": 0.56, "learning_rate": 8.462980749502386e-06, "loss": 0.4424, "step": 2504 }, { "epoch": 0.56, "learning_rate": 8.455788706152117e-06, "loss": 0.446, "step": 2505 }, { "epoch": 0.56, "learning_rate": 8.4485974809719e-06, "loss": 0.4544, "step": 2506 }, { "epoch": 0.56, "learning_rate": 8.441407077771864e-06, "loss": 0.463, "step": 2507 }, { "epoch": 0.56, "learning_rate": 8.434217500361701e-06, "loss": 0.4489, "step": 2508 }, { "epoch": 0.56, "learning_rate": 8.42702875255067e-06, "loss": 0.4411, "step": 2509 }, { "epoch": 0.56, "learning_rate": 8.41984083814759e-06, "loss": 0.4444, "step": 2510 }, { "epoch": 0.56, "learning_rate": 8.412653760960835e-06, "loss": 0.4812, "step": 2511 }, { "epoch": 0.56, "learning_rate": 8.405467524798335e-06, "loss": 0.4502, "step": 2512 }, { "epoch": 0.56, "learning_rate": 8.398282133467579e-06, "loss": 0.444, "step": 2513 }, { "epoch": 0.56, "learning_rate": 8.391097590775603e-06, "loss": 0.4408, "step": 2514 }, { "epoch": 0.57, "learning_rate": 8.383913900528994e-06, "loss": 0.4247, "step": 2515 }, { "epoch": 0.57, "learning_rate": 8.376731066533893e-06, "loss": 0.448, "step": 2516 }, { "epoch": 0.57, "learning_rate": 8.369549092595984e-06, "loss": 0.4553, "step": 2517 }, { "epoch": 0.57, "learning_rate": 8.362367982520495e-06, "loss": 0.4574, "step": 2518 }, { "epoch": 0.57, "learning_rate": 8.355187740112196e-06, "loss": 0.4681, "step": 2519 }, { "epoch": 0.57, "learning_rate": 8.348008369175394e-06, "loss": 0.4585, "step": 2520 }, { "epoch": 0.57, "learning_rate": 8.34082987351394e-06, "loss": 0.4556, "step": 2521 }, { "epoch": 0.57, "learning_rate": 8.333652256931222e-06, "loss": 0.4536, "step": 2522 }, { "epoch": 0.57, "learning_rate": 8.326475523230152e-06, "loss": 0.4572, "step": 2523 }, { "epoch": 0.57, "learning_rate": 8.31929967621319e-06, "loss": 0.4683, "step": 2524 }, { "epoch": 0.57, "learning_rate": 8.312124719682315e-06, "loss": 0.4253, "step": 2525 }, { "epoch": 0.57, "learning_rate": 8.304950657439034e-06, "loss": 0.4703, "step": 2526 }, { "epoch": 0.57, "learning_rate": 8.297777493284386e-06, "loss": 0.4544, "step": 2527 }, { "epoch": 0.57, "learning_rate": 8.290605231018931e-06, "loss": 0.4468, "step": 2528 }, { "epoch": 0.57, "learning_rate": 8.28343387444275e-06, "loss": 0.4466, "step": 2529 }, { "epoch": 0.57, "learning_rate": 8.276263427355447e-06, "loss": 0.4618, "step": 2530 }, { "epoch": 0.57, "learning_rate": 8.269093893556136e-06, "loss": 0.4543, "step": 2531 }, { "epoch": 0.57, "learning_rate": 8.261925276843465e-06, "loss": 0.4853, "step": 2532 }, { "epoch": 0.57, "learning_rate": 8.25475758101558e-06, "loss": 0.4287, "step": 2533 }, { "epoch": 0.57, "learning_rate": 8.247590809870142e-06, "loss": 0.458, "step": 2534 }, { "epoch": 0.57, "learning_rate": 8.240424967204328e-06, "loss": 0.4581, "step": 2535 }, { "epoch": 0.57, "learning_rate": 8.233260056814816e-06, "loss": 0.4245, "step": 2536 }, { "epoch": 0.57, "learning_rate": 8.226096082497794e-06, "loss": 0.4182, "step": 2537 }, { "epoch": 0.57, "learning_rate": 8.218933048048952e-06, "loss": 0.4617, "step": 2538 }, { "epoch": 0.57, "learning_rate": 8.211770957263482e-06, "loss": 0.4381, "step": 2539 }, { "epoch": 0.57, "learning_rate": 8.204609813936082e-06, "loss": 0.4433, "step": 2540 }, { "epoch": 0.57, "learning_rate": 8.197449621860944e-06, "loss": 0.4612, "step": 2541 }, { "epoch": 0.57, "learning_rate": 8.19029038483175e-06, "loss": 0.4604, "step": 2542 }, { "epoch": 0.57, "learning_rate": 8.183132106641684e-06, "loss": 0.4473, "step": 2543 }, { "epoch": 0.57, "learning_rate": 8.175974791083419e-06, "loss": 0.4625, "step": 2544 }, { "epoch": 0.57, "learning_rate": 8.168818441949116e-06, "loss": 0.4689, "step": 2545 }, { "epoch": 0.57, "learning_rate": 8.16166306303043e-06, "loss": 0.4365, "step": 2546 }, { "epoch": 0.57, "learning_rate": 8.154508658118493e-06, "loss": 0.4562, "step": 2547 }, { "epoch": 0.57, "learning_rate": 8.147355231003931e-06, "loss": 0.4303, "step": 2548 }, { "epoch": 0.57, "learning_rate": 8.140202785476845e-06, "loss": 0.4326, "step": 2549 }, { "epoch": 0.57, "learning_rate": 8.13305132532682e-06, "loss": 0.4364, "step": 2550 }, { "epoch": 0.57, "learning_rate": 8.125900854342914e-06, "loss": 0.4504, "step": 2551 }, { "epoch": 0.57, "learning_rate": 8.118751376313666e-06, "loss": 0.4748, "step": 2552 }, { "epoch": 0.57, "learning_rate": 8.111602895027083e-06, "loss": 0.4726, "step": 2553 }, { "epoch": 0.57, "learning_rate": 8.104455414270647e-06, "loss": 0.4566, "step": 2554 }, { "epoch": 0.57, "learning_rate": 8.097308937831318e-06, "loss": 0.4168, "step": 2555 }, { "epoch": 0.57, "learning_rate": 8.09016346949551e-06, "loss": 0.4561, "step": 2556 }, { "epoch": 0.57, "learning_rate": 8.083019013049112e-06, "loss": 0.4564, "step": 2557 }, { "epoch": 0.57, "learning_rate": 8.075875572277474e-06, "loss": 0.4636, "step": 2558 }, { "epoch": 0.58, "learning_rate": 8.068733150965405e-06, "loss": 0.4434, "step": 2559 }, { "epoch": 0.58, "learning_rate": 8.06159175289718e-06, "loss": 0.4701, "step": 2560 }, { "epoch": 0.58, "learning_rate": 8.054451381856525e-06, "loss": 0.4685, "step": 2561 }, { "epoch": 0.58, "learning_rate": 8.047312041626624e-06, "loss": 0.4619, "step": 2562 }, { "epoch": 0.58, "learning_rate": 8.040173735990124e-06, "loss": 0.4508, "step": 2563 }, { "epoch": 0.58, "learning_rate": 8.033036468729113e-06, "loss": 0.4672, "step": 2564 }, { "epoch": 0.58, "learning_rate": 8.025900243625131e-06, "loss": 0.4491, "step": 2565 }, { "epoch": 0.58, "learning_rate": 8.018765064459166e-06, "loss": 0.4615, "step": 2566 }, { "epoch": 0.58, "learning_rate": 8.011630935011656e-06, "loss": 0.4238, "step": 2567 }, { "epoch": 0.58, "learning_rate": 8.004497859062475e-06, "loss": 0.4488, "step": 2568 }, { "epoch": 0.58, "learning_rate": 7.997365840390943e-06, "loss": 0.4436, "step": 2569 }, { "epoch": 0.58, "learning_rate": 7.990234882775825e-06, "loss": 0.45, "step": 2570 }, { "epoch": 0.58, "learning_rate": 7.983104989995316e-06, "loss": 0.461, "step": 2571 }, { "epoch": 0.58, "learning_rate": 7.975976165827052e-06, "loss": 0.4501, "step": 2572 }, { "epoch": 0.58, "learning_rate": 7.968848414048097e-06, "loss": 0.4476, "step": 2573 }, { "epoch": 0.58, "learning_rate": 7.961721738434956e-06, "loss": 0.4254, "step": 2574 }, { "epoch": 0.58, "learning_rate": 7.954596142763552e-06, "loss": 0.4323, "step": 2575 }, { "epoch": 0.58, "learning_rate": 7.947471630809243e-06, "loss": 0.48, "step": 2576 }, { "epoch": 0.58, "learning_rate": 7.940348206346815e-06, "loss": 0.4384, "step": 2577 }, { "epoch": 0.58, "learning_rate": 7.93322587315047e-06, "loss": 0.4693, "step": 2578 }, { "epoch": 0.58, "learning_rate": 7.926104634993842e-06, "loss": 0.4443, "step": 2579 }, { "epoch": 0.58, "learning_rate": 7.91898449564998e-06, "loss": 0.4528, "step": 2580 }, { "epoch": 0.58, "learning_rate": 7.911865458891345e-06, "loss": 0.433, "step": 2581 }, { "epoch": 0.58, "learning_rate": 7.904747528489818e-06, "loss": 0.45, "step": 2582 }, { "epoch": 0.58, "learning_rate": 7.897630708216701e-06, "loss": 0.4333, "step": 2583 }, { "epoch": 0.58, "learning_rate": 7.890515001842698e-06, "loss": 0.4455, "step": 2584 }, { "epoch": 0.58, "learning_rate": 7.883400413137924e-06, "loss": 0.4766, "step": 2585 }, { "epoch": 0.58, "learning_rate": 7.876286945871908e-06, "loss": 0.4499, "step": 2586 }, { "epoch": 0.58, "learning_rate": 7.869174603813582e-06, "loss": 0.4385, "step": 2587 }, { "epoch": 0.58, "learning_rate": 7.862063390731277e-06, "loss": 0.4368, "step": 2588 }, { "epoch": 0.58, "learning_rate": 7.854953310392731e-06, "loss": 0.4612, "step": 2589 }, { "epoch": 0.58, "learning_rate": 7.847844366565083e-06, "loss": 0.4303, "step": 2590 }, { "epoch": 0.58, "learning_rate": 7.840736563014863e-06, "loss": 0.452, "step": 2591 }, { "epoch": 0.58, "learning_rate": 7.833629903508007e-06, "loss": 0.437, "step": 2592 }, { "epoch": 0.58, "learning_rate": 7.826524391809833e-06, "loss": 0.4794, "step": 2593 }, { "epoch": 0.58, "learning_rate": 7.81942003168506e-06, "loss": 0.4459, "step": 2594 }, { "epoch": 0.58, "learning_rate": 7.812316826897792e-06, "loss": 0.4606, "step": 2595 }, { "epoch": 0.58, "learning_rate": 7.805214781211526e-06, "loss": 0.394, "step": 2596 }, { "epoch": 0.58, "learning_rate": 7.79811389838914e-06, "loss": 0.4338, "step": 2597 }, { "epoch": 0.58, "learning_rate": 7.791014182192898e-06, "loss": 0.4537, "step": 2598 }, { "epoch": 0.58, "learning_rate": 7.783915636384443e-06, "loss": 0.4551, "step": 2599 }, { "epoch": 0.58, "learning_rate": 7.776818264724802e-06, "loss": 0.4423, "step": 2600 }, { "epoch": 0.58, "learning_rate": 7.769722070974374e-06, "loss": 0.436, "step": 2601 }, { "epoch": 0.58, "learning_rate": 7.762627058892947e-06, "loss": 0.416, "step": 2602 }, { "epoch": 0.58, "learning_rate": 7.755533232239667e-06, "loss": 0.4327, "step": 2603 }, { "epoch": 0.59, "learning_rate": 7.74844059477306e-06, "loss": 0.4659, "step": 2604 }, { "epoch": 0.59, "learning_rate": 7.741349150251023e-06, "loss": 0.4054, "step": 2605 }, { "epoch": 0.59, "learning_rate": 7.734258902430816e-06, "loss": 0.4475, "step": 2606 }, { "epoch": 0.59, "learning_rate": 7.727169855069069e-06, "loss": 0.4387, "step": 2607 }, { "epoch": 0.59, "learning_rate": 7.720082011921775e-06, "loss": 0.4492, "step": 2608 }, { "epoch": 0.59, "learning_rate": 7.712995376744282e-06, "loss": 0.4437, "step": 2609 }, { "epoch": 0.59, "learning_rate": 7.705909953291314e-06, "loss": 0.4377, "step": 2610 }, { "epoch": 0.59, "learning_rate": 7.698825745316943e-06, "loss": 0.427, "step": 2611 }, { "epoch": 0.59, "learning_rate": 7.691742756574593e-06, "loss": 0.4497, "step": 2612 }, { "epoch": 0.59, "learning_rate": 7.68466099081705e-06, "loss": 0.4689, "step": 2613 }, { "epoch": 0.59, "learning_rate": 7.677580451796445e-06, "loss": 0.4557, "step": 2614 }, { "epoch": 0.59, "learning_rate": 7.670501143264266e-06, "loss": 0.4092, "step": 2615 }, { "epoch": 0.59, "learning_rate": 7.663423068971343e-06, "loss": 0.4284, "step": 2616 }, { "epoch": 0.59, "learning_rate": 7.65634623266785e-06, "loss": 0.436, "step": 2617 }, { "epoch": 0.59, "learning_rate": 7.649270638103324e-06, "loss": 0.4303, "step": 2618 }, { "epoch": 0.59, "learning_rate": 7.64219628902662e-06, "loss": 0.4735, "step": 2619 }, { "epoch": 0.59, "learning_rate": 7.635123189185944e-06, "loss": 0.4366, "step": 2620 }, { "epoch": 0.59, "learning_rate": 7.628051342328842e-06, "loss": 0.4478, "step": 2621 }, { "epoch": 0.59, "learning_rate": 7.620980752202189e-06, "loss": 0.4391, "step": 2622 }, { "epoch": 0.59, "learning_rate": 7.613911422552203e-06, "loss": 0.4223, "step": 2623 }, { "epoch": 0.59, "learning_rate": 7.606843357124426e-06, "loss": 0.4426, "step": 2624 }, { "epoch": 0.59, "learning_rate": 7.599776559663731e-06, "loss": 0.4288, "step": 2625 }, { "epoch": 0.59, "learning_rate": 7.5927110339143296e-06, "loss": 0.4366, "step": 2626 }, { "epoch": 0.59, "learning_rate": 7.585646783619749e-06, "loss": 0.4388, "step": 2627 }, { "epoch": 0.59, "learning_rate": 7.578583812522844e-06, "loss": 0.4299, "step": 2628 }, { "epoch": 0.59, "learning_rate": 7.571522124365789e-06, "loss": 0.4365, "step": 2629 }, { "epoch": 0.59, "learning_rate": 7.564461722890082e-06, "loss": 0.455, "step": 2630 }, { "epoch": 0.59, "learning_rate": 7.557402611836539e-06, "loss": 0.4766, "step": 2631 }, { "epoch": 0.59, "learning_rate": 7.5503447949452905e-06, "loss": 0.4192, "step": 2632 }, { "epoch": 0.59, "learning_rate": 7.5432882759557795e-06, "loss": 0.4561, "step": 2633 }, { "epoch": 0.59, "learning_rate": 7.53623305860677e-06, "loss": 0.411, "step": 2634 }, { "epoch": 0.59, "learning_rate": 7.529179146636327e-06, "loss": 0.4387, "step": 2635 }, { "epoch": 0.59, "learning_rate": 7.522126543781829e-06, "loss": 0.4663, "step": 2636 }, { "epoch": 0.59, "learning_rate": 7.515075253779959e-06, "loss": 0.4396, "step": 2637 }, { "epoch": 0.59, "learning_rate": 7.508025280366703e-06, "loss": 0.4127, "step": 2638 }, { "epoch": 0.59, "learning_rate": 7.500976627277352e-06, "loss": 0.4291, "step": 2639 }, { "epoch": 0.59, "learning_rate": 7.493929298246491e-06, "loss": 0.4474, "step": 2640 }, { "epoch": 0.59, "learning_rate": 7.486883297008021e-06, "loss": 0.4308, "step": 2641 }, { "epoch": 0.59, "learning_rate": 7.479838627295119e-06, "loss": 0.4304, "step": 2642 }, { "epoch": 0.59, "learning_rate": 7.4727952928402695e-06, "loss": 0.4495, "step": 2643 }, { "epoch": 0.59, "learning_rate": 7.46575329737524e-06, "loss": 0.4522, "step": 2644 }, { "epoch": 0.59, "learning_rate": 7.458712644631096e-06, "loss": 0.4664, "step": 2645 }, { "epoch": 0.59, "learning_rate": 7.451673338338191e-06, "loss": 0.3868, "step": 2646 }, { "epoch": 0.59, "learning_rate": 7.444635382226161e-06, "loss": 0.436, "step": 2647 }, { "epoch": 0.6, "learning_rate": 7.437598780023924e-06, "loss": 0.4168, "step": 2648 }, { "epoch": 0.6, "learning_rate": 7.4305635354596975e-06, "loss": 0.4418, "step": 2649 }, { "epoch": 0.6, "learning_rate": 7.42352965226096e-06, "loss": 0.4156, "step": 2650 }, { "epoch": 0.6, "learning_rate": 7.4164971341544785e-06, "loss": 0.4545, "step": 2651 }, { "epoch": 0.6, "learning_rate": 7.409465984866294e-06, "loss": 0.4454, "step": 2652 }, { "epoch": 0.6, "learning_rate": 7.402436208121723e-06, "loss": 0.4216, "step": 2653 }, { "epoch": 0.6, "learning_rate": 7.395407807645355e-06, "loss": 0.4678, "step": 2654 }, { "epoch": 0.6, "learning_rate": 7.388380787161048e-06, "loss": 0.4595, "step": 2655 }, { "epoch": 0.6, "learning_rate": 7.3813551503919335e-06, "loss": 0.4409, "step": 2656 }, { "epoch": 0.6, "learning_rate": 7.374330901060407e-06, "loss": 0.456, "step": 2657 }, { "epoch": 0.6, "learning_rate": 7.367308042888131e-06, "loss": 0.4244, "step": 2658 }, { "epoch": 0.6, "learning_rate": 7.360286579596024e-06, "loss": 0.4408, "step": 2659 }, { "epoch": 0.6, "learning_rate": 7.353266514904276e-06, "loss": 0.4467, "step": 2660 }, { "epoch": 0.6, "learning_rate": 7.346247852532324e-06, "loss": 0.4205, "step": 2661 }, { "epoch": 0.6, "learning_rate": 7.339230596198876e-06, "loss": 0.4463, "step": 2662 }, { "epoch": 0.6, "learning_rate": 7.332214749621884e-06, "loss": 0.4007, "step": 2663 }, { "epoch": 0.6, "learning_rate": 7.325200316518554e-06, "loss": 0.4324, "step": 2664 }, { "epoch": 0.6, "learning_rate": 7.3181873006053505e-06, "loss": 0.4455, "step": 2665 }, { "epoch": 0.6, "learning_rate": 7.3111757055979805e-06, "loss": 0.4364, "step": 2666 }, { "epoch": 0.6, "learning_rate": 7.3041655352113986e-06, "loss": 0.4447, "step": 2667 }, { "epoch": 0.6, "learning_rate": 7.297156793159808e-06, "loss": 0.461, "step": 2668 }, { "epoch": 0.6, "learning_rate": 7.290149483156652e-06, "loss": 0.5072, "step": 2669 }, { "epoch": 0.6, "learning_rate": 7.283143608914618e-06, "loss": 0.4573, "step": 2670 }, { "epoch": 0.6, "learning_rate": 7.276139174145629e-06, "loss": 0.4443, "step": 2671 }, { "epoch": 0.6, "learning_rate": 7.2691361825608465e-06, "loss": 0.4364, "step": 2672 }, { "epoch": 0.6, "learning_rate": 7.26213463787067e-06, "loss": 0.4309, "step": 2673 }, { "epoch": 0.6, "learning_rate": 7.25513454378473e-06, "loss": 0.4482, "step": 2674 }, { "epoch": 0.6, "learning_rate": 7.2481359040118906e-06, "loss": 0.4585, "step": 2675 }, { "epoch": 0.6, "learning_rate": 7.241138722260244e-06, "loss": 0.4429, "step": 2676 }, { "epoch": 0.6, "learning_rate": 7.234143002237106e-06, "loss": 0.4347, "step": 2677 }, { "epoch": 0.6, "learning_rate": 7.227148747649024e-06, "loss": 0.4516, "step": 2678 }, { "epoch": 0.6, "learning_rate": 7.220155962201766e-06, "loss": 0.4518, "step": 2679 }, { "epoch": 0.6, "learning_rate": 7.213164649600318e-06, "loss": 0.4596, "step": 2680 }, { "epoch": 0.6, "learning_rate": 7.206174813548898e-06, "loss": 0.4634, "step": 2681 }, { "epoch": 0.6, "learning_rate": 7.199186457750931e-06, "loss": 0.4392, "step": 2682 }, { "epoch": 0.6, "learning_rate": 7.192199585909058e-06, "loss": 0.4267, "step": 2683 }, { "epoch": 0.6, "learning_rate": 7.185214201725136e-06, "loss": 0.4744, "step": 2684 }, { "epoch": 0.6, "learning_rate": 7.178230308900235e-06, "loss": 0.436, "step": 2685 }, { "epoch": 0.6, "learning_rate": 7.171247911134633e-06, "loss": 0.4328, "step": 2686 }, { "epoch": 0.6, "learning_rate": 7.1642670121278125e-06, "loss": 0.4353, "step": 2687 }, { "epoch": 0.6, "learning_rate": 7.157287615578472e-06, "loss": 0.4515, "step": 2688 }, { "epoch": 0.6, "learning_rate": 7.150309725184508e-06, "loss": 0.4621, "step": 2689 }, { "epoch": 0.6, "learning_rate": 7.1433333446430154e-06, "loss": 0.4462, "step": 2690 }, { "epoch": 0.6, "learning_rate": 7.1363584776502935e-06, "loss": 0.4509, "step": 2691 }, { "epoch": 0.6, "learning_rate": 7.1293851279018375e-06, "loss": 0.4376, "step": 2692 }, { "epoch": 0.61, "learning_rate": 7.122413299092343e-06, "loss": 0.4383, "step": 2693 }, { "epoch": 0.61, "learning_rate": 7.115442994915693e-06, "loss": 0.4493, "step": 2694 }, { "epoch": 0.61, "learning_rate": 7.108474219064967e-06, "loss": 0.4474, "step": 2695 }, { "epoch": 0.61, "learning_rate": 7.101506975232437e-06, "loss": 0.4438, "step": 2696 }, { "epoch": 0.61, "learning_rate": 7.094541267109559e-06, "loss": 0.455, "step": 2697 }, { "epoch": 0.61, "learning_rate": 7.0875770983869774e-06, "loss": 0.4497, "step": 2698 }, { "epoch": 0.61, "learning_rate": 7.080614472754519e-06, "loss": 0.4246, "step": 2699 }, { "epoch": 0.61, "learning_rate": 7.073653393901194e-06, "loss": 0.4842, "step": 2700 }, { "epoch": 0.61, "learning_rate": 7.066693865515195e-06, "loss": 0.4106, "step": 2701 }, { "epoch": 0.61, "learning_rate": 7.059735891283891e-06, "loss": 0.409, "step": 2702 }, { "epoch": 0.61, "learning_rate": 7.0527794748938225e-06, "loss": 0.4613, "step": 2703 }, { "epoch": 0.61, "learning_rate": 7.045824620030721e-06, "loss": 0.4235, "step": 2704 }, { "epoch": 0.61, "learning_rate": 7.0388713303794755e-06, "loss": 0.4398, "step": 2705 }, { "epoch": 0.61, "learning_rate": 7.031919609624147e-06, "loss": 0.4371, "step": 2706 }, { "epoch": 0.61, "learning_rate": 7.024969461447973e-06, "loss": 0.4609, "step": 2707 }, { "epoch": 0.61, "learning_rate": 7.018020889533348e-06, "loss": 0.4205, "step": 2708 }, { "epoch": 0.61, "learning_rate": 7.01107389756184e-06, "loss": 0.4795, "step": 2709 }, { "epoch": 0.61, "learning_rate": 7.004128489214175e-06, "loss": 0.4467, "step": 2710 }, { "epoch": 0.61, "learning_rate": 6.997184668170237e-06, "loss": 0.4403, "step": 2711 }, { "epoch": 0.61, "learning_rate": 6.990242438109083e-06, "loss": 0.4425, "step": 2712 }, { "epoch": 0.61, "learning_rate": 6.9833018027089125e-06, "loss": 0.3915, "step": 2713 }, { "epoch": 0.61, "learning_rate": 6.976362765647084e-06, "loss": 0.4535, "step": 2714 }, { "epoch": 0.61, "learning_rate": 6.969425330600113e-06, "loss": 0.4528, "step": 2715 }, { "epoch": 0.61, "learning_rate": 6.962489501243659e-06, "loss": 0.4386, "step": 2716 }, { "epoch": 0.61, "learning_rate": 6.955555281252539e-06, "loss": 0.4437, "step": 2717 }, { "epoch": 0.61, "learning_rate": 6.948622674300712e-06, "loss": 0.457, "step": 2718 }, { "epoch": 0.61, "learning_rate": 6.941691684061278e-06, "loss": 0.4517, "step": 2719 }, { "epoch": 0.61, "learning_rate": 6.934762314206496e-06, "loss": 0.4455, "step": 2720 }, { "epoch": 0.61, "learning_rate": 6.927834568407753e-06, "loss": 0.451, "step": 2721 }, { "epoch": 0.61, "learning_rate": 6.920908450335578e-06, "loss": 0.4319, "step": 2722 }, { "epoch": 0.61, "learning_rate": 6.913983963659639e-06, "loss": 0.465, "step": 2723 }, { "epoch": 0.61, "learning_rate": 6.907061112048741e-06, "loss": 0.431, "step": 2724 }, { "epoch": 0.61, "learning_rate": 6.900139899170819e-06, "loss": 0.4405, "step": 2725 }, { "epoch": 0.61, "learning_rate": 6.893220328692938e-06, "loss": 0.4419, "step": 2726 }, { "epoch": 0.61, "learning_rate": 6.886302404281307e-06, "loss": 0.4532, "step": 2727 }, { "epoch": 0.61, "learning_rate": 6.879386129601244e-06, "loss": 0.419, "step": 2728 }, { "epoch": 0.61, "learning_rate": 6.872471508317206e-06, "loss": 0.4557, "step": 2729 }, { "epoch": 0.61, "learning_rate": 6.865558544092767e-06, "loss": 0.4504, "step": 2730 }, { "epoch": 0.61, "learning_rate": 6.858647240590626e-06, "loss": 0.4457, "step": 2731 }, { "epoch": 0.61, "learning_rate": 6.8517376014726015e-06, "loss": 0.4184, "step": 2732 }, { "epoch": 0.61, "learning_rate": 6.8448296303996295e-06, "loss": 0.4433, "step": 2733 }, { "epoch": 0.61, "learning_rate": 6.837923331031761e-06, "loss": 0.4367, "step": 2734 }, { "epoch": 0.61, "learning_rate": 6.831018707028169e-06, "loss": 0.4148, "step": 2735 }, { "epoch": 0.61, "learning_rate": 6.8241157620471296e-06, "loss": 0.44, "step": 2736 }, { "epoch": 0.62, "learning_rate": 6.8172144997460335e-06, "loss": 0.4725, "step": 2737 }, { "epoch": 0.62, "learning_rate": 6.8103149237813784e-06, "loss": 0.4109, "step": 2738 }, { "epoch": 0.62, "learning_rate": 6.80341703780877e-06, "loss": 0.4097, "step": 2739 }, { "epoch": 0.62, "learning_rate": 6.796520845482915e-06, "loss": 0.4488, "step": 2740 }, { "epoch": 0.62, "learning_rate": 6.789626350457628e-06, "loss": 0.4561, "step": 2741 }, { "epoch": 0.62, "learning_rate": 6.782733556385821e-06, "loss": 0.4675, "step": 2742 }, { "epoch": 0.62, "learning_rate": 6.7758424669195086e-06, "loss": 0.4458, "step": 2743 }, { "epoch": 0.62, "learning_rate": 6.768953085709797e-06, "loss": 0.4134, "step": 2744 }, { "epoch": 0.62, "learning_rate": 6.762065416406891e-06, "loss": 0.475, "step": 2745 }, { "epoch": 0.62, "learning_rate": 6.755179462660084e-06, "loss": 0.4346, "step": 2746 }, { "epoch": 0.62, "learning_rate": 6.748295228117765e-06, "loss": 0.404, "step": 2747 }, { "epoch": 0.62, "learning_rate": 6.7414127164274115e-06, "loss": 0.4194, "step": 2748 }, { "epoch": 0.62, "learning_rate": 6.7345319312355865e-06, "loss": 0.4427, "step": 2749 }, { "epoch": 0.62, "learning_rate": 6.727652876187938e-06, "loss": 0.4453, "step": 2750 }, { "epoch": 0.62, "learning_rate": 6.720775554929201e-06, "loss": 0.4295, "step": 2751 }, { "epoch": 0.62, "learning_rate": 6.713899971103188e-06, "loss": 0.4359, "step": 2752 }, { "epoch": 0.62, "learning_rate": 6.7070261283527895e-06, "loss": 0.4013, "step": 2753 }, { "epoch": 0.62, "learning_rate": 6.70015403031998e-06, "loss": 0.4305, "step": 2754 }, { "epoch": 0.62, "learning_rate": 6.693283680645806e-06, "loss": 0.4559, "step": 2755 }, { "epoch": 0.62, "learning_rate": 6.686415082970383e-06, "loss": 0.4552, "step": 2756 }, { "epoch": 0.62, "learning_rate": 6.679548240932908e-06, "loss": 0.4336, "step": 2757 }, { "epoch": 0.62, "learning_rate": 6.6726831581716374e-06, "loss": 0.4655, "step": 2758 }, { "epoch": 0.62, "learning_rate": 6.665819838323904e-06, "loss": 0.4354, "step": 2759 }, { "epoch": 0.62, "learning_rate": 6.6589582850261025e-06, "loss": 0.4562, "step": 2760 }, { "epoch": 0.62, "learning_rate": 6.652098501913693e-06, "loss": 0.4691, "step": 2761 }, { "epoch": 0.62, "learning_rate": 6.645240492621196e-06, "loss": 0.4316, "step": 2762 }, { "epoch": 0.62, "learning_rate": 6.638384260782193e-06, "loss": 0.4528, "step": 2763 }, { "epoch": 0.62, "learning_rate": 6.631529810029325e-06, "loss": 0.4188, "step": 2764 }, { "epoch": 0.62, "learning_rate": 6.6246771439942855e-06, "loss": 0.4541, "step": 2765 }, { "epoch": 0.62, "learning_rate": 6.617826266307824e-06, "loss": 0.4333, "step": 2766 }, { "epoch": 0.62, "learning_rate": 6.6109771805997515e-06, "loss": 0.4257, "step": 2767 }, { "epoch": 0.62, "learning_rate": 6.604129890498915e-06, "loss": 0.4334, "step": 2768 }, { "epoch": 0.62, "learning_rate": 6.597284399633219e-06, "loss": 0.4354, "step": 2769 }, { "epoch": 0.62, "learning_rate": 6.590440711629614e-06, "loss": 0.4253, "step": 2770 }, { "epoch": 0.62, "learning_rate": 6.583598830114089e-06, "loss": 0.4215, "step": 2771 }, { "epoch": 0.62, "learning_rate": 6.576758758711685e-06, "loss": 0.4549, "step": 2772 }, { "epoch": 0.62, "learning_rate": 6.569920501046474e-06, "loss": 0.4672, "step": 2773 }, { "epoch": 0.62, "learning_rate": 6.56308406074158e-06, "loss": 0.4495, "step": 2774 }, { "epoch": 0.62, "learning_rate": 6.5562494414191535e-06, "loss": 0.4513, "step": 2775 }, { "epoch": 0.62, "learning_rate": 6.5494166467003835e-06, "loss": 0.4492, "step": 2776 }, { "epoch": 0.62, "learning_rate": 6.542585680205493e-06, "loss": 0.4367, "step": 2777 }, { "epoch": 0.62, "learning_rate": 6.535756545553734e-06, "loss": 0.4523, "step": 2778 }, { "epoch": 0.62, "learning_rate": 6.5289292463633906e-06, "loss": 0.4352, "step": 2779 }, { "epoch": 0.62, "learning_rate": 6.522103786251772e-06, "loss": 0.434, "step": 2780 }, { "epoch": 0.62, "learning_rate": 6.515280168835212e-06, "loss": 0.4412, "step": 2781 }, { "epoch": 0.63, "learning_rate": 6.508458397729077e-06, "loss": 0.4399, "step": 2782 }, { "epoch": 0.63, "learning_rate": 6.501638476547745e-06, "loss": 0.4697, "step": 2783 }, { "epoch": 0.63, "learning_rate": 6.494820408904619e-06, "loss": 0.4483, "step": 2784 }, { "epoch": 0.63, "learning_rate": 6.488004198412114e-06, "loss": 0.4428, "step": 2785 }, { "epoch": 0.63, "learning_rate": 6.48118984868167e-06, "loss": 0.4449, "step": 2786 }, { "epoch": 0.63, "learning_rate": 6.474377363323736e-06, "loss": 0.422, "step": 2787 }, { "epoch": 0.63, "learning_rate": 6.467566745947771e-06, "loss": 0.4337, "step": 2788 }, { "epoch": 0.63, "learning_rate": 6.460758000162244e-06, "loss": 0.4505, "step": 2789 }, { "epoch": 0.63, "learning_rate": 6.453951129574644e-06, "loss": 0.4511, "step": 2790 }, { "epoch": 0.63, "learning_rate": 6.447146137791454e-06, "loss": 0.4503, "step": 2791 }, { "epoch": 0.63, "learning_rate": 6.440343028418163e-06, "loss": 0.457, "step": 2792 }, { "epoch": 0.63, "learning_rate": 6.433541805059269e-06, "loss": 0.4662, "step": 2793 }, { "epoch": 0.63, "learning_rate": 6.4267424713182636e-06, "loss": 0.3962, "step": 2794 }, { "epoch": 0.63, "learning_rate": 6.419945030797643e-06, "loss": 0.4378, "step": 2795 }, { "epoch": 0.63, "learning_rate": 6.413149487098895e-06, "loss": 0.4124, "step": 2796 }, { "epoch": 0.63, "learning_rate": 6.4063558438225005e-06, "loss": 0.4304, "step": 2797 }, { "epoch": 0.63, "learning_rate": 6.39956410456795e-06, "loss": 0.43, "step": 2798 }, { "epoch": 0.63, "learning_rate": 6.392774272933706e-06, "loss": 0.4129, "step": 2799 }, { "epoch": 0.63, "learning_rate": 6.385986352517228e-06, "loss": 0.4684, "step": 2800 }, { "epoch": 0.63, "learning_rate": 6.379200346914964e-06, "loss": 0.4381, "step": 2801 }, { "epoch": 0.63, "learning_rate": 6.372416259722345e-06, "loss": 0.3892, "step": 2802 }, { "epoch": 0.63, "learning_rate": 6.365634094533786e-06, "loss": 0.4626, "step": 2803 }, { "epoch": 0.63, "learning_rate": 6.358853854942684e-06, "loss": 0.4395, "step": 2804 }, { "epoch": 0.63, "learning_rate": 6.352075544541413e-06, "loss": 0.4227, "step": 2805 }, { "epoch": 0.63, "learning_rate": 6.3452991669213345e-06, "loss": 0.3966, "step": 2806 }, { "epoch": 0.63, "learning_rate": 6.3385247256727764e-06, "loss": 0.4274, "step": 2807 }, { "epoch": 0.63, "learning_rate": 6.331752224385043e-06, "loss": 0.4566, "step": 2808 }, { "epoch": 0.63, "learning_rate": 6.3249816666464104e-06, "loss": 0.4511, "step": 2809 }, { "epoch": 0.63, "learning_rate": 6.318213056044127e-06, "loss": 0.4526, "step": 2810 }, { "epoch": 0.63, "learning_rate": 6.311446396164408e-06, "loss": 0.4209, "step": 2811 }, { "epoch": 0.63, "learning_rate": 6.304681690592431e-06, "loss": 0.441, "step": 2812 }, { "epoch": 0.63, "learning_rate": 6.29791894291235e-06, "loss": 0.429, "step": 2813 }, { "epoch": 0.63, "learning_rate": 6.291158156707271e-06, "loss": 0.4517, "step": 2814 }, { "epoch": 0.63, "learning_rate": 6.284399335559265e-06, "loss": 0.4501, "step": 2815 }, { "epoch": 0.63, "learning_rate": 6.277642483049357e-06, "loss": 0.4314, "step": 2816 }, { "epoch": 0.63, "learning_rate": 6.270887602757538e-06, "loss": 0.4528, "step": 2817 }, { "epoch": 0.63, "learning_rate": 6.264134698262745e-06, "loss": 0.4163, "step": 2818 }, { "epoch": 0.63, "learning_rate": 6.257383773142869e-06, "loss": 0.4297, "step": 2819 }, { "epoch": 0.63, "learning_rate": 6.250634830974761e-06, "loss": 0.4406, "step": 2820 }, { "epoch": 0.63, "learning_rate": 6.2438878753342144e-06, "loss": 0.4636, "step": 2821 }, { "epoch": 0.63, "learning_rate": 6.237142909795973e-06, "loss": 0.4549, "step": 2822 }, { "epoch": 0.63, "learning_rate": 6.230399937933719e-06, "loss": 0.4528, "step": 2823 }, { "epoch": 0.63, "learning_rate": 6.2236589633200875e-06, "loss": 0.4391, "step": 2824 }, { "epoch": 0.63, "learning_rate": 6.2169199895266505e-06, "loss": 0.4339, "step": 2825 }, { "epoch": 0.64, "learning_rate": 6.210183020123921e-06, "loss": 0.4343, "step": 2826 }, { "epoch": 0.64, "learning_rate": 6.203448058681351e-06, "loss": 0.4307, "step": 2827 }, { "epoch": 0.64, "learning_rate": 6.196715108767325e-06, "loss": 0.4038, "step": 2828 }, { "epoch": 0.64, "learning_rate": 6.189984173949168e-06, "loss": 0.4185, "step": 2829 }, { "epoch": 0.64, "learning_rate": 6.183255257793132e-06, "loss": 0.4476, "step": 2830 }, { "epoch": 0.64, "learning_rate": 6.176528363864403e-06, "loss": 0.4607, "step": 2831 }, { "epoch": 0.64, "learning_rate": 6.169803495727089e-06, "loss": 0.4286, "step": 2832 }, { "epoch": 0.64, "learning_rate": 6.163080656944234e-06, "loss": 0.4464, "step": 2833 }, { "epoch": 0.64, "learning_rate": 6.156359851077802e-06, "loss": 0.4503, "step": 2834 }, { "epoch": 0.64, "learning_rate": 6.149641081688677e-06, "loss": 0.4571, "step": 2835 }, { "epoch": 0.64, "learning_rate": 6.142924352336668e-06, "loss": 0.4343, "step": 2836 }, { "epoch": 0.64, "learning_rate": 6.136209666580505e-06, "loss": 0.4434, "step": 2837 }, { "epoch": 0.64, "learning_rate": 6.129497027977829e-06, "loss": 0.4294, "step": 2838 }, { "epoch": 0.64, "learning_rate": 6.122786440085202e-06, "loss": 0.435, "step": 2839 }, { "epoch": 0.64, "learning_rate": 6.116077906458097e-06, "loss": 0.4086, "step": 2840 }, { "epoch": 0.64, "learning_rate": 6.109371430650901e-06, "loss": 0.4516, "step": 2841 }, { "epoch": 0.64, "learning_rate": 6.102667016216905e-06, "loss": 0.4018, "step": 2842 }, { "epoch": 0.64, "learning_rate": 6.095964666708312e-06, "loss": 0.4563, "step": 2843 }, { "epoch": 0.64, "learning_rate": 6.089264385676229e-06, "loss": 0.461, "step": 2844 }, { "epoch": 0.64, "learning_rate": 6.0825661766706715e-06, "loss": 0.4487, "step": 2845 }, { "epoch": 0.64, "learning_rate": 6.075870043240555e-06, "loss": 0.4281, "step": 2846 }, { "epoch": 0.64, "learning_rate": 6.069175988933691e-06, "loss": 0.4549, "step": 2847 }, { "epoch": 0.64, "learning_rate": 6.062484017296796e-06, "loss": 0.4492, "step": 2848 }, { "epoch": 0.64, "learning_rate": 6.055794131875477e-06, "loss": 0.4234, "step": 2849 }, { "epoch": 0.64, "learning_rate": 6.049106336214241e-06, "loss": 0.4403, "step": 2850 }, { "epoch": 0.64, "learning_rate": 6.042420633856483e-06, "loss": 0.4257, "step": 2851 }, { "epoch": 0.64, "learning_rate": 6.035737028344488e-06, "loss": 0.4506, "step": 2852 }, { "epoch": 0.64, "learning_rate": 6.029055523219442e-06, "loss": 0.454, "step": 2853 }, { "epoch": 0.64, "learning_rate": 6.022376122021405e-06, "loss": 0.4354, "step": 2854 }, { "epoch": 0.64, "learning_rate": 6.015698828289325e-06, "loss": 0.4372, "step": 2855 }, { "epoch": 0.64, "learning_rate": 6.009023645561039e-06, "loss": 0.4466, "step": 2856 }, { "epoch": 0.64, "learning_rate": 6.002350577373256e-06, "loss": 0.4391, "step": 2857 }, { "epoch": 0.64, "learning_rate": 5.995679627261575e-06, "loss": 0.4438, "step": 2858 }, { "epoch": 0.64, "learning_rate": 5.989010798760462e-06, "loss": 0.435, "step": 2859 }, { "epoch": 0.64, "learning_rate": 5.982344095403274e-06, "loss": 0.4003, "step": 2860 }, { "epoch": 0.64, "learning_rate": 5.975679520722227e-06, "loss": 0.4324, "step": 2861 }, { "epoch": 0.64, "learning_rate": 5.969017078248417e-06, "loss": 0.4399, "step": 2862 }, { "epoch": 0.64, "learning_rate": 5.962356771511808e-06, "loss": 0.4154, "step": 2863 }, { "epoch": 0.64, "learning_rate": 5.955698604041231e-06, "loss": 0.4436, "step": 2864 }, { "epoch": 0.64, "learning_rate": 5.949042579364389e-06, "loss": 0.4316, "step": 2865 }, { "epoch": 0.64, "learning_rate": 5.942388701007843e-06, "loss": 0.4227, "step": 2866 }, { "epoch": 0.64, "learning_rate": 5.935736972497017e-06, "loss": 0.4621, "step": 2867 }, { "epoch": 0.64, "learning_rate": 5.929087397356206e-06, "loss": 0.4447, "step": 2868 }, { "epoch": 0.64, "learning_rate": 5.922439979108554e-06, "loss": 0.4509, "step": 2869 }, { "epoch": 0.64, "learning_rate": 5.915794721276065e-06, "loss": 0.4401, "step": 2870 }, { "epoch": 0.65, "learning_rate": 5.909151627379599e-06, "loss": 0.4437, "step": 2871 }, { "epoch": 0.65, "learning_rate": 5.902510700938866e-06, "loss": 0.4133, "step": 2872 }, { "epoch": 0.65, "learning_rate": 5.895871945472434e-06, "loss": 0.4394, "step": 2873 }, { "epoch": 0.65, "learning_rate": 5.889235364497716e-06, "loss": 0.4342, "step": 2874 }, { "epoch": 0.65, "learning_rate": 5.882600961530971e-06, "loss": 0.4351, "step": 2875 }, { "epoch": 0.65, "learning_rate": 5.875968740087312e-06, "loss": 0.4474, "step": 2876 }, { "epoch": 0.65, "learning_rate": 5.869338703680691e-06, "loss": 0.4488, "step": 2877 }, { "epoch": 0.65, "learning_rate": 5.862710855823902e-06, "loss": 0.4355, "step": 2878 }, { "epoch": 0.65, "learning_rate": 5.856085200028579e-06, "loss": 0.4467, "step": 2879 }, { "epoch": 0.65, "learning_rate": 5.849461739805198e-06, "loss": 0.4537, "step": 2880 }, { "epoch": 0.65, "learning_rate": 5.842840478663066e-06, "loss": 0.465, "step": 2881 }, { "epoch": 0.65, "learning_rate": 5.836221420110328e-06, "loss": 0.418, "step": 2882 }, { "epoch": 0.65, "learning_rate": 5.829604567653964e-06, "loss": 0.454, "step": 2883 }, { "epoch": 0.65, "learning_rate": 5.822989924799785e-06, "loss": 0.4223, "step": 2884 }, { "epoch": 0.65, "learning_rate": 5.816377495052432e-06, "loss": 0.4015, "step": 2885 }, { "epoch": 0.65, "learning_rate": 5.809767281915363e-06, "loss": 0.4442, "step": 2886 }, { "epoch": 0.65, "learning_rate": 5.803159288890878e-06, "loss": 0.4266, "step": 2887 }, { "epoch": 0.65, "learning_rate": 5.796553519480086e-06, "loss": 0.4331, "step": 2888 }, { "epoch": 0.65, "learning_rate": 5.789949977182932e-06, "loss": 0.4437, "step": 2889 }, { "epoch": 0.65, "learning_rate": 5.78334866549816e-06, "loss": 0.4175, "step": 2890 }, { "epoch": 0.65, "learning_rate": 5.776749587923358e-06, "loss": 0.4318, "step": 2891 }, { "epoch": 0.65, "learning_rate": 5.770152747954913e-06, "loss": 0.4516, "step": 2892 }, { "epoch": 0.65, "learning_rate": 5.763558149088035e-06, "loss": 0.4601, "step": 2893 }, { "epoch": 0.65, "learning_rate": 5.756965794816736e-06, "loss": 0.3964, "step": 2894 }, { "epoch": 0.65, "learning_rate": 5.750375688633852e-06, "loss": 0.4428, "step": 2895 }, { "epoch": 0.65, "learning_rate": 5.743787834031016e-06, "loss": 0.4243, "step": 2896 }, { "epoch": 0.65, "learning_rate": 5.737202234498679e-06, "loss": 0.4273, "step": 2897 }, { "epoch": 0.65, "learning_rate": 5.730618893526081e-06, "loss": 0.4266, "step": 2898 }, { "epoch": 0.65, "learning_rate": 5.724037814601292e-06, "loss": 0.4063, "step": 2899 }, { "epoch": 0.65, "learning_rate": 5.717459001211155e-06, "loss": 0.4437, "step": 2900 }, { "epoch": 0.65, "learning_rate": 5.710882456841334e-06, "loss": 0.4416, "step": 2901 }, { "epoch": 0.65, "learning_rate": 5.704308184976273e-06, "loss": 0.4476, "step": 2902 }, { "epoch": 0.65, "learning_rate": 5.69773618909923e-06, "loss": 0.446, "step": 2903 }, { "epoch": 0.65, "learning_rate": 5.691166472692239e-06, "loss": 0.4182, "step": 2904 }, { "epoch": 0.65, "learning_rate": 5.684599039236144e-06, "loss": 0.4197, "step": 2905 }, { "epoch": 0.65, "learning_rate": 5.678033892210558e-06, "loss": 0.4136, "step": 2906 }, { "epoch": 0.65, "learning_rate": 5.671471035093912e-06, "loss": 0.4491, "step": 2907 }, { "epoch": 0.65, "learning_rate": 5.664910471363395e-06, "loss": 0.403, "step": 2908 }, { "epoch": 0.65, "learning_rate": 5.6583522044950016e-06, "loss": 0.4634, "step": 2909 }, { "epoch": 0.65, "learning_rate": 5.651796237963492e-06, "loss": 0.4226, "step": 2910 }, { "epoch": 0.65, "learning_rate": 5.645242575242422e-06, "loss": 0.4418, "step": 2911 }, { "epoch": 0.65, "learning_rate": 5.6386912198041175e-06, "loss": 0.4468, "step": 2912 }, { "epoch": 0.65, "learning_rate": 5.6321421751196926e-06, "loss": 0.4482, "step": 2913 }, { "epoch": 0.65, "learning_rate": 5.625595444659017e-06, "loss": 0.4229, "step": 2914 }, { "epoch": 0.66, "learning_rate": 5.619051031890763e-06, "loss": 0.4558, "step": 2915 }, { "epoch": 0.66, "learning_rate": 5.612508940282348e-06, "loss": 0.4229, "step": 2916 }, { "epoch": 0.66, "learning_rate": 5.6059691732999785e-06, "loss": 0.4593, "step": 2917 }, { "epoch": 0.66, "learning_rate": 5.599431734408615e-06, "loss": 0.4369, "step": 2918 }, { "epoch": 0.66, "learning_rate": 5.592896627071998e-06, "loss": 0.4276, "step": 2919 }, { "epoch": 0.66, "learning_rate": 5.586363854752617e-06, "loss": 0.4399, "step": 2920 }, { "epoch": 0.66, "learning_rate": 5.579833420911745e-06, "loss": 0.4229, "step": 2921 }, { "epoch": 0.66, "learning_rate": 5.573305329009387e-06, "loss": 0.4213, "step": 2922 }, { "epoch": 0.66, "learning_rate": 5.5667795825043425e-06, "loss": 0.4433, "step": 2923 }, { "epoch": 0.66, "learning_rate": 5.560256184854141e-06, "loss": 0.45, "step": 2924 }, { "epoch": 0.66, "learning_rate": 5.553735139515081e-06, "loss": 0.4291, "step": 2925 }, { "epoch": 0.66, "learning_rate": 5.547216449942203e-06, "loss": 0.4297, "step": 2926 }, { "epoch": 0.66, "learning_rate": 5.5407001195893175e-06, "loss": 0.4268, "step": 2927 }, { "epoch": 0.66, "learning_rate": 5.5341861519089635e-06, "loss": 0.4334, "step": 2928 }, { "epoch": 0.66, "learning_rate": 5.527674550352448e-06, "loss": 0.4603, "step": 2929 }, { "epoch": 0.66, "learning_rate": 5.521165318369803e-06, "loss": 0.4078, "step": 2930 }, { "epoch": 0.66, "learning_rate": 5.5146584594098344e-06, "loss": 0.4184, "step": 2931 }, { "epoch": 0.66, "learning_rate": 5.508153976920059e-06, "loss": 0.4593, "step": 2932 }, { "epoch": 0.66, "learning_rate": 5.501651874346759e-06, "loss": 0.464, "step": 2933 }, { "epoch": 0.66, "learning_rate": 5.49515215513494e-06, "loss": 0.4064, "step": 2934 }, { "epoch": 0.66, "learning_rate": 5.488654822728355e-06, "loss": 0.3987, "step": 2935 }, { "epoch": 0.66, "learning_rate": 5.4821598805694805e-06, "loss": 0.4203, "step": 2936 }, { "epoch": 0.66, "learning_rate": 5.475667332099545e-06, "loss": 0.4206, "step": 2937 }, { "epoch": 0.66, "learning_rate": 5.469177180758482e-06, "loss": 0.4376, "step": 2938 }, { "epoch": 0.66, "learning_rate": 5.462689429984992e-06, "loss": 0.4392, "step": 2939 }, { "epoch": 0.66, "learning_rate": 5.456204083216465e-06, "loss": 0.4608, "step": 2940 }, { "epoch": 0.66, "learning_rate": 5.449721143889047e-06, "loss": 0.4555, "step": 2941 }, { "epoch": 0.66, "learning_rate": 5.443240615437586e-06, "loss": 0.4583, "step": 2942 }, { "epoch": 0.66, "learning_rate": 5.43676250129567e-06, "loss": 0.4465, "step": 2943 }, { "epoch": 0.66, "learning_rate": 5.4302868048955945e-06, "loss": 0.4068, "step": 2944 }, { "epoch": 0.66, "learning_rate": 5.423813529668384e-06, "loss": 0.4352, "step": 2945 }, { "epoch": 0.66, "learning_rate": 5.417342679043776e-06, "loss": 0.453, "step": 2946 }, { "epoch": 0.66, "learning_rate": 5.410874256450226e-06, "loss": 0.4356, "step": 2947 }, { "epoch": 0.66, "learning_rate": 5.404408265314896e-06, "loss": 0.4425, "step": 2948 }, { "epoch": 0.66, "learning_rate": 5.39794470906367e-06, "loss": 0.4338, "step": 2949 }, { "epoch": 0.66, "learning_rate": 5.391483591121129e-06, "loss": 0.4424, "step": 2950 }, { "epoch": 0.66, "learning_rate": 5.385024914910575e-06, "loss": 0.4638, "step": 2951 }, { "epoch": 0.66, "learning_rate": 5.378568683854013e-06, "loss": 0.4223, "step": 2952 }, { "epoch": 0.66, "learning_rate": 5.372114901372143e-06, "loss": 0.4345, "step": 2953 }, { "epoch": 0.66, "learning_rate": 5.3656635708843785e-06, "loss": 0.4492, "step": 2954 }, { "epoch": 0.66, "learning_rate": 5.359214695808835e-06, "loss": 0.4046, "step": 2955 }, { "epoch": 0.66, "learning_rate": 5.352768279562315e-06, "loss": 0.412, "step": 2956 }, { "epoch": 0.66, "learning_rate": 5.346324325560329e-06, "loss": 0.4206, "step": 2957 }, { "epoch": 0.66, "learning_rate": 5.339882837217083e-06, "loss": 0.4397, "step": 2958 }, { "epoch": 0.66, "learning_rate": 5.333443817945467e-06, "loss": 0.436, "step": 2959 }, { "epoch": 0.67, "learning_rate": 5.3270072711570744e-06, "loss": 0.3991, "step": 2960 }, { "epoch": 0.67, "learning_rate": 5.3205732002621776e-06, "loss": 0.4577, "step": 2961 }, { "epoch": 0.67, "learning_rate": 5.314141608669745e-06, "loss": 0.4174, "step": 2962 }, { "epoch": 0.67, "learning_rate": 5.30771249978743e-06, "loss": 0.4494, "step": 2963 }, { "epoch": 0.67, "learning_rate": 5.301285877021575e-06, "loss": 0.4529, "step": 2964 }, { "epoch": 0.67, "learning_rate": 5.294861743777189e-06, "loss": 0.4291, "step": 2965 }, { "epoch": 0.67, "learning_rate": 5.2884401034579835e-06, "loss": 0.4006, "step": 2966 }, { "epoch": 0.67, "learning_rate": 5.28202095946633e-06, "loss": 0.4174, "step": 2967 }, { "epoch": 0.67, "learning_rate": 5.2756043152032934e-06, "loss": 0.437, "step": 2968 }, { "epoch": 0.67, "learning_rate": 5.269190174068599e-06, "loss": 0.4274, "step": 2969 }, { "epoch": 0.67, "learning_rate": 5.262778539460658e-06, "loss": 0.4408, "step": 2970 }, { "epoch": 0.67, "learning_rate": 5.256369414776548e-06, "loss": 0.4528, "step": 2971 }, { "epoch": 0.67, "learning_rate": 5.249962803412024e-06, "loss": 0.4486, "step": 2972 }, { "epoch": 0.67, "learning_rate": 5.243558708761493e-06, "loss": 0.4434, "step": 2973 }, { "epoch": 0.67, "learning_rate": 5.237157134218049e-06, "loss": 0.4259, "step": 2974 }, { "epoch": 0.67, "learning_rate": 5.230758083173433e-06, "loss": 0.4357, "step": 2975 }, { "epoch": 0.67, "learning_rate": 5.224361559018061e-06, "loss": 0.424, "step": 2976 }, { "epoch": 0.67, "learning_rate": 5.217967565140998e-06, "loss": 0.4287, "step": 2977 }, { "epoch": 0.67, "learning_rate": 5.211576104929989e-06, "loss": 0.4414, "step": 2978 }, { "epoch": 0.67, "learning_rate": 5.205187181771413e-06, "loss": 0.4437, "step": 2979 }, { "epoch": 0.67, "learning_rate": 5.198800799050323e-06, "loss": 0.419, "step": 2980 }, { "epoch": 0.67, "learning_rate": 5.192416960150413e-06, "loss": 0.4491, "step": 2981 }, { "epoch": 0.67, "learning_rate": 5.1860356684540395e-06, "loss": 0.4317, "step": 2982 }, { "epoch": 0.67, "learning_rate": 5.1796569273421995e-06, "loss": 0.4162, "step": 2983 }, { "epoch": 0.67, "learning_rate": 5.173280740194546e-06, "loss": 0.4527, "step": 2984 }, { "epoch": 0.67, "learning_rate": 5.166907110389376e-06, "loss": 0.4404, "step": 2985 }, { "epoch": 0.67, "learning_rate": 5.160536041303639e-06, "loss": 0.4208, "step": 2986 }, { "epoch": 0.67, "learning_rate": 5.154167536312911e-06, "loss": 0.4315, "step": 2987 }, { "epoch": 0.67, "learning_rate": 5.147801598791426e-06, "loss": 0.4373, "step": 2988 }, { "epoch": 0.67, "learning_rate": 5.141438232112046e-06, "loss": 0.4489, "step": 2989 }, { "epoch": 0.67, "learning_rate": 5.135077439646283e-06, "loss": 0.4087, "step": 2990 }, { "epoch": 0.67, "learning_rate": 5.128719224764269e-06, "loss": 0.4151, "step": 2991 }, { "epoch": 0.67, "learning_rate": 5.1223635908347846e-06, "loss": 0.4425, "step": 2992 }, { "epoch": 0.67, "learning_rate": 5.116010541225236e-06, "loss": 0.4492, "step": 2993 }, { "epoch": 0.67, "learning_rate": 5.109660079301668e-06, "loss": 0.4307, "step": 2994 }, { "epoch": 0.67, "learning_rate": 5.10331220842874e-06, "loss": 0.4114, "step": 2995 }, { "epoch": 0.67, "learning_rate": 5.096966931969754e-06, "loss": 0.4165, "step": 2996 }, { "epoch": 0.67, "learning_rate": 5.090624253286622e-06, "loss": 0.462, "step": 2997 }, { "epoch": 0.67, "learning_rate": 5.084284175739896e-06, "loss": 0.4365, "step": 2998 }, { "epoch": 0.67, "learning_rate": 5.077946702688734e-06, "loss": 0.4451, "step": 2999 }, { "epoch": 0.67, "learning_rate": 5.071611837490926e-06, "loss": 0.443, "step": 3000 }, { "epoch": 0.67, "learning_rate": 5.065279583502876e-06, "loss": 0.4359, "step": 3001 }, { "epoch": 0.67, "learning_rate": 5.058949944079607e-06, "loss": 0.413, "step": 3002 }, { "epoch": 0.67, "learning_rate": 5.052622922574748e-06, "loss": 0.4593, "step": 3003 }, { "epoch": 0.68, "learning_rate": 5.046298522340554e-06, "loss": 0.4436, "step": 3004 }, { "epoch": 0.68, "learning_rate": 5.0399767467278784e-06, "loss": 0.4598, "step": 3005 }, { "epoch": 0.68, "learning_rate": 5.033657599086195e-06, "loss": 0.4329, "step": 3006 }, { "epoch": 0.68, "learning_rate": 5.027341082763575e-06, "loss": 0.4439, "step": 3007 }, { "epoch": 0.68, "learning_rate": 5.021027201106703e-06, "loss": 0.465, "step": 3008 }, { "epoch": 0.68, "learning_rate": 5.014715957460865e-06, "loss": 0.3814, "step": 3009 }, { "epoch": 0.68, "learning_rate": 5.0084073551699545e-06, "loss": 0.405, "step": 3010 }, { "epoch": 0.68, "learning_rate": 5.002101397576452e-06, "loss": 0.429, "step": 3011 }, { "epoch": 0.68, "learning_rate": 4.995798088021454e-06, "loss": 0.4302, "step": 3012 }, { "epoch": 0.68, "learning_rate": 4.989497429844638e-06, "loss": 0.4444, "step": 3013 }, { "epoch": 0.68, "learning_rate": 4.98319942638429e-06, "loss": 0.419, "step": 3014 }, { "epoch": 0.68, "learning_rate": 4.976904080977277e-06, "loss": 0.4424, "step": 3015 }, { "epoch": 0.68, "learning_rate": 4.970611396959067e-06, "loss": 0.4245, "step": 3016 }, { "epoch": 0.68, "learning_rate": 4.964321377663718e-06, "loss": 0.4474, "step": 3017 }, { "epoch": 0.68, "learning_rate": 4.958034026423875e-06, "loss": 0.413, "step": 3018 }, { "epoch": 0.68, "learning_rate": 4.95174934657076e-06, "loss": 0.4377, "step": 3019 }, { "epoch": 0.68, "learning_rate": 4.9454673414341945e-06, "loss": 0.4462, "step": 3020 }, { "epoch": 0.68, "learning_rate": 4.93918801434257e-06, "loss": 0.4544, "step": 3021 }, { "epoch": 0.68, "learning_rate": 4.93291136862287e-06, "loss": 0.4612, "step": 3022 }, { "epoch": 0.68, "learning_rate": 4.9266374076006466e-06, "loss": 0.4611, "step": 3023 }, { "epoch": 0.68, "learning_rate": 4.920366134600036e-06, "loss": 0.4151, "step": 3024 }, { "epoch": 0.68, "learning_rate": 4.914097552943752e-06, "loss": 0.3946, "step": 3025 }, { "epoch": 0.68, "learning_rate": 4.907831665953082e-06, "loss": 0.4395, "step": 3026 }, { "epoch": 0.68, "learning_rate": 4.901568476947876e-06, "loss": 0.4325, "step": 3027 }, { "epoch": 0.68, "learning_rate": 4.895307989246569e-06, "loss": 0.4221, "step": 3028 }, { "epoch": 0.68, "learning_rate": 4.88905020616615e-06, "loss": 0.4432, "step": 3029 }, { "epoch": 0.68, "learning_rate": 4.8827951310221875e-06, "loss": 0.4174, "step": 3030 }, { "epoch": 0.68, "learning_rate": 4.876542767128809e-06, "loss": 0.4446, "step": 3031 }, { "epoch": 0.68, "learning_rate": 4.8702931177987115e-06, "loss": 0.4169, "step": 3032 }, { "epoch": 0.68, "learning_rate": 4.86404618634314e-06, "loss": 0.4532, "step": 3033 }, { "epoch": 0.68, "learning_rate": 4.857801976071917e-06, "loss": 0.4159, "step": 3034 }, { "epoch": 0.68, "learning_rate": 4.851560490293408e-06, "loss": 0.4198, "step": 3035 }, { "epoch": 0.68, "learning_rate": 4.845321732314544e-06, "loss": 0.4226, "step": 3036 }, { "epoch": 0.68, "learning_rate": 4.839085705440815e-06, "loss": 0.4391, "step": 3037 }, { "epoch": 0.68, "learning_rate": 4.832852412976247e-06, "loss": 0.442, "step": 3038 }, { "epoch": 0.68, "learning_rate": 4.826621858223431e-06, "loss": 0.438, "step": 3039 }, { "epoch": 0.68, "learning_rate": 4.8203940444835114e-06, "loss": 0.443, "step": 3040 }, { "epoch": 0.68, "learning_rate": 4.814168975056164e-06, "loss": 0.4363, "step": 3041 }, { "epoch": 0.68, "learning_rate": 4.807946653239621e-06, "loss": 0.389, "step": 3042 }, { "epoch": 0.68, "learning_rate": 4.801727082330665e-06, "loss": 0.4188, "step": 3043 }, { "epoch": 0.68, "learning_rate": 4.795510265624604e-06, "loss": 0.4281, "step": 3044 }, { "epoch": 0.68, "learning_rate": 4.7892962064153045e-06, "loss": 0.4242, "step": 3045 }, { "epoch": 0.68, "learning_rate": 4.783084907995156e-06, "loss": 0.4389, "step": 3046 }, { "epoch": 0.68, "learning_rate": 4.7768763736550975e-06, "loss": 0.4527, "step": 3047 }, { "epoch": 0.68, "learning_rate": 4.770670606684601e-06, "loss": 0.4358, "step": 3048 }, { "epoch": 0.69, "learning_rate": 4.764467610371664e-06, "loss": 0.4474, "step": 3049 }, { "epoch": 0.69, "learning_rate": 4.758267388002827e-06, "loss": 0.447, "step": 3050 }, { "epoch": 0.69, "learning_rate": 4.75206994286316e-06, "loss": 0.4289, "step": 3051 }, { "epoch": 0.69, "learning_rate": 4.7458752782362486e-06, "loss": 0.4621, "step": 3052 }, { "epoch": 0.69, "learning_rate": 4.739683397404222e-06, "loss": 0.4477, "step": 3053 }, { "epoch": 0.69, "learning_rate": 4.733494303647721e-06, "loss": 0.4002, "step": 3054 }, { "epoch": 0.69, "learning_rate": 4.727308000245917e-06, "loss": 0.4374, "step": 3055 }, { "epoch": 0.69, "learning_rate": 4.721124490476503e-06, "loss": 0.4255, "step": 3056 }, { "epoch": 0.69, "learning_rate": 4.714943777615693e-06, "loss": 0.4313, "step": 3057 }, { "epoch": 0.69, "learning_rate": 4.7087658649382105e-06, "loss": 0.4299, "step": 3058 }, { "epoch": 0.69, "learning_rate": 4.7025907557173074e-06, "loss": 0.4453, "step": 3059 }, { "epoch": 0.69, "learning_rate": 4.696418453224737e-06, "loss": 0.4205, "step": 3060 }, { "epoch": 0.69, "learning_rate": 4.690248960730781e-06, "loss": 0.404, "step": 3061 }, { "epoch": 0.69, "learning_rate": 4.684082281504214e-06, "loss": 0.4226, "step": 3062 }, { "epoch": 0.69, "learning_rate": 4.677918418812336e-06, "loss": 0.4438, "step": 3063 }, { "epoch": 0.69, "learning_rate": 4.671757375920949e-06, "loss": 0.4309, "step": 3064 }, { "epoch": 0.69, "learning_rate": 4.665599156094363e-06, "loss": 0.4151, "step": 3065 }, { "epoch": 0.69, "learning_rate": 4.659443762595383e-06, "loss": 0.4428, "step": 3066 }, { "epoch": 0.69, "learning_rate": 4.653291198685331e-06, "loss": 0.4206, "step": 3067 }, { "epoch": 0.69, "learning_rate": 4.647141467624012e-06, "loss": 0.4295, "step": 3068 }, { "epoch": 0.69, "learning_rate": 4.6409945726697535e-06, "loss": 0.4416, "step": 3069 }, { "epoch": 0.69, "learning_rate": 4.634850517079352e-06, "loss": 0.4238, "step": 3070 }, { "epoch": 0.69, "learning_rate": 4.62870930410813e-06, "loss": 0.4152, "step": 3071 }, { "epoch": 0.69, "learning_rate": 4.622570937009879e-06, "loss": 0.4122, "step": 3072 }, { "epoch": 0.69, "learning_rate": 4.616435419036899e-06, "loss": 0.4291, "step": 3073 }, { "epoch": 0.69, "learning_rate": 4.610302753439966e-06, "loss": 0.4597, "step": 3074 }, { "epoch": 0.69, "learning_rate": 4.60417294346836e-06, "loss": 0.4233, "step": 3075 }, { "epoch": 0.69, "learning_rate": 4.598045992369833e-06, "loss": 0.4163, "step": 3076 }, { "epoch": 0.69, "learning_rate": 4.5919219033906384e-06, "loss": 0.4396, "step": 3077 }, { "epoch": 0.69, "learning_rate": 4.5858006797754915e-06, "loss": 0.3998, "step": 3078 }, { "epoch": 0.69, "learning_rate": 4.57968232476762e-06, "loss": 0.4284, "step": 3079 }, { "epoch": 0.69, "learning_rate": 4.573566841608701e-06, "loss": 0.4261, "step": 3080 }, { "epoch": 0.69, "learning_rate": 4.5674542335389105e-06, "loss": 0.4329, "step": 3081 }, { "epoch": 0.69, "learning_rate": 4.561344503796887e-06, "loss": 0.4206, "step": 3082 }, { "epoch": 0.69, "learning_rate": 4.5552376556197595e-06, "loss": 0.424, "step": 3083 }, { "epoch": 0.69, "learning_rate": 4.549133692243114e-06, "loss": 0.4065, "step": 3084 }, { "epoch": 0.69, "learning_rate": 4.543032616901022e-06, "loss": 0.4452, "step": 3085 }, { "epoch": 0.69, "learning_rate": 4.536934432826008e-06, "loss": 0.4429, "step": 3086 }, { "epoch": 0.69, "learning_rate": 4.530839143249089e-06, "loss": 0.4302, "step": 3087 }, { "epoch": 0.69, "learning_rate": 4.524746751399725e-06, "loss": 0.4413, "step": 3088 }, { "epoch": 0.69, "learning_rate": 4.5186572605058566e-06, "loss": 0.4213, "step": 3089 }, { "epoch": 0.69, "learning_rate": 4.5125706737938745e-06, "loss": 0.4664, "step": 3090 }, { "epoch": 0.69, "learning_rate": 4.506486994488643e-06, "loss": 0.4364, "step": 3091 }, { "epoch": 0.69, "learning_rate": 4.500406225813476e-06, "loss": 0.4386, "step": 3092 }, { "epoch": 0.7, "learning_rate": 4.494328370990154e-06, "loss": 0.4187, "step": 3093 }, { "epoch": 0.7, "learning_rate": 4.4882534332388995e-06, "loss": 0.4385, "step": 3094 }, { "epoch": 0.7, "learning_rate": 4.482181415778412e-06, "loss": 0.3924, "step": 3095 }, { "epoch": 0.7, "learning_rate": 4.476112321825822e-06, "loss": 0.4582, "step": 3096 }, { "epoch": 0.7, "learning_rate": 4.470046154596725e-06, "loss": 0.437, "step": 3097 }, { "epoch": 0.7, "learning_rate": 4.463982917305155e-06, "loss": 0.4266, "step": 3098 }, { "epoch": 0.7, "learning_rate": 4.457922613163607e-06, "loss": 0.412, "step": 3099 }, { "epoch": 0.7, "learning_rate": 4.451865245383006e-06, "loss": 0.4276, "step": 3100 }, { "epoch": 0.7, "learning_rate": 4.445810817172735e-06, "loss": 0.4157, "step": 3101 }, { "epoch": 0.7, "learning_rate": 4.439759331740606e-06, "loss": 0.4332, "step": 3102 }, { "epoch": 0.7, "learning_rate": 4.433710792292894e-06, "loss": 0.426, "step": 3103 }, { "epoch": 0.7, "learning_rate": 4.427665202034286e-06, "loss": 0.4139, "step": 3104 }, { "epoch": 0.7, "learning_rate": 4.421622564167928e-06, "loss": 0.4166, "step": 3105 }, { "epoch": 0.7, "learning_rate": 4.4155828818953864e-06, "loss": 0.4427, "step": 3106 }, { "epoch": 0.7, "learning_rate": 4.409546158416674e-06, "loss": 0.4156, "step": 3107 }, { "epoch": 0.7, "learning_rate": 4.403512396930224e-06, "loss": 0.4244, "step": 3108 }, { "epoch": 0.7, "learning_rate": 4.3974816006329115e-06, "loss": 0.4334, "step": 3109 }, { "epoch": 0.7, "learning_rate": 4.391453772720032e-06, "loss": 0.426, "step": 3110 }, { "epoch": 0.7, "learning_rate": 4.385428916385319e-06, "loss": 0.4314, "step": 3111 }, { "epoch": 0.7, "learning_rate": 4.379407034820915e-06, "loss": 0.4184, "step": 3112 }, { "epoch": 0.7, "learning_rate": 4.373388131217404e-06, "loss": 0.47, "step": 3113 }, { "epoch": 0.7, "learning_rate": 4.367372208763777e-06, "loss": 0.4527, "step": 3114 }, { "epoch": 0.7, "learning_rate": 4.361359270647455e-06, "loss": 0.4252, "step": 3115 }, { "epoch": 0.7, "learning_rate": 4.355349320054279e-06, "loss": 0.4361, "step": 3116 }, { "epoch": 0.7, "learning_rate": 4.349342360168498e-06, "loss": 0.4009, "step": 3117 }, { "epoch": 0.7, "learning_rate": 4.3433383941727826e-06, "loss": 0.4422, "step": 3118 }, { "epoch": 0.7, "learning_rate": 4.337337425248223e-06, "loss": 0.4089, "step": 3119 }, { "epoch": 0.7, "learning_rate": 4.331339456574304e-06, "loss": 0.4322, "step": 3120 }, { "epoch": 0.7, "learning_rate": 4.325344491328942e-06, "loss": 0.4232, "step": 3121 }, { "epoch": 0.7, "learning_rate": 4.319352532688444e-06, "loss": 0.4346, "step": 3122 }, { "epoch": 0.7, "learning_rate": 4.313363583827533e-06, "loss": 0.4467, "step": 3123 }, { "epoch": 0.7, "learning_rate": 4.307377647919343e-06, "loss": 0.4484, "step": 3124 }, { "epoch": 0.7, "learning_rate": 4.301394728135395e-06, "loss": 0.4344, "step": 3125 }, { "epoch": 0.7, "learning_rate": 4.2954148276456255e-06, "loss": 0.4456, "step": 3126 }, { "epoch": 0.7, "learning_rate": 4.2894379496183725e-06, "loss": 0.4463, "step": 3127 }, { "epoch": 0.7, "learning_rate": 4.2834640972203576e-06, "loss": 0.4106, "step": 3128 }, { "epoch": 0.7, "learning_rate": 4.277493273616716e-06, "loss": 0.4309, "step": 3129 }, { "epoch": 0.7, "learning_rate": 4.271525481970974e-06, "loss": 0.4186, "step": 3130 }, { "epoch": 0.7, "learning_rate": 4.265560725445041e-06, "loss": 0.4054, "step": 3131 }, { "epoch": 0.7, "learning_rate": 4.259599007199233e-06, "loss": 0.4388, "step": 3132 }, { "epoch": 0.7, "learning_rate": 4.253640330392245e-06, "loss": 0.451, "step": 3133 }, { "epoch": 0.7, "learning_rate": 4.2476846981811644e-06, "loss": 0.4458, "step": 3134 }, { "epoch": 0.7, "learning_rate": 4.241732113721468e-06, "loss": 0.4272, "step": 3135 }, { "epoch": 0.7, "learning_rate": 4.2357825801670214e-06, "loss": 0.4205, "step": 3136 }, { "epoch": 0.7, "learning_rate": 4.229836100670058e-06, "loss": 0.4574, "step": 3137 }, { "epoch": 0.71, "learning_rate": 4.2238926783812125e-06, "loss": 0.4415, "step": 3138 }, { "epoch": 0.71, "learning_rate": 4.217952316449483e-06, "loss": 0.4447, "step": 3139 }, { "epoch": 0.71, "learning_rate": 4.21201501802226e-06, "loss": 0.4372, "step": 3140 }, { "epoch": 0.71, "learning_rate": 4.206080786245294e-06, "loss": 0.4428, "step": 3141 }, { "epoch": 0.71, "learning_rate": 4.200149624262736e-06, "loss": 0.4138, "step": 3142 }, { "epoch": 0.71, "learning_rate": 4.1942215352170855e-06, "loss": 0.4118, "step": 3143 }, { "epoch": 0.71, "learning_rate": 4.18829652224923e-06, "loss": 0.4279, "step": 3144 }, { "epoch": 0.71, "learning_rate": 4.182374588498416e-06, "loss": 0.4243, "step": 3145 }, { "epoch": 0.71, "learning_rate": 4.176455737102269e-06, "loss": 0.4022, "step": 3146 }, { "epoch": 0.71, "learning_rate": 4.170539971196771e-06, "loss": 0.4336, "step": 3147 }, { "epoch": 0.71, "learning_rate": 4.16462729391628e-06, "loss": 0.418, "step": 3148 }, { "epoch": 0.71, "learning_rate": 4.158717708393502e-06, "loss": 0.3855, "step": 3149 }, { "epoch": 0.71, "learning_rate": 4.152811217759529e-06, "loss": 0.4646, "step": 3150 }, { "epoch": 0.71, "learning_rate": 4.14690782514379e-06, "loss": 0.4158, "step": 3151 }, { "epoch": 0.71, "learning_rate": 4.141007533674087e-06, "loss": 0.4337, "step": 3152 }, { "epoch": 0.71, "learning_rate": 4.1351103464765675e-06, "loss": 0.4183, "step": 3153 }, { "epoch": 0.71, "learning_rate": 4.1292162666757465e-06, "loss": 0.4485, "step": 3154 }, { "epoch": 0.71, "learning_rate": 4.123325297394481e-06, "loss": 0.4193, "step": 3155 }, { "epoch": 0.71, "learning_rate": 4.117437441753987e-06, "loss": 0.4158, "step": 3156 }, { "epoch": 0.71, "learning_rate": 4.11155270287383e-06, "loss": 0.3993, "step": 3157 }, { "epoch": 0.71, "learning_rate": 4.105671083871928e-06, "loss": 0.4394, "step": 3158 }, { "epoch": 0.71, "learning_rate": 4.099792587864534e-06, "loss": 0.4003, "step": 3159 }, { "epoch": 0.71, "learning_rate": 4.0939172179662604e-06, "loss": 0.4178, "step": 3160 }, { "epoch": 0.71, "learning_rate": 4.08804497729005e-06, "loss": 0.435, "step": 3161 }, { "epoch": 0.71, "learning_rate": 4.0821758689472e-06, "loss": 0.439, "step": 3162 }, { "epoch": 0.71, "learning_rate": 4.076309896047337e-06, "loss": 0.4106, "step": 3163 }, { "epoch": 0.71, "learning_rate": 4.070447061698435e-06, "loss": 0.394, "step": 3164 }, { "epoch": 0.71, "learning_rate": 4.064587369006799e-06, "loss": 0.4246, "step": 3165 }, { "epoch": 0.71, "learning_rate": 4.0587308210770805e-06, "loss": 0.416, "step": 3166 }, { "epoch": 0.71, "learning_rate": 4.0528774210122455e-06, "loss": 0.4174, "step": 3167 }, { "epoch": 0.71, "learning_rate": 4.047027171913611e-06, "loss": 0.4235, "step": 3168 }, { "epoch": 0.71, "learning_rate": 4.041180076880811e-06, "loss": 0.4159, "step": 3169 }, { "epoch": 0.71, "learning_rate": 4.03533613901182e-06, "loss": 0.4438, "step": 3170 }, { "epoch": 0.71, "learning_rate": 4.029495361402927e-06, "loss": 0.4273, "step": 3171 }, { "epoch": 0.71, "learning_rate": 4.023657747148757e-06, "loss": 0.4473, "step": 3172 }, { "epoch": 0.71, "learning_rate": 4.017823299342256e-06, "loss": 0.4257, "step": 3173 }, { "epoch": 0.71, "learning_rate": 4.0119920210746935e-06, "loss": 0.4254, "step": 3174 }, { "epoch": 0.71, "learning_rate": 4.006163915435652e-06, "loss": 0.4462, "step": 3175 }, { "epoch": 0.71, "learning_rate": 4.000338985513046e-06, "loss": 0.4151, "step": 3176 }, { "epoch": 0.71, "learning_rate": 3.994517234393093e-06, "loss": 0.4338, "step": 3177 }, { "epoch": 0.71, "learning_rate": 3.988698665160341e-06, "loss": 0.4073, "step": 3178 }, { "epoch": 0.71, "learning_rate": 3.982883280897637e-06, "loss": 0.4253, "step": 3179 }, { "epoch": 0.71, "learning_rate": 3.977071084686153e-06, "loss": 0.4248, "step": 3180 }, { "epoch": 0.71, "learning_rate": 3.971262079605365e-06, "loss": 0.4312, "step": 3181 }, { "epoch": 0.72, "learning_rate": 3.965456268733065e-06, "loss": 0.4327, "step": 3182 }, { "epoch": 0.72, "learning_rate": 3.959653655145341e-06, "loss": 0.4371, "step": 3183 }, { "epoch": 0.72, "learning_rate": 3.953854241916603e-06, "loss": 0.4402, "step": 3184 }, { "epoch": 0.72, "learning_rate": 3.948058032119545e-06, "loss": 0.4333, "step": 3185 }, { "epoch": 0.72, "learning_rate": 3.9422650288251856e-06, "loss": 0.4193, "step": 3186 }, { "epoch": 0.72, "learning_rate": 3.936475235102826e-06, "loss": 0.4522, "step": 3187 }, { "epoch": 0.72, "learning_rate": 3.930688654020076e-06, "loss": 0.4605, "step": 3188 }, { "epoch": 0.72, "learning_rate": 3.9249052886428455e-06, "loss": 0.4392, "step": 3189 }, { "epoch": 0.72, "learning_rate": 3.919125142035339e-06, "loss": 0.4268, "step": 3190 }, { "epoch": 0.72, "learning_rate": 3.913348217260047e-06, "loss": 0.4252, "step": 3191 }, { "epoch": 0.72, "learning_rate": 3.907574517377766e-06, "loss": 0.3834, "step": 3192 }, { "epoch": 0.72, "learning_rate": 3.901804045447571e-06, "loss": 0.4299, "step": 3193 }, { "epoch": 0.72, "learning_rate": 3.896036804526838e-06, "loss": 0.4214, "step": 3194 }, { "epoch": 0.72, "learning_rate": 3.890272797671229e-06, "loss": 0.4236, "step": 3195 }, { "epoch": 0.72, "learning_rate": 3.884512027934682e-06, "loss": 0.4237, "step": 3196 }, { "epoch": 0.72, "learning_rate": 3.8787544983694325e-06, "loss": 0.423, "step": 3197 }, { "epoch": 0.72, "learning_rate": 3.873000212025997e-06, "loss": 0.4074, "step": 3198 }, { "epoch": 0.72, "learning_rate": 3.867249171953166e-06, "loss": 0.4358, "step": 3199 }, { "epoch": 0.72, "learning_rate": 3.86150138119802e-06, "loss": 0.4238, "step": 3200 }, { "epoch": 0.72, "learning_rate": 3.855756842805909e-06, "loss": 0.4368, "step": 3201 }, { "epoch": 0.72, "learning_rate": 3.850015559820465e-06, "loss": 0.4509, "step": 3202 }, { "epoch": 0.72, "learning_rate": 3.844277535283595e-06, "loss": 0.4164, "step": 3203 }, { "epoch": 0.72, "learning_rate": 3.8385427722354806e-06, "loss": 0.4313, "step": 3204 }, { "epoch": 0.72, "learning_rate": 3.832811273714569e-06, "loss": 0.4531, "step": 3205 }, { "epoch": 0.72, "learning_rate": 3.827083042757589e-06, "loss": 0.4588, "step": 3206 }, { "epoch": 0.72, "learning_rate": 3.821358082399522e-06, "loss": 0.3967, "step": 3207 }, { "epoch": 0.72, "learning_rate": 3.815636395673632e-06, "loss": 0.4418, "step": 3208 }, { "epoch": 0.72, "learning_rate": 3.8099179856114456e-06, "loss": 0.4254, "step": 3209 }, { "epoch": 0.72, "learning_rate": 3.8042028552427412e-06, "loss": 0.4305, "step": 3210 }, { "epoch": 0.72, "learning_rate": 3.798491007595573e-06, "loss": 0.4321, "step": 3211 }, { "epoch": 0.72, "learning_rate": 3.7927824456962557e-06, "loss": 0.4357, "step": 3212 }, { "epoch": 0.72, "learning_rate": 3.7870771725693513e-06, "loss": 0.4402, "step": 3213 }, { "epoch": 0.72, "learning_rate": 3.7813751912376895e-06, "loss": 0.3993, "step": 3214 }, { "epoch": 0.72, "learning_rate": 3.7756765047223578e-06, "loss": 0.4324, "step": 3215 }, { "epoch": 0.72, "learning_rate": 3.769981116042686e-06, "loss": 0.4271, "step": 3216 }, { "epoch": 0.72, "learning_rate": 3.7642890282162713e-06, "loss": 0.4444, "step": 3217 }, { "epoch": 0.72, "learning_rate": 3.7586002442589476e-06, "loss": 0.4141, "step": 3218 }, { "epoch": 0.72, "learning_rate": 3.7529147671848086e-06, "loss": 0.4327, "step": 3219 }, { "epoch": 0.72, "learning_rate": 3.747232600006193e-06, "loss": 0.4279, "step": 3220 }, { "epoch": 0.72, "learning_rate": 3.741553745733689e-06, "loss": 0.4467, "step": 3221 }, { "epoch": 0.72, "learning_rate": 3.7358782073761202e-06, "loss": 0.4339, "step": 3222 }, { "epoch": 0.72, "learning_rate": 3.7302059879405637e-06, "loss": 0.3976, "step": 3223 }, { "epoch": 0.72, "learning_rate": 3.7245370904323296e-06, "loss": 0.4584, "step": 3224 }, { "epoch": 0.72, "learning_rate": 3.718871517854976e-06, "loss": 0.4445, "step": 3225 }, { "epoch": 0.72, "learning_rate": 3.713209273210292e-06, "loss": 0.4217, "step": 3226 }, { "epoch": 0.73, "learning_rate": 3.7075503594983064e-06, "loss": 0.4163, "step": 3227 }, { "epoch": 0.73, "learning_rate": 3.7018947797172864e-06, "loss": 0.4454, "step": 3228 }, { "epoch": 0.73, "learning_rate": 3.696242536863732e-06, "loss": 0.4279, "step": 3229 }, { "epoch": 0.73, "learning_rate": 3.6905936339323677e-06, "loss": 0.4442, "step": 3230 }, { "epoch": 0.73, "learning_rate": 3.684948073916159e-06, "loss": 0.4131, "step": 3231 }, { "epoch": 0.73, "learning_rate": 3.6793058598062892e-06, "loss": 0.4414, "step": 3232 }, { "epoch": 0.73, "learning_rate": 3.673666994592181e-06, "loss": 0.4167, "step": 3233 }, { "epoch": 0.73, "learning_rate": 3.66803148126147e-06, "loss": 0.4145, "step": 3234 }, { "epoch": 0.73, "learning_rate": 3.6623993228000266e-06, "loss": 0.4549, "step": 3235 }, { "epoch": 0.73, "learning_rate": 3.656770522191938e-06, "loss": 0.419, "step": 3236 }, { "epoch": 0.73, "learning_rate": 3.6511450824195184e-06, "loss": 0.4226, "step": 3237 }, { "epoch": 0.73, "learning_rate": 3.6455230064632884e-06, "loss": 0.4134, "step": 3238 }, { "epoch": 0.73, "learning_rate": 3.639904297302004e-06, "loss": 0.4225, "step": 3239 }, { "epoch": 0.73, "learning_rate": 3.634288957912617e-06, "loss": 0.4456, "step": 3240 }, { "epoch": 0.73, "learning_rate": 3.628676991270316e-06, "loss": 0.4068, "step": 3241 }, { "epoch": 0.73, "learning_rate": 3.6230684003484785e-06, "loss": 0.4089, "step": 3242 }, { "epoch": 0.73, "learning_rate": 3.6174631881187205e-06, "loss": 0.4415, "step": 3243 }, { "epoch": 0.73, "learning_rate": 3.611861357550844e-06, "loss": 0.4091, "step": 3244 }, { "epoch": 0.73, "learning_rate": 3.6062629116128757e-06, "loss": 0.3914, "step": 3245 }, { "epoch": 0.73, "learning_rate": 3.6006678532710347e-06, "loss": 0.4399, "step": 3246 }, { "epoch": 0.73, "learning_rate": 3.595076185489761e-06, "loss": 0.423, "step": 3247 }, { "epoch": 0.73, "learning_rate": 3.5894879112316826e-06, "loss": 0.4053, "step": 3248 }, { "epoch": 0.73, "learning_rate": 3.5839030334576443e-06, "loss": 0.4427, "step": 3249 }, { "epoch": 0.73, "learning_rate": 3.5783215551266713e-06, "loss": 0.4186, "step": 3250 }, { "epoch": 0.73, "learning_rate": 3.572743479196017e-06, "loss": 0.3978, "step": 3251 }, { "epoch": 0.73, "learning_rate": 3.567168808621104e-06, "loss": 0.4279, "step": 3252 }, { "epoch": 0.73, "learning_rate": 3.5615975463555697e-06, "loss": 0.4203, "step": 3253 }, { "epoch": 0.73, "learning_rate": 3.5560296953512296e-06, "loss": 0.4367, "step": 3254 }, { "epoch": 0.73, "learning_rate": 3.550465258558109e-06, "loss": 0.4146, "step": 3255 }, { "epoch": 0.73, "learning_rate": 3.544904238924407e-06, "loss": 0.4267, "step": 3256 }, { "epoch": 0.73, "learning_rate": 3.539346639396529e-06, "loss": 0.4211, "step": 3257 }, { "epoch": 0.73, "learning_rate": 3.53379246291905e-06, "loss": 0.4368, "step": 3258 }, { "epoch": 0.73, "learning_rate": 3.5282417124347533e-06, "loss": 0.4451, "step": 3259 }, { "epoch": 0.73, "learning_rate": 3.5226943908845857e-06, "loss": 0.4569, "step": 3260 }, { "epoch": 0.73, "learning_rate": 3.5171505012076945e-06, "loss": 0.4443, "step": 3261 }, { "epoch": 0.73, "learning_rate": 3.5116100463413926e-06, "loss": 0.4261, "step": 3262 }, { "epoch": 0.73, "learning_rate": 3.5060730292211888e-06, "loss": 0.4352, "step": 3263 }, { "epoch": 0.73, "learning_rate": 3.5005394527807566e-06, "loss": 0.4009, "step": 3264 }, { "epoch": 0.73, "learning_rate": 3.4950093199519588e-06, "loss": 0.406, "step": 3265 }, { "epoch": 0.73, "learning_rate": 3.489482633664818e-06, "loss": 0.4382, "step": 3266 }, { "epoch": 0.73, "learning_rate": 3.483959396847554e-06, "loss": 0.4356, "step": 3267 }, { "epoch": 0.73, "learning_rate": 3.478439612426535e-06, "loss": 0.429, "step": 3268 }, { "epoch": 0.73, "learning_rate": 3.4729232833263183e-06, "loss": 0.4419, "step": 3269 }, { "epoch": 0.73, "learning_rate": 3.467410412469614e-06, "loss": 0.4267, "step": 3270 }, { "epoch": 0.74, "learning_rate": 3.461901002777317e-06, "loss": 0.417, "step": 3271 }, { "epoch": 0.74, "learning_rate": 3.4563950571684725e-06, "loss": 0.4422, "step": 3272 }, { "epoch": 0.74, "learning_rate": 3.450892578560301e-06, "loss": 0.4197, "step": 3273 }, { "epoch": 0.74, "learning_rate": 3.445393569868183e-06, "loss": 0.4403, "step": 3274 }, { "epoch": 0.74, "learning_rate": 3.4398980340056643e-06, "loss": 0.4303, "step": 3275 }, { "epoch": 0.74, "learning_rate": 3.434405973884438e-06, "loss": 0.4455, "step": 3276 }, { "epoch": 0.74, "learning_rate": 3.428917392414374e-06, "loss": 0.4228, "step": 3277 }, { "epoch": 0.74, "learning_rate": 3.42343229250348e-06, "loss": 0.4242, "step": 3278 }, { "epoch": 0.74, "learning_rate": 3.4179506770579373e-06, "loss": 0.4256, "step": 3279 }, { "epoch": 0.74, "learning_rate": 3.4124725489820643e-06, "loss": 0.4237, "step": 3280 }, { "epoch": 0.74, "learning_rate": 3.4069979111783435e-06, "loss": 0.408, "step": 3281 }, { "epoch": 0.74, "learning_rate": 3.401526766547405e-06, "loss": 0.4075, "step": 3282 }, { "epoch": 0.74, "learning_rate": 3.39605911798803e-06, "loss": 0.4273, "step": 3283 }, { "epoch": 0.74, "learning_rate": 3.3905949683971375e-06, "loss": 0.4344, "step": 3284 }, { "epoch": 0.74, "learning_rate": 3.3851343206698082e-06, "loss": 0.4189, "step": 3285 }, { "epoch": 0.74, "learning_rate": 3.379677177699251e-06, "loss": 0.4471, "step": 3286 }, { "epoch": 0.74, "learning_rate": 3.37422354237683e-06, "loss": 0.4424, "step": 3287 }, { "epoch": 0.74, "learning_rate": 3.3687734175920505e-06, "loss": 0.4442, "step": 3288 }, { "epoch": 0.74, "learning_rate": 3.3633268062325462e-06, "loss": 0.418, "step": 3289 }, { "epoch": 0.74, "learning_rate": 3.357883711184102e-06, "loss": 0.4225, "step": 3290 }, { "epoch": 0.74, "learning_rate": 3.352444135330638e-06, "loss": 0.4057, "step": 3291 }, { "epoch": 0.74, "learning_rate": 3.3470080815542004e-06, "loss": 0.4361, "step": 3292 }, { "epoch": 0.74, "learning_rate": 3.341575552734978e-06, "loss": 0.386, "step": 3293 }, { "epoch": 0.74, "learning_rate": 3.3361465517512938e-06, "loss": 0.4508, "step": 3294 }, { "epoch": 0.74, "learning_rate": 3.33072108147959e-06, "loss": 0.4189, "step": 3295 }, { "epoch": 0.74, "learning_rate": 3.3252991447944517e-06, "loss": 0.4162, "step": 3296 }, { "epoch": 0.74, "learning_rate": 3.319880744568581e-06, "loss": 0.4361, "step": 3297 }, { "epoch": 0.74, "learning_rate": 3.314465883672813e-06, "loss": 0.4289, "step": 3298 }, { "epoch": 0.74, "learning_rate": 3.3090545649761052e-06, "loss": 0.4021, "step": 3299 }, { "epoch": 0.74, "learning_rate": 3.303646791345543e-06, "loss": 0.407, "step": 3300 }, { "epoch": 0.74, "learning_rate": 3.2982425656463215e-06, "loss": 0.4243, "step": 3301 }, { "epoch": 0.74, "learning_rate": 3.2928418907417702e-06, "loss": 0.4066, "step": 3302 }, { "epoch": 0.74, "learning_rate": 3.2874447694933253e-06, "loss": 0.4397, "step": 3303 }, { "epoch": 0.74, "learning_rate": 3.2820512047605512e-06, "loss": 0.4321, "step": 3304 }, { "epoch": 0.74, "learning_rate": 3.2766611994011123e-06, "loss": 0.4296, "step": 3305 }, { "epoch": 0.74, "learning_rate": 3.2712747562708115e-06, "loss": 0.4237, "step": 3306 }, { "epoch": 0.74, "learning_rate": 3.2658918782235383e-06, "loss": 0.4254, "step": 3307 }, { "epoch": 0.74, "learning_rate": 3.2605125681113135e-06, "loss": 0.4169, "step": 3308 }, { "epoch": 0.74, "learning_rate": 3.255136828784251e-06, "loss": 0.412, "step": 3309 }, { "epoch": 0.74, "learning_rate": 3.249764663090589e-06, "loss": 0.4336, "step": 3310 }, { "epoch": 0.74, "learning_rate": 3.2443960738766557e-06, "loss": 0.4241, "step": 3311 }, { "epoch": 0.74, "learning_rate": 3.2390310639868992e-06, "loss": 0.4187, "step": 3312 }, { "epoch": 0.74, "learning_rate": 3.2336696362638563e-06, "loss": 0.4162, "step": 3313 }, { "epoch": 0.74, "learning_rate": 3.228311793548188e-06, "loss": 0.426, "step": 3314 }, { "epoch": 0.74, "learning_rate": 3.2229575386786295e-06, "loss": 0.4381, "step": 3315 }, { "epoch": 0.75, "learning_rate": 3.2176068744920364e-06, "loss": 0.3898, "step": 3316 }, { "epoch": 0.75, "learning_rate": 3.2122598038233466e-06, "loss": 0.4325, "step": 3317 }, { "epoch": 0.75, "learning_rate": 3.2069163295056062e-06, "loss": 0.4173, "step": 3318 }, { "epoch": 0.75, "learning_rate": 3.2015764543699437e-06, "loss": 0.4366, "step": 3319 }, { "epoch": 0.75, "learning_rate": 3.1962401812455933e-06, "loss": 0.3978, "step": 3320 }, { "epoch": 0.75, "learning_rate": 3.1909075129598666e-06, "loss": 0.4511, "step": 3321 }, { "epoch": 0.75, "learning_rate": 3.185578452338185e-06, "loss": 0.4474, "step": 3322 }, { "epoch": 0.75, "learning_rate": 3.180253002204037e-06, "loss": 0.391, "step": 3323 }, { "epoch": 0.75, "learning_rate": 3.174931165379014e-06, "loss": 0.4087, "step": 3324 }, { "epoch": 0.75, "learning_rate": 3.169612944682782e-06, "loss": 0.4224, "step": 3325 }, { "epoch": 0.75, "learning_rate": 3.1642983429330996e-06, "loss": 0.4244, "step": 3326 }, { "epoch": 0.75, "learning_rate": 3.1589873629458002e-06, "loss": 0.4209, "step": 3327 }, { "epoch": 0.75, "learning_rate": 3.153680007534804e-06, "loss": 0.4229, "step": 3328 }, { "epoch": 0.75, "learning_rate": 3.148376279512111e-06, "loss": 0.4108, "step": 3329 }, { "epoch": 0.75, "learning_rate": 3.1430761816877974e-06, "loss": 0.4626, "step": 3330 }, { "epoch": 0.75, "learning_rate": 3.137779716870013e-06, "loss": 0.4036, "step": 3331 }, { "epoch": 0.75, "learning_rate": 3.132486887864992e-06, "loss": 0.4121, "step": 3332 }, { "epoch": 0.75, "learning_rate": 3.1271976974770256e-06, "loss": 0.391, "step": 3333 }, { "epoch": 0.75, "learning_rate": 3.121912148508499e-06, "loss": 0.408, "step": 3334 }, { "epoch": 0.75, "learning_rate": 3.116630243759847e-06, "loss": 0.4287, "step": 3335 }, { "epoch": 0.75, "learning_rate": 3.111351986029587e-06, "loss": 0.4263, "step": 3336 }, { "epoch": 0.75, "learning_rate": 3.1060773781143004e-06, "loss": 0.4186, "step": 3337 }, { "epoch": 0.75, "learning_rate": 3.100806422808639e-06, "loss": 0.4053, "step": 3338 }, { "epoch": 0.75, "learning_rate": 3.0955391229053076e-06, "loss": 0.4085, "step": 3339 }, { "epoch": 0.75, "learning_rate": 3.0902754811950875e-06, "loss": 0.4193, "step": 3340 }, { "epoch": 0.75, "learning_rate": 3.0850155004668105e-06, "loss": 0.4259, "step": 3341 }, { "epoch": 0.75, "learning_rate": 3.0797591835073804e-06, "loss": 0.4071, "step": 3342 }, { "epoch": 0.75, "learning_rate": 3.0745065331017475e-06, "loss": 0.4098, "step": 3343 }, { "epoch": 0.75, "learning_rate": 3.069257552032928e-06, "loss": 0.4284, "step": 3344 }, { "epoch": 0.75, "learning_rate": 3.064012243081992e-06, "loss": 0.4298, "step": 3345 }, { "epoch": 0.75, "learning_rate": 3.0587706090280667e-06, "loss": 0.4114, "step": 3346 }, { "epoch": 0.75, "learning_rate": 3.053532652648323e-06, "loss": 0.4213, "step": 3347 }, { "epoch": 0.75, "learning_rate": 3.0482983767179952e-06, "loss": 0.3863, "step": 3348 }, { "epoch": 0.75, "learning_rate": 3.043067784010354e-06, "loss": 0.4257, "step": 3349 }, { "epoch": 0.75, "learning_rate": 3.037840877296736e-06, "loss": 0.4009, "step": 3350 }, { "epoch": 0.75, "learning_rate": 3.0326176593465053e-06, "loss": 0.395, "step": 3351 }, { "epoch": 0.75, "learning_rate": 3.0273981329270865e-06, "loss": 0.4078, "step": 3352 }, { "epoch": 0.75, "learning_rate": 3.022182300803943e-06, "loss": 0.4126, "step": 3353 }, { "epoch": 0.75, "learning_rate": 3.016970165740585e-06, "loss": 0.4005, "step": 3354 }, { "epoch": 0.75, "learning_rate": 3.0117617304985513e-06, "loss": 0.3752, "step": 3355 }, { "epoch": 0.75, "learning_rate": 3.0065569978374385e-06, "loss": 0.4179, "step": 3356 }, { "epoch": 0.75, "learning_rate": 3.001355970514863e-06, "loss": 0.4046, "step": 3357 }, { "epoch": 0.75, "learning_rate": 2.9961586512864947e-06, "loss": 0.4017, "step": 3358 }, { "epoch": 0.75, "learning_rate": 2.9909650429060257e-06, "loss": 0.4199, "step": 3359 }, { "epoch": 0.76, "learning_rate": 2.9857751481251897e-06, "loss": 0.4304, "step": 3360 }, { "epoch": 0.76, "learning_rate": 2.9805889696937496e-06, "loss": 0.4198, "step": 3361 }, { "epoch": 0.76, "learning_rate": 2.9754065103595054e-06, "loss": 0.4471, "step": 3362 }, { "epoch": 0.76, "learning_rate": 2.970227772868274e-06, "loss": 0.426, "step": 3363 }, { "epoch": 0.76, "learning_rate": 2.9650527599639134e-06, "loss": 0.4427, "step": 3364 }, { "epoch": 0.76, "learning_rate": 2.9598814743882987e-06, "loss": 0.4306, "step": 3365 }, { "epoch": 0.76, "learning_rate": 2.9547139188813345e-06, "loss": 0.4112, "step": 3366 }, { "epoch": 0.76, "learning_rate": 2.949550096180954e-06, "loss": 0.4252, "step": 3367 }, { "epoch": 0.76, "learning_rate": 2.9443900090231005e-06, "loss": 0.424, "step": 3368 }, { "epoch": 0.76, "learning_rate": 2.939233660141747e-06, "loss": 0.4297, "step": 3369 }, { "epoch": 0.76, "learning_rate": 2.9340810522688888e-06, "loss": 0.4014, "step": 3370 }, { "epoch": 0.76, "learning_rate": 2.9289321881345257e-06, "loss": 0.4251, "step": 3371 }, { "epoch": 0.76, "learning_rate": 2.923787070466687e-06, "loss": 0.4243, "step": 3372 }, { "epoch": 0.76, "learning_rate": 2.918645701991414e-06, "loss": 0.4175, "step": 3373 }, { "epoch": 0.76, "learning_rate": 2.9135080854327556e-06, "loss": 0.4123, "step": 3374 }, { "epoch": 0.76, "learning_rate": 2.9083742235127787e-06, "loss": 0.4104, "step": 3375 }, { "epoch": 0.76, "learning_rate": 2.9032441189515625e-06, "loss": 0.43, "step": 3376 }, { "epoch": 0.76, "learning_rate": 2.8981177744671875e-06, "loss": 0.4162, "step": 3377 }, { "epoch": 0.76, "learning_rate": 2.8929951927757484e-06, "loss": 0.4248, "step": 3378 }, { "epoch": 0.76, "learning_rate": 2.8878763765913478e-06, "loss": 0.4203, "step": 3379 }, { "epoch": 0.76, "learning_rate": 2.8827613286260836e-06, "loss": 0.4088, "step": 3380 }, { "epoch": 0.76, "learning_rate": 2.877650051590071e-06, "loss": 0.4348, "step": 3381 }, { "epoch": 0.76, "learning_rate": 2.8725425481914127e-06, "loss": 0.4278, "step": 3382 }, { "epoch": 0.76, "learning_rate": 2.8674388211362223e-06, "loss": 0.4022, "step": 3383 }, { "epoch": 0.76, "learning_rate": 2.8623388731286097e-06, "loss": 0.4078, "step": 3384 }, { "epoch": 0.76, "learning_rate": 2.8572427068706843e-06, "loss": 0.4332, "step": 3385 }, { "epoch": 0.76, "learning_rate": 2.852150325062546e-06, "loss": 0.4371, "step": 3386 }, { "epoch": 0.76, "learning_rate": 2.8470617304022976e-06, "loss": 0.3891, "step": 3387 }, { "epoch": 0.76, "learning_rate": 2.8419769255860254e-06, "loss": 0.402, "step": 3388 }, { "epoch": 0.76, "learning_rate": 2.8368959133078188e-06, "loss": 0.429, "step": 3389 }, { "epoch": 0.76, "learning_rate": 2.8318186962597484e-06, "loss": 0.4317, "step": 3390 }, { "epoch": 0.76, "learning_rate": 2.8267452771318794e-06, "loss": 0.4086, "step": 3391 }, { "epoch": 0.76, "learning_rate": 2.821675658612263e-06, "loss": 0.4236, "step": 3392 }, { "epoch": 0.76, "learning_rate": 2.8166098433869417e-06, "loss": 0.4429, "step": 3393 }, { "epoch": 0.76, "learning_rate": 2.8115478341399317e-06, "loss": 0.4217, "step": 3394 }, { "epoch": 0.76, "learning_rate": 2.8064896335532444e-06, "loss": 0.4333, "step": 3395 }, { "epoch": 0.76, "learning_rate": 2.801435244306864e-06, "loss": 0.4106, "step": 3396 }, { "epoch": 0.76, "learning_rate": 2.7963846690787633e-06, "loss": 0.4342, "step": 3397 }, { "epoch": 0.76, "learning_rate": 2.7913379105448856e-06, "loss": 0.4226, "step": 3398 }, { "epoch": 0.76, "learning_rate": 2.78629497137916e-06, "loss": 0.4253, "step": 3399 }, { "epoch": 0.76, "learning_rate": 2.7812558542534874e-06, "loss": 0.4228, "step": 3400 }, { "epoch": 0.76, "learning_rate": 2.77622056183775e-06, "loss": 0.4266, "step": 3401 }, { "epoch": 0.76, "learning_rate": 2.7711890967997923e-06, "loss": 0.4149, "step": 3402 }, { "epoch": 0.76, "learning_rate": 2.7661614618054434e-06, "loss": 0.4272, "step": 3403 }, { "epoch": 0.76, "learning_rate": 2.76113765951849e-06, "loss": 0.4259, "step": 3404 }, { "epoch": 0.77, "learning_rate": 2.7561176926007018e-06, "loss": 0.432, "step": 3405 }, { "epoch": 0.77, "learning_rate": 2.7511015637118034e-06, "loss": 0.3966, "step": 3406 }, { "epoch": 0.77, "learning_rate": 2.746089275509496e-06, "loss": 0.4445, "step": 3407 }, { "epoch": 0.77, "learning_rate": 2.7410808306494418e-06, "loss": 0.4229, "step": 3408 }, { "epoch": 0.77, "learning_rate": 2.7360762317852696e-06, "loss": 0.4043, "step": 3409 }, { "epoch": 0.77, "learning_rate": 2.7310754815685627e-06, "loss": 0.4026, "step": 3410 }, { "epoch": 0.77, "learning_rate": 2.726078582648877e-06, "loss": 0.4425, "step": 3411 }, { "epoch": 0.77, "learning_rate": 2.7210855376737123e-06, "loss": 0.4114, "step": 3412 }, { "epoch": 0.77, "learning_rate": 2.716096349288545e-06, "loss": 0.4227, "step": 3413 }, { "epoch": 0.77, "learning_rate": 2.7111110201367907e-06, "loss": 0.3967, "step": 3414 }, { "epoch": 0.77, "learning_rate": 2.706129552859832e-06, "loss": 0.4349, "step": 3415 }, { "epoch": 0.77, "learning_rate": 2.701151950097002e-06, "loss": 0.4522, "step": 3416 }, { "epoch": 0.77, "learning_rate": 2.6961782144855876e-06, "loss": 0.3981, "step": 3417 }, { "epoch": 0.77, "learning_rate": 2.6912083486608186e-06, "loss": 0.3946, "step": 3418 }, { "epoch": 0.77, "learning_rate": 2.6862423552558893e-06, "loss": 0.3842, "step": 3419 }, { "epoch": 0.77, "learning_rate": 2.6812802369019266e-06, "loss": 0.4223, "step": 3420 }, { "epoch": 0.77, "learning_rate": 2.676321996228016e-06, "loss": 0.4142, "step": 3421 }, { "epoch": 0.77, "learning_rate": 2.6713676358611775e-06, "loss": 0.4159, "step": 3422 }, { "epoch": 0.77, "learning_rate": 2.666417158426393e-06, "loss": 0.4079, "step": 3423 }, { "epoch": 0.77, "learning_rate": 2.661470566546566e-06, "loss": 0.3865, "step": 3424 }, { "epoch": 0.77, "learning_rate": 2.656527862842557e-06, "loss": 0.4126, "step": 3425 }, { "epoch": 0.77, "learning_rate": 2.6515890499331564e-06, "loss": 0.3933, "step": 3426 }, { "epoch": 0.77, "learning_rate": 2.646654130435101e-06, "loss": 0.414, "step": 3427 }, { "epoch": 0.77, "learning_rate": 2.6417231069630568e-06, "loss": 0.4068, "step": 3428 }, { "epoch": 0.77, "learning_rate": 2.6367959821296354e-06, "loss": 0.4258, "step": 3429 }, { "epoch": 0.77, "learning_rate": 2.6318727585453675e-06, "loss": 0.421, "step": 3430 }, { "epoch": 0.77, "learning_rate": 2.626953438818739e-06, "loss": 0.4228, "step": 3431 }, { "epoch": 0.77, "learning_rate": 2.622038025556145e-06, "loss": 0.4192, "step": 3432 }, { "epoch": 0.77, "learning_rate": 2.617126521361929e-06, "loss": 0.4125, "step": 3433 }, { "epoch": 0.77, "learning_rate": 2.612218928838348e-06, "loss": 0.4141, "step": 3434 }, { "epoch": 0.77, "learning_rate": 2.607315250585598e-06, "loss": 0.4216, "step": 3435 }, { "epoch": 0.77, "learning_rate": 2.6024154892017938e-06, "loss": 0.4216, "step": 3436 }, { "epoch": 0.77, "learning_rate": 2.597519647282981e-06, "loss": 0.4307, "step": 3437 }, { "epoch": 0.77, "learning_rate": 2.592627727423117e-06, "loss": 0.4181, "step": 3438 }, { "epoch": 0.77, "learning_rate": 2.5877397322141028e-06, "loss": 0.4068, "step": 3439 }, { "epoch": 0.77, "learning_rate": 2.582855664245737e-06, "loss": 0.4156, "step": 3440 }, { "epoch": 0.77, "learning_rate": 2.577975526105754e-06, "loss": 0.4279, "step": 3441 }, { "epoch": 0.77, "learning_rate": 2.5730993203797906e-06, "loss": 0.4104, "step": 3442 }, { "epoch": 0.77, "learning_rate": 2.568227049651417e-06, "loss": 0.4005, "step": 3443 }, { "epoch": 0.77, "learning_rate": 2.5633587165021047e-06, "loss": 0.4234, "step": 3444 }, { "epoch": 0.77, "learning_rate": 2.5584943235112458e-06, "loss": 0.4284, "step": 3445 }, { "epoch": 0.77, "learning_rate": 2.553633873256144e-06, "loss": 0.4209, "step": 3446 }, { "epoch": 0.77, "learning_rate": 2.5487773683120166e-06, "loss": 0.4291, "step": 3447 }, { "epoch": 0.77, "learning_rate": 2.543924811251982e-06, "loss": 0.4145, "step": 3448 }, { "epoch": 0.78, "learning_rate": 2.5390762046470773e-06, "loss": 0.3947, "step": 3449 }, { "epoch": 0.78, "learning_rate": 2.5342315510662363e-06, "loss": 0.3889, "step": 3450 }, { "epoch": 0.78, "learning_rate": 2.5293908530763067e-06, "loss": 0.4, "step": 3451 }, { "epoch": 0.78, "learning_rate": 2.5245541132420403e-06, "loss": 0.4082, "step": 3452 }, { "epoch": 0.78, "learning_rate": 2.5197213341260816e-06, "loss": 0.4334, "step": 3453 }, { "epoch": 0.78, "learning_rate": 2.514892518288988e-06, "loss": 0.4291, "step": 3454 }, { "epoch": 0.78, "learning_rate": 2.510067668289217e-06, "loss": 0.4126, "step": 3455 }, { "epoch": 0.78, "learning_rate": 2.505246786683112e-06, "loss": 0.4224, "step": 3456 }, { "epoch": 0.78, "learning_rate": 2.5004298760249267e-06, "loss": 0.4392, "step": 3457 }, { "epoch": 0.78, "learning_rate": 2.4956169388668104e-06, "loss": 0.4125, "step": 3458 }, { "epoch": 0.78, "learning_rate": 2.4908079777587966e-06, "loss": 0.4129, "step": 3459 }, { "epoch": 0.78, "learning_rate": 2.486002995248825e-06, "loss": 0.4372, "step": 3460 }, { "epoch": 0.78, "learning_rate": 2.4812019938827146e-06, "loss": 0.4235, "step": 3461 }, { "epoch": 0.78, "learning_rate": 2.4764049762041874e-06, "loss": 0.424, "step": 3462 }, { "epoch": 0.78, "learning_rate": 2.471611944754846e-06, "loss": 0.409, "step": 3463 }, { "epoch": 0.78, "learning_rate": 2.4668229020741883e-06, "loss": 0.4225, "step": 3464 }, { "epoch": 0.78, "learning_rate": 2.46203785069959e-06, "loss": 0.3954, "step": 3465 }, { "epoch": 0.78, "learning_rate": 2.4572567931663205e-06, "loss": 0.438, "step": 3466 }, { "epoch": 0.78, "learning_rate": 2.4524797320075233e-06, "loss": 0.4244, "step": 3467 }, { "epoch": 0.78, "learning_rate": 2.4477066697542386e-06, "loss": 0.4084, "step": 3468 }, { "epoch": 0.78, "learning_rate": 2.4429376089353717e-06, "loss": 0.4159, "step": 3469 }, { "epoch": 0.78, "learning_rate": 2.43817255207772e-06, "loss": 0.4359, "step": 3470 }, { "epoch": 0.78, "learning_rate": 2.4334115017059558e-06, "loss": 0.4566, "step": 3471 }, { "epoch": 0.78, "learning_rate": 2.42865446034263e-06, "loss": 0.4375, "step": 3472 }, { "epoch": 0.78, "learning_rate": 2.423901430508161e-06, "loss": 0.4071, "step": 3473 }, { "epoch": 0.78, "learning_rate": 2.419152414720857e-06, "loss": 0.4393, "step": 3474 }, { "epoch": 0.78, "learning_rate": 2.414407415496883e-06, "loss": 0.4296, "step": 3475 }, { "epoch": 0.78, "learning_rate": 2.4096664353502895e-06, "loss": 0.4519, "step": 3476 }, { "epoch": 0.78, "learning_rate": 2.4049294767929844e-06, "loss": 0.4293, "step": 3477 }, { "epoch": 0.78, "learning_rate": 2.4001965423347617e-06, "loss": 0.4255, "step": 3478 }, { "epoch": 0.78, "learning_rate": 2.3954676344832673e-06, "loss": 0.4453, "step": 3479 }, { "epoch": 0.78, "learning_rate": 2.3907427557440253e-06, "loss": 0.385, "step": 3480 }, { "epoch": 0.78, "learning_rate": 2.386021908620414e-06, "loss": 0.4027, "step": 3481 }, { "epoch": 0.78, "learning_rate": 2.3813050956136876e-06, "loss": 0.4112, "step": 3482 }, { "epoch": 0.78, "learning_rate": 2.37659231922295e-06, "loss": 0.4224, "step": 3483 }, { "epoch": 0.78, "learning_rate": 2.37188358194518e-06, "loss": 0.4188, "step": 3484 }, { "epoch": 0.78, "learning_rate": 2.367178886275202e-06, "loss": 0.4161, "step": 3485 }, { "epoch": 0.78, "learning_rate": 2.3624782347057172e-06, "loss": 0.4262, "step": 3486 }, { "epoch": 0.78, "learning_rate": 2.357781629727265e-06, "loss": 0.4254, "step": 3487 }, { "epoch": 0.78, "learning_rate": 2.353089073828255e-06, "loss": 0.4024, "step": 3488 }, { "epoch": 0.78, "learning_rate": 2.348400569494941e-06, "loss": 0.4222, "step": 3489 }, { "epoch": 0.78, "learning_rate": 2.3437161192114387e-06, "loss": 0.4366, "step": 3490 }, { "epoch": 0.78, "learning_rate": 2.3390357254597084e-06, "loss": 0.3824, "step": 3491 }, { "epoch": 0.78, "learning_rate": 2.3343593907195692e-06, "loss": 0.398, "step": 3492 }, { "epoch": 0.78, "learning_rate": 2.3296871174686787e-06, "loss": 0.4129, "step": 3493 }, { "epoch": 0.79, "learning_rate": 2.325018908182559e-06, "loss": 0.4215, "step": 3494 }, { "epoch": 0.79, "learning_rate": 2.32035476533456e-06, "loss": 0.4066, "step": 3495 }, { "epoch": 0.79, "learning_rate": 2.3156946913958943e-06, "loss": 0.4275, "step": 3496 }, { "epoch": 0.79, "learning_rate": 2.311038688835604e-06, "loss": 0.4233, "step": 3497 }, { "epoch": 0.79, "learning_rate": 2.3063867601205848e-06, "loss": 0.4087, "step": 3498 }, { "epoch": 0.79, "learning_rate": 2.301738907715566e-06, "loss": 0.411, "step": 3499 }, { "epoch": 0.79, "learning_rate": 2.297095134083126e-06, "loss": 0.411, "step": 3500 }, { "epoch": 0.79, "learning_rate": 2.2924554416836675e-06, "loss": 0.4085, "step": 3501 }, { "epoch": 0.79, "learning_rate": 2.287819832975454e-06, "loss": 0.4305, "step": 3502 }, { "epoch": 0.79, "learning_rate": 2.2831883104145627e-06, "loss": 0.4166, "step": 3503 }, { "epoch": 0.79, "learning_rate": 2.2785608764549194e-06, "loss": 0.4372, "step": 3504 }, { "epoch": 0.79, "learning_rate": 2.273937533548275e-06, "loss": 0.4076, "step": 3505 }, { "epoch": 0.79, "learning_rate": 2.269318284144222e-06, "loss": 0.4556, "step": 3506 }, { "epoch": 0.79, "learning_rate": 2.2647031306901724e-06, "loss": 0.4386, "step": 3507 }, { "epoch": 0.79, "learning_rate": 2.2600920756313795e-06, "loss": 0.401, "step": 3508 }, { "epoch": 0.79, "learning_rate": 2.2554851214109185e-06, "loss": 0.4161, "step": 3509 }, { "epoch": 0.79, "learning_rate": 2.2508822704696977e-06, "loss": 0.4142, "step": 3510 }, { "epoch": 0.79, "learning_rate": 2.246283525246442e-06, "loss": 0.3922, "step": 3511 }, { "epoch": 0.79, "learning_rate": 2.241688888177711e-06, "loss": 0.4244, "step": 3512 }, { "epoch": 0.79, "learning_rate": 2.237098361697877e-06, "loss": 0.4111, "step": 3513 }, { "epoch": 0.79, "learning_rate": 2.2325119482391466e-06, "loss": 0.4264, "step": 3514 }, { "epoch": 0.79, "learning_rate": 2.227929650231536e-06, "loss": 0.3924, "step": 3515 }, { "epoch": 0.79, "learning_rate": 2.223351470102888e-06, "loss": 0.4198, "step": 3516 }, { "epoch": 0.79, "learning_rate": 2.2187774102788617e-06, "loss": 0.4025, "step": 3517 }, { "epoch": 0.79, "learning_rate": 2.2142074731829365e-06, "loss": 0.4087, "step": 3518 }, { "epoch": 0.79, "learning_rate": 2.2096416612363957e-06, "loss": 0.425, "step": 3519 }, { "epoch": 0.79, "learning_rate": 2.205079976858354e-06, "loss": 0.414, "step": 3520 }, { "epoch": 0.79, "learning_rate": 2.200522422465723e-06, "loss": 0.4162, "step": 3521 }, { "epoch": 0.79, "learning_rate": 2.195969000473238e-06, "loss": 0.4168, "step": 3522 }, { "epoch": 0.79, "learning_rate": 2.1914197132934378e-06, "loss": 0.4044, "step": 3523 }, { "epoch": 0.79, "learning_rate": 2.1868745633366726e-06, "loss": 0.4216, "step": 3524 }, { "epoch": 0.79, "learning_rate": 2.1823335530111032e-06, "loss": 0.4277, "step": 3525 }, { "epoch": 0.79, "learning_rate": 2.177796684722696e-06, "loss": 0.3865, "step": 3526 }, { "epoch": 0.79, "learning_rate": 2.1732639608752173e-06, "loss": 0.4275, "step": 3527 }, { "epoch": 0.79, "learning_rate": 2.168735383870246e-06, "loss": 0.4193, "step": 3528 }, { "epoch": 0.79, "learning_rate": 2.1642109561071568e-06, "loss": 0.4107, "step": 3529 }, { "epoch": 0.79, "learning_rate": 2.1596906799831287e-06, "loss": 0.4435, "step": 3530 }, { "epoch": 0.79, "learning_rate": 2.155174557893146e-06, "loss": 0.4102, "step": 3531 }, { "epoch": 0.79, "learning_rate": 2.1506625922299807e-06, "loss": 0.3997, "step": 3532 }, { "epoch": 0.79, "learning_rate": 2.1461547853842125e-06, "loss": 0.4295, "step": 3533 }, { "epoch": 0.79, "learning_rate": 2.1416511397442176e-06, "loss": 0.3891, "step": 3534 }, { "epoch": 0.79, "learning_rate": 2.137151657696158e-06, "loss": 0.3928, "step": 3535 }, { "epoch": 0.79, "learning_rate": 2.1326563416239997e-06, "loss": 0.4148, "step": 3536 }, { "epoch": 0.79, "learning_rate": 2.1281651939094996e-06, "loss": 0.4032, "step": 3537 }, { "epoch": 0.8, "learning_rate": 2.123678216932199e-06, "loss": 0.3975, "step": 3538 }, { "epoch": 0.8, "learning_rate": 2.1191954130694404e-06, "loss": 0.3921, "step": 3539 }, { "epoch": 0.8, "learning_rate": 2.114716784696342e-06, "loss": 0.3819, "step": 3540 }, { "epoch": 0.8, "learning_rate": 2.1102423341858235e-06, "loss": 0.4166, "step": 3541 }, { "epoch": 0.8, "learning_rate": 2.105772063908582e-06, "loss": 0.399, "step": 3542 }, { "epoch": 0.8, "learning_rate": 2.1013059762331058e-06, "loss": 0.418, "step": 3543 }, { "epoch": 0.8, "learning_rate": 2.096844073525659e-06, "loss": 0.4406, "step": 3544 }, { "epoch": 0.8, "learning_rate": 2.092386358150298e-06, "loss": 0.4365, "step": 3545 }, { "epoch": 0.8, "learning_rate": 2.0879328324688497e-06, "loss": 0.4127, "step": 3546 }, { "epoch": 0.8, "learning_rate": 2.0834834988409315e-06, "loss": 0.4171, "step": 3547 }, { "epoch": 0.8, "learning_rate": 2.079038359623937e-06, "loss": 0.3993, "step": 3548 }, { "epoch": 0.8, "learning_rate": 2.074597417173032e-06, "loss": 0.3987, "step": 3549 }, { "epoch": 0.8, "learning_rate": 2.070160673841165e-06, "loss": 0.4273, "step": 3550 }, { "epoch": 0.8, "learning_rate": 2.065728131979058e-06, "loss": 0.4288, "step": 3551 }, { "epoch": 0.8, "learning_rate": 2.061299793935204e-06, "loss": 0.4383, "step": 3552 }, { "epoch": 0.8, "learning_rate": 2.056875662055874e-06, "loss": 0.4135, "step": 3553 }, { "epoch": 0.8, "learning_rate": 2.052455738685103e-06, "loss": 0.4183, "step": 3554 }, { "epoch": 0.8, "learning_rate": 2.0480400261647037e-06, "loss": 0.4252, "step": 3555 }, { "epoch": 0.8, "learning_rate": 2.0436285268342548e-06, "loss": 0.4195, "step": 3556 }, { "epoch": 0.8, "learning_rate": 2.0392212430311056e-06, "loss": 0.4238, "step": 3557 }, { "epoch": 0.8, "learning_rate": 2.034818177090362e-06, "loss": 0.3821, "step": 3558 }, { "epoch": 0.8, "learning_rate": 2.0304193313449084e-06, "loss": 0.4174, "step": 3559 }, { "epoch": 0.8, "learning_rate": 2.0260247081253816e-06, "loss": 0.4165, "step": 3560 }, { "epoch": 0.8, "learning_rate": 2.021634309760191e-06, "loss": 0.4201, "step": 3561 }, { "epoch": 0.8, "learning_rate": 2.0172481385754994e-06, "loss": 0.383, "step": 3562 }, { "epoch": 0.8, "learning_rate": 2.0128661968952346e-06, "loss": 0.4233, "step": 3563 }, { "epoch": 0.8, "learning_rate": 2.008488487041084e-06, "loss": 0.4533, "step": 3564 }, { "epoch": 0.8, "learning_rate": 2.0041150113324925e-06, "loss": 0.4273, "step": 3565 }, { "epoch": 0.8, "learning_rate": 1.9997457720866554e-06, "loss": 0.4028, "step": 3566 }, { "epoch": 0.8, "learning_rate": 1.9953807716185337e-06, "loss": 0.433, "step": 3567 }, { "epoch": 0.8, "learning_rate": 1.991020012240832e-06, "loss": 0.4162, "step": 3568 }, { "epoch": 0.8, "learning_rate": 1.9866634962640186e-06, "loss": 0.442, "step": 3569 }, { "epoch": 0.8, "learning_rate": 1.982311225996303e-06, "loss": 0.3992, "step": 3570 }, { "epoch": 0.8, "learning_rate": 1.9779632037436513e-06, "loss": 0.4059, "step": 3571 }, { "epoch": 0.8, "learning_rate": 1.97361943180978e-06, "loss": 0.4091, "step": 3572 }, { "epoch": 0.8, "learning_rate": 1.9692799124961524e-06, "loss": 0.3913, "step": 3573 }, { "epoch": 0.8, "learning_rate": 1.964944648101973e-06, "loss": 0.3923, "step": 3574 }, { "epoch": 0.8, "learning_rate": 1.9606136409242026e-06, "loss": 0.421, "step": 3575 }, { "epoch": 0.8, "learning_rate": 1.9562868932575328e-06, "loss": 0.4298, "step": 3576 }, { "epoch": 0.8, "learning_rate": 1.951964407394413e-06, "loss": 0.4305, "step": 3577 }, { "epoch": 0.8, "learning_rate": 1.947646185625023e-06, "loss": 0.4219, "step": 3578 }, { "epoch": 0.8, "learning_rate": 1.943332230237288e-06, "loss": 0.4075, "step": 3579 }, { "epoch": 0.8, "learning_rate": 1.9390225435168753e-06, "loss": 0.4177, "step": 3580 }, { "epoch": 0.8, "learning_rate": 1.9347171277471875e-06, "loss": 0.4268, "step": 3581 }, { "epoch": 0.8, "learning_rate": 1.930415985209363e-06, "loss": 0.4468, "step": 3582 }, { "epoch": 0.81, "learning_rate": 1.9261191181822804e-06, "loss": 0.4063, "step": 3583 }, { "epoch": 0.81, "learning_rate": 1.9218265289425453e-06, "loss": 0.4435, "step": 3584 }, { "epoch": 0.81, "learning_rate": 1.9175382197645075e-06, "loss": 0.4064, "step": 3585 }, { "epoch": 0.81, "learning_rate": 1.9132541929202384e-06, "loss": 0.3972, "step": 3586 }, { "epoch": 0.81, "learning_rate": 1.9089744506795493e-06, "loss": 0.41, "step": 3587 }, { "epoch": 0.81, "learning_rate": 1.904698995309976e-06, "loss": 0.4359, "step": 3588 }, { "epoch": 0.81, "learning_rate": 1.9004278290767874e-06, "loss": 0.407, "step": 3589 }, { "epoch": 0.81, "learning_rate": 1.8961609542429726e-06, "loss": 0.4283, "step": 3590 }, { "epoch": 0.81, "learning_rate": 1.8918983730692563e-06, "loss": 0.4117, "step": 3591 }, { "epoch": 0.81, "learning_rate": 1.8876400878140776e-06, "loss": 0.3949, "step": 3592 }, { "epoch": 0.81, "learning_rate": 1.8833861007336119e-06, "loss": 0.4207, "step": 3593 }, { "epoch": 0.81, "learning_rate": 1.8791364140817426e-06, "loss": 0.4236, "step": 3594 }, { "epoch": 0.81, "learning_rate": 1.874891030110093e-06, "loss": 0.4259, "step": 3595 }, { "epoch": 0.81, "learning_rate": 1.8706499510679888e-06, "loss": 0.3982, "step": 3596 }, { "epoch": 0.81, "learning_rate": 1.8664131792024865e-06, "loss": 0.4126, "step": 3597 }, { "epoch": 0.81, "learning_rate": 1.862180716758354e-06, "loss": 0.3909, "step": 3598 }, { "epoch": 0.81, "learning_rate": 1.8579525659780806e-06, "loss": 0.4513, "step": 3599 }, { "epoch": 0.81, "learning_rate": 1.8537287291018657e-06, "loss": 0.416, "step": 3600 }, { "epoch": 0.81, "learning_rate": 1.8495092083676324e-06, "loss": 0.4379, "step": 3601 }, { "epoch": 0.81, "learning_rate": 1.8452940060110002e-06, "loss": 0.4107, "step": 3602 }, { "epoch": 0.81, "learning_rate": 1.8410831242653248e-06, "loss": 0.4267, "step": 3603 }, { "epoch": 0.81, "learning_rate": 1.8368765653616493e-06, "loss": 0.421, "step": 3604 }, { "epoch": 0.81, "learning_rate": 1.8326743315287432e-06, "loss": 0.3944, "step": 3605 }, { "epoch": 0.81, "learning_rate": 1.828476424993071e-06, "loss": 0.411, "step": 3606 }, { "epoch": 0.81, "learning_rate": 1.8242828479788177e-06, "loss": 0.4306, "step": 3607 }, { "epoch": 0.81, "learning_rate": 1.8200936027078619e-06, "loss": 0.4261, "step": 3608 }, { "epoch": 0.81, "learning_rate": 1.8159086913997948e-06, "loss": 0.4183, "step": 3609 }, { "epoch": 0.81, "learning_rate": 1.8117281162719102e-06, "loss": 0.4233, "step": 3610 }, { "epoch": 0.81, "learning_rate": 1.8075518795392077e-06, "loss": 0.4069, "step": 3611 }, { "epoch": 0.81, "learning_rate": 1.8033799834143773e-06, "loss": 0.4338, "step": 3612 }, { "epoch": 0.81, "learning_rate": 1.7992124301078218e-06, "loss": 0.4339, "step": 3613 }, { "epoch": 0.81, "learning_rate": 1.7950492218276339e-06, "loss": 0.4411, "step": 3614 }, { "epoch": 0.81, "learning_rate": 1.790890360779608e-06, "loss": 0.4215, "step": 3615 }, { "epoch": 0.81, "learning_rate": 1.7867358491672394e-06, "loss": 0.4047, "step": 3616 }, { "epoch": 0.81, "learning_rate": 1.7825856891917093e-06, "loss": 0.431, "step": 3617 }, { "epoch": 0.81, "learning_rate": 1.7784398830519002e-06, "loss": 0.4355, "step": 3618 }, { "epoch": 0.81, "learning_rate": 1.7742984329443879e-06, "loss": 0.432, "step": 3619 }, { "epoch": 0.81, "learning_rate": 1.7701613410634367e-06, "loss": 0.4234, "step": 3620 }, { "epoch": 0.81, "learning_rate": 1.7660286096010027e-06, "loss": 0.4346, "step": 3621 }, { "epoch": 0.81, "learning_rate": 1.7619002407467367e-06, "loss": 0.4001, "step": 3622 }, { "epoch": 0.81, "learning_rate": 1.7577762366879692e-06, "loss": 0.4338, "step": 3623 }, { "epoch": 0.81, "learning_rate": 1.7536565996097265e-06, "loss": 0.4005, "step": 3624 }, { "epoch": 0.81, "learning_rate": 1.749541331694713e-06, "loss": 0.3764, "step": 3625 }, { "epoch": 0.81, "learning_rate": 1.7454304351233253e-06, "loss": 0.3889, "step": 3626 }, { "epoch": 0.82, "learning_rate": 1.7413239120736447e-06, "loss": 0.4341, "step": 3627 }, { "epoch": 0.82, "learning_rate": 1.7372217647214252e-06, "loss": 0.4233, "step": 3628 }, { "epoch": 0.82, "learning_rate": 1.7331239952401123e-06, "loss": 0.437, "step": 3629 }, { "epoch": 0.82, "learning_rate": 1.7290306058008322e-06, "loss": 0.3911, "step": 3630 }, { "epoch": 0.82, "learning_rate": 1.7249415985723795e-06, "loss": 0.4313, "step": 3631 }, { "epoch": 0.82, "learning_rate": 1.7208569757212424e-06, "loss": 0.3976, "step": 3632 }, { "epoch": 0.82, "learning_rate": 1.7167767394115708e-06, "loss": 0.4066, "step": 3633 }, { "epoch": 0.82, "learning_rate": 1.7127008918052023e-06, "loss": 0.4205, "step": 3634 }, { "epoch": 0.82, "learning_rate": 1.7086294350616428e-06, "loss": 0.4082, "step": 3635 }, { "epoch": 0.82, "learning_rate": 1.7045623713380777e-06, "loss": 0.3925, "step": 3636 }, { "epoch": 0.82, "learning_rate": 1.7004997027893556e-06, "loss": 0.3991, "step": 3637 }, { "epoch": 0.82, "learning_rate": 1.6964414315680068e-06, "loss": 0.4183, "step": 3638 }, { "epoch": 0.82, "learning_rate": 1.6923875598242202e-06, "loss": 0.4094, "step": 3639 }, { "epoch": 0.82, "learning_rate": 1.6883380897058677e-06, "loss": 0.4125, "step": 3640 }, { "epoch": 0.82, "learning_rate": 1.684293023358472e-06, "loss": 0.4209, "step": 3641 }, { "epoch": 0.82, "learning_rate": 1.6802523629252431e-06, "loss": 0.4091, "step": 3642 }, { "epoch": 0.82, "learning_rate": 1.6762161105470388e-06, "loss": 0.4179, "step": 3643 }, { "epoch": 0.82, "learning_rate": 1.6721842683623911e-06, "loss": 0.4364, "step": 3644 }, { "epoch": 0.82, "learning_rate": 1.6681568385074897e-06, "loss": 0.4442, "step": 3645 }, { "epoch": 0.82, "learning_rate": 1.664133823116193e-06, "loss": 0.4256, "step": 3646 }, { "epoch": 0.82, "learning_rate": 1.6601152243200113e-06, "loss": 0.4332, "step": 3647 }, { "epoch": 0.82, "learning_rate": 1.6561010442481274e-06, "loss": 0.3925, "step": 3648 }, { "epoch": 0.82, "learning_rate": 1.6520912850273662e-06, "loss": 0.4228, "step": 3649 }, { "epoch": 0.82, "learning_rate": 1.648085948782231e-06, "loss": 0.4276, "step": 3650 }, { "epoch": 0.82, "learning_rate": 1.6440850376348627e-06, "loss": 0.4091, "step": 3651 }, { "epoch": 0.82, "learning_rate": 1.6400885537050716e-06, "loss": 0.4006, "step": 3652 }, { "epoch": 0.82, "learning_rate": 1.6360964991103102e-06, "loss": 0.4154, "step": 3653 }, { "epoch": 0.82, "learning_rate": 1.6321088759656946e-06, "loss": 0.4199, "step": 3654 }, { "epoch": 0.82, "learning_rate": 1.6281256863839856e-06, "loss": 0.4041, "step": 3655 }, { "epoch": 0.82, "learning_rate": 1.624146932475601e-06, "loss": 0.3859, "step": 3656 }, { "epoch": 0.82, "learning_rate": 1.6201726163485997e-06, "loss": 0.3968, "step": 3657 }, { "epoch": 0.82, "learning_rate": 1.6162027401087045e-06, "loss": 0.3987, "step": 3658 }, { "epoch": 0.82, "learning_rate": 1.6122373058592689e-06, "loss": 0.417, "step": 3659 }, { "epoch": 0.82, "learning_rate": 1.6082763157013059e-06, "loss": 0.4286, "step": 3660 }, { "epoch": 0.82, "learning_rate": 1.6043197717334614e-06, "loss": 0.4283, "step": 3661 }, { "epoch": 0.82, "learning_rate": 1.6003676760520405e-06, "loss": 0.4097, "step": 3662 }, { "epoch": 0.82, "learning_rate": 1.596420030750976e-06, "loss": 0.4176, "step": 3663 }, { "epoch": 0.82, "learning_rate": 1.592476837921857e-06, "loss": 0.4221, "step": 3664 }, { "epoch": 0.82, "learning_rate": 1.588538099653899e-06, "loss": 0.4314, "step": 3665 }, { "epoch": 0.82, "learning_rate": 1.584603818033975e-06, "loss": 0.4332, "step": 3666 }, { "epoch": 0.82, "learning_rate": 1.5806739951465788e-06, "loss": 0.4226, "step": 3667 }, { "epoch": 0.82, "learning_rate": 1.5767486330738558e-06, "loss": 0.4169, "step": 3668 }, { "epoch": 0.82, "learning_rate": 1.5728277338955767e-06, "loss": 0.4027, "step": 3669 }, { "epoch": 0.82, "learning_rate": 1.5689112996891576e-06, "loss": 0.4079, "step": 3670 }, { "epoch": 0.82, "learning_rate": 1.5649993325296408e-06, "loss": 0.4118, "step": 3671 }, { "epoch": 0.83, "learning_rate": 1.5610918344897085e-06, "loss": 0.4299, "step": 3672 }, { "epoch": 0.83, "learning_rate": 1.5571888076396658e-06, "loss": 0.4067, "step": 3673 }, { "epoch": 0.83, "learning_rate": 1.5532902540474637e-06, "loss": 0.4046, "step": 3674 }, { "epoch": 0.83, "learning_rate": 1.549396175778667e-06, "loss": 0.4162, "step": 3675 }, { "epoch": 0.83, "learning_rate": 1.5455065748964825e-06, "loss": 0.4432, "step": 3676 }, { "epoch": 0.83, "learning_rate": 1.5416214534617335e-06, "loss": 0.3895, "step": 3677 }, { "epoch": 0.83, "learning_rate": 1.5377408135328797e-06, "loss": 0.4293, "step": 3678 }, { "epoch": 0.83, "learning_rate": 1.5338646571659988e-06, "loss": 0.4199, "step": 3679 }, { "epoch": 0.83, "learning_rate": 1.5299929864147966e-06, "loss": 0.3846, "step": 3680 }, { "epoch": 0.83, "learning_rate": 1.5261258033306027e-06, "loss": 0.4084, "step": 3681 }, { "epoch": 0.83, "learning_rate": 1.5222631099623708e-06, "loss": 0.4388, "step": 3682 }, { "epoch": 0.83, "learning_rate": 1.5184049083566688e-06, "loss": 0.4224, "step": 3683 }, { "epoch": 0.83, "learning_rate": 1.5145512005576923e-06, "loss": 0.4293, "step": 3684 }, { "epoch": 0.83, "learning_rate": 1.5107019886072494e-06, "loss": 0.3997, "step": 3685 }, { "epoch": 0.83, "learning_rate": 1.506857274544774e-06, "loss": 0.4162, "step": 3686 }, { "epoch": 0.83, "learning_rate": 1.503017060407308e-06, "loss": 0.4302, "step": 3687 }, { "epoch": 0.83, "learning_rate": 1.4991813482295149e-06, "loss": 0.4264, "step": 3688 }, { "epoch": 0.83, "learning_rate": 1.495350140043672e-06, "loss": 0.4339, "step": 3689 }, { "epoch": 0.83, "learning_rate": 1.4915234378796717e-06, "loss": 0.4233, "step": 3690 }, { "epoch": 0.83, "learning_rate": 1.487701243765013e-06, "loss": 0.4266, "step": 3691 }, { "epoch": 0.83, "learning_rate": 1.4838835597248157e-06, "loss": 0.3969, "step": 3692 }, { "epoch": 0.83, "learning_rate": 1.480070387781799e-06, "loss": 0.3996, "step": 3693 }, { "epoch": 0.83, "learning_rate": 1.4762617299562997e-06, "loss": 0.4038, "step": 3694 }, { "epoch": 0.83, "learning_rate": 1.4724575882662629e-06, "loss": 0.3874, "step": 3695 }, { "epoch": 0.83, "learning_rate": 1.4686579647272337e-06, "loss": 0.4039, "step": 3696 }, { "epoch": 0.83, "learning_rate": 1.4648628613523708e-06, "loss": 0.4092, "step": 3697 }, { "epoch": 0.83, "learning_rate": 1.4610722801524368e-06, "loss": 0.4287, "step": 3698 }, { "epoch": 0.83, "learning_rate": 1.4572862231357933e-06, "loss": 0.4445, "step": 3699 }, { "epoch": 0.83, "learning_rate": 1.4535046923084117e-06, "loss": 0.4012, "step": 3700 }, { "epoch": 0.83, "learning_rate": 1.4497276896738588e-06, "loss": 0.3924, "step": 3701 }, { "epoch": 0.83, "learning_rate": 1.4459552172333058e-06, "loss": 0.4134, "step": 3702 }, { "epoch": 0.83, "learning_rate": 1.4421872769855262e-06, "loss": 0.4113, "step": 3703 }, { "epoch": 0.83, "learning_rate": 1.438423870926885e-06, "loss": 0.4185, "step": 3704 }, { "epoch": 0.83, "learning_rate": 1.4346650010513518e-06, "loss": 0.4143, "step": 3705 }, { "epoch": 0.83, "learning_rate": 1.4309106693504914e-06, "loss": 0.4131, "step": 3706 }, { "epoch": 0.83, "learning_rate": 1.4271608778134582e-06, "loss": 0.4017, "step": 3707 }, { "epoch": 0.83, "learning_rate": 1.4234156284270085e-06, "loss": 0.4257, "step": 3708 }, { "epoch": 0.83, "learning_rate": 1.4196749231754903e-06, "loss": 0.4193, "step": 3709 }, { "epoch": 0.83, "learning_rate": 1.4159387640408396e-06, "loss": 0.4322, "step": 3710 }, { "epoch": 0.83, "learning_rate": 1.4122071530025915e-06, "loss": 0.4148, "step": 3711 }, { "epoch": 0.83, "learning_rate": 1.4084800920378617e-06, "loss": 0.4236, "step": 3712 }, { "epoch": 0.83, "learning_rate": 1.4047575831213634e-06, "loss": 0.4172, "step": 3713 }, { "epoch": 0.83, "learning_rate": 1.4010396282253935e-06, "loss": 0.3954, "step": 3714 }, { "epoch": 0.83, "learning_rate": 1.3973262293198419e-06, "loss": 0.4215, "step": 3715 }, { "epoch": 0.84, "learning_rate": 1.3936173883721726e-06, "loss": 0.419, "step": 3716 }, { "epoch": 0.84, "learning_rate": 1.3899131073474504e-06, "loss": 0.4081, "step": 3717 }, { "epoch": 0.84, "learning_rate": 1.3862133882083072e-06, "loss": 0.423, "step": 3718 }, { "epoch": 0.84, "learning_rate": 1.3825182329149734e-06, "loss": 0.4207, "step": 3719 }, { "epoch": 0.84, "learning_rate": 1.3788276434252502e-06, "loss": 0.4145, "step": 3720 }, { "epoch": 0.84, "learning_rate": 1.375141621694529e-06, "loss": 0.4267, "step": 3721 }, { "epoch": 0.84, "learning_rate": 1.3714601696757713e-06, "loss": 0.4124, "step": 3722 }, { "epoch": 0.84, "learning_rate": 1.3677832893195253e-06, "loss": 0.3907, "step": 3723 }, { "epoch": 0.84, "learning_rate": 1.3641109825739119e-06, "loss": 0.413, "step": 3724 }, { "epoch": 0.84, "learning_rate": 1.3604432513846322e-06, "loss": 0.4166, "step": 3725 }, { "epoch": 0.84, "learning_rate": 1.3567800976949585e-06, "loss": 0.4143, "step": 3726 }, { "epoch": 0.84, "learning_rate": 1.353121523445744e-06, "loss": 0.4175, "step": 3727 }, { "epoch": 0.84, "learning_rate": 1.3494675305754123e-06, "loss": 0.4047, "step": 3728 }, { "epoch": 0.84, "learning_rate": 1.3458181210199606e-06, "loss": 0.3992, "step": 3729 }, { "epoch": 0.84, "learning_rate": 1.3421732967129541e-06, "loss": 0.4194, "step": 3730 }, { "epoch": 0.84, "learning_rate": 1.338533059585534e-06, "loss": 0.4307, "step": 3731 }, { "epoch": 0.84, "learning_rate": 1.334897411566406e-06, "loss": 0.4475, "step": 3732 }, { "epoch": 0.84, "learning_rate": 1.3312663545818504e-06, "loss": 0.4022, "step": 3733 }, { "epoch": 0.84, "learning_rate": 1.3276398905557075e-06, "loss": 0.4174, "step": 3734 }, { "epoch": 0.84, "learning_rate": 1.32401802140939e-06, "loss": 0.427, "step": 3735 }, { "epoch": 0.84, "learning_rate": 1.3204007490618742e-06, "loss": 0.4111, "step": 3736 }, { "epoch": 0.84, "learning_rate": 1.3167880754297024e-06, "loss": 0.4429, "step": 3737 }, { "epoch": 0.84, "learning_rate": 1.3131800024269758e-06, "loss": 0.4278, "step": 3738 }, { "epoch": 0.84, "learning_rate": 1.309576531965364e-06, "loss": 0.4194, "step": 3739 }, { "epoch": 0.84, "learning_rate": 1.3059776659540925e-06, "loss": 0.4261, "step": 3740 }, { "epoch": 0.84, "learning_rate": 1.302383406299952e-06, "loss": 0.4321, "step": 3741 }, { "epoch": 0.84, "learning_rate": 1.2987937549072883e-06, "loss": 0.4069, "step": 3742 }, { "epoch": 0.84, "learning_rate": 1.2952087136780089e-06, "loss": 0.4122, "step": 3743 }, { "epoch": 0.84, "learning_rate": 1.291628284511577e-06, "loss": 0.4194, "step": 3744 }, { "epoch": 0.84, "learning_rate": 1.2880524693050156e-06, "loss": 0.4307, "step": 3745 }, { "epoch": 0.84, "learning_rate": 1.2844812699528963e-06, "loss": 0.4365, "step": 3746 }, { "epoch": 0.84, "learning_rate": 1.2809146883473521e-06, "loss": 0.4369, "step": 3747 }, { "epoch": 0.84, "learning_rate": 1.2773527263780626e-06, "loss": 0.4271, "step": 3748 }, { "epoch": 0.84, "learning_rate": 1.2737953859322683e-06, "loss": 0.3841, "step": 3749 }, { "epoch": 0.84, "learning_rate": 1.270242668894751e-06, "loss": 0.4243, "step": 3750 }, { "epoch": 0.84, "learning_rate": 1.266694577147851e-06, "loss": 0.4079, "step": 3751 }, { "epoch": 0.84, "learning_rate": 1.2631511125714545e-06, "loss": 0.3961, "step": 3752 }, { "epoch": 0.84, "learning_rate": 1.2596122770429998e-06, "loss": 0.4221, "step": 3753 }, { "epoch": 0.84, "learning_rate": 1.2560780724374633e-06, "loss": 0.4019, "step": 3754 }, { "epoch": 0.84, "learning_rate": 1.2525485006273808e-06, "loss": 0.4164, "step": 3755 }, { "epoch": 0.84, "learning_rate": 1.2490235634828196e-06, "loss": 0.4183, "step": 3756 }, { "epoch": 0.84, "learning_rate": 1.2455032628714048e-06, "loss": 0.4174, "step": 3757 }, { "epoch": 0.84, "learning_rate": 1.2419876006582944e-06, "loss": 0.4387, "step": 3758 }, { "epoch": 0.84, "learning_rate": 1.2384765787061936e-06, "loss": 0.4015, "step": 3759 }, { "epoch": 0.84, "learning_rate": 1.2349701988753494e-06, "loss": 0.4069, "step": 3760 }, { "epoch": 0.85, "learning_rate": 1.2314684630235507e-06, "loss": 0.4228, "step": 3761 }, { "epoch": 0.85, "learning_rate": 1.2279713730061183e-06, "loss": 0.4032, "step": 3762 }, { "epoch": 0.85, "learning_rate": 1.224478930675922e-06, "loss": 0.4084, "step": 3763 }, { "epoch": 0.85, "learning_rate": 1.2209911378833594e-06, "loss": 0.4022, "step": 3764 }, { "epoch": 0.85, "learning_rate": 1.2175079964763726e-06, "loss": 0.4509, "step": 3765 }, { "epoch": 0.85, "learning_rate": 1.2140295083004306e-06, "loss": 0.4124, "step": 3766 }, { "epoch": 0.85, "learning_rate": 1.2105556751985491e-06, "loss": 0.411, "step": 3767 }, { "epoch": 0.85, "learning_rate": 1.2070864990112663e-06, "loss": 0.4142, "step": 3768 }, { "epoch": 0.85, "learning_rate": 1.2036219815766581e-06, "loss": 0.438, "step": 3769 }, { "epoch": 0.85, "learning_rate": 1.2001621247303296e-06, "loss": 0.4117, "step": 3770 }, { "epoch": 0.85, "learning_rate": 1.1967069303054213e-06, "loss": 0.3898, "step": 3771 }, { "epoch": 0.85, "learning_rate": 1.1932564001325963e-06, "loss": 0.3999, "step": 3772 }, { "epoch": 0.85, "learning_rate": 1.1898105360400514e-06, "loss": 0.3943, "step": 3773 }, { "epoch": 0.85, "learning_rate": 1.1863693398535115e-06, "loss": 0.4111, "step": 3774 }, { "epoch": 0.85, "learning_rate": 1.182932813396227e-06, "loss": 0.4124, "step": 3775 }, { "epoch": 0.85, "learning_rate": 1.1795009584889716e-06, "loss": 0.4127, "step": 3776 }, { "epoch": 0.85, "learning_rate": 1.1760737769500508e-06, "loss": 0.4334, "step": 3777 }, { "epoch": 0.85, "learning_rate": 1.172651270595283e-06, "loss": 0.3985, "step": 3778 }, { "epoch": 0.85, "learning_rate": 1.1692334412380224e-06, "loss": 0.4353, "step": 3779 }, { "epoch": 0.85, "learning_rate": 1.1658202906891336e-06, "loss": 0.4007, "step": 3780 }, { "epoch": 0.85, "learning_rate": 1.16241182075701e-06, "loss": 0.4097, "step": 3781 }, { "epoch": 0.85, "learning_rate": 1.1590080332475628e-06, "loss": 0.4164, "step": 3782 }, { "epoch": 0.85, "learning_rate": 1.1556089299642226e-06, "loss": 0.3958, "step": 3783 }, { "epoch": 0.85, "learning_rate": 1.1522145127079354e-06, "loss": 0.4283, "step": 3784 }, { "epoch": 0.85, "learning_rate": 1.1488247832771692e-06, "loss": 0.4111, "step": 3785 }, { "epoch": 0.85, "learning_rate": 1.1454397434679022e-06, "loss": 0.4257, "step": 3786 }, { "epoch": 0.85, "learning_rate": 1.1420593950736326e-06, "loss": 0.4008, "step": 3787 }, { "epoch": 0.85, "learning_rate": 1.1386837398853745e-06, "loss": 0.4248, "step": 3788 }, { "epoch": 0.85, "learning_rate": 1.1353127796916486e-06, "loss": 0.4079, "step": 3789 }, { "epoch": 0.85, "learning_rate": 1.1319465162784937e-06, "loss": 0.4176, "step": 3790 }, { "epoch": 0.85, "learning_rate": 1.12858495142946e-06, "loss": 0.3945, "step": 3791 }, { "epoch": 0.85, "learning_rate": 1.1252280869256038e-06, "loss": 0.4279, "step": 3792 }, { "epoch": 0.85, "learning_rate": 1.1218759245454946e-06, "loss": 0.4285, "step": 3793 }, { "epoch": 0.85, "learning_rate": 1.118528466065213e-06, "loss": 0.4194, "step": 3794 }, { "epoch": 0.85, "learning_rate": 1.1151857132583398e-06, "loss": 0.3864, "step": 3795 }, { "epoch": 0.85, "learning_rate": 1.111847667895971e-06, "loss": 0.4247, "step": 3796 }, { "epoch": 0.85, "learning_rate": 1.1085143317466996e-06, "loss": 0.3997, "step": 3797 }, { "epoch": 0.85, "learning_rate": 1.1051857065766313e-06, "loss": 0.406, "step": 3798 }, { "epoch": 0.85, "learning_rate": 1.1018617941493725e-06, "loss": 0.4252, "step": 3799 }, { "epoch": 0.85, "learning_rate": 1.0985425962260342e-06, "loss": 0.4056, "step": 3800 }, { "epoch": 0.85, "learning_rate": 1.0952281145652266e-06, "loss": 0.3984, "step": 3801 }, { "epoch": 0.85, "learning_rate": 1.0919183509230636e-06, "loss": 0.4169, "step": 3802 }, { "epoch": 0.85, "learning_rate": 1.088613307053158e-06, "loss": 0.4282, "step": 3803 }, { "epoch": 0.85, "learning_rate": 1.0853129847066236e-06, "loss": 0.4012, "step": 3804 }, { "epoch": 0.86, "learning_rate": 1.082017385632066e-06, "loss": 0.4022, "step": 3805 }, { "epoch": 0.86, "learning_rate": 1.078726511575603e-06, "loss": 0.4076, "step": 3806 }, { "epoch": 0.86, "learning_rate": 1.075440364280832e-06, "loss": 0.4038, "step": 3807 }, { "epoch": 0.86, "learning_rate": 1.0721589454888593e-06, "loss": 0.401, "step": 3808 }, { "epoch": 0.86, "learning_rate": 1.068882256938275e-06, "loss": 0.4402, "step": 3809 }, { "epoch": 0.86, "learning_rate": 1.065610300365173e-06, "loss": 0.3913, "step": 3810 }, { "epoch": 0.86, "learning_rate": 1.0623430775031306e-06, "loss": 0.4018, "step": 3811 }, { "epoch": 0.86, "learning_rate": 1.0590805900832257e-06, "loss": 0.4113, "step": 3812 }, { "epoch": 0.86, "learning_rate": 1.0558228398340188e-06, "loss": 0.4246, "step": 3813 }, { "epoch": 0.86, "learning_rate": 1.0525698284815712e-06, "loss": 0.4257, "step": 3814 }, { "epoch": 0.86, "learning_rate": 1.0493215577494209e-06, "loss": 0.4012, "step": 3815 }, { "epoch": 0.86, "learning_rate": 1.0460780293586059e-06, "loss": 0.4115, "step": 3816 }, { "epoch": 0.86, "learning_rate": 1.0428392450276414e-06, "loss": 0.4181, "step": 3817 }, { "epoch": 0.86, "learning_rate": 1.039605206472537e-06, "loss": 0.4079, "step": 3818 }, { "epoch": 0.86, "learning_rate": 1.036375915406782e-06, "loss": 0.423, "step": 3819 }, { "epoch": 0.86, "learning_rate": 1.0331513735413557e-06, "loss": 0.4009, "step": 3820 }, { "epoch": 0.86, "learning_rate": 1.0299315825847122e-06, "loss": 0.4191, "step": 3821 }, { "epoch": 0.86, "learning_rate": 1.026716544242804e-06, "loss": 0.416, "step": 3822 }, { "epoch": 0.86, "learning_rate": 1.0235062602190494e-06, "loss": 0.4245, "step": 3823 }, { "epoch": 0.86, "learning_rate": 1.0203007322143577e-06, "loss": 0.4088, "step": 3824 }, { "epoch": 0.86, "learning_rate": 1.0170999619271116e-06, "loss": 0.3949, "step": 3825 }, { "epoch": 0.86, "learning_rate": 1.01390395105318e-06, "loss": 0.4095, "step": 3826 }, { "epoch": 0.86, "learning_rate": 1.010712701285903e-06, "loss": 0.4242, "step": 3827 }, { "epoch": 0.86, "learning_rate": 1.0075262143161058e-06, "loss": 0.3832, "step": 3828 }, { "epoch": 0.86, "learning_rate": 1.0043444918320788e-06, "loss": 0.4056, "step": 3829 }, { "epoch": 0.86, "learning_rate": 1.001167535519606e-06, "loss": 0.431, "step": 3830 }, { "epoch": 0.86, "learning_rate": 9.979953470619263e-07, "loss": 0.4443, "step": 3831 }, { "epoch": 0.86, "learning_rate": 9.948279281397667e-07, "loss": 0.4088, "step": 3832 }, { "epoch": 0.86, "learning_rate": 9.916652804313177e-07, "loss": 0.4023, "step": 3833 }, { "epoch": 0.86, "learning_rate": 9.885074056122513e-07, "loss": 0.421, "step": 3834 }, { "epoch": 0.86, "learning_rate": 9.853543053557001e-07, "loss": 0.4228, "step": 3835 }, { "epoch": 0.86, "learning_rate": 9.822059813322771e-07, "loss": 0.4023, "step": 3836 }, { "epoch": 0.86, "learning_rate": 9.79062435210054e-07, "loss": 0.4171, "step": 3837 }, { "epoch": 0.86, "learning_rate": 9.759236686545847e-07, "loss": 0.3899, "step": 3838 }, { "epoch": 0.86, "learning_rate": 9.727896833288764e-07, "loss": 0.3811, "step": 3839 }, { "epoch": 0.86, "learning_rate": 9.696604808934152e-07, "loss": 0.4203, "step": 3840 }, { "epoch": 0.86, "learning_rate": 9.665360630061438e-07, "loss": 0.4287, "step": 3841 }, { "epoch": 0.86, "learning_rate": 9.634164313224758e-07, "loss": 0.4104, "step": 3842 }, { "epoch": 0.86, "learning_rate": 9.603015874952838e-07, "loss": 0.3993, "step": 3843 }, { "epoch": 0.86, "learning_rate": 9.571915331749115e-07, "loss": 0.4253, "step": 3844 }, { "epoch": 0.86, "learning_rate": 9.540862700091525e-07, "loss": 0.4166, "step": 3845 }, { "epoch": 0.86, "learning_rate": 9.509857996432792e-07, "loss": 0.4208, "step": 3846 }, { "epoch": 0.86, "learning_rate": 9.478901237200078e-07, "loss": 0.4168, "step": 3847 }, { "epoch": 0.86, "learning_rate": 9.447992438795261e-07, "loss": 0.4144, "step": 3848 }, { "epoch": 0.86, "learning_rate": 9.417131617594721e-07, "loss": 0.4156, "step": 3849 }, { "epoch": 0.87, "learning_rate": 9.386318789949488e-07, "loss": 0.4027, "step": 3850 }, { "epoch": 0.87, "learning_rate": 9.355553972185116e-07, "loss": 0.4164, "step": 3851 }, { "epoch": 0.87, "learning_rate": 9.324837180601743e-07, "loss": 0.4044, "step": 3852 }, { "epoch": 0.87, "learning_rate": 9.29416843147406e-07, "loss": 0.3774, "step": 3853 }, { "epoch": 0.87, "learning_rate": 9.263547741051315e-07, "loss": 0.433, "step": 3854 }, { "epoch": 0.87, "learning_rate": 9.232975125557264e-07, "loss": 0.3789, "step": 3855 }, { "epoch": 0.87, "learning_rate": 9.202450601190227e-07, "loss": 0.4113, "step": 3856 }, { "epoch": 0.87, "learning_rate": 9.171974184122989e-07, "loss": 0.4065, "step": 3857 }, { "epoch": 0.87, "learning_rate": 9.141545890502924e-07, "loss": 0.4227, "step": 3858 }, { "epoch": 0.87, "learning_rate": 9.111165736451822e-07, "loss": 0.3913, "step": 3859 }, { "epoch": 0.87, "learning_rate": 9.080833738066031e-07, "loss": 0.4284, "step": 3860 }, { "epoch": 0.87, "learning_rate": 9.050549911416373e-07, "loss": 0.4132, "step": 3861 }, { "epoch": 0.87, "learning_rate": 9.020314272548147e-07, "loss": 0.3864, "step": 3862 }, { "epoch": 0.87, "learning_rate": 8.990126837481083e-07, "loss": 0.4128, "step": 3863 }, { "epoch": 0.87, "learning_rate": 8.959987622209442e-07, "loss": 0.4293, "step": 3864 }, { "epoch": 0.87, "learning_rate": 8.929896642701851e-07, "loss": 0.4122, "step": 3865 }, { "epoch": 0.87, "learning_rate": 8.899853914901446e-07, "loss": 0.4154, "step": 3866 }, { "epoch": 0.87, "learning_rate": 8.869859454725794e-07, "loss": 0.4068, "step": 3867 }, { "epoch": 0.87, "learning_rate": 8.839913278066848e-07, "loss": 0.424, "step": 3868 }, { "epoch": 0.87, "learning_rate": 8.810015400790994e-07, "loss": 0.3972, "step": 3869 }, { "epoch": 0.87, "learning_rate": 8.780165838739074e-07, "loss": 0.3899, "step": 3870 }, { "epoch": 0.87, "learning_rate": 8.750364607726247e-07, "loss": 0.4043, "step": 3871 }, { "epoch": 0.87, "learning_rate": 8.720611723542127e-07, "loss": 0.4098, "step": 3872 }, { "epoch": 0.87, "learning_rate": 8.690907201950727e-07, "loss": 0.3881, "step": 3873 }, { "epoch": 0.87, "learning_rate": 8.661251058690357e-07, "loss": 0.3835, "step": 3874 }, { "epoch": 0.87, "learning_rate": 8.63164330947378e-07, "loss": 0.4197, "step": 3875 }, { "epoch": 0.87, "learning_rate": 8.602083969988051e-07, "loss": 0.3928, "step": 3876 }, { "epoch": 0.87, "learning_rate": 8.572573055894629e-07, "loss": 0.4201, "step": 3877 }, { "epoch": 0.87, "learning_rate": 8.543110582829272e-07, "loss": 0.4479, "step": 3878 }, { "epoch": 0.87, "learning_rate": 8.513696566402119e-07, "loss": 0.4051, "step": 3879 }, { "epoch": 0.87, "learning_rate": 8.484331022197589e-07, "loss": 0.4047, "step": 3880 }, { "epoch": 0.87, "learning_rate": 8.455013965774462e-07, "loss": 0.3883, "step": 3881 }, { "epoch": 0.87, "learning_rate": 8.425745412665765e-07, "loss": 0.4219, "step": 3882 }, { "epoch": 0.87, "learning_rate": 8.396525378378906e-07, "loss": 0.4043, "step": 3883 }, { "epoch": 0.87, "learning_rate": 8.367353878395501e-07, "loss": 0.4124, "step": 3884 }, { "epoch": 0.87, "learning_rate": 8.338230928171565e-07, "loss": 0.3931, "step": 3885 }, { "epoch": 0.87, "learning_rate": 8.309156543137265e-07, "loss": 0.3856, "step": 3886 }, { "epoch": 0.87, "learning_rate": 8.280130738697123e-07, "loss": 0.415, "step": 3887 }, { "epoch": 0.87, "learning_rate": 8.251153530229871e-07, "loss": 0.4, "step": 3888 }, { "epoch": 0.87, "learning_rate": 8.22222493308853e-07, "loss": 0.4321, "step": 3889 }, { "epoch": 0.87, "learning_rate": 8.193344962600336e-07, "loss": 0.3974, "step": 3890 }, { "epoch": 0.87, "learning_rate": 8.164513634066784e-07, "loss": 0.4107, "step": 3891 }, { "epoch": 0.87, "learning_rate": 8.135730962763589e-07, "loss": 0.4059, "step": 3892 }, { "epoch": 0.87, "learning_rate": 8.106996963940683e-07, "loss": 0.3996, "step": 3893 }, { "epoch": 0.88, "learning_rate": 8.078311652822213e-07, "loss": 0.4128, "step": 3894 }, { "epoch": 0.88, "learning_rate": 8.04967504460653e-07, "loss": 0.4123, "step": 3895 }, { "epoch": 0.88, "learning_rate": 8.021087154466156e-07, "loss": 0.4148, "step": 3896 }, { "epoch": 0.88, "learning_rate": 7.992547997547861e-07, "loss": 0.4281, "step": 3897 }, { "epoch": 0.88, "learning_rate": 7.964057588972529e-07, "loss": 0.3867, "step": 3898 }, { "epoch": 0.88, "learning_rate": 7.93561594383524e-07, "loss": 0.4253, "step": 3899 }, { "epoch": 0.88, "learning_rate": 7.907223077205262e-07, "loss": 0.4173, "step": 3900 }, { "epoch": 0.88, "learning_rate": 7.878879004126005e-07, "loss": 0.4202, "step": 3901 }, { "epoch": 0.88, "learning_rate": 7.850583739614992e-07, "loss": 0.4016, "step": 3902 }, { "epoch": 0.88, "learning_rate": 7.82233729866394e-07, "loss": 0.4029, "step": 3903 }, { "epoch": 0.88, "learning_rate": 7.794139696238645e-07, "loss": 0.4213, "step": 3904 }, { "epoch": 0.88, "learning_rate": 7.76599094727909e-07, "loss": 0.3953, "step": 3905 }, { "epoch": 0.88, "learning_rate": 7.737891066699288e-07, "loss": 0.3876, "step": 3906 }, { "epoch": 0.88, "learning_rate": 7.709840069387442e-07, "loss": 0.4471, "step": 3907 }, { "epoch": 0.88, "learning_rate": 7.681837970205808e-07, "loss": 0.405, "step": 3908 }, { "epoch": 0.88, "learning_rate": 7.653884783990783e-07, "loss": 0.4227, "step": 3909 }, { "epoch": 0.88, "learning_rate": 7.625980525552767e-07, "loss": 0.403, "step": 3910 }, { "epoch": 0.88, "learning_rate": 7.598125209676321e-07, "loss": 0.405, "step": 3911 }, { "epoch": 0.88, "learning_rate": 7.570318851120007e-07, "loss": 0.3891, "step": 3912 }, { "epoch": 0.88, "learning_rate": 7.542561464616505e-07, "loss": 0.3986, "step": 3913 }, { "epoch": 0.88, "learning_rate": 7.514853064872496e-07, "loss": 0.3925, "step": 3914 }, { "epoch": 0.88, "learning_rate": 7.487193666568749e-07, "loss": 0.407, "step": 3915 }, { "epoch": 0.88, "learning_rate": 7.459583284360039e-07, "loss": 0.4498, "step": 3916 }, { "epoch": 0.88, "learning_rate": 7.432021932875222e-07, "loss": 0.3934, "step": 3917 }, { "epoch": 0.88, "learning_rate": 7.404509626717093e-07, "loss": 0.3884, "step": 3918 }, { "epoch": 0.88, "learning_rate": 7.377046380462549e-07, "loss": 0.4435, "step": 3919 }, { "epoch": 0.88, "learning_rate": 7.349632208662416e-07, "loss": 0.4186, "step": 3920 }, { "epoch": 0.88, "learning_rate": 7.322267125841575e-07, "loss": 0.4246, "step": 3921 }, { "epoch": 0.88, "learning_rate": 7.294951146498863e-07, "loss": 0.4245, "step": 3922 }, { "epoch": 0.88, "learning_rate": 7.267684285107124e-07, "loss": 0.4216, "step": 3923 }, { "epoch": 0.88, "learning_rate": 7.240466556113179e-07, "loss": 0.3907, "step": 3924 }, { "epoch": 0.88, "learning_rate": 7.213297973937805e-07, "loss": 0.3992, "step": 3925 }, { "epoch": 0.88, "learning_rate": 7.18617855297572e-07, "loss": 0.4331, "step": 3926 }, { "epoch": 0.88, "learning_rate": 7.159108307595663e-07, "loss": 0.4203, "step": 3927 }, { "epoch": 0.88, "learning_rate": 7.132087252140218e-07, "loss": 0.4045, "step": 3928 }, { "epoch": 0.88, "learning_rate": 7.105115400925999e-07, "loss": 0.4182, "step": 3929 }, { "epoch": 0.88, "learning_rate": 7.078192768243486e-07, "loss": 0.4295, "step": 3930 }, { "epoch": 0.88, "learning_rate": 7.051319368357124e-07, "loss": 0.4228, "step": 3931 }, { "epoch": 0.88, "learning_rate": 7.024495215505256e-07, "loss": 0.4061, "step": 3932 }, { "epoch": 0.88, "learning_rate": 6.997720323900137e-07, "loss": 0.3978, "step": 3933 }, { "epoch": 0.88, "learning_rate": 6.970994707727896e-07, "loss": 0.4094, "step": 3934 }, { "epoch": 0.88, "learning_rate": 6.944318381148619e-07, "loss": 0.3946, "step": 3935 }, { "epoch": 0.88, "learning_rate": 6.917691358296185e-07, "loss": 0.4281, "step": 3936 }, { "epoch": 0.88, "learning_rate": 6.891113653278436e-07, "loss": 0.4431, "step": 3937 }, { "epoch": 0.88, "learning_rate": 6.864585280176994e-07, "loss": 0.3996, "step": 3938 }, { "epoch": 0.89, "learning_rate": 6.838106253047483e-07, "loss": 0.4259, "step": 3939 }, { "epoch": 0.89, "learning_rate": 6.811676585919224e-07, "loss": 0.3869, "step": 3940 }, { "epoch": 0.89, "learning_rate": 6.785296292795496e-07, "loss": 0.3882, "step": 3941 }, { "epoch": 0.89, "learning_rate": 6.758965387653349e-07, "loss": 0.416, "step": 3942 }, { "epoch": 0.89, "learning_rate": 6.732683884443736e-07, "loss": 0.3864, "step": 3943 }, { "epoch": 0.89, "learning_rate": 6.706451797091351e-07, "loss": 0.4395, "step": 3944 }, { "epoch": 0.89, "learning_rate": 6.680269139494777e-07, "loss": 0.418, "step": 3945 }, { "epoch": 0.89, "learning_rate": 6.654135925526373e-07, "loss": 0.4377, "step": 3946 }, { "epoch": 0.89, "learning_rate": 6.628052169032328e-07, "loss": 0.4232, "step": 3947 }, { "epoch": 0.89, "learning_rate": 6.60201788383259e-07, "loss": 0.4058, "step": 3948 }, { "epoch": 0.89, "learning_rate": 6.576033083720923e-07, "loss": 0.4052, "step": 3949 }, { "epoch": 0.89, "learning_rate": 6.550097782464849e-07, "loss": 0.3914, "step": 3950 }, { "epoch": 0.89, "learning_rate": 6.524211993805684e-07, "loss": 0.4209, "step": 3951 }, { "epoch": 0.89, "learning_rate": 6.498375731458529e-07, "loss": 0.4211, "step": 3952 }, { "epoch": 0.89, "learning_rate": 6.472589009112185e-07, "loss": 0.4243, "step": 3953 }, { "epoch": 0.89, "learning_rate": 6.446851840429246e-07, "loss": 0.3987, "step": 3954 }, { "epoch": 0.89, "learning_rate": 6.421164239046085e-07, "loss": 0.4059, "step": 3955 }, { "epoch": 0.89, "learning_rate": 6.395526218572723e-07, "loss": 0.4158, "step": 3956 }, { "epoch": 0.89, "learning_rate": 6.369937792592984e-07, "loss": 0.4034, "step": 3957 }, { "epoch": 0.89, "learning_rate": 6.344398974664411e-07, "loss": 0.4068, "step": 3958 }, { "epoch": 0.89, "learning_rate": 6.318909778318216e-07, "loss": 0.415, "step": 3959 }, { "epoch": 0.89, "learning_rate": 6.293470217059372e-07, "loss": 0.4111, "step": 3960 }, { "epoch": 0.89, "learning_rate": 6.268080304366509e-07, "loss": 0.4055, "step": 3961 }, { "epoch": 0.89, "learning_rate": 6.242740053691987e-07, "loss": 0.3903, "step": 3962 }, { "epoch": 0.89, "learning_rate": 6.217449478461835e-07, "loss": 0.4089, "step": 3963 }, { "epoch": 0.89, "learning_rate": 6.192208592075788e-07, "loss": 0.3886, "step": 3964 }, { "epoch": 0.89, "learning_rate": 6.167017407907205e-07, "loss": 0.4047, "step": 3965 }, { "epoch": 0.89, "learning_rate": 6.141875939303176e-07, "loss": 0.3975, "step": 3966 }, { "epoch": 0.89, "learning_rate": 6.116784199584369e-07, "loss": 0.4031, "step": 3967 }, { "epoch": 0.89, "learning_rate": 6.091742202045192e-07, "loss": 0.437, "step": 3968 }, { "epoch": 0.89, "learning_rate": 6.066749959953622e-07, "loss": 0.4234, "step": 3969 }, { "epoch": 0.89, "learning_rate": 6.041807486551332e-07, "loss": 0.4145, "step": 3970 }, { "epoch": 0.89, "learning_rate": 6.016914795053586e-07, "loss": 0.4037, "step": 3971 }, { "epoch": 0.89, "learning_rate": 5.99207189864931e-07, "loss": 0.3653, "step": 3972 }, { "epoch": 0.89, "learning_rate": 5.967278810500998e-07, "loss": 0.4195, "step": 3973 }, { "epoch": 0.89, "learning_rate": 5.942535543744821e-07, "loss": 0.398, "step": 3974 }, { "epoch": 0.89, "learning_rate": 5.917842111490457e-07, "loss": 0.4314, "step": 3975 }, { "epoch": 0.89, "learning_rate": 5.893198526821287e-07, "loss": 0.3981, "step": 3976 }, { "epoch": 0.89, "learning_rate": 5.868604802794186e-07, "loss": 0.3865, "step": 3977 }, { "epoch": 0.89, "learning_rate": 5.844060952439701e-07, "loss": 0.3995, "step": 3978 }, { "epoch": 0.89, "learning_rate": 5.819566988761882e-07, "loss": 0.4158, "step": 3979 }, { "epoch": 0.89, "learning_rate": 5.795122924738395e-07, "loss": 0.4018, "step": 3980 }, { "epoch": 0.89, "learning_rate": 5.770728773320411e-07, "loss": 0.4226, "step": 3981 }, { "epoch": 0.89, "learning_rate": 5.746384547432738e-07, "loss": 0.388, "step": 3982 }, { "epoch": 0.9, "learning_rate": 5.722090259973645e-07, "loss": 0.4001, "step": 3983 }, { "epoch": 0.9, "learning_rate": 5.697845923815026e-07, "loss": 0.3974, "step": 3984 }, { "epoch": 0.9, "learning_rate": 5.673651551802206e-07, "loss": 0.3945, "step": 3985 }, { "epoch": 0.9, "learning_rate": 5.649507156754174e-07, "loss": 0.4086, "step": 3986 }, { "epoch": 0.9, "learning_rate": 5.625412751463311e-07, "loss": 0.4384, "step": 3987 }, { "epoch": 0.9, "learning_rate": 5.601368348695591e-07, "loss": 0.4095, "step": 3988 }, { "epoch": 0.9, "learning_rate": 5.577373961190435e-07, "loss": 0.4287, "step": 3989 }, { "epoch": 0.9, "learning_rate": 5.553429601660842e-07, "loss": 0.4028, "step": 3990 }, { "epoch": 0.9, "learning_rate": 5.52953528279323e-07, "loss": 0.396, "step": 3991 }, { "epoch": 0.9, "learning_rate": 5.50569101724755e-07, "loss": 0.4287, "step": 3992 }, { "epoch": 0.9, "learning_rate": 5.481896817657195e-07, "loss": 0.4294, "step": 3993 }, { "epoch": 0.9, "learning_rate": 5.458152696629093e-07, "loss": 0.3768, "step": 3994 }, { "epoch": 0.9, "learning_rate": 5.434458666743569e-07, "loss": 0.406, "step": 3995 }, { "epoch": 0.9, "learning_rate": 5.410814740554471e-07, "loss": 0.4033, "step": 3996 }, { "epoch": 0.9, "learning_rate": 5.387220930589032e-07, "loss": 0.423, "step": 3997 }, { "epoch": 0.9, "learning_rate": 5.363677249347998e-07, "loss": 0.3987, "step": 3998 }, { "epoch": 0.9, "learning_rate": 5.340183709305502e-07, "loss": 0.4027, "step": 3999 }, { "epoch": 0.9, "learning_rate": 5.316740322909164e-07, "loss": 0.4222, "step": 4000 }, { "epoch": 0.9, "learning_rate": 5.293347102579959e-07, "loss": 0.3991, "step": 4001 }, { "epoch": 0.9, "learning_rate": 5.270004060712386e-07, "loss": 0.4062, "step": 4002 }, { "epoch": 0.9, "learning_rate": 5.246711209674249e-07, "loss": 0.436, "step": 4003 }, { "epoch": 0.9, "learning_rate": 5.223468561806832e-07, "loss": 0.4154, "step": 4004 }, { "epoch": 0.9, "learning_rate": 5.200276129424775e-07, "loss": 0.4171, "step": 4005 }, { "epoch": 0.9, "learning_rate": 5.177133924816169e-07, "loss": 0.3895, "step": 4006 }, { "epoch": 0.9, "learning_rate": 5.154041960242417e-07, "loss": 0.4015, "step": 4007 }, { "epoch": 0.9, "learning_rate": 5.131000247938367e-07, "loss": 0.397, "step": 4008 }, { "epoch": 0.9, "learning_rate": 5.108008800112197e-07, "loss": 0.3951, "step": 4009 }, { "epoch": 0.9, "learning_rate": 5.085067628945506e-07, "loss": 0.4006, "step": 4010 }, { "epoch": 0.9, "learning_rate": 5.062176746593195e-07, "loss": 0.3926, "step": 4011 }, { "epoch": 0.9, "learning_rate": 5.039336165183573e-07, "loss": 0.4093, "step": 4012 }, { "epoch": 0.9, "learning_rate": 5.016545896818259e-07, "loss": 0.4082, "step": 4013 }, { "epoch": 0.9, "learning_rate": 4.993805953572229e-07, "loss": 0.38, "step": 4014 }, { "epoch": 0.9, "learning_rate": 4.971116347493798e-07, "loss": 0.387, "step": 4015 }, { "epoch": 0.9, "learning_rate": 4.94847709060462e-07, "loss": 0.4203, "step": 4016 }, { "epoch": 0.9, "learning_rate": 4.925888194899619e-07, "loss": 0.392, "step": 4017 }, { "epoch": 0.9, "learning_rate": 4.903349672347136e-07, "loss": 0.415, "step": 4018 }, { "epoch": 0.9, "learning_rate": 4.880861534888726e-07, "loss": 0.3883, "step": 4019 }, { "epoch": 0.9, "learning_rate": 4.858423794439305e-07, "loss": 0.4046, "step": 4020 }, { "epoch": 0.9, "learning_rate": 4.836036462887061e-07, "loss": 0.4247, "step": 4021 }, { "epoch": 0.9, "learning_rate": 4.813699552093498e-07, "loss": 0.4038, "step": 4022 }, { "epoch": 0.9, "learning_rate": 4.791413073893359e-07, "loss": 0.4199, "step": 4023 }, { "epoch": 0.9, "learning_rate": 4.769177040094719e-07, "loss": 0.4038, "step": 4024 }, { "epoch": 0.9, "learning_rate": 4.7469914624789e-07, "loss": 0.3993, "step": 4025 }, { "epoch": 0.9, "learning_rate": 4.724856352800511e-07, "loss": 0.3876, "step": 4026 }, { "epoch": 0.9, "learning_rate": 4.702771722787369e-07, "loss": 0.4364, "step": 4027 }, { "epoch": 0.91, "learning_rate": 4.6807375841406153e-07, "loss": 0.4123, "step": 4028 }, { "epoch": 0.91, "learning_rate": 4.658753948534589e-07, "loss": 0.3914, "step": 4029 }, { "epoch": 0.91, "learning_rate": 4.6368208276168836e-07, "loss": 0.4242, "step": 4030 }, { "epoch": 0.91, "learning_rate": 4.614938233008359e-07, "loss": 0.4044, "step": 4031 }, { "epoch": 0.91, "learning_rate": 4.593106176303053e-07, "loss": 0.3876, "step": 4032 }, { "epoch": 0.91, "learning_rate": 4.5713246690682555e-07, "loss": 0.3995, "step": 4033 }, { "epoch": 0.91, "learning_rate": 4.549593722844492e-07, "loss": 0.4311, "step": 4034 }, { "epoch": 0.91, "learning_rate": 4.5279133491454406e-07, "loss": 0.4243, "step": 4035 }, { "epoch": 0.91, "learning_rate": 4.506283559458047e-07, "loss": 0.4152, "step": 4036 }, { "epoch": 0.91, "learning_rate": 4.484704365242443e-07, "loss": 0.415, "step": 4037 }, { "epoch": 0.91, "learning_rate": 4.463175777931905e-07, "loss": 0.387, "step": 4038 }, { "epoch": 0.91, "learning_rate": 4.4416978089329655e-07, "loss": 0.416, "step": 4039 }, { "epoch": 0.91, "learning_rate": 4.420270469625287e-07, "loss": 0.3776, "step": 4040 }, { "epoch": 0.91, "learning_rate": 4.398893771361723e-07, "loss": 0.4169, "step": 4041 }, { "epoch": 0.91, "learning_rate": 4.3775677254683125e-07, "loss": 0.4288, "step": 4042 }, { "epoch": 0.91, "learning_rate": 4.3562923432442396e-07, "loss": 0.4134, "step": 4043 }, { "epoch": 0.91, "learning_rate": 4.3350676359618317e-07, "loss": 0.4007, "step": 4044 }, { "epoch": 0.91, "learning_rate": 4.3138936148666046e-07, "loss": 0.3962, "step": 4045 }, { "epoch": 0.91, "learning_rate": 4.292770291177173e-07, "loss": 0.388, "step": 4046 }, { "epoch": 0.91, "learning_rate": 4.2716976760853513e-07, "loss": 0.4056, "step": 4047 }, { "epoch": 0.91, "learning_rate": 4.2506757807560085e-07, "loss": 0.4122, "step": 4048 }, { "epoch": 0.91, "learning_rate": 4.2297046163272124e-07, "loss": 0.4083, "step": 4049 }, { "epoch": 0.91, "learning_rate": 4.208784193910109e-07, "loss": 0.3978, "step": 4050 }, { "epoch": 0.91, "learning_rate": 4.187914524588998e-07, "loss": 0.3855, "step": 4051 }, { "epoch": 0.91, "learning_rate": 4.1670956194212354e-07, "loss": 0.4232, "step": 4052 }, { "epoch": 0.91, "learning_rate": 4.1463274894373183e-07, "loss": 0.4382, "step": 4053 }, { "epoch": 0.91, "learning_rate": 4.125610145640835e-07, "loss": 0.3782, "step": 4054 }, { "epoch": 0.91, "learning_rate": 4.1049435990084707e-07, "loss": 0.4135, "step": 4055 }, { "epoch": 0.91, "learning_rate": 4.0843278604899673e-07, "loss": 0.4288, "step": 4056 }, { "epoch": 0.91, "learning_rate": 4.063762941008198e-07, "loss": 0.4157, "step": 4057 }, { "epoch": 0.91, "learning_rate": 4.0432488514590587e-07, "loss": 0.4002, "step": 4058 }, { "epoch": 0.91, "learning_rate": 4.022785602711554e-07, "loss": 0.4183, "step": 4059 }, { "epoch": 0.91, "learning_rate": 4.0023732056077235e-07, "loss": 0.3872, "step": 4060 }, { "epoch": 0.91, "learning_rate": 3.982011670962682e-07, "loss": 0.4292, "step": 4061 }, { "epoch": 0.91, "learning_rate": 3.9617010095645894e-07, "loss": 0.4427, "step": 4062 }, { "epoch": 0.91, "learning_rate": 3.941441232174637e-07, "loss": 0.3892, "step": 4063 }, { "epoch": 0.91, "learning_rate": 3.921232349527082e-07, "loss": 0.4026, "step": 4064 }, { "epoch": 0.91, "learning_rate": 3.901074372329217e-07, "loss": 0.4172, "step": 4065 }, { "epoch": 0.91, "learning_rate": 3.880967311261319e-07, "loss": 0.4154, "step": 4066 }, { "epoch": 0.91, "learning_rate": 3.860911176976756e-07, "loss": 0.3836, "step": 4067 }, { "epoch": 0.91, "learning_rate": 3.8409059801018387e-07, "loss": 0.4286, "step": 4068 }, { "epoch": 0.91, "learning_rate": 3.820951731235967e-07, "loss": 0.3777, "step": 4069 }, { "epoch": 0.91, "learning_rate": 3.8010484409514823e-07, "loss": 0.4183, "step": 4070 }, { "epoch": 0.91, "learning_rate": 3.781196119793751e-07, "loss": 0.3915, "step": 4071 }, { "epoch": 0.92, "learning_rate": 3.7613947782811467e-07, "loss": 0.3933, "step": 4072 }, { "epoch": 0.92, "learning_rate": 3.7416444269050335e-07, "loss": 0.3837, "step": 4073 }, { "epoch": 0.92, "learning_rate": 3.7219450761297185e-07, "loss": 0.4036, "step": 4074 }, { "epoch": 0.92, "learning_rate": 3.702296736392541e-07, "loss": 0.3899, "step": 4075 }, { "epoch": 0.92, "learning_rate": 3.6826994181037744e-07, "loss": 0.3971, "step": 4076 }, { "epoch": 0.92, "learning_rate": 3.663153131646691e-07, "loss": 0.3853, "step": 4077 }, { "epoch": 0.92, "learning_rate": 3.643657887377472e-07, "loss": 0.4036, "step": 4078 }, { "epoch": 0.92, "learning_rate": 3.624213695625312e-07, "loss": 0.4044, "step": 4079 }, { "epoch": 0.92, "learning_rate": 3.6048205666923353e-07, "loss": 0.4009, "step": 4080 }, { "epoch": 0.92, "learning_rate": 3.5854785108536237e-07, "loss": 0.4253, "step": 4081 }, { "epoch": 0.92, "learning_rate": 3.5661875383571574e-07, "loss": 0.4132, "step": 4082 }, { "epoch": 0.92, "learning_rate": 3.5469476594239047e-07, "loss": 0.3998, "step": 4083 }, { "epoch": 0.92, "learning_rate": 3.5277588842477117e-07, "loss": 0.3893, "step": 4084 }, { "epoch": 0.92, "learning_rate": 3.508621222995412e-07, "loss": 0.4194, "step": 4085 }, { "epoch": 0.92, "learning_rate": 3.4895346858066723e-07, "loss": 0.4319, "step": 4086 }, { "epoch": 0.92, "learning_rate": 3.4704992827941487e-07, "loss": 0.4193, "step": 4087 }, { "epoch": 0.92, "learning_rate": 3.4515150240433836e-07, "loss": 0.3849, "step": 4088 }, { "epoch": 0.92, "learning_rate": 3.4325819196127987e-07, "loss": 0.4224, "step": 4089 }, { "epoch": 0.92, "learning_rate": 3.4136999795337246e-07, "loss": 0.3971, "step": 4090 }, { "epoch": 0.92, "learning_rate": 3.3948692138104146e-07, "loss": 0.4081, "step": 4091 }, { "epoch": 0.92, "learning_rate": 3.376089632419932e-07, "loss": 0.4195, "step": 4092 }, { "epoch": 0.92, "learning_rate": 3.3573612453123184e-07, "loss": 0.3901, "step": 4093 }, { "epoch": 0.92, "learning_rate": 3.3386840624104133e-07, "loss": 0.39, "step": 4094 }, { "epoch": 0.92, "learning_rate": 3.320058093609957e-07, "loss": 0.3837, "step": 4095 }, { "epoch": 0.92, "learning_rate": 3.301483348779555e-07, "loss": 0.4076, "step": 4096 }, { "epoch": 0.92, "learning_rate": 3.282959837760702e-07, "loss": 0.4024, "step": 4097 }, { "epoch": 0.92, "learning_rate": 3.2644875703676695e-07, "loss": 0.3838, "step": 4098 }, { "epoch": 0.92, "learning_rate": 3.2460665563876505e-07, "loss": 0.422, "step": 4099 }, { "epoch": 0.92, "learning_rate": 3.22769680558066e-07, "loss": 0.4176, "step": 4100 }, { "epoch": 0.92, "learning_rate": 3.209378327679546e-07, "loss": 0.4232, "step": 4101 }, { "epoch": 0.92, "learning_rate": 3.191111132389979e-07, "loss": 0.417, "step": 4102 }, { "epoch": 0.92, "learning_rate": 3.1728952293904933e-07, "loss": 0.3969, "step": 4103 }, { "epoch": 0.92, "learning_rate": 3.154730628332425e-07, "loss": 0.3915, "step": 4104 }, { "epoch": 0.92, "learning_rate": 3.1366173388399313e-07, "loss": 0.4231, "step": 4105 }, { "epoch": 0.92, "learning_rate": 3.118555370509979e-07, "loss": 0.4126, "step": 4106 }, { "epoch": 0.92, "learning_rate": 3.1005447329123584e-07, "loss": 0.4209, "step": 4107 }, { "epoch": 0.92, "learning_rate": 3.0825854355896243e-07, "loss": 0.4192, "step": 4108 }, { "epoch": 0.92, "learning_rate": 3.064677488057188e-07, "loss": 0.415, "step": 4109 }, { "epoch": 0.92, "learning_rate": 3.0468208998032047e-07, "loss": 0.4418, "step": 4110 }, { "epoch": 0.92, "learning_rate": 3.0290156802886605e-07, "loss": 0.4177, "step": 4111 }, { "epoch": 0.92, "learning_rate": 3.011261838947277e-07, "loss": 0.3776, "step": 4112 }, { "epoch": 0.92, "learning_rate": 2.993559385185596e-07, "loss": 0.4268, "step": 4113 }, { "epoch": 0.92, "learning_rate": 2.9759083283829036e-07, "loss": 0.4065, "step": 4114 }, { "epoch": 0.92, "learning_rate": 2.958308677891264e-07, "loss": 0.4055, "step": 4115 }, { "epoch": 0.92, "learning_rate": 2.9407604430355283e-07, "loss": 0.3961, "step": 4116 }, { "epoch": 0.93, "learning_rate": 2.923263633113249e-07, "loss": 0.4134, "step": 4117 }, { "epoch": 0.93, "learning_rate": 2.905818257394799e-07, "loss": 0.4087, "step": 4118 }, { "epoch": 0.93, "learning_rate": 2.8884243251232626e-07, "loss": 0.4018, "step": 4119 }, { "epoch": 0.93, "learning_rate": 2.871081845514445e-07, "loss": 0.3928, "step": 4120 }, { "epoch": 0.93, "learning_rate": 2.8537908277569523e-07, "loss": 0.4207, "step": 4121 }, { "epoch": 0.93, "learning_rate": 2.83655128101209e-07, "loss": 0.4138, "step": 4122 }, { "epoch": 0.93, "learning_rate": 2.8193632144138727e-07, "loss": 0.3987, "step": 4123 }, { "epoch": 0.93, "learning_rate": 2.802226637069083e-07, "loss": 0.3976, "step": 4124 }, { "epoch": 0.93, "learning_rate": 2.7851415580571696e-07, "loss": 0.3982, "step": 4125 }, { "epoch": 0.93, "learning_rate": 2.7681079864303574e-07, "loss": 0.4462, "step": 4126 }, { "epoch": 0.93, "learning_rate": 2.7511259312135384e-07, "loss": 0.4159, "step": 4127 }, { "epoch": 0.93, "learning_rate": 2.734195401404316e-07, "loss": 0.4264, "step": 4128 }, { "epoch": 0.93, "learning_rate": 2.717316405972992e-07, "loss": 0.4384, "step": 4129 }, { "epoch": 0.93, "learning_rate": 2.7004889538626013e-07, "loss": 0.4253, "step": 4130 }, { "epoch": 0.93, "learning_rate": 2.6837130539888124e-07, "loss": 0.4129, "step": 4131 }, { "epoch": 0.93, "learning_rate": 2.6669887152400265e-07, "loss": 0.4069, "step": 4132 }, { "epoch": 0.93, "learning_rate": 2.650315946477278e-07, "loss": 0.4051, "step": 4133 }, { "epoch": 0.93, "learning_rate": 2.633694756534322e-07, "loss": 0.4223, "step": 4134 }, { "epoch": 0.93, "learning_rate": 2.61712515421757e-07, "loss": 0.4134, "step": 4135 }, { "epoch": 0.93, "learning_rate": 2.600607148306111e-07, "loss": 0.418, "step": 4136 }, { "epoch": 0.93, "learning_rate": 2.584140747551678e-07, "loss": 0.429, "step": 4137 }, { "epoch": 0.93, "learning_rate": 2.5677259606786686e-07, "loss": 0.4079, "step": 4138 }, { "epoch": 0.93, "learning_rate": 2.551362796384127e-07, "loss": 0.4254, "step": 4139 }, { "epoch": 0.93, "learning_rate": 2.535051263337773e-07, "loss": 0.4022, "step": 4140 }, { "epoch": 0.93, "learning_rate": 2.518791370181928e-07, "loss": 0.4092, "step": 4141 }, { "epoch": 0.93, "learning_rate": 2.5025831255315993e-07, "loss": 0.4057, "step": 4142 }, { "epoch": 0.93, "learning_rate": 2.4864265379743956e-07, "loss": 0.442, "step": 4143 }, { "epoch": 0.93, "learning_rate": 2.470321616070592e-07, "loss": 0.4165, "step": 4144 }, { "epoch": 0.93, "learning_rate": 2.45426836835303e-07, "loss": 0.4098, "step": 4145 }, { "epoch": 0.93, "learning_rate": 2.438266803327227e-07, "loss": 0.3989, "step": 4146 }, { "epoch": 0.93, "learning_rate": 2.422316929471291e-07, "loss": 0.4097, "step": 4147 }, { "epoch": 0.93, "learning_rate": 2.406418755235962e-07, "loss": 0.4181, "step": 4148 }, { "epoch": 0.93, "learning_rate": 2.390572289044546e-07, "loss": 0.4129, "step": 4149 }, { "epoch": 0.93, "learning_rate": 2.3747775392930272e-07, "loss": 0.3784, "step": 4150 }, { "epoch": 0.93, "learning_rate": 2.3590345143499005e-07, "loss": 0.4056, "step": 4151 }, { "epoch": 0.93, "learning_rate": 2.343343222556338e-07, "loss": 0.4067, "step": 4152 }, { "epoch": 0.93, "learning_rate": 2.3277036722260226e-07, "loss": 0.4391, "step": 4153 }, { "epoch": 0.93, "learning_rate": 2.312115871645304e-07, "loss": 0.3795, "step": 4154 }, { "epoch": 0.93, "learning_rate": 2.2965798290730312e-07, "loss": 0.4127, "step": 4155 }, { "epoch": 0.93, "learning_rate": 2.2810955527407085e-07, "loss": 0.3998, "step": 4156 }, { "epoch": 0.93, "learning_rate": 2.2656630508523403e-07, "loss": 0.4082, "step": 4157 }, { "epoch": 0.93, "learning_rate": 2.2502823315845746e-07, "loss": 0.4148, "step": 4158 }, { "epoch": 0.93, "learning_rate": 2.2349534030865483e-07, "loss": 0.4111, "step": 4159 }, { "epoch": 0.93, "learning_rate": 2.2196762734800205e-07, "loss": 0.4369, "step": 4160 }, { "epoch": 0.94, "learning_rate": 2.2044509508592605e-07, "loss": 0.4019, "step": 4161 }, { "epoch": 0.94, "learning_rate": 2.1892774432911157e-07, "loss": 0.4001, "step": 4162 }, { "epoch": 0.94, "learning_rate": 2.1741557588149553e-07, "loss": 0.3968, "step": 4163 }, { "epoch": 0.94, "learning_rate": 2.159085905442737e-07, "loss": 0.4138, "step": 4164 }, { "epoch": 0.94, "learning_rate": 2.1440678911588853e-07, "loss": 0.3982, "step": 4165 }, { "epoch": 0.94, "learning_rate": 2.129101723920446e-07, "loss": 0.4123, "step": 4166 }, { "epoch": 0.94, "learning_rate": 2.1141874116569204e-07, "loss": 0.4042, "step": 4167 }, { "epoch": 0.94, "learning_rate": 2.099324962270377e-07, "loss": 0.4097, "step": 4168 }, { "epoch": 0.94, "learning_rate": 2.0845143836353833e-07, "loss": 0.406, "step": 4169 }, { "epoch": 0.94, "learning_rate": 2.0697556835990396e-07, "loss": 0.4224, "step": 4170 }, { "epoch": 0.94, "learning_rate": 2.0550488699809468e-07, "loss": 0.4001, "step": 4171 }, { "epoch": 0.94, "learning_rate": 2.0403939505732385e-07, "loss": 0.4153, "step": 4172 }, { "epoch": 0.94, "learning_rate": 2.0257909331405035e-07, "loss": 0.4242, "step": 4173 }, { "epoch": 0.94, "learning_rate": 2.0112398254199085e-07, "loss": 0.4095, "step": 4174 }, { "epoch": 0.94, "learning_rate": 1.9967406351210305e-07, "loss": 0.4245, "step": 4175 }, { "epoch": 0.94, "learning_rate": 1.9822933699260138e-07, "loss": 0.3864, "step": 4176 }, { "epoch": 0.94, "learning_rate": 1.9678980374894352e-07, "loss": 0.4182, "step": 4177 }, { "epoch": 0.94, "learning_rate": 1.9535546454383936e-07, "loss": 0.4146, "step": 4178 }, { "epoch": 0.94, "learning_rate": 1.9392632013724433e-07, "loss": 0.4046, "step": 4179 }, { "epoch": 0.94, "learning_rate": 1.9250237128636385e-07, "loss": 0.4119, "step": 4180 }, { "epoch": 0.94, "learning_rate": 1.9108361874564663e-07, "loss": 0.4282, "step": 4181 }, { "epoch": 0.94, "learning_rate": 1.8967006326679361e-07, "loss": 0.4005, "step": 4182 }, { "epoch": 0.94, "learning_rate": 1.8826170559874902e-07, "loss": 0.41, "step": 4183 }, { "epoch": 0.94, "learning_rate": 1.8685854648770263e-07, "loss": 0.4028, "step": 4184 }, { "epoch": 0.94, "learning_rate": 1.8546058667709088e-07, "loss": 0.3949, "step": 4185 }, { "epoch": 0.94, "learning_rate": 1.8406782690759684e-07, "loss": 0.4053, "step": 4186 }, { "epoch": 0.94, "learning_rate": 1.8268026791714354e-07, "loss": 0.4098, "step": 4187 }, { "epoch": 0.94, "learning_rate": 1.8129791044090516e-07, "loss": 0.4071, "step": 4188 }, { "epoch": 0.94, "learning_rate": 1.799207552112947e-07, "loss": 0.4069, "step": 4189 }, { "epoch": 0.94, "learning_rate": 1.7854880295797406e-07, "loss": 0.3916, "step": 4190 }, { "epoch": 0.94, "learning_rate": 1.7718205440784064e-07, "loss": 0.4161, "step": 4191 }, { "epoch": 0.94, "learning_rate": 1.75820510285043e-07, "loss": 0.4124, "step": 4192 }, { "epoch": 0.94, "learning_rate": 1.7446417131096626e-07, "loss": 0.4315, "step": 4193 }, { "epoch": 0.94, "learning_rate": 1.7311303820424008e-07, "loss": 0.4012, "step": 4194 }, { "epoch": 0.94, "learning_rate": 1.7176711168073845e-07, "loss": 0.3925, "step": 4195 }, { "epoch": 0.94, "learning_rate": 1.7042639245357096e-07, "loss": 0.4192, "step": 4196 }, { "epoch": 0.94, "learning_rate": 1.690908812330927e-07, "loss": 0.3971, "step": 4197 }, { "epoch": 0.94, "learning_rate": 1.6776057872689876e-07, "loss": 0.4076, "step": 4198 }, { "epoch": 0.94, "learning_rate": 1.6643548563982315e-07, "loss": 0.4093, "step": 4199 }, { "epoch": 0.94, "learning_rate": 1.6511560267394088e-07, "loss": 0.4233, "step": 4200 }, { "epoch": 0.94, "learning_rate": 1.6380093052856482e-07, "loss": 0.4045, "step": 4201 }, { "epoch": 0.94, "learning_rate": 1.6249146990024888e-07, "loss": 0.3849, "step": 4202 }, { "epoch": 0.94, "learning_rate": 1.6118722148278586e-07, "loss": 0.4165, "step": 4203 }, { "epoch": 0.94, "learning_rate": 1.5988818596720524e-07, "loss": 0.4101, "step": 4204 }, { "epoch": 0.94, "learning_rate": 1.5859436404177532e-07, "loss": 0.4236, "step": 4205 }, { "epoch": 0.95, "learning_rate": 1.5730575639200328e-07, "loss": 0.3773, "step": 4206 }, { "epoch": 0.95, "learning_rate": 1.5602236370063194e-07, "loss": 0.4037, "step": 4207 }, { "epoch": 0.95, "learning_rate": 1.547441866476407e-07, "loss": 0.3815, "step": 4208 }, { "epoch": 0.95, "learning_rate": 1.5347122591025e-07, "loss": 0.4325, "step": 4209 }, { "epoch": 0.95, "learning_rate": 1.5220348216290924e-07, "loss": 0.4069, "step": 4210 }, { "epoch": 0.95, "learning_rate": 1.5094095607731007e-07, "loss": 0.4127, "step": 4211 }, { "epoch": 0.95, "learning_rate": 1.4968364832237515e-07, "loss": 0.3921, "step": 4212 }, { "epoch": 0.95, "learning_rate": 1.4843155956426604e-07, "loss": 0.3948, "step": 4213 }, { "epoch": 0.95, "learning_rate": 1.4718469046637652e-07, "loss": 0.4221, "step": 4214 }, { "epoch": 0.95, "learning_rate": 1.4594304168933703e-07, "loss": 0.3982, "step": 4215 }, { "epoch": 0.95, "learning_rate": 1.4470661389100804e-07, "loss": 0.4077, "step": 4216 }, { "epoch": 0.95, "learning_rate": 1.4347540772648994e-07, "loss": 0.3839, "step": 4217 }, { "epoch": 0.95, "learning_rate": 1.4224942384811092e-07, "loss": 0.4257, "step": 4218 }, { "epoch": 0.95, "learning_rate": 1.410286629054347e-07, "loss": 0.4213, "step": 4219 }, { "epoch": 0.95, "learning_rate": 1.3981312554525728e-07, "loss": 0.3989, "step": 4220 }, { "epoch": 0.95, "learning_rate": 1.38602812411609e-07, "loss": 0.4179, "step": 4221 }, { "epoch": 0.95, "learning_rate": 1.3739772414574915e-07, "loss": 0.4283, "step": 4222 }, { "epoch": 0.95, "learning_rate": 1.361978613861703e-07, "loss": 0.4293, "step": 4223 }, { "epoch": 0.95, "learning_rate": 1.350032247685962e-07, "loss": 0.3846, "step": 4224 }, { "epoch": 0.95, "learning_rate": 1.3381381492598155e-07, "loss": 0.3977, "step": 4225 }, { "epoch": 0.95, "learning_rate": 1.3262963248851125e-07, "loss": 0.4153, "step": 4226 }, { "epoch": 0.95, "learning_rate": 1.3145067808360334e-07, "loss": 0.3829, "step": 4227 }, { "epoch": 0.95, "learning_rate": 1.3027695233590042e-07, "loss": 0.4219, "step": 4228 }, { "epoch": 0.95, "learning_rate": 1.291084558672806e-07, "loss": 0.4082, "step": 4229 }, { "epoch": 0.95, "learning_rate": 1.279451892968475e-07, "loss": 0.4185, "step": 4230 }, { "epoch": 0.95, "learning_rate": 1.2678715324093705e-07, "loss": 0.4128, "step": 4231 }, { "epoch": 0.95, "learning_rate": 1.256343483131084e-07, "loss": 0.3984, "step": 4232 }, { "epoch": 0.95, "learning_rate": 1.244867751241563e-07, "loss": 0.4126, "step": 4233 }, { "epoch": 0.95, "learning_rate": 1.2334443428209663e-07, "loss": 0.4138, "step": 4234 }, { "epoch": 0.95, "learning_rate": 1.2220732639217858e-07, "loss": 0.4002, "step": 4235 }, { "epoch": 0.95, "learning_rate": 1.2107545205687576e-07, "loss": 0.424, "step": 4236 }, { "epoch": 0.95, "learning_rate": 1.1994881187588957e-07, "loss": 0.4071, "step": 4237 }, { "epoch": 0.95, "learning_rate": 1.188274064461481e-07, "loss": 0.4404, "step": 4238 }, { "epoch": 0.95, "learning_rate": 1.1771123636180714e-07, "loss": 0.4314, "step": 4239 }, { "epoch": 0.95, "learning_rate": 1.1660030221424479e-07, "loss": 0.4241, "step": 4240 }, { "epoch": 0.95, "learning_rate": 1.1549460459207018e-07, "loss": 0.4178, "step": 4241 }, { "epoch": 0.95, "learning_rate": 1.1439414408111471e-07, "loss": 0.4026, "step": 4242 }, { "epoch": 0.95, "learning_rate": 1.1329892126443531e-07, "loss": 0.3857, "step": 4243 }, { "epoch": 0.95, "learning_rate": 1.1220893672231559e-07, "loss": 0.4403, "step": 4244 }, { "epoch": 0.95, "learning_rate": 1.1112419103226136e-07, "loss": 0.4203, "step": 4245 }, { "epoch": 0.95, "learning_rate": 1.1004468476900288e-07, "loss": 0.412, "step": 4246 }, { "epoch": 0.95, "learning_rate": 1.0897041850449819e-07, "loss": 0.4114, "step": 4247 }, { "epoch": 0.95, "learning_rate": 1.0790139280792422e-07, "loss": 0.4104, "step": 4248 }, { "epoch": 0.95, "learning_rate": 1.0683760824568346e-07, "loss": 0.3928, "step": 4249 }, { "epoch": 0.96, "learning_rate": 1.057790653814017e-07, "loss": 0.407, "step": 4250 }, { "epoch": 0.96, "learning_rate": 1.0472576477592589e-07, "loss": 0.3791, "step": 4251 }, { "epoch": 0.96, "learning_rate": 1.0367770698732849e-07, "loss": 0.4159, "step": 4252 }, { "epoch": 0.96, "learning_rate": 1.0263489257090198e-07, "loss": 0.4081, "step": 4253 }, { "epoch": 0.96, "learning_rate": 1.0159732207915884e-07, "loss": 0.4064, "step": 4254 }, { "epoch": 0.96, "learning_rate": 1.0056499606183933e-07, "loss": 0.4033, "step": 4255 }, { "epoch": 0.96, "learning_rate": 9.953791506589815e-08, "loss": 0.4146, "step": 4256 }, { "epoch": 0.96, "learning_rate": 9.851607963551557e-08, "loss": 0.416, "step": 4257 }, { "epoch": 0.96, "learning_rate": 9.74994903120896e-08, "loss": 0.3921, "step": 4258 }, { "epoch": 0.96, "learning_rate": 9.648814763424052e-08, "loss": 0.3984, "step": 4259 }, { "epoch": 0.96, "learning_rate": 9.548205213780859e-08, "loss": 0.415, "step": 4260 }, { "epoch": 0.96, "learning_rate": 9.448120435585629e-08, "loss": 0.4119, "step": 4261 }, { "epoch": 0.96, "learning_rate": 9.348560481865831e-08, "loss": 0.3949, "step": 4262 }, { "epoch": 0.96, "learning_rate": 9.249525405371829e-08, "loss": 0.4249, "step": 4263 }, { "epoch": 0.96, "learning_rate": 9.15101525857498e-08, "loss": 0.4117, "step": 4264 }, { "epoch": 0.96, "learning_rate": 9.053030093669313e-08, "loss": 0.4279, "step": 4265 }, { "epoch": 0.96, "learning_rate": 8.955569962570188e-08, "loss": 0.4068, "step": 4266 }, { "epoch": 0.96, "learning_rate": 8.858634916914854e-08, "loss": 0.4025, "step": 4267 }, { "epoch": 0.96, "learning_rate": 8.762225008062675e-08, "loss": 0.4093, "step": 4268 }, { "epoch": 0.96, "learning_rate": 8.666340287094343e-08, "loss": 0.3933, "step": 4269 }, { "epoch": 0.96, "learning_rate": 8.570980804812556e-08, "loss": 0.4057, "step": 4270 }, { "epoch": 0.96, "learning_rate": 8.476146611741786e-08, "loss": 0.4126, "step": 4271 }, { "epoch": 0.96, "learning_rate": 8.381837758127842e-08, "loss": 0.4241, "step": 4272 }, { "epoch": 0.96, "learning_rate": 8.28805429393853e-08, "loss": 0.4444, "step": 4273 }, { "epoch": 0.96, "learning_rate": 8.194796268863325e-08, "loss": 0.3962, "step": 4274 }, { "epoch": 0.96, "learning_rate": 8.102063732312925e-08, "loss": 0.411, "step": 4275 }, { "epoch": 0.96, "learning_rate": 8.009856733419918e-08, "loss": 0.409, "step": 4276 }, { "epoch": 0.96, "learning_rate": 7.918175321038446e-08, "loss": 0.3867, "step": 4277 }, { "epoch": 0.96, "learning_rate": 7.827019543743986e-08, "loss": 0.4015, "step": 4278 }, { "epoch": 0.96, "learning_rate": 7.736389449833903e-08, "loss": 0.3799, "step": 4279 }, { "epoch": 0.96, "learning_rate": 7.646285087326344e-08, "loss": 0.4157, "step": 4280 }, { "epoch": 0.96, "learning_rate": 7.556706503961675e-08, "loss": 0.3982, "step": 4281 }, { "epoch": 0.96, "learning_rate": 7.467653747201265e-08, "loss": 0.4297, "step": 4282 }, { "epoch": 0.96, "learning_rate": 7.379126864228037e-08, "loss": 0.4117, "step": 4283 }, { "epoch": 0.96, "learning_rate": 7.291125901946027e-08, "loss": 0.4052, "step": 4284 }, { "epoch": 0.96, "learning_rate": 7.203650906980942e-08, "loss": 0.4122, "step": 4285 }, { "epoch": 0.96, "learning_rate": 7.116701925679704e-08, "loss": 0.3966, "step": 4286 }, { "epoch": 0.96, "learning_rate": 7.030279004110463e-08, "loss": 0.4143, "step": 4287 }, { "epoch": 0.96, "learning_rate": 6.944382188062815e-08, "loss": 0.4084, "step": 4288 }, { "epoch": 0.96, "learning_rate": 6.859011523047465e-08, "loss": 0.4073, "step": 4289 }, { "epoch": 0.96, "learning_rate": 6.774167054296233e-08, "loss": 0.4079, "step": 4290 }, { "epoch": 0.96, "learning_rate": 6.689848826762491e-08, "loss": 0.3728, "step": 4291 }, { "epoch": 0.96, "learning_rate": 6.606056885120504e-08, "loss": 0.4144, "step": 4292 }, { "epoch": 0.96, "learning_rate": 6.522791273765871e-08, "loss": 0.4041, "step": 4293 }, { "epoch": 0.96, "learning_rate": 6.440052036815081e-08, "loss": 0.4124, "step": 4294 }, { "epoch": 0.97, "learning_rate": 6.357839218106066e-08, "loss": 0.3856, "step": 4295 }, { "epoch": 0.97, "learning_rate": 6.276152861197649e-08, "loss": 0.4105, "step": 4296 }, { "epoch": 0.97, "learning_rate": 6.194993009369543e-08, "loss": 0.4306, "step": 4297 }, { "epoch": 0.97, "learning_rate": 6.114359705622908e-08, "loss": 0.4027, "step": 4298 }, { "epoch": 0.97, "learning_rate": 6.03425299267968e-08, "loss": 0.4237, "step": 4299 }, { "epoch": 0.97, "learning_rate": 5.954672912982906e-08, "loss": 0.3959, "step": 4300 }, { "epoch": 0.97, "learning_rate": 5.8756195086963066e-08, "loss": 0.4271, "step": 4301 }, { "epoch": 0.97, "learning_rate": 5.797092821705041e-08, "loss": 0.4003, "step": 4302 }, { "epoch": 0.97, "learning_rate": 5.7190928936146083e-08, "loss": 0.3836, "step": 4303 }, { "epoch": 0.97, "learning_rate": 5.641619765752171e-08, "loss": 0.3913, "step": 4304 }, { "epoch": 0.97, "learning_rate": 5.564673479164895e-08, "loss": 0.414, "step": 4305 }, { "epoch": 0.97, "learning_rate": 5.488254074621613e-08, "loss": 0.414, "step": 4306 }, { "epoch": 0.97, "learning_rate": 5.412361592611382e-08, "loss": 0.3904, "step": 4307 }, { "epoch": 0.97, "learning_rate": 5.3369960733445916e-08, "loss": 0.4266, "step": 4308 }, { "epoch": 0.97, "learning_rate": 5.262157556751968e-08, "loss": 0.3832, "step": 4309 }, { "epoch": 0.97, "learning_rate": 5.187846082485348e-08, "loss": 0.4193, "step": 4310 }, { "epoch": 0.97, "learning_rate": 5.114061689917238e-08, "loss": 0.4087, "step": 4311 }, { "epoch": 0.97, "learning_rate": 5.04080441814081e-08, "loss": 0.4282, "step": 4312 }, { "epoch": 0.97, "learning_rate": 4.9680743059699055e-08, "loss": 0.3873, "step": 4313 }, { "epoch": 0.97, "learning_rate": 4.8958713919391445e-08, "loss": 0.4074, "step": 4314 }, { "epoch": 0.97, "learning_rate": 4.8241957143040365e-08, "loss": 0.4156, "step": 4315 }, { "epoch": 0.97, "learning_rate": 4.753047311040315e-08, "loss": 0.383, "step": 4316 }, { "epoch": 0.97, "learning_rate": 4.6824262198447154e-08, "loss": 0.4205, "step": 4317 }, { "epoch": 0.97, "learning_rate": 4.6123324781343073e-08, "loss": 0.3948, "step": 4318 }, { "epoch": 0.97, "learning_rate": 4.5427661230469376e-08, "loss": 0.4185, "step": 4319 }, { "epoch": 0.97, "learning_rate": 4.473727191441124e-08, "loss": 0.3977, "step": 4320 }, { "epoch": 0.97, "learning_rate": 4.4052157198953835e-08, "loss": 0.4186, "step": 4321 }, { "epoch": 0.97, "learning_rate": 4.3372317447096803e-08, "loss": 0.42, "step": 4322 }, { "epoch": 0.97, "learning_rate": 4.269775301903645e-08, "loss": 0.389, "step": 4323 }, { "epoch": 0.97, "learning_rate": 4.20284642721791e-08, "loss": 0.4175, "step": 4324 }, { "epoch": 0.97, "learning_rate": 4.136445156113222e-08, "loss": 0.4162, "step": 4325 }, { "epoch": 0.97, "learning_rate": 4.070571523771327e-08, "loss": 0.4039, "step": 4326 }, { "epoch": 0.97, "learning_rate": 4.005225565093751e-08, "loss": 0.3892, "step": 4327 }, { "epoch": 0.97, "learning_rate": 3.940407314703021e-08, "loss": 0.4068, "step": 4328 }, { "epoch": 0.97, "learning_rate": 3.876116806941555e-08, "loss": 0.4204, "step": 4329 }, { "epoch": 0.97, "learning_rate": 3.8123540758726596e-08, "loss": 0.404, "step": 4330 }, { "epoch": 0.97, "learning_rate": 3.749119155279646e-08, "loss": 0.4048, "step": 4331 }, { "epoch": 0.97, "learning_rate": 3.686412078666268e-08, "loss": 0.4125, "step": 4332 }, { "epoch": 0.97, "learning_rate": 3.6242328792567286e-08, "loss": 0.376, "step": 4333 }, { "epoch": 0.97, "learning_rate": 3.562581589995451e-08, "loss": 0.4052, "step": 4334 }, { "epoch": 0.97, "learning_rate": 3.501458243547085e-08, "loss": 0.3968, "step": 4335 }, { "epoch": 0.97, "learning_rate": 3.440862872296724e-08, "loss": 0.4154, "step": 4336 }, { "epoch": 0.97, "learning_rate": 3.380795508349466e-08, "loss": 0.4119, "step": 4337 }, { "epoch": 0.97, "learning_rate": 3.321256183531074e-08, "loss": 0.4193, "step": 4338 }, { "epoch": 0.98, "learning_rate": 3.262244929387204e-08, "loss": 0.4019, "step": 4339 }, { "epoch": 0.98, "learning_rate": 3.203761777183734e-08, "loss": 0.4015, "step": 4340 }, { "epoch": 0.98, "learning_rate": 3.145806757906766e-08, "loss": 0.3855, "step": 4341 }, { "epoch": 0.98, "learning_rate": 3.0883799022628504e-08, "loss": 0.3926, "step": 4342 }, { "epoch": 0.98, "learning_rate": 3.0314812406782024e-08, "loss": 0.4046, "step": 4343 }, { "epoch": 0.98, "learning_rate": 2.975110803299708e-08, "loss": 0.4055, "step": 4344 }, { "epoch": 0.98, "learning_rate": 2.9192686199939204e-08, "loss": 0.3815, "step": 4345 }, { "epoch": 0.98, "learning_rate": 2.86395472034795e-08, "loss": 0.4192, "step": 4346 }, { "epoch": 0.98, "learning_rate": 2.8091691336686877e-08, "loss": 0.4151, "step": 4347 }, { "epoch": 0.98, "learning_rate": 2.754911888983136e-08, "loss": 0.408, "step": 4348 }, { "epoch": 0.98, "learning_rate": 2.701183015038522e-08, "loss": 0.4084, "step": 4349 }, { "epoch": 0.98, "learning_rate": 2.6479825403019633e-08, "loss": 0.4083, "step": 4350 }, { "epoch": 0.98, "learning_rate": 2.5953104929606898e-08, "loss": 0.4014, "step": 4351 }, { "epoch": 0.98, "learning_rate": 2.543166900921934e-08, "loss": 0.4215, "step": 4352 }, { "epoch": 0.98, "learning_rate": 2.4915517918130406e-08, "loss": 0.4049, "step": 4353 }, { "epoch": 0.98, "learning_rate": 2.440465192981356e-08, "loss": 0.3926, "step": 4354 }, { "epoch": 0.98, "learning_rate": 2.389907131493785e-08, "loss": 0.4128, "step": 4355 }, { "epoch": 0.98, "learning_rate": 2.3398776341378992e-08, "loss": 0.4348, "step": 4356 }, { "epoch": 0.98, "learning_rate": 2.2903767274204956e-08, "loss": 0.4104, "step": 4357 }, { "epoch": 0.98, "learning_rate": 2.241404437568928e-08, "loss": 0.4095, "step": 4358 }, { "epoch": 0.98, "learning_rate": 2.1929607905301077e-08, "loss": 0.4086, "step": 4359 }, { "epoch": 0.98, "learning_rate": 2.145045811970836e-08, "loss": 0.4086, "step": 4360 }, { "epoch": 0.98, "learning_rate": 2.09765952727814e-08, "loss": 0.3942, "step": 4361 }, { "epoch": 0.98, "learning_rate": 2.0508019615586015e-08, "loss": 0.3854, "step": 4362 }, { "epoch": 0.98, "learning_rate": 2.0044731396386963e-08, "loss": 0.396, "step": 4363 }, { "epoch": 0.98, "learning_rate": 1.9586730860650106e-08, "loss": 0.4117, "step": 4364 }, { "epoch": 0.98, "learning_rate": 1.9134018251038e-08, "loss": 0.4133, "step": 4365 }, { "epoch": 0.98, "learning_rate": 1.868659380740989e-08, "loss": 0.381, "step": 4366 }, { "epoch": 0.98, "learning_rate": 1.824445776682504e-08, "loss": 0.3953, "step": 4367 }, { "epoch": 0.98, "learning_rate": 1.7807610363542727e-08, "loss": 0.3866, "step": 4368 }, { "epoch": 0.98, "learning_rate": 1.7376051829015583e-08, "loss": 0.3969, "step": 4369 }, { "epoch": 0.98, "learning_rate": 1.6949782391897375e-08, "loss": 0.4166, "step": 4370 }, { "epoch": 0.98, "learning_rate": 1.6528802278038547e-08, "loss": 0.3993, "step": 4371 }, { "epoch": 0.98, "learning_rate": 1.611311171048735e-08, "loss": 0.4164, "step": 4372 }, { "epoch": 0.98, "learning_rate": 1.5702710909489827e-08, "loss": 0.3788, "step": 4373 }, { "epoch": 0.98, "learning_rate": 1.5297600092486486e-08, "loss": 0.4111, "step": 4374 }, { "epoch": 0.98, "learning_rate": 1.4897779474120078e-08, "loss": 0.4312, "step": 4375 }, { "epoch": 0.98, "learning_rate": 1.4503249266226705e-08, "loss": 0.3924, "step": 4376 }, { "epoch": 0.98, "learning_rate": 1.4114009677839158e-08, "loss": 0.4217, "step": 4377 }, { "epoch": 0.98, "learning_rate": 1.3730060915190246e-08, "loss": 0.4151, "step": 4378 }, { "epoch": 0.98, "learning_rate": 1.3351403181707246e-08, "loss": 0.4056, "step": 4379 }, { "epoch": 0.98, "learning_rate": 1.2978036678014117e-08, "loss": 0.3976, "step": 4380 }, { "epoch": 0.98, "learning_rate": 1.260996160193262e-08, "loss": 0.4107, "step": 4381 }, { "epoch": 0.98, "learning_rate": 1.2247178148477867e-08, "loss": 0.3957, "step": 4382 }, { "epoch": 0.98, "learning_rate": 1.1889686509866104e-08, "loss": 0.4085, "step": 4383 }, { "epoch": 0.99, "learning_rate": 1.1537486875505821e-08, "loss": 0.4252, "step": 4384 }, { "epoch": 0.99, "learning_rate": 1.1190579432003301e-08, "loss": 0.3986, "step": 4385 }, { "epoch": 0.99, "learning_rate": 1.0848964363161518e-08, "loss": 0.4097, "step": 4386 }, { "epoch": 0.99, "learning_rate": 1.0512641849977911e-08, "loss": 0.3842, "step": 4387 }, { "epoch": 0.99, "learning_rate": 1.0181612070646608e-08, "loss": 0.4053, "step": 4388 }, { "epoch": 0.99, "learning_rate": 9.855875200556197e-09, "loss": 0.4088, "step": 4389 }, { "epoch": 0.99, "learning_rate": 9.535431412293073e-09, "loss": 0.426, "step": 4390 }, { "epoch": 0.99, "learning_rate": 9.220280875636978e-09, "loss": 0.4089, "step": 4391 }, { "epoch": 0.99, "learning_rate": 8.910423757565455e-09, "loss": 0.3937, "step": 4392 }, { "epoch": 0.99, "learning_rate": 8.605860222250518e-09, "loss": 0.413, "step": 4393 }, { "epoch": 0.99, "learning_rate": 8.306590431057526e-09, "loss": 0.3998, "step": 4394 }, { "epoch": 0.99, "learning_rate": 8.012614542549646e-09, "loss": 0.3997, "step": 4395 }, { "epoch": 0.99, "learning_rate": 7.723932712484505e-09, "loss": 0.3996, "step": 4396 }, { "epoch": 0.99, "learning_rate": 7.4405450938142e-09, "loss": 0.4299, "step": 4397 }, { "epoch": 0.99, "learning_rate": 7.162451836685291e-09, "loss": 0.3906, "step": 4398 }, { "epoch": 0.99, "learning_rate": 6.889653088442139e-09, "loss": 0.416, "step": 4399 }, { "epoch": 0.99, "learning_rate": 6.622148993619126e-09, "loss": 0.4178, "step": 4400 }, { "epoch": 0.99, "learning_rate": 6.359939693950656e-09, "loss": 0.4119, "step": 4401 }, { "epoch": 0.99, "learning_rate": 6.103025328361156e-09, "loss": 0.4091, "step": 4402 }, { "epoch": 0.99, "learning_rate": 5.85140603297285e-09, "loss": 0.3768, "step": 4403 }, { "epoch": 0.99, "learning_rate": 5.60508194110021e-09, "loss": 0.418, "step": 4404 }, { "epoch": 0.99, "learning_rate": 5.3640531832543916e-09, "loss": 0.3742, "step": 4405 }, { "epoch": 0.99, "learning_rate": 5.12831988713991e-09, "loss": 0.4043, "step": 4406 }, { "epoch": 0.99, "learning_rate": 4.897882177653524e-09, "loss": 0.3926, "step": 4407 }, { "epoch": 0.99, "learning_rate": 4.6727401768897894e-09, "loss": 0.4089, "step": 4408 }, { "epoch": 0.99, "learning_rate": 4.452894004136621e-09, "loss": 0.3858, "step": 4409 }, { "epoch": 0.99, "learning_rate": 4.2383437758719555e-09, "loss": 0.4028, "step": 4410 }, { "epoch": 0.99, "learning_rate": 4.029089605774861e-09, "loss": 0.4308, "step": 4411 }, { "epoch": 0.99, "learning_rate": 3.8251316047110964e-09, "loss": 0.4314, "step": 4412 }, { "epoch": 0.99, "learning_rate": 3.6264698807464417e-09, "loss": 0.4032, "step": 4413 }, { "epoch": 0.99, "learning_rate": 3.4331045391367e-09, "loss": 0.3986, "step": 4414 }, { "epoch": 0.99, "learning_rate": 3.2450356823321427e-09, "loss": 0.4174, "step": 4415 }, { "epoch": 0.99, "learning_rate": 3.0622634099786164e-09, "loss": 0.4184, "step": 4416 }, { "epoch": 0.99, "learning_rate": 2.8847878189142143e-09, "loss": 0.4119, "step": 4417 }, { "epoch": 0.99, "learning_rate": 2.7126090031703855e-09, "loss": 0.3796, "step": 4418 }, { "epoch": 0.99, "learning_rate": 2.545727053973046e-09, "loss": 0.3916, "step": 4419 }, { "epoch": 0.99, "learning_rate": 2.3841420597414677e-09, "loss": 0.3709, "step": 4420 }, { "epoch": 0.99, "learning_rate": 2.2278541060871683e-09, "loss": 0.4175, "step": 4421 }, { "epoch": 0.99, "learning_rate": 2.0768632758172425e-09, "loss": 0.4153, "step": 4422 }, { "epoch": 0.99, "learning_rate": 1.931169648932141e-09, "loss": 0.4163, "step": 4423 }, { "epoch": 0.99, "learning_rate": 1.7907733026223396e-09, "loss": 0.419, "step": 4424 }, { "epoch": 0.99, "learning_rate": 1.655674311276112e-09, "loss": 0.4303, "step": 4425 }, { "epoch": 0.99, "learning_rate": 1.5258727464717571e-09, "loss": 0.4352, "step": 4426 }, { "epoch": 0.99, "learning_rate": 1.4013686769831503e-09, "loss": 0.4137, "step": 4427 }, { "epoch": 1.0, "learning_rate": 1.282162168776413e-09, "loss": 0.3924, "step": 4428 }, { "epoch": 1.0, "learning_rate": 1.1682532850099125e-09, "loss": 0.424, "step": 4429 }, { "epoch": 1.0, "learning_rate": 1.0596420860353728e-09, "loss": 0.4077, "step": 4430 }, { "epoch": 1.0, "learning_rate": 9.56328629400094e-10, "loss": 0.4112, "step": 4431 }, { "epoch": 1.0, "learning_rate": 8.583129698414017e-10, "loss": 0.4193, "step": 4432 }, { "epoch": 1.0, "learning_rate": 7.655951592921984e-10, "loss": 0.4224, "step": 4433 }, { "epoch": 1.0, "learning_rate": 6.781752468754122e-10, "loss": 0.3846, "step": 4434 }, { "epoch": 1.0, "learning_rate": 5.960532789106577e-10, "loss": 0.3971, "step": 4435 }, { "epoch": 1.0, "learning_rate": 5.192292989075753e-10, "loss": 0.4127, "step": 4436 }, { "epoch": 1.0, "learning_rate": 4.477033475702719e-10, "loss": 0.4204, "step": 4437 }, { "epoch": 1.0, "learning_rate": 3.814754627939898e-10, "loss": 0.4123, "step": 4438 }, { "epoch": 1.0, "learning_rate": 3.2054567967065854e-10, "loss": 0.4014, "step": 4439 }, { "epoch": 1.0, "learning_rate": 2.6491403048112266e-10, "loss": 0.4085, "step": 4440 }, { "epoch": 1.0, "learning_rate": 2.1458054470069322e-10, "loss": 0.4012, "step": 4441 }, { "epoch": 1.0, "learning_rate": 1.6954524899803758e-10, "loss": 0.4116, "step": 4442 }, { "epoch": 1.0, "learning_rate": 1.298081672351792e-10, "loss": 0.3999, "step": 4443 }, { "epoch": 1.0, "learning_rate": 9.536932046416702e-11, "loss": 0.4351, "step": 4444 }, { "epoch": 1.0, "learning_rate": 6.622872693262672e-11, "loss": 0.3972, "step": 4445 }, { "epoch": 1.0, "learning_rate": 4.238640208042988e-11, "loss": 0.4103, "step": 4446 }, { "epoch": 1.0, "learning_rate": 2.3842358538583856e-11, "loss": 0.3977, "step": 4447 }, { "epoch": 1.0, "learning_rate": 1.059660613478286e-11, "loss": 0.4013, "step": 4448 }, { "epoch": 1.0, "learning_rate": 2.649151884526191e-12, "loss": 0.4014, "step": 4449 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.4396, "step": 4450 }, { "epoch": 1.0, "step": 4450, "total_flos": 2783327872417792.0, "train_loss": 0.4793249399675412, "train_runtime": 45248.1247, "train_samples_per_second": 12.587, "train_steps_per_second": 0.098 } ], "max_steps": 4450, "num_train_epochs": 1, "total_flos": 2783327872417792.0, "trial_name": null, "trial_params": null }