{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 11666, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.714285714285715e-08, "loss": 0.5529, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.142857142857143e-07, "loss": 0.5725, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.7142857142857146e-07, "loss": 0.4912, "step": 3 }, { "epoch": 0.0, "learning_rate": 2.285714285714286e-07, "loss": 0.5575, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.8571428571428575e-07, "loss": 0.5187, "step": 5 }, { "epoch": 0.0, "learning_rate": 3.428571428571429e-07, "loss": 0.6355, "step": 6 }, { "epoch": 0.0, "learning_rate": 4.0000000000000003e-07, "loss": 0.5984, "step": 7 }, { "epoch": 0.0, "learning_rate": 4.571428571428572e-07, "loss": 0.6348, "step": 8 }, { "epoch": 0.0, "learning_rate": 5.142857142857143e-07, "loss": 0.5369, "step": 9 }, { "epoch": 0.0, "learning_rate": 5.714285714285715e-07, "loss": 0.5762, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.285714285714287e-07, "loss": 0.5363, "step": 11 }, { "epoch": 0.0, "learning_rate": 6.857142857142858e-07, "loss": 0.554, "step": 12 }, { "epoch": 0.0, "learning_rate": 7.428571428571429e-07, "loss": 0.6045, "step": 13 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, "loss": 0.6339, "step": 14 }, { "epoch": 0.0, "learning_rate": 8.571428571428572e-07, "loss": 0.5054, "step": 15 }, { "epoch": 0.0, "learning_rate": 9.142857142857144e-07, "loss": 0.5938, "step": 16 }, { "epoch": 0.0, "learning_rate": 9.714285714285715e-07, "loss": 0.5323, "step": 17 }, { "epoch": 0.0, "learning_rate": 1.0285714285714286e-06, "loss": 0.5791, "step": 18 }, { "epoch": 0.0, "learning_rate": 1.0857142857142858e-06, "loss": 0.5463, "step": 19 }, { "epoch": 0.0, "learning_rate": 1.142857142857143e-06, "loss": 0.5763, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-06, "loss": 0.5176, "step": 21 }, { "epoch": 0.0, "learning_rate": 1.2571428571428573e-06, "loss": 0.5574, "step": 22 }, { "epoch": 0.0, "learning_rate": 1.3142857142857143e-06, "loss": 0.5089, "step": 23 }, { "epoch": 0.0, "learning_rate": 1.3714285714285717e-06, "loss": 0.5031, "step": 24 }, { "epoch": 0.0, "learning_rate": 1.4285714285714286e-06, "loss": 0.5227, "step": 25 }, { "epoch": 0.0, "learning_rate": 1.4857142857142858e-06, "loss": 0.4615, "step": 26 }, { "epoch": 0.0, "learning_rate": 1.542857142857143e-06, "loss": 0.4647, "step": 27 }, { "epoch": 0.0, "learning_rate": 1.6000000000000001e-06, "loss": 0.4697, "step": 28 }, { "epoch": 0.0, "learning_rate": 1.657142857142857e-06, "loss": 0.4701, "step": 29 }, { "epoch": 0.0, "learning_rate": 1.7142857142857145e-06, "loss": 0.4543, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.7714285714285714e-06, "loss": 0.4435, "step": 31 }, { "epoch": 0.0, "learning_rate": 1.8285714285714288e-06, "loss": 0.447, "step": 32 }, { "epoch": 0.0, "learning_rate": 1.885714285714286e-06, "loss": 0.4642, "step": 33 }, { "epoch": 0.0, "learning_rate": 1.942857142857143e-06, "loss": 0.4464, "step": 34 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.5004, "step": 35 }, { "epoch": 0.0, "learning_rate": 2.0571428571428573e-06, "loss": 0.442, "step": 36 }, { "epoch": 0.0, "learning_rate": 2.1142857142857147e-06, "loss": 0.4708, "step": 37 }, { "epoch": 0.0, "learning_rate": 2.1714285714285716e-06, "loss": 0.4746, "step": 38 }, { "epoch": 0.0, "learning_rate": 2.228571428571429e-06, "loss": 0.4735, "step": 39 }, { "epoch": 0.0, "learning_rate": 2.285714285714286e-06, "loss": 0.469, "step": 40 }, { "epoch": 0.0, "learning_rate": 2.342857142857143e-06, "loss": 0.6212, "step": 41 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-06, "loss": 0.4497, "step": 42 }, { "epoch": 0.0, "learning_rate": 2.4571428571428573e-06, "loss": 0.4344, "step": 43 }, { "epoch": 0.0, "learning_rate": 2.5142857142857147e-06, "loss": 0.3882, "step": 44 }, { "epoch": 0.0, "learning_rate": 2.571428571428571e-06, "loss": 0.4233, "step": 45 }, { "epoch": 0.0, "learning_rate": 2.6285714285714286e-06, "loss": 0.4608, "step": 46 }, { "epoch": 0.0, "learning_rate": 2.685714285714286e-06, "loss": 0.4189, "step": 47 }, { "epoch": 0.0, "learning_rate": 2.7428571428571433e-06, "loss": 0.4578, "step": 48 }, { "epoch": 0.0, "learning_rate": 2.8000000000000003e-06, "loss": 0.3942, "step": 49 }, { "epoch": 0.0, "learning_rate": 2.8571428571428573e-06, "loss": 0.3894, "step": 50 }, { "epoch": 0.0, "learning_rate": 2.9142857142857146e-06, "loss": 0.4807, "step": 51 }, { "epoch": 0.0, "learning_rate": 2.9714285714285716e-06, "loss": 0.5984, "step": 52 }, { "epoch": 0.0, "learning_rate": 3.028571428571429e-06, "loss": 0.4355, "step": 53 }, { "epoch": 0.0, "learning_rate": 3.085714285714286e-06, "loss": 0.4977, "step": 54 }, { "epoch": 0.0, "learning_rate": 3.142857142857143e-06, "loss": 0.4062, "step": 55 }, { "epoch": 0.0, "learning_rate": 3.2000000000000003e-06, "loss": 0.4528, "step": 56 }, { "epoch": 0.0, "learning_rate": 3.2571428571428577e-06, "loss": 0.4639, "step": 57 }, { "epoch": 0.0, "learning_rate": 3.314285714285714e-06, "loss": 0.3911, "step": 58 }, { "epoch": 0.01, "learning_rate": 3.3714285714285716e-06, "loss": 0.4202, "step": 59 }, { "epoch": 0.01, "learning_rate": 3.428571428571429e-06, "loss": 0.3906, "step": 60 }, { "epoch": 0.01, "learning_rate": 3.4857142857142863e-06, "loss": 0.4504, "step": 61 }, { "epoch": 0.01, "learning_rate": 3.542857142857143e-06, "loss": 0.4298, "step": 62 }, { "epoch": 0.01, "learning_rate": 3.6000000000000003e-06, "loss": 0.3845, "step": 63 }, { "epoch": 0.01, "learning_rate": 3.6571428571428576e-06, "loss": 0.418, "step": 64 }, { "epoch": 0.01, "learning_rate": 3.7142857142857146e-06, "loss": 0.4547, "step": 65 }, { "epoch": 0.01, "learning_rate": 3.771428571428572e-06, "loss": 0.4591, "step": 66 }, { "epoch": 0.01, "learning_rate": 3.828571428571429e-06, "loss": 0.3833, "step": 67 }, { "epoch": 0.01, "learning_rate": 3.885714285714286e-06, "loss": 0.421, "step": 68 }, { "epoch": 0.01, "learning_rate": 3.942857142857143e-06, "loss": 0.4047, "step": 69 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "loss": 0.448, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.057142857142858e-06, "loss": 0.4734, "step": 71 }, { "epoch": 0.01, "learning_rate": 4.114285714285715e-06, "loss": 0.4569, "step": 72 }, { "epoch": 0.01, "learning_rate": 4.1714285714285715e-06, "loss": 0.4249, "step": 73 }, { "epoch": 0.01, "learning_rate": 4.228571428571429e-06, "loss": 0.3678, "step": 74 }, { "epoch": 0.01, "learning_rate": 4.2857142857142855e-06, "loss": 0.437, "step": 75 }, { "epoch": 0.01, "learning_rate": 4.342857142857143e-06, "loss": 0.3923, "step": 76 }, { "epoch": 0.01, "learning_rate": 4.4e-06, "loss": 0.4132, "step": 77 }, { "epoch": 0.01, "learning_rate": 4.457142857142858e-06, "loss": 0.4335, "step": 78 }, { "epoch": 0.01, "learning_rate": 4.514285714285714e-06, "loss": 0.4343, "step": 79 }, { "epoch": 0.01, "learning_rate": 4.571428571428572e-06, "loss": 0.423, "step": 80 }, { "epoch": 0.01, "learning_rate": 4.628571428571429e-06, "loss": 0.4707, "step": 81 }, { "epoch": 0.01, "learning_rate": 4.685714285714286e-06, "loss": 0.4405, "step": 82 }, { "epoch": 0.01, "learning_rate": 4.742857142857144e-06, "loss": 0.3959, "step": 83 }, { "epoch": 0.01, "learning_rate": 4.800000000000001e-06, "loss": 0.4052, "step": 84 }, { "epoch": 0.01, "learning_rate": 4.857142857142858e-06, "loss": 0.4122, "step": 85 }, { "epoch": 0.01, "learning_rate": 4.9142857142857145e-06, "loss": 0.4609, "step": 86 }, { "epoch": 0.01, "learning_rate": 4.971428571428572e-06, "loss": 0.3727, "step": 87 }, { "epoch": 0.01, "learning_rate": 5.028571428571429e-06, "loss": 0.366, "step": 88 }, { "epoch": 0.01, "learning_rate": 5.085714285714286e-06, "loss": 0.3898, "step": 89 }, { "epoch": 0.01, "learning_rate": 5.142857142857142e-06, "loss": 0.4215, "step": 90 }, { "epoch": 0.01, "learning_rate": 5.2e-06, "loss": 0.3989, "step": 91 }, { "epoch": 0.01, "learning_rate": 5.257142857142857e-06, "loss": 0.4144, "step": 92 }, { "epoch": 0.01, "learning_rate": 5.314285714285715e-06, "loss": 0.4241, "step": 93 }, { "epoch": 0.01, "learning_rate": 5.371428571428572e-06, "loss": 0.3887, "step": 94 }, { "epoch": 0.01, "learning_rate": 5.428571428571429e-06, "loss": 0.3746, "step": 95 }, { "epoch": 0.01, "learning_rate": 5.485714285714287e-06, "loss": 0.4066, "step": 96 }, { "epoch": 0.01, "learning_rate": 5.542857142857143e-06, "loss": 0.4607, "step": 97 }, { "epoch": 0.01, "learning_rate": 5.600000000000001e-06, "loss": 0.3784, "step": 98 }, { "epoch": 0.01, "learning_rate": 5.6571428571428576e-06, "loss": 0.4544, "step": 99 }, { "epoch": 0.01, "learning_rate": 5.7142857142857145e-06, "loss": 0.4081, "step": 100 }, { "epoch": 0.01, "learning_rate": 5.771428571428572e-06, "loss": 0.3824, "step": 101 }, { "epoch": 0.01, "learning_rate": 5.828571428571429e-06, "loss": 0.3564, "step": 102 }, { "epoch": 0.01, "learning_rate": 5.885714285714285e-06, "loss": 0.4146, "step": 103 }, { "epoch": 0.01, "learning_rate": 5.942857142857143e-06, "loss": 0.4246, "step": 104 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 0.3865, "step": 105 }, { "epoch": 0.01, "learning_rate": 6.057142857142858e-06, "loss": 0.4225, "step": 106 }, { "epoch": 0.01, "learning_rate": 6.114285714285715e-06, "loss": 0.3798, "step": 107 }, { "epoch": 0.01, "learning_rate": 6.171428571428572e-06, "loss": 0.4398, "step": 108 }, { "epoch": 0.01, "learning_rate": 6.22857142857143e-06, "loss": 0.4353, "step": 109 }, { "epoch": 0.01, "learning_rate": 6.285714285714286e-06, "loss": 0.4125, "step": 110 }, { "epoch": 0.01, "learning_rate": 6.342857142857143e-06, "loss": 0.3684, "step": 111 }, { "epoch": 0.01, "learning_rate": 6.4000000000000006e-06, "loss": 0.3872, "step": 112 }, { "epoch": 0.01, "learning_rate": 6.4571428571428575e-06, "loss": 0.4212, "step": 113 }, { "epoch": 0.01, "learning_rate": 6.514285714285715e-06, "loss": 0.3522, "step": 114 }, { "epoch": 0.01, "learning_rate": 6.571428571428572e-06, "loss": 0.351, "step": 115 }, { "epoch": 0.01, "learning_rate": 6.628571428571428e-06, "loss": 0.3942, "step": 116 }, { "epoch": 0.01, "learning_rate": 6.685714285714286e-06, "loss": 0.3751, "step": 117 }, { "epoch": 0.01, "learning_rate": 6.742857142857143e-06, "loss": 0.3584, "step": 118 }, { "epoch": 0.01, "learning_rate": 6.800000000000001e-06, "loss": 0.4096, "step": 119 }, { "epoch": 0.01, "learning_rate": 6.857142857142858e-06, "loss": 0.4347, "step": 120 }, { "epoch": 0.01, "learning_rate": 6.914285714285715e-06, "loss": 0.3983, "step": 121 }, { "epoch": 0.01, "learning_rate": 6.971428571428573e-06, "loss": 0.397, "step": 122 }, { "epoch": 0.01, "learning_rate": 7.028571428571429e-06, "loss": 0.376, "step": 123 }, { "epoch": 0.01, "learning_rate": 7.085714285714286e-06, "loss": 0.3793, "step": 124 }, { "epoch": 0.01, "learning_rate": 7.1428571428571436e-06, "loss": 0.3593, "step": 125 }, { "epoch": 0.01, "learning_rate": 7.2000000000000005e-06, "loss": 0.399, "step": 126 }, { "epoch": 0.01, "learning_rate": 7.257142857142858e-06, "loss": 0.4334, "step": 127 }, { "epoch": 0.01, "learning_rate": 7.314285714285715e-06, "loss": 0.3678, "step": 128 }, { "epoch": 0.01, "learning_rate": 7.371428571428571e-06, "loss": 0.389, "step": 129 }, { "epoch": 0.01, "learning_rate": 7.428571428571429e-06, "loss": 0.4114, "step": 130 }, { "epoch": 0.01, "learning_rate": 7.485714285714286e-06, "loss": 0.3561, "step": 131 }, { "epoch": 0.01, "learning_rate": 7.542857142857144e-06, "loss": 0.379, "step": 132 }, { "epoch": 0.01, "learning_rate": 7.600000000000001e-06, "loss": 0.41, "step": 133 }, { "epoch": 0.01, "learning_rate": 7.657142857142858e-06, "loss": 0.3839, "step": 134 }, { "epoch": 0.01, "learning_rate": 7.714285714285716e-06, "loss": 0.4163, "step": 135 }, { "epoch": 0.01, "learning_rate": 7.771428571428572e-06, "loss": 0.3829, "step": 136 }, { "epoch": 0.01, "learning_rate": 7.828571428571428e-06, "loss": 0.3513, "step": 137 }, { "epoch": 0.01, "learning_rate": 7.885714285714286e-06, "loss": 0.4048, "step": 138 }, { "epoch": 0.01, "learning_rate": 7.942857142857144e-06, "loss": 0.3633, "step": 139 }, { "epoch": 0.01, "learning_rate": 8.000000000000001e-06, "loss": 0.5511, "step": 140 }, { "epoch": 0.01, "learning_rate": 8.057142857142857e-06, "loss": 0.4023, "step": 141 }, { "epoch": 0.01, "learning_rate": 8.114285714285715e-06, "loss": 0.4149, "step": 142 }, { "epoch": 0.01, "learning_rate": 8.171428571428573e-06, "loss": 0.4139, "step": 143 }, { "epoch": 0.01, "learning_rate": 8.22857142857143e-06, "loss": 0.3245, "step": 144 }, { "epoch": 0.01, "learning_rate": 8.285714285714287e-06, "loss": 0.3558, "step": 145 }, { "epoch": 0.01, "learning_rate": 8.342857142857143e-06, "loss": 0.384, "step": 146 }, { "epoch": 0.01, "learning_rate": 8.400000000000001e-06, "loss": 0.3575, "step": 147 }, { "epoch": 0.01, "learning_rate": 8.457142857142859e-06, "loss": 0.3829, "step": 148 }, { "epoch": 0.01, "learning_rate": 8.514285714285715e-06, "loss": 0.3997, "step": 149 }, { "epoch": 0.01, "learning_rate": 8.571428571428571e-06, "loss": 0.3687, "step": 150 }, { "epoch": 0.01, "learning_rate": 8.628571428571429e-06, "loss": 0.3605, "step": 151 }, { "epoch": 0.01, "learning_rate": 8.685714285714287e-06, "loss": 0.4476, "step": 152 }, { "epoch": 0.01, "learning_rate": 8.742857142857144e-06, "loss": 0.3475, "step": 153 }, { "epoch": 0.01, "learning_rate": 8.8e-06, "loss": 0.3788, "step": 154 }, { "epoch": 0.01, "learning_rate": 8.857142857142858e-06, "loss": 0.4537, "step": 155 }, { "epoch": 0.01, "learning_rate": 8.914285714285716e-06, "loss": 0.4049, "step": 156 }, { "epoch": 0.01, "learning_rate": 8.971428571428572e-06, "loss": 0.4028, "step": 157 }, { "epoch": 0.01, "learning_rate": 9.028571428571428e-06, "loss": 0.3348, "step": 158 }, { "epoch": 0.01, "learning_rate": 9.085714285714286e-06, "loss": 0.3737, "step": 159 }, { "epoch": 0.01, "learning_rate": 9.142857142857144e-06, "loss": 0.381, "step": 160 }, { "epoch": 0.01, "learning_rate": 9.200000000000002e-06, "loss": 0.411, "step": 161 }, { "epoch": 0.01, "learning_rate": 9.257142857142858e-06, "loss": 0.3879, "step": 162 }, { "epoch": 0.01, "learning_rate": 9.314285714285714e-06, "loss": 0.3915, "step": 163 }, { "epoch": 0.01, "learning_rate": 9.371428571428572e-06, "loss": 0.3442, "step": 164 }, { "epoch": 0.01, "learning_rate": 9.42857142857143e-06, "loss": 0.4283, "step": 165 }, { "epoch": 0.01, "learning_rate": 9.485714285714287e-06, "loss": 0.3656, "step": 166 }, { "epoch": 0.01, "learning_rate": 9.542857142857143e-06, "loss": 0.3682, "step": 167 }, { "epoch": 0.01, "learning_rate": 9.600000000000001e-06, "loss": 0.3803, "step": 168 }, { "epoch": 0.01, "learning_rate": 9.657142857142859e-06, "loss": 0.3868, "step": 169 }, { "epoch": 0.01, "learning_rate": 9.714285714285715e-06, "loss": 0.3719, "step": 170 }, { "epoch": 0.01, "learning_rate": 9.771428571428571e-06, "loss": 0.3582, "step": 171 }, { "epoch": 0.01, "learning_rate": 9.828571428571429e-06, "loss": 0.3861, "step": 172 }, { "epoch": 0.01, "learning_rate": 9.885714285714287e-06, "loss": 0.3943, "step": 173 }, { "epoch": 0.01, "learning_rate": 9.942857142857145e-06, "loss": 0.4069, "step": 174 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 0.3989, "step": 175 }, { "epoch": 0.02, "learning_rate": 1.0057142857142859e-05, "loss": 0.3892, "step": 176 }, { "epoch": 0.02, "learning_rate": 1.0114285714285715e-05, "loss": 0.3906, "step": 177 }, { "epoch": 0.02, "learning_rate": 1.0171428571428573e-05, "loss": 0.3853, "step": 178 }, { "epoch": 0.02, "learning_rate": 1.022857142857143e-05, "loss": 0.4231, "step": 179 }, { "epoch": 0.02, "learning_rate": 1.0285714285714285e-05, "loss": 0.3798, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.0342857142857143e-05, "loss": 0.3433, "step": 181 }, { "epoch": 0.02, "learning_rate": 1.04e-05, "loss": 0.3929, "step": 182 }, { "epoch": 0.02, "learning_rate": 1.045714285714286e-05, "loss": 0.3617, "step": 183 }, { "epoch": 0.02, "learning_rate": 1.0514285714285714e-05, "loss": 0.3925, "step": 184 }, { "epoch": 0.02, "learning_rate": 1.0571428571428572e-05, "loss": 0.3739, "step": 185 }, { "epoch": 0.02, "learning_rate": 1.062857142857143e-05, "loss": 0.3937, "step": 186 }, { "epoch": 0.02, "learning_rate": 1.0685714285714286e-05, "loss": 0.3399, "step": 187 }, { "epoch": 0.02, "learning_rate": 1.0742857142857144e-05, "loss": 0.3892, "step": 188 }, { "epoch": 0.02, "learning_rate": 1.0800000000000002e-05, "loss": 0.3416, "step": 189 }, { "epoch": 0.02, "learning_rate": 1.0857142857142858e-05, "loss": 0.3815, "step": 190 }, { "epoch": 0.02, "learning_rate": 1.0914285714285716e-05, "loss": 0.3743, "step": 191 }, { "epoch": 0.02, "learning_rate": 1.0971428571428573e-05, "loss": 0.3636, "step": 192 }, { "epoch": 0.02, "learning_rate": 1.1028571428571428e-05, "loss": 0.4591, "step": 193 }, { "epoch": 0.02, "learning_rate": 1.1085714285714286e-05, "loss": 0.3463, "step": 194 }, { "epoch": 0.02, "learning_rate": 1.1142857142857143e-05, "loss": 0.3828, "step": 195 }, { "epoch": 0.02, "learning_rate": 1.1200000000000001e-05, "loss": 0.3506, "step": 196 }, { "epoch": 0.02, "learning_rate": 1.1257142857142857e-05, "loss": 0.4203, "step": 197 }, { "epoch": 0.02, "learning_rate": 1.1314285714285715e-05, "loss": 0.3699, "step": 198 }, { "epoch": 0.02, "learning_rate": 1.1371428571428573e-05, "loss": 0.618, "step": 199 }, { "epoch": 0.02, "learning_rate": 1.1428571428571429e-05, "loss": 0.3959, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.1485714285714287e-05, "loss": 0.3685, "step": 201 }, { "epoch": 0.02, "learning_rate": 1.1542857142857145e-05, "loss": 0.3714, "step": 202 }, { "epoch": 0.02, "learning_rate": 1.16e-05, "loss": 0.3918, "step": 203 }, { "epoch": 0.02, "learning_rate": 1.1657142857142859e-05, "loss": 0.3181, "step": 204 }, { "epoch": 0.02, "learning_rate": 1.1714285714285716e-05, "loss": 0.3566, "step": 205 }, { "epoch": 0.02, "learning_rate": 1.177142857142857e-05, "loss": 0.33, "step": 206 }, { "epoch": 0.02, "learning_rate": 1.1828571428571429e-05, "loss": 0.3928, "step": 207 }, { "epoch": 0.02, "learning_rate": 1.1885714285714286e-05, "loss": 0.3738, "step": 208 }, { "epoch": 0.02, "learning_rate": 1.1942857142857144e-05, "loss": 0.3224, "step": 209 }, { "epoch": 0.02, "learning_rate": 1.2e-05, "loss": 0.3654, "step": 210 }, { "epoch": 0.02, "learning_rate": 1.2057142857142858e-05, "loss": 0.3367, "step": 211 }, { "epoch": 0.02, "learning_rate": 1.2114285714285716e-05, "loss": 0.3665, "step": 212 }, { "epoch": 0.02, "learning_rate": 1.2171428571428572e-05, "loss": 0.3373, "step": 213 }, { "epoch": 0.02, "learning_rate": 1.222857142857143e-05, "loss": 0.3722, "step": 214 }, { "epoch": 0.02, "learning_rate": 1.2285714285714288e-05, "loss": 0.5845, "step": 215 }, { "epoch": 0.02, "learning_rate": 1.2342857142857144e-05, "loss": 0.3638, "step": 216 }, { "epoch": 0.02, "learning_rate": 1.2400000000000002e-05, "loss": 0.3444, "step": 217 }, { "epoch": 0.02, "learning_rate": 1.245714285714286e-05, "loss": 0.4137, "step": 218 }, { "epoch": 0.02, "learning_rate": 1.2514285714285714e-05, "loss": 0.3753, "step": 219 }, { "epoch": 0.02, "learning_rate": 1.2571428571428572e-05, "loss": 0.3772, "step": 220 }, { "epoch": 0.02, "learning_rate": 1.262857142857143e-05, "loss": 0.4005, "step": 221 }, { "epoch": 0.02, "learning_rate": 1.2685714285714286e-05, "loss": 0.405, "step": 222 }, { "epoch": 0.02, "learning_rate": 1.2742857142857143e-05, "loss": 0.3678, "step": 223 }, { "epoch": 0.02, "learning_rate": 1.2800000000000001e-05, "loss": 0.3552, "step": 224 }, { "epoch": 0.02, "learning_rate": 1.2857142857142859e-05, "loss": 0.3738, "step": 225 }, { "epoch": 0.02, "learning_rate": 1.2914285714285715e-05, "loss": 0.388, "step": 226 }, { "epoch": 0.02, "learning_rate": 1.2971428571428573e-05, "loss": 0.3687, "step": 227 }, { "epoch": 0.02, "learning_rate": 1.302857142857143e-05, "loss": 0.368, "step": 228 }, { "epoch": 0.02, "learning_rate": 1.3085714285714287e-05, "loss": 0.3291, "step": 229 }, { "epoch": 0.02, "learning_rate": 1.3142857142857145e-05, "loss": 0.4016, "step": 230 }, { "epoch": 0.02, "learning_rate": 1.3200000000000002e-05, "loss": 0.3512, "step": 231 }, { "epoch": 0.02, "learning_rate": 1.3257142857142857e-05, "loss": 0.3287, "step": 232 }, { "epoch": 0.02, "learning_rate": 1.3314285714285715e-05, "loss": 0.3435, "step": 233 }, { "epoch": 0.02, "learning_rate": 1.3371428571428572e-05, "loss": 0.3379, "step": 234 }, { "epoch": 0.02, "learning_rate": 1.3428571428571429e-05, "loss": 0.3394, "step": 235 }, { "epoch": 0.02, "learning_rate": 1.3485714285714286e-05, "loss": 0.3784, "step": 236 }, { "epoch": 0.02, "learning_rate": 1.3542857142857144e-05, "loss": 0.3846, "step": 237 }, { "epoch": 0.02, "learning_rate": 1.3600000000000002e-05, "loss": 0.3494, "step": 238 }, { "epoch": 0.02, "learning_rate": 1.3657142857142858e-05, "loss": 0.3472, "step": 239 }, { "epoch": 0.02, "learning_rate": 1.3714285714285716e-05, "loss": 0.3765, "step": 240 }, { "epoch": 0.02, "learning_rate": 1.3771428571428574e-05, "loss": 0.431, "step": 241 }, { "epoch": 0.02, "learning_rate": 1.382857142857143e-05, "loss": 0.3636, "step": 242 }, { "epoch": 0.02, "learning_rate": 1.3885714285714288e-05, "loss": 0.3846, "step": 243 }, { "epoch": 0.02, "learning_rate": 1.3942857142857145e-05, "loss": 0.3311, "step": 244 }, { "epoch": 0.02, "learning_rate": 1.4e-05, "loss": 0.6819, "step": 245 }, { "epoch": 0.02, "learning_rate": 1.4057142857142858e-05, "loss": 0.3956, "step": 246 }, { "epoch": 0.02, "learning_rate": 1.4114285714285715e-05, "loss": 0.4042, "step": 247 }, { "epoch": 0.02, "learning_rate": 1.4171428571428572e-05, "loss": 0.5765, "step": 248 }, { "epoch": 0.02, "learning_rate": 1.422857142857143e-05, "loss": 0.3557, "step": 249 }, { "epoch": 0.02, "learning_rate": 1.4285714285714287e-05, "loss": 0.3547, "step": 250 }, { "epoch": 0.02, "learning_rate": 1.4342857142857145e-05, "loss": 0.3671, "step": 251 }, { "epoch": 0.02, "learning_rate": 1.4400000000000001e-05, "loss": 0.3599, "step": 252 }, { "epoch": 0.02, "learning_rate": 1.4457142857142859e-05, "loss": 0.3845, "step": 253 }, { "epoch": 0.02, "learning_rate": 1.4514285714285717e-05, "loss": 0.3499, "step": 254 }, { "epoch": 0.02, "learning_rate": 1.4571428571428573e-05, "loss": 0.3463, "step": 255 }, { "epoch": 0.02, "learning_rate": 1.462857142857143e-05, "loss": 0.4027, "step": 256 }, { "epoch": 0.02, "learning_rate": 1.4685714285714288e-05, "loss": 0.3233, "step": 257 }, { "epoch": 0.02, "learning_rate": 1.4742857142857143e-05, "loss": 0.3505, "step": 258 }, { "epoch": 0.02, "learning_rate": 1.48e-05, "loss": 0.3649, "step": 259 }, { "epoch": 0.02, "learning_rate": 1.4857142857142858e-05, "loss": 0.3671, "step": 260 }, { "epoch": 0.02, "learning_rate": 1.4914285714285715e-05, "loss": 0.3244, "step": 261 }, { "epoch": 0.02, "learning_rate": 1.4971428571428572e-05, "loss": 0.3764, "step": 262 }, { "epoch": 0.02, "learning_rate": 1.502857142857143e-05, "loss": 0.4233, "step": 263 }, { "epoch": 0.02, "learning_rate": 1.5085714285714288e-05, "loss": 0.3888, "step": 264 }, { "epoch": 0.02, "learning_rate": 1.5142857142857144e-05, "loss": 0.3811, "step": 265 }, { "epoch": 0.02, "learning_rate": 1.5200000000000002e-05, "loss": 0.3345, "step": 266 }, { "epoch": 0.02, "learning_rate": 1.525714285714286e-05, "loss": 0.6316, "step": 267 }, { "epoch": 0.02, "learning_rate": 1.5314285714285716e-05, "loss": 0.3324, "step": 268 }, { "epoch": 0.02, "learning_rate": 1.5371428571428572e-05, "loss": 0.3956, "step": 269 }, { "epoch": 0.02, "learning_rate": 1.542857142857143e-05, "loss": 0.4175, "step": 270 }, { "epoch": 0.02, "learning_rate": 1.5485714285714287e-05, "loss": 0.3496, "step": 271 }, { "epoch": 0.02, "learning_rate": 1.5542857142857144e-05, "loss": 0.3684, "step": 272 }, { "epoch": 0.02, "learning_rate": 1.5600000000000003e-05, "loss": 0.3895, "step": 273 }, { "epoch": 0.02, "learning_rate": 1.5657142857142856e-05, "loss": 0.3804, "step": 274 }, { "epoch": 0.02, "learning_rate": 1.5714285714285715e-05, "loss": 0.3207, "step": 275 }, { "epoch": 0.02, "learning_rate": 1.577142857142857e-05, "loss": 0.3481, "step": 276 }, { "epoch": 0.02, "learning_rate": 1.582857142857143e-05, "loss": 0.3585, "step": 277 }, { "epoch": 0.02, "learning_rate": 1.5885714285714287e-05, "loss": 0.382, "step": 278 }, { "epoch": 0.02, "learning_rate": 1.5942857142857143e-05, "loss": 0.3727, "step": 279 }, { "epoch": 0.02, "learning_rate": 1.6000000000000003e-05, "loss": 0.4006, "step": 280 }, { "epoch": 0.02, "learning_rate": 1.605714285714286e-05, "loss": 0.3492, "step": 281 }, { "epoch": 0.02, "learning_rate": 1.6114285714285715e-05, "loss": 0.38, "step": 282 }, { "epoch": 0.02, "learning_rate": 1.6171428571428574e-05, "loss": 0.3618, "step": 283 }, { "epoch": 0.02, "learning_rate": 1.622857142857143e-05, "loss": 0.3693, "step": 284 }, { "epoch": 0.02, "learning_rate": 1.6285714285714287e-05, "loss": 0.6064, "step": 285 }, { "epoch": 0.02, "learning_rate": 1.6342857142857146e-05, "loss": 0.3769, "step": 286 }, { "epoch": 0.02, "learning_rate": 1.64e-05, "loss": 0.4042, "step": 287 }, { "epoch": 0.02, "learning_rate": 1.645714285714286e-05, "loss": 0.3466, "step": 288 }, { "epoch": 0.02, "learning_rate": 1.6514285714285714e-05, "loss": 0.3666, "step": 289 }, { "epoch": 0.02, "learning_rate": 1.6571428571428574e-05, "loss": 0.3208, "step": 290 }, { "epoch": 0.02, "learning_rate": 1.662857142857143e-05, "loss": 0.3625, "step": 291 }, { "epoch": 0.03, "learning_rate": 1.6685714285714286e-05, "loss": 0.3184, "step": 292 }, { "epoch": 0.03, "learning_rate": 1.6742857142857146e-05, "loss": 0.3839, "step": 293 }, { "epoch": 0.03, "learning_rate": 1.6800000000000002e-05, "loss": 0.3312, "step": 294 }, { "epoch": 0.03, "learning_rate": 1.6857142857142858e-05, "loss": 0.3323, "step": 295 }, { "epoch": 0.03, "learning_rate": 1.6914285714285717e-05, "loss": 0.3436, "step": 296 }, { "epoch": 0.03, "learning_rate": 1.6971428571428574e-05, "loss": 0.345, "step": 297 }, { "epoch": 0.03, "learning_rate": 1.702857142857143e-05, "loss": 0.3371, "step": 298 }, { "epoch": 0.03, "learning_rate": 1.708571428571429e-05, "loss": 0.3867, "step": 299 }, { "epoch": 0.03, "learning_rate": 1.7142857142857142e-05, "loss": 0.3937, "step": 300 }, { "epoch": 0.03, "learning_rate": 1.72e-05, "loss": 0.3862, "step": 301 }, { "epoch": 0.03, "learning_rate": 1.7257142857142857e-05, "loss": 0.366, "step": 302 }, { "epoch": 0.03, "learning_rate": 1.7314285714285717e-05, "loss": 0.397, "step": 303 }, { "epoch": 0.03, "learning_rate": 1.7371428571428573e-05, "loss": 0.3698, "step": 304 }, { "epoch": 0.03, "learning_rate": 1.742857142857143e-05, "loss": 0.3694, "step": 305 }, { "epoch": 0.03, "learning_rate": 1.748571428571429e-05, "loss": 0.5981, "step": 306 }, { "epoch": 0.03, "learning_rate": 1.7542857142857145e-05, "loss": 0.3619, "step": 307 }, { "epoch": 0.03, "learning_rate": 1.76e-05, "loss": 0.326, "step": 308 }, { "epoch": 0.03, "learning_rate": 1.765714285714286e-05, "loss": 0.3652, "step": 309 }, { "epoch": 0.03, "learning_rate": 1.7714285714285717e-05, "loss": 0.3784, "step": 310 }, { "epoch": 0.03, "learning_rate": 1.7771428571428573e-05, "loss": 0.3403, "step": 311 }, { "epoch": 0.03, "learning_rate": 1.7828571428571432e-05, "loss": 0.3816, "step": 312 }, { "epoch": 0.03, "learning_rate": 1.7885714285714285e-05, "loss": 0.3508, "step": 313 }, { "epoch": 0.03, "learning_rate": 1.7942857142857144e-05, "loss": 0.389, "step": 314 }, { "epoch": 0.03, "learning_rate": 1.8e-05, "loss": 0.3719, "step": 315 }, { "epoch": 0.03, "learning_rate": 1.8057142857142857e-05, "loss": 0.3411, "step": 316 }, { "epoch": 0.03, "learning_rate": 1.8114285714285716e-05, "loss": 0.3459, "step": 317 }, { "epoch": 0.03, "learning_rate": 1.8171428571428572e-05, "loss": 0.606, "step": 318 }, { "epoch": 0.03, "learning_rate": 1.822857142857143e-05, "loss": 0.3605, "step": 319 }, { "epoch": 0.03, "learning_rate": 1.8285714285714288e-05, "loss": 0.3584, "step": 320 }, { "epoch": 0.03, "learning_rate": 1.8342857142857144e-05, "loss": 0.3464, "step": 321 }, { "epoch": 0.03, "learning_rate": 1.8400000000000003e-05, "loss": 0.3539, "step": 322 }, { "epoch": 0.03, "learning_rate": 1.845714285714286e-05, "loss": 0.3964, "step": 323 }, { "epoch": 0.03, "learning_rate": 1.8514285714285716e-05, "loss": 0.3508, "step": 324 }, { "epoch": 0.03, "learning_rate": 1.8571428571428575e-05, "loss": 0.3266, "step": 325 }, { "epoch": 0.03, "learning_rate": 1.8628571428571428e-05, "loss": 0.3758, "step": 326 }, { "epoch": 0.03, "learning_rate": 1.8685714285714287e-05, "loss": 0.3062, "step": 327 }, { "epoch": 0.03, "learning_rate": 1.8742857142857143e-05, "loss": 0.4012, "step": 328 }, { "epoch": 0.03, "learning_rate": 1.88e-05, "loss": 0.3679, "step": 329 }, { "epoch": 0.03, "learning_rate": 1.885714285714286e-05, "loss": 0.337, "step": 330 }, { "epoch": 0.03, "learning_rate": 1.8914285714285715e-05, "loss": 0.4494, "step": 331 }, { "epoch": 0.03, "learning_rate": 1.8971428571428575e-05, "loss": 0.3542, "step": 332 }, { "epoch": 0.03, "learning_rate": 1.902857142857143e-05, "loss": 0.3492, "step": 333 }, { "epoch": 0.03, "learning_rate": 1.9085714285714287e-05, "loss": 0.3257, "step": 334 }, { "epoch": 0.03, "learning_rate": 1.9142857142857146e-05, "loss": 0.3489, "step": 335 }, { "epoch": 0.03, "learning_rate": 1.9200000000000003e-05, "loss": 0.317, "step": 336 }, { "epoch": 0.03, "learning_rate": 1.925714285714286e-05, "loss": 0.3724, "step": 337 }, { "epoch": 0.03, "learning_rate": 1.9314285714285718e-05, "loss": 0.3696, "step": 338 }, { "epoch": 0.03, "learning_rate": 1.937142857142857e-05, "loss": 0.3848, "step": 339 }, { "epoch": 0.03, "learning_rate": 1.942857142857143e-05, "loss": 0.3625, "step": 340 }, { "epoch": 0.03, "learning_rate": 1.9485714285714286e-05, "loss": 0.2758, "step": 341 }, { "epoch": 0.03, "learning_rate": 1.9542857142857143e-05, "loss": 0.4115, "step": 342 }, { "epoch": 0.03, "learning_rate": 1.9600000000000002e-05, "loss": 0.3625, "step": 343 }, { "epoch": 0.03, "learning_rate": 1.9657142857142858e-05, "loss": 0.3627, "step": 344 }, { "epoch": 0.03, "learning_rate": 1.9714285714285718e-05, "loss": 0.3516, "step": 345 }, { "epoch": 0.03, "learning_rate": 1.9771428571428574e-05, "loss": 0.2944, "step": 346 }, { "epoch": 0.03, "learning_rate": 1.982857142857143e-05, "loss": 0.41, "step": 347 }, { "epoch": 0.03, "learning_rate": 1.988571428571429e-05, "loss": 0.376, "step": 348 }, { "epoch": 0.03, "learning_rate": 1.9942857142857142e-05, "loss": 0.3127, "step": 349 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.3829, "step": 350 }, { "epoch": 0.03, "learning_rate": 1.9999999614624707e-05, "loss": 0.3218, "step": 351 }, { "epoch": 0.03, "learning_rate": 1.9999998458498852e-05, "loss": 0.3351, "step": 352 }, { "epoch": 0.03, "learning_rate": 1.999999653162253e-05, "loss": 0.3874, "step": 353 }, { "epoch": 0.03, "learning_rate": 1.9999993833995886e-05, "loss": 0.361, "step": 354 }, { "epoch": 0.03, "learning_rate": 1.999999036561913e-05, "loss": 0.3157, "step": 355 }, { "epoch": 0.03, "learning_rate": 1.9999986126492526e-05, "loss": 0.3591, "step": 356 }, { "epoch": 0.03, "learning_rate": 1.9999981116616402e-05, "loss": 0.3319, "step": 357 }, { "epoch": 0.03, "learning_rate": 1.999997533599115e-05, "loss": 0.3223, "step": 358 }, { "epoch": 0.03, "learning_rate": 1.9999968784617204e-05, "loss": 0.323, "step": 359 }, { "epoch": 0.03, "learning_rate": 1.9999961462495078e-05, "loss": 0.3703, "step": 360 }, { "epoch": 0.03, "learning_rate": 1.9999953369625334e-05, "loss": 0.3814, "step": 361 }, { "epoch": 0.03, "learning_rate": 1.9999944506008594e-05, "loss": 0.3477, "step": 362 }, { "epoch": 0.03, "learning_rate": 1.9999934871645544e-05, "loss": 0.3254, "step": 363 }, { "epoch": 0.03, "learning_rate": 1.9999924466536925e-05, "loss": 0.4109, "step": 364 }, { "epoch": 0.03, "learning_rate": 1.999991329068354e-05, "loss": 0.3309, "step": 365 }, { "epoch": 0.03, "learning_rate": 1.999990134408625e-05, "loss": 0.3484, "step": 366 }, { "epoch": 0.03, "learning_rate": 1.9999888626745975e-05, "loss": 0.3314, "step": 367 }, { "epoch": 0.03, "learning_rate": 1.9999875138663694e-05, "loss": 0.3262, "step": 368 }, { "epoch": 0.03, "learning_rate": 1.999986087984045e-05, "loss": 0.3336, "step": 369 }, { "epoch": 0.03, "learning_rate": 1.999984585027734e-05, "loss": 0.3501, "step": 370 }, { "epoch": 0.03, "learning_rate": 1.9999830049975523e-05, "loss": 0.3049, "step": 371 }, { "epoch": 0.03, "learning_rate": 1.9999813478936213e-05, "loss": 0.3162, "step": 372 }, { "epoch": 0.03, "learning_rate": 1.9999796137160693e-05, "loss": 0.359, "step": 373 }, { "epoch": 0.03, "learning_rate": 1.9999778024650296e-05, "loss": 0.3531, "step": 374 }, { "epoch": 0.03, "learning_rate": 1.999975914140642e-05, "loss": 0.6349, "step": 375 }, { "epoch": 0.03, "learning_rate": 1.999973948743052e-05, "loss": 0.3192, "step": 376 }, { "epoch": 0.03, "learning_rate": 1.999971906272411e-05, "loss": 0.3427, "step": 377 }, { "epoch": 0.03, "learning_rate": 1.9999697867288764e-05, "loss": 0.3779, "step": 378 }, { "epoch": 0.03, "learning_rate": 1.9999675901126117e-05, "loss": 0.326, "step": 379 }, { "epoch": 0.03, "learning_rate": 1.999965316423786e-05, "loss": 0.3265, "step": 380 }, { "epoch": 0.03, "learning_rate": 1.9999629656625748e-05, "loss": 0.3572, "step": 381 }, { "epoch": 0.03, "learning_rate": 1.9999605378291593e-05, "loss": 0.4268, "step": 382 }, { "epoch": 0.03, "learning_rate": 1.9999580329237264e-05, "loss": 0.3033, "step": 383 }, { "epoch": 0.03, "learning_rate": 1.9999554509464695e-05, "loss": 0.3462, "step": 384 }, { "epoch": 0.03, "learning_rate": 1.999952791897587e-05, "loss": 0.3683, "step": 385 }, { "epoch": 0.03, "learning_rate": 1.9999500557772843e-05, "loss": 0.3035, "step": 386 }, { "epoch": 0.03, "learning_rate": 1.999947242585772e-05, "loss": 0.3604, "step": 387 }, { "epoch": 0.03, "learning_rate": 1.9999443523232676e-05, "loss": 0.3183, "step": 388 }, { "epoch": 0.03, "learning_rate": 1.9999413849899933e-05, "loss": 0.3355, "step": 389 }, { "epoch": 0.03, "learning_rate": 1.999938340586178e-05, "loss": 0.3467, "step": 390 }, { "epoch": 0.03, "learning_rate": 1.9999352191120556e-05, "loss": 0.3902, "step": 391 }, { "epoch": 0.03, "learning_rate": 1.999932020567868e-05, "loss": 0.3678, "step": 392 }, { "epoch": 0.03, "learning_rate": 1.9999287449538608e-05, "loss": 0.3354, "step": 393 }, { "epoch": 0.03, "learning_rate": 1.9999253922702868e-05, "loss": 0.3428, "step": 394 }, { "epoch": 0.03, "learning_rate": 1.999921962517404e-05, "loss": 0.3271, "step": 395 }, { "epoch": 0.03, "learning_rate": 1.9999184556954777e-05, "loss": 0.3287, "step": 396 }, { "epoch": 0.03, "learning_rate": 1.999914871804777e-05, "loss": 0.3804, "step": 397 }, { "epoch": 0.03, "learning_rate": 1.999911210845579e-05, "loss": 0.3475, "step": 398 }, { "epoch": 0.03, "learning_rate": 1.9999074728181657e-05, "loss": 0.3671, "step": 399 }, { "epoch": 0.03, "learning_rate": 1.9999036577228245e-05, "loss": 0.2861, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.9998997655598505e-05, "loss": 0.3315, "step": 401 }, { "epoch": 0.03, "learning_rate": 1.9998957963295434e-05, "loss": 0.3436, "step": 402 }, { "epoch": 0.03, "learning_rate": 1.9998917500322086e-05, "loss": 0.4235, "step": 403 }, { "epoch": 0.03, "learning_rate": 1.9998876266681585e-05, "loss": 0.3639, "step": 404 }, { "epoch": 0.03, "learning_rate": 1.9998834262377107e-05, "loss": 0.3844, "step": 405 }, { "epoch": 0.03, "learning_rate": 1.9998791487411887e-05, "loss": 0.3854, "step": 406 }, { "epoch": 0.03, "learning_rate": 1.999874794178923e-05, "loss": 0.3645, "step": 407 }, { "epoch": 0.03, "learning_rate": 1.999870362551248e-05, "loss": 0.3372, "step": 408 }, { "epoch": 0.04, "learning_rate": 1.9998658538585067e-05, "loss": 0.3628, "step": 409 }, { "epoch": 0.04, "learning_rate": 1.9998612681010452e-05, "loss": 0.3358, "step": 410 }, { "epoch": 0.04, "learning_rate": 1.9998566052792178e-05, "loss": 0.38, "step": 411 }, { "epoch": 0.04, "learning_rate": 1.999851865393384e-05, "loss": 0.3282, "step": 412 }, { "epoch": 0.04, "learning_rate": 1.9998470484439084e-05, "loss": 0.3574, "step": 413 }, { "epoch": 0.04, "learning_rate": 1.999842154431163e-05, "loss": 0.381, "step": 414 }, { "epoch": 0.04, "learning_rate": 1.999837183355525e-05, "loss": 0.3918, "step": 415 }, { "epoch": 0.04, "learning_rate": 1.9998321352173767e-05, "loss": 0.343, "step": 416 }, { "epoch": 0.04, "learning_rate": 1.999827010017108e-05, "loss": 0.3431, "step": 417 }, { "epoch": 0.04, "learning_rate": 1.9998218077551135e-05, "loss": 0.3518, "step": 418 }, { "epoch": 0.04, "learning_rate": 1.9998165284317944e-05, "loss": 0.3424, "step": 419 }, { "epoch": 0.04, "learning_rate": 1.9998111720475574e-05, "loss": 0.3839, "step": 420 }, { "epoch": 0.04, "learning_rate": 1.9998057386028157e-05, "loss": 0.3334, "step": 421 }, { "epoch": 0.04, "learning_rate": 1.999800228097988e-05, "loss": 0.3163, "step": 422 }, { "epoch": 0.04, "learning_rate": 1.9997946405334986e-05, "loss": 0.3484, "step": 423 }, { "epoch": 0.04, "learning_rate": 1.9997889759097785e-05, "loss": 0.3439, "step": 424 }, { "epoch": 0.04, "learning_rate": 1.9997832342272642e-05, "loss": 0.3445, "step": 425 }, { "epoch": 0.04, "learning_rate": 1.999777415486398e-05, "loss": 0.3461, "step": 426 }, { "epoch": 0.04, "learning_rate": 1.999771519687629e-05, "loss": 0.3525, "step": 427 }, { "epoch": 0.04, "learning_rate": 1.9997655468314115e-05, "loss": 0.3342, "step": 428 }, { "epoch": 0.04, "learning_rate": 1.9997594969182054e-05, "loss": 0.3387, "step": 429 }, { "epoch": 0.04, "learning_rate": 1.999753369948477e-05, "loss": 0.6073, "step": 430 }, { "epoch": 0.04, "learning_rate": 1.999747165922699e-05, "loss": 0.3946, "step": 431 }, { "epoch": 0.04, "learning_rate": 1.9997408848413494e-05, "loss": 0.3114, "step": 432 }, { "epoch": 0.04, "learning_rate": 1.999734526704912e-05, "loss": 0.3873, "step": 433 }, { "epoch": 0.04, "learning_rate": 1.999728091513877e-05, "loss": 0.3724, "step": 434 }, { "epoch": 0.04, "learning_rate": 1.999721579268741e-05, "loss": 0.3638, "step": 435 }, { "epoch": 0.04, "learning_rate": 1.9997149899700056e-05, "loss": 0.3469, "step": 436 }, { "epoch": 0.04, "learning_rate": 1.999708323618178e-05, "loss": 0.3553, "step": 437 }, { "epoch": 0.04, "learning_rate": 1.9997015802137727e-05, "loss": 0.3351, "step": 438 }, { "epoch": 0.04, "learning_rate": 1.999694759757309e-05, "loss": 0.3518, "step": 439 }, { "epoch": 0.04, "learning_rate": 1.9996878622493134e-05, "loss": 0.3533, "step": 440 }, { "epoch": 0.04, "learning_rate": 1.9996808876903168e-05, "loss": 0.3652, "step": 441 }, { "epoch": 0.04, "learning_rate": 1.9996738360808566e-05, "loss": 0.3635, "step": 442 }, { "epoch": 0.04, "learning_rate": 1.9996667074214768e-05, "loss": 0.309, "step": 443 }, { "epoch": 0.04, "learning_rate": 1.9996595017127268e-05, "loss": 0.3436, "step": 444 }, { "epoch": 0.04, "learning_rate": 1.999652218955162e-05, "loss": 0.4006, "step": 445 }, { "epoch": 0.04, "learning_rate": 1.9996448591493433e-05, "loss": 0.3715, "step": 446 }, { "epoch": 0.04, "learning_rate": 1.9996374222958383e-05, "loss": 0.2994, "step": 447 }, { "epoch": 0.04, "learning_rate": 1.99962990839522e-05, "loss": 0.3869, "step": 448 }, { "epoch": 0.04, "learning_rate": 1.999622317448068e-05, "loss": 0.3598, "step": 449 }, { "epoch": 0.04, "learning_rate": 1.9996146494549672e-05, "loss": 0.3042, "step": 450 }, { "epoch": 0.04, "learning_rate": 1.9996069044165082e-05, "loss": 0.3992, "step": 451 }, { "epoch": 0.04, "learning_rate": 1.999599082333288e-05, "loss": 0.3358, "step": 452 }, { "epoch": 0.04, "learning_rate": 1.99959118320591e-05, "loss": 0.3698, "step": 453 }, { "epoch": 0.04, "learning_rate": 1.9995832070349827e-05, "loss": 0.3474, "step": 454 }, { "epoch": 0.04, "learning_rate": 1.9995751538211205e-05, "loss": 0.3728, "step": 455 }, { "epoch": 0.04, "learning_rate": 1.999567023564945e-05, "loss": 0.3209, "step": 456 }, { "epoch": 0.04, "learning_rate": 1.999558816267082e-05, "loss": 0.6414, "step": 457 }, { "epoch": 0.04, "learning_rate": 1.9995505319281645e-05, "loss": 0.3498, "step": 458 }, { "epoch": 0.04, "learning_rate": 1.9995421705488313e-05, "loss": 0.3851, "step": 459 }, { "epoch": 0.04, "learning_rate": 1.999533732129726e-05, "loss": 0.3364, "step": 460 }, { "epoch": 0.04, "learning_rate": 1.9995252166714993e-05, "loss": 0.355, "step": 461 }, { "epoch": 0.04, "learning_rate": 1.9995166241748084e-05, "loss": 0.3567, "step": 462 }, { "epoch": 0.04, "learning_rate": 1.9995079546403143e-05, "loss": 0.3937, "step": 463 }, { "epoch": 0.04, "learning_rate": 1.999499208068686e-05, "loss": 0.3322, "step": 464 }, { "epoch": 0.04, "learning_rate": 1.9994903844605973e-05, "loss": 0.3702, "step": 465 }, { "epoch": 0.04, "learning_rate": 1.9994814838167286e-05, "loss": 0.3237, "step": 466 }, { "epoch": 0.04, "learning_rate": 1.9994725061377653e-05, "loss": 0.3216, "step": 467 }, { "epoch": 0.04, "learning_rate": 1.9994634514244002e-05, "loss": 0.3365, "step": 468 }, { "epoch": 0.04, "learning_rate": 1.9994543196773307e-05, "loss": 0.3491, "step": 469 }, { "epoch": 0.04, "learning_rate": 1.9994451108972604e-05, "loss": 0.3711, "step": 470 }, { "epoch": 0.04, "learning_rate": 1.999435825084899e-05, "loss": 0.3419, "step": 471 }, { "epoch": 0.04, "learning_rate": 1.9994264622409636e-05, "loss": 0.3483, "step": 472 }, { "epoch": 0.04, "learning_rate": 1.999417022366174e-05, "loss": 0.609, "step": 473 }, { "epoch": 0.04, "learning_rate": 1.999407505461259e-05, "loss": 0.3381, "step": 474 }, { "epoch": 0.04, "learning_rate": 1.9993979115269517e-05, "loss": 0.3595, "step": 475 }, { "epoch": 0.04, "learning_rate": 1.9993882405639914e-05, "loss": 0.3449, "step": 476 }, { "epoch": 0.04, "learning_rate": 1.9993784925731234e-05, "loss": 0.353, "step": 477 }, { "epoch": 0.04, "learning_rate": 1.9993686675550998e-05, "loss": 0.3359, "step": 478 }, { "epoch": 0.04, "learning_rate": 1.9993587655106766e-05, "loss": 0.2803, "step": 479 }, { "epoch": 0.04, "learning_rate": 1.9993487864406185e-05, "loss": 0.3397, "step": 480 }, { "epoch": 0.04, "learning_rate": 1.9993387303456938e-05, "loss": 0.3652, "step": 481 }, { "epoch": 0.04, "learning_rate": 1.999328597226677e-05, "loss": 0.2666, "step": 482 }, { "epoch": 0.04, "learning_rate": 1.99931838708435e-05, "loss": 0.4054, "step": 483 }, { "epoch": 0.04, "learning_rate": 1.9993080999195e-05, "loss": 0.3444, "step": 484 }, { "epoch": 0.04, "learning_rate": 1.999297735732919e-05, "loss": 0.3139, "step": 485 }, { "epoch": 0.04, "learning_rate": 1.9992872945254064e-05, "loss": 0.3209, "step": 486 }, { "epoch": 0.04, "learning_rate": 1.9992767762977662e-05, "loss": 0.3729, "step": 487 }, { "epoch": 0.04, "learning_rate": 1.99926618105081e-05, "loss": 0.3699, "step": 488 }, { "epoch": 0.04, "learning_rate": 1.999255508785354e-05, "loss": 0.3394, "step": 489 }, { "epoch": 0.04, "learning_rate": 1.9992447595022214e-05, "loss": 0.3563, "step": 490 }, { "epoch": 0.04, "learning_rate": 1.9992339332022396e-05, "loss": 0.2916, "step": 491 }, { "epoch": 0.04, "learning_rate": 1.9992230298862436e-05, "loss": 0.3495, "step": 492 }, { "epoch": 0.04, "learning_rate": 1.999212049555074e-05, "loss": 0.3378, "step": 493 }, { "epoch": 0.04, "learning_rate": 1.9992009922095766e-05, "loss": 0.3357, "step": 494 }, { "epoch": 0.04, "learning_rate": 1.9991898578506043e-05, "loss": 0.3236, "step": 495 }, { "epoch": 0.04, "learning_rate": 1.9991786464790145e-05, "loss": 0.3782, "step": 496 }, { "epoch": 0.04, "learning_rate": 1.999167358095672e-05, "loss": 0.366, "step": 497 }, { "epoch": 0.04, "learning_rate": 1.9991559927014465e-05, "loss": 0.3559, "step": 498 }, { "epoch": 0.04, "learning_rate": 1.999144550297214e-05, "loss": 0.3563, "step": 499 }, { "epoch": 0.04, "learning_rate": 1.9991330308838565e-05, "loss": 0.5999, "step": 500 }, { "epoch": 0.04, "learning_rate": 1.9991214344622616e-05, "loss": 0.3724, "step": 501 }, { "epoch": 0.04, "learning_rate": 1.999109761033324e-05, "loss": 0.3094, "step": 502 }, { "epoch": 0.04, "learning_rate": 1.999098010597942e-05, "loss": 0.3472, "step": 503 }, { "epoch": 0.04, "learning_rate": 1.9990861831570224e-05, "loss": 0.3942, "step": 504 }, { "epoch": 0.04, "learning_rate": 1.9990742787114765e-05, "loss": 0.333, "step": 505 }, { "epoch": 0.04, "learning_rate": 1.9990622972622216e-05, "loss": 0.332, "step": 506 }, { "epoch": 0.04, "learning_rate": 1.9990502388101813e-05, "loss": 0.4093, "step": 507 }, { "epoch": 0.04, "learning_rate": 1.9990381033562853e-05, "loss": 0.4125, "step": 508 }, { "epoch": 0.04, "learning_rate": 1.9990258909014684e-05, "loss": 0.3181, "step": 509 }, { "epoch": 0.04, "learning_rate": 1.9990136014466722e-05, "loss": 0.3375, "step": 510 }, { "epoch": 0.04, "learning_rate": 1.999001234992844e-05, "loss": 0.3879, "step": 511 }, { "epoch": 0.04, "learning_rate": 1.9989887915409368e-05, "loss": 0.3282, "step": 512 }, { "epoch": 0.04, "learning_rate": 1.99897627109191e-05, "loss": 0.3341, "step": 513 }, { "epoch": 0.04, "learning_rate": 1.9989636736467278e-05, "loss": 0.3152, "step": 514 }, { "epoch": 0.04, "learning_rate": 1.998950999206362e-05, "loss": 0.3618, "step": 515 }, { "epoch": 0.04, "learning_rate": 1.9989382477717888e-05, "loss": 0.5778, "step": 516 }, { "epoch": 0.04, "learning_rate": 1.9989254193439915e-05, "loss": 0.4129, "step": 517 }, { "epoch": 0.04, "learning_rate": 1.998912513923959e-05, "loss": 0.3804, "step": 518 }, { "epoch": 0.04, "learning_rate": 1.9988995315126852e-05, "loss": 0.3449, "step": 519 }, { "epoch": 0.04, "learning_rate": 1.9988864721111714e-05, "loss": 0.3252, "step": 520 }, { "epoch": 0.04, "learning_rate": 1.998873335720424e-05, "loss": 0.3777, "step": 521 }, { "epoch": 0.04, "learning_rate": 1.9988601223414555e-05, "loss": 0.3055, "step": 522 }, { "epoch": 0.04, "learning_rate": 1.9988468319752846e-05, "loss": 0.3441, "step": 523 }, { "epoch": 0.04, "learning_rate": 1.998833464622935e-05, "loss": 0.3198, "step": 524 }, { "epoch": 0.05, "learning_rate": 1.998820020285437e-05, "loss": 0.3326, "step": 525 }, { "epoch": 0.05, "learning_rate": 1.998806498963828e-05, "loss": 0.5486, "step": 526 }, { "epoch": 0.05, "learning_rate": 1.9987929006591487e-05, "loss": 0.3384, "step": 527 }, { "epoch": 0.05, "learning_rate": 1.9987792253724477e-05, "loss": 0.3862, "step": 528 }, { "epoch": 0.05, "learning_rate": 1.9987654731047793e-05, "loss": 0.3315, "step": 529 }, { "epoch": 0.05, "learning_rate": 1.9987516438572035e-05, "loss": 0.303, "step": 530 }, { "epoch": 0.05, "learning_rate": 1.9987377376307856e-05, "loss": 0.363, "step": 531 }, { "epoch": 0.05, "learning_rate": 1.9987237544265982e-05, "loss": 0.3525, "step": 532 }, { "epoch": 0.05, "learning_rate": 1.998709694245718e-05, "loss": 0.3682, "step": 533 }, { "epoch": 0.05, "learning_rate": 1.9986955570892302e-05, "loss": 0.3137, "step": 534 }, { "epoch": 0.05, "learning_rate": 1.998681342958223e-05, "loss": 0.3818, "step": 535 }, { "epoch": 0.05, "learning_rate": 1.9986670518537928e-05, "loss": 0.3285, "step": 536 }, { "epoch": 0.05, "learning_rate": 1.9986526837770405e-05, "loss": 0.3517, "step": 537 }, { "epoch": 0.05, "learning_rate": 1.9986382387290738e-05, "loss": 0.3886, "step": 538 }, { "epoch": 0.05, "learning_rate": 1.9986237167110066e-05, "loss": 0.3514, "step": 539 }, { "epoch": 0.05, "learning_rate": 1.998609117723957e-05, "loss": 0.3494, "step": 540 }, { "epoch": 0.05, "learning_rate": 1.998594441769051e-05, "loss": 0.6116, "step": 541 }, { "epoch": 0.05, "learning_rate": 1.99857968884742e-05, "loss": 0.3469, "step": 542 }, { "epoch": 0.05, "learning_rate": 1.9985648589602005e-05, "loss": 0.3199, "step": 543 }, { "epoch": 0.05, "learning_rate": 1.998549952108536e-05, "loss": 0.3323, "step": 544 }, { "epoch": 0.05, "learning_rate": 1.9985349682935747e-05, "loss": 0.3555, "step": 545 }, { "epoch": 0.05, "learning_rate": 1.998519907516472e-05, "loss": 0.38, "step": 546 }, { "epoch": 0.05, "learning_rate": 1.9985047697783886e-05, "loss": 0.3052, "step": 547 }, { "epoch": 0.05, "learning_rate": 1.9984895550804918e-05, "loss": 0.2993, "step": 548 }, { "epoch": 0.05, "learning_rate": 1.9984742634239535e-05, "loss": 0.3571, "step": 549 }, { "epoch": 0.05, "learning_rate": 1.9984588948099528e-05, "loss": 0.3547, "step": 550 }, { "epoch": 0.05, "learning_rate": 1.9984434492396736e-05, "loss": 0.3859, "step": 551 }, { "epoch": 0.05, "learning_rate": 1.9984279267143072e-05, "loss": 0.3196, "step": 552 }, { "epoch": 0.05, "learning_rate": 1.998412327235049e-05, "loss": 0.323, "step": 553 }, { "epoch": 0.05, "learning_rate": 1.9983966508031026e-05, "loss": 0.3586, "step": 554 }, { "epoch": 0.05, "learning_rate": 1.9983808974196752e-05, "loss": 0.3491, "step": 555 }, { "epoch": 0.05, "learning_rate": 1.9983650670859814e-05, "loss": 0.3204, "step": 556 }, { "epoch": 0.05, "learning_rate": 1.998349159803241e-05, "loss": 0.3123, "step": 557 }, { "epoch": 0.05, "learning_rate": 1.998333175572681e-05, "loss": 0.3387, "step": 558 }, { "epoch": 0.05, "learning_rate": 1.9983171143955326e-05, "loss": 0.3529, "step": 559 }, { "epoch": 0.05, "learning_rate": 1.9983009762730336e-05, "loss": 0.3638, "step": 560 }, { "epoch": 0.05, "learning_rate": 1.998284761206428e-05, "loss": 0.3256, "step": 561 }, { "epoch": 0.05, "learning_rate": 1.998268469196966e-05, "loss": 0.3117, "step": 562 }, { "epoch": 0.05, "learning_rate": 1.9982521002459026e-05, "loss": 0.3577, "step": 563 }, { "epoch": 0.05, "learning_rate": 1.9982356543545003e-05, "loss": 0.3594, "step": 564 }, { "epoch": 0.05, "learning_rate": 1.9982191315240257e-05, "loss": 0.3307, "step": 565 }, { "epoch": 0.05, "learning_rate": 1.998202531755753e-05, "loss": 0.3172, "step": 566 }, { "epoch": 0.05, "learning_rate": 1.9981858550509617e-05, "loss": 0.3244, "step": 567 }, { "epoch": 0.05, "learning_rate": 1.9981691014109364e-05, "loss": 0.355, "step": 568 }, { "epoch": 0.05, "learning_rate": 1.998152270836969e-05, "loss": 0.3032, "step": 569 }, { "epoch": 0.05, "learning_rate": 1.998135363330357e-05, "loss": 0.3289, "step": 570 }, { "epoch": 0.05, "learning_rate": 1.9981183788924025e-05, "loss": 0.303, "step": 571 }, { "epoch": 0.05, "learning_rate": 1.9981013175244154e-05, "loss": 0.3134, "step": 572 }, { "epoch": 0.05, "learning_rate": 1.9980841792277104e-05, "loss": 0.3936, "step": 573 }, { "epoch": 0.05, "learning_rate": 1.998066964003609e-05, "loss": 0.4129, "step": 574 }, { "epoch": 0.05, "learning_rate": 1.9980496718534375e-05, "loss": 0.3412, "step": 575 }, { "epoch": 0.05, "learning_rate": 1.9980323027785285e-05, "loss": 0.3352, "step": 576 }, { "epoch": 0.05, "learning_rate": 1.998014856780221e-05, "loss": 0.3317, "step": 577 }, { "epoch": 0.05, "learning_rate": 1.9979973338598603e-05, "loss": 0.3583, "step": 578 }, { "epoch": 0.05, "learning_rate": 1.9979797340187957e-05, "loss": 0.3521, "step": 579 }, { "epoch": 0.05, "learning_rate": 1.9979620572583846e-05, "loss": 0.3589, "step": 580 }, { "epoch": 0.05, "learning_rate": 1.9979443035799893e-05, "loss": 0.3391, "step": 581 }, { "epoch": 0.05, "learning_rate": 1.9979264729849776e-05, "loss": 0.3552, "step": 582 }, { "epoch": 0.05, "learning_rate": 1.9979085654747248e-05, "loss": 0.3248, "step": 583 }, { "epoch": 0.05, "learning_rate": 1.9978905810506105e-05, "loss": 0.3448, "step": 584 }, { "epoch": 0.05, "learning_rate": 1.9978725197140204e-05, "loss": 0.3793, "step": 585 }, { "epoch": 0.05, "learning_rate": 1.9978543814663478e-05, "loss": 0.2988, "step": 586 }, { "epoch": 0.05, "learning_rate": 1.99783616630899e-05, "loss": 0.3255, "step": 587 }, { "epoch": 0.05, "learning_rate": 1.9978178742433504e-05, "loss": 0.3887, "step": 588 }, { "epoch": 0.05, "learning_rate": 1.9977995052708398e-05, "loss": 0.3858, "step": 589 }, { "epoch": 0.05, "learning_rate": 1.9977810593928736e-05, "loss": 0.326, "step": 590 }, { "epoch": 0.05, "learning_rate": 1.9977625366108733e-05, "loss": 0.3425, "step": 591 }, { "epoch": 0.05, "learning_rate": 1.9977439369262668e-05, "loss": 0.3419, "step": 592 }, { "epoch": 0.05, "learning_rate": 1.997725260340488e-05, "loss": 0.3846, "step": 593 }, { "epoch": 0.05, "learning_rate": 1.9977065068549756e-05, "loss": 0.3597, "step": 594 }, { "epoch": 0.05, "learning_rate": 1.9976876764711756e-05, "loss": 0.3577, "step": 595 }, { "epoch": 0.05, "learning_rate": 1.9976687691905394e-05, "loss": 0.4155, "step": 596 }, { "epoch": 0.05, "learning_rate": 1.9976497850145237e-05, "loss": 0.337, "step": 597 }, { "epoch": 0.05, "learning_rate": 1.9976307239445924e-05, "loss": 0.3729, "step": 598 }, { "epoch": 0.05, "learning_rate": 1.9976115859822146e-05, "loss": 0.3073, "step": 599 }, { "epoch": 0.05, "learning_rate": 1.9975923711288646e-05, "loss": 0.3835, "step": 600 }, { "epoch": 0.05, "learning_rate": 1.9975730793860242e-05, "loss": 0.343, "step": 601 }, { "epoch": 0.05, "learning_rate": 1.99755371075518e-05, "loss": 0.3936, "step": 602 }, { "epoch": 0.05, "learning_rate": 1.9975342652378247e-05, "loss": 0.3302, "step": 603 }, { "epoch": 0.05, "learning_rate": 1.997514742835457e-05, "loss": 0.3183, "step": 604 }, { "epoch": 0.05, "learning_rate": 1.997495143549582e-05, "loss": 0.3001, "step": 605 }, { "epoch": 0.05, "learning_rate": 1.99747546738171e-05, "loss": 0.3329, "step": 606 }, { "epoch": 0.05, "learning_rate": 1.997455714333358e-05, "loss": 0.6206, "step": 607 }, { "epoch": 0.05, "learning_rate": 1.9974358844060476e-05, "loss": 0.3369, "step": 608 }, { "epoch": 0.05, "learning_rate": 1.997415977601308e-05, "loss": 0.3627, "step": 609 }, { "epoch": 0.05, "learning_rate": 1.9973959939206734e-05, "loss": 0.276, "step": 610 }, { "epoch": 0.05, "learning_rate": 1.9973759333656835e-05, "loss": 0.3221, "step": 611 }, { "epoch": 0.05, "learning_rate": 1.997355795937885e-05, "loss": 0.3703, "step": 612 }, { "epoch": 0.05, "learning_rate": 1.99733558163883e-05, "loss": 0.3236, "step": 613 }, { "epoch": 0.05, "learning_rate": 1.9973152904700762e-05, "loss": 0.3277, "step": 614 }, { "epoch": 0.05, "learning_rate": 1.9972949224331876e-05, "loss": 0.3821, "step": 615 }, { "epoch": 0.05, "learning_rate": 1.997274477529734e-05, "loss": 0.3165, "step": 616 }, { "epoch": 0.05, "learning_rate": 1.9972539557612918e-05, "loss": 0.351, "step": 617 }, { "epoch": 0.05, "learning_rate": 1.9972333571294418e-05, "loss": 0.333, "step": 618 }, { "epoch": 0.05, "learning_rate": 1.9972126816357723e-05, "loss": 0.3143, "step": 619 }, { "epoch": 0.05, "learning_rate": 1.9971919292818768e-05, "loss": 0.3568, "step": 620 }, { "epoch": 0.05, "learning_rate": 1.9971711000693544e-05, "loss": 0.3401, "step": 621 }, { "epoch": 0.05, "learning_rate": 1.997150193999811e-05, "loss": 0.3361, "step": 622 }, { "epoch": 0.05, "learning_rate": 1.997129211074858e-05, "loss": 0.3162, "step": 623 }, { "epoch": 0.05, "learning_rate": 1.9971081512961117e-05, "loss": 0.3279, "step": 624 }, { "epoch": 0.05, "learning_rate": 1.9970870146651964e-05, "loss": 0.3678, "step": 625 }, { "epoch": 0.05, "learning_rate": 1.9970658011837404e-05, "loss": 0.3188, "step": 626 }, { "epoch": 0.05, "learning_rate": 1.9970445108533795e-05, "loss": 0.3673, "step": 627 }, { "epoch": 0.05, "learning_rate": 1.9970231436757542e-05, "loss": 0.3157, "step": 628 }, { "epoch": 0.05, "learning_rate": 1.9970016996525112e-05, "loss": 0.3331, "step": 629 }, { "epoch": 0.05, "learning_rate": 1.9969801787853035e-05, "loss": 0.3539, "step": 630 }, { "epoch": 0.05, "learning_rate": 1.9969585810757902e-05, "loss": 0.3256, "step": 631 }, { "epoch": 0.05, "learning_rate": 1.996936906525635e-05, "loss": 0.3576, "step": 632 }, { "epoch": 0.05, "learning_rate": 1.9969151551365097e-05, "loss": 0.3077, "step": 633 }, { "epoch": 0.05, "learning_rate": 1.99689332691009e-05, "loss": 0.3336, "step": 634 }, { "epoch": 0.05, "learning_rate": 1.996871421848058e-05, "loss": 0.3175, "step": 635 }, { "epoch": 0.05, "learning_rate": 1.996849439952103e-05, "loss": 0.3411, "step": 636 }, { "epoch": 0.05, "learning_rate": 1.9968273812239185e-05, "loss": 0.3721, "step": 637 }, { "epoch": 0.05, "learning_rate": 1.9968052456652048e-05, "loss": 0.3252, "step": 638 }, { "epoch": 0.05, "learning_rate": 1.9967830332776684e-05, "loss": 0.2779, "step": 639 }, { "epoch": 0.05, "learning_rate": 1.996760744063021e-05, "loss": 0.2823, "step": 640 }, { "epoch": 0.05, "learning_rate": 1.9967383780229805e-05, "loss": 0.3191, "step": 641 }, { "epoch": 0.06, "learning_rate": 1.9967159351592706e-05, "loss": 0.3174, "step": 642 }, { "epoch": 0.06, "learning_rate": 1.9966934154736216e-05, "loss": 0.3321, "step": 643 }, { "epoch": 0.06, "learning_rate": 1.996670818967769e-05, "loss": 0.302, "step": 644 }, { "epoch": 0.06, "learning_rate": 1.9966481456434543e-05, "loss": 0.5845, "step": 645 }, { "epoch": 0.06, "learning_rate": 1.996625395502425e-05, "loss": 0.3726, "step": 646 }, { "epoch": 0.06, "learning_rate": 1.9966025685464353e-05, "loss": 0.3862, "step": 647 }, { "epoch": 0.06, "learning_rate": 1.9965796647772434e-05, "loss": 0.3524, "step": 648 }, { "epoch": 0.06, "learning_rate": 1.996556684196615e-05, "loss": 0.3419, "step": 649 }, { "epoch": 0.06, "learning_rate": 1.996533626806322e-05, "loss": 0.3483, "step": 650 }, { "epoch": 0.06, "learning_rate": 1.996510492608141e-05, "loss": 0.3784, "step": 651 }, { "epoch": 0.06, "learning_rate": 1.9964872816038547e-05, "loss": 0.3185, "step": 652 }, { "epoch": 0.06, "learning_rate": 1.9964639937952527e-05, "loss": 0.3842, "step": 653 }, { "epoch": 0.06, "learning_rate": 1.99644062918413e-05, "loss": 0.3243, "step": 654 }, { "epoch": 0.06, "learning_rate": 1.996417187772287e-05, "loss": 0.3759, "step": 655 }, { "epoch": 0.06, "learning_rate": 1.9963936695615307e-05, "loss": 0.3246, "step": 656 }, { "epoch": 0.06, "learning_rate": 1.9963700745536733e-05, "loss": 0.3589, "step": 657 }, { "epoch": 0.06, "learning_rate": 1.9963464027505343e-05, "loss": 0.3464, "step": 658 }, { "epoch": 0.06, "learning_rate": 1.996322654153937e-05, "loss": 0.3057, "step": 659 }, { "epoch": 0.06, "learning_rate": 1.996298828765713e-05, "loss": 0.3293, "step": 660 }, { "epoch": 0.06, "learning_rate": 1.9962749265876983e-05, "loss": 0.3138, "step": 661 }, { "epoch": 0.06, "learning_rate": 1.9962509476217348e-05, "loss": 0.3162, "step": 662 }, { "epoch": 0.06, "learning_rate": 1.9962268918696708e-05, "loss": 0.3716, "step": 663 }, { "epoch": 0.06, "learning_rate": 1.9962027593333603e-05, "loss": 0.363, "step": 664 }, { "epoch": 0.06, "learning_rate": 1.9961785500146638e-05, "loss": 0.3375, "step": 665 }, { "epoch": 0.06, "learning_rate": 1.9961542639154467e-05, "loss": 0.3518, "step": 666 }, { "epoch": 0.06, "learning_rate": 1.9961299010375813e-05, "loss": 0.3, "step": 667 }, { "epoch": 0.06, "learning_rate": 1.996105461382945e-05, "loss": 0.3693, "step": 668 }, { "epoch": 0.06, "learning_rate": 1.9960809449534214e-05, "loss": 0.3063, "step": 669 }, { "epoch": 0.06, "learning_rate": 1.9960563517509008e-05, "loss": 0.304, "step": 670 }, { "epoch": 0.06, "learning_rate": 1.9960316817772783e-05, "loss": 0.3294, "step": 671 }, { "epoch": 0.06, "learning_rate": 1.9960069350344547e-05, "loss": 0.3421, "step": 672 }, { "epoch": 0.06, "learning_rate": 1.9959821115243385e-05, "loss": 0.3419, "step": 673 }, { "epoch": 0.06, "learning_rate": 1.9959572112488423e-05, "loss": 0.3864, "step": 674 }, { "epoch": 0.06, "learning_rate": 1.9959322342098854e-05, "loss": 0.3907, "step": 675 }, { "epoch": 0.06, "learning_rate": 1.995907180409393e-05, "loss": 0.3643, "step": 676 }, { "epoch": 0.06, "learning_rate": 1.9958820498492958e-05, "loss": 0.3456, "step": 677 }, { "epoch": 0.06, "learning_rate": 1.9958568425315316e-05, "loss": 0.279, "step": 678 }, { "epoch": 0.06, "learning_rate": 1.995831558458042e-05, "loss": 0.3097, "step": 679 }, { "epoch": 0.06, "learning_rate": 1.9958061976307767e-05, "loss": 0.3608, "step": 680 }, { "epoch": 0.06, "learning_rate": 1.99578076005169e-05, "loss": 0.3563, "step": 681 }, { "epoch": 0.06, "learning_rate": 1.9957552457227428e-05, "loss": 0.3356, "step": 682 }, { "epoch": 0.06, "learning_rate": 1.995729654645901e-05, "loss": 0.3179, "step": 683 }, { "epoch": 0.06, "learning_rate": 1.9957039868231382e-05, "loss": 0.3613, "step": 684 }, { "epoch": 0.06, "learning_rate": 1.9956782422564313e-05, "loss": 0.3726, "step": 685 }, { "epoch": 0.06, "learning_rate": 1.9956524209477658e-05, "loss": 0.296, "step": 686 }, { "epoch": 0.06, "learning_rate": 1.995626522899131e-05, "loss": 0.3303, "step": 687 }, { "epoch": 0.06, "learning_rate": 1.9956005481125235e-05, "loss": 0.3466, "step": 688 }, { "epoch": 0.06, "learning_rate": 1.9955744965899452e-05, "loss": 0.3399, "step": 689 }, { "epoch": 0.06, "learning_rate": 1.995548368333404e-05, "loss": 0.3265, "step": 690 }, { "epoch": 0.06, "learning_rate": 1.9955221633449137e-05, "loss": 0.361, "step": 691 }, { "epoch": 0.06, "learning_rate": 1.995495881626494e-05, "loss": 0.3459, "step": 692 }, { "epoch": 0.06, "learning_rate": 1.9954695231801706e-05, "loss": 0.3618, "step": 693 }, { "epoch": 0.06, "learning_rate": 1.995443088007975e-05, "loss": 0.3339, "step": 694 }, { "epoch": 0.06, "learning_rate": 1.995416576111945e-05, "loss": 0.3064, "step": 695 }, { "epoch": 0.06, "learning_rate": 1.995389987494124e-05, "loss": 0.3008, "step": 696 }, { "epoch": 0.06, "learning_rate": 1.995363322156561e-05, "loss": 0.3214, "step": 697 }, { "epoch": 0.06, "learning_rate": 1.995336580101311e-05, "loss": 0.2971, "step": 698 }, { "epoch": 0.06, "learning_rate": 1.995309761330436e-05, "loss": 0.6136, "step": 699 }, { "epoch": 0.06, "learning_rate": 1.995282865846002e-05, "loss": 0.3083, "step": 700 }, { "epoch": 0.06, "learning_rate": 1.995255893650083e-05, "loss": 0.3008, "step": 701 }, { "epoch": 0.06, "learning_rate": 1.9952288447447573e-05, "loss": 0.3506, "step": 702 }, { "epoch": 0.06, "learning_rate": 1.9952017191321098e-05, "loss": 0.3716, "step": 703 }, { "epoch": 0.06, "learning_rate": 1.9951745168142312e-05, "loss": 0.357, "step": 704 }, { "epoch": 0.06, "learning_rate": 1.9951472377932183e-05, "loss": 0.3171, "step": 705 }, { "epoch": 0.06, "learning_rate": 1.9951198820711735e-05, "loss": 0.356, "step": 706 }, { "epoch": 0.06, "learning_rate": 1.9950924496502048e-05, "loss": 0.3558, "step": 707 }, { "epoch": 0.06, "learning_rate": 1.9950649405324275e-05, "loss": 0.3903, "step": 708 }, { "epoch": 0.06, "learning_rate": 1.9950373547199612e-05, "loss": 0.417, "step": 709 }, { "epoch": 0.06, "learning_rate": 1.995009692214932e-05, "loss": 0.314, "step": 710 }, { "epoch": 0.06, "learning_rate": 1.9949819530194722e-05, "loss": 0.3521, "step": 711 }, { "epoch": 0.06, "learning_rate": 1.99495413713572e-05, "loss": 0.3274, "step": 712 }, { "epoch": 0.06, "learning_rate": 1.994926244565819e-05, "loss": 0.619, "step": 713 }, { "epoch": 0.06, "learning_rate": 1.994898275311919e-05, "loss": 0.3841, "step": 714 }, { "epoch": 0.06, "learning_rate": 1.9948702293761763e-05, "loss": 0.3521, "step": 715 }, { "epoch": 0.06, "learning_rate": 1.9948421067607518e-05, "loss": 0.3113, "step": 716 }, { "epoch": 0.06, "learning_rate": 1.9948139074678138e-05, "loss": 0.3591, "step": 717 }, { "epoch": 0.06, "learning_rate": 1.994785631499535e-05, "loss": 0.3658, "step": 718 }, { "epoch": 0.06, "learning_rate": 1.994757278858095e-05, "loss": 0.4016, "step": 719 }, { "epoch": 0.06, "learning_rate": 1.9947288495456793e-05, "loss": 0.3768, "step": 720 }, { "epoch": 0.06, "learning_rate": 1.9947003435644788e-05, "loss": 0.4028, "step": 721 }, { "epoch": 0.06, "learning_rate": 1.994671760916691e-05, "loss": 0.3552, "step": 722 }, { "epoch": 0.06, "learning_rate": 1.9946431016045187e-05, "loss": 0.348, "step": 723 }, { "epoch": 0.06, "learning_rate": 1.994614365630171e-05, "loss": 0.3878, "step": 724 }, { "epoch": 0.06, "learning_rate": 1.994585552995862e-05, "loss": 0.3807, "step": 725 }, { "epoch": 0.06, "learning_rate": 1.9945566637038133e-05, "loss": 0.3594, "step": 726 }, { "epoch": 0.06, "learning_rate": 1.9945276977562515e-05, "loss": 0.3416, "step": 727 }, { "epoch": 0.06, "learning_rate": 1.994498655155408e-05, "loss": 0.31, "step": 728 }, { "epoch": 0.06, "learning_rate": 1.994469535903523e-05, "loss": 0.3784, "step": 729 }, { "epoch": 0.06, "learning_rate": 1.9944403400028392e-05, "loss": 0.3105, "step": 730 }, { "epoch": 0.06, "learning_rate": 1.9944110674556082e-05, "loss": 0.5735, "step": 731 }, { "epoch": 0.06, "learning_rate": 1.9943817182640856e-05, "loss": 0.3445, "step": 732 }, { "epoch": 0.06, "learning_rate": 1.9943522924305337e-05, "loss": 0.347, "step": 733 }, { "epoch": 0.06, "learning_rate": 1.9943227899572198e-05, "loss": 0.2965, "step": 734 }, { "epoch": 0.06, "learning_rate": 1.994293210846419e-05, "loss": 0.3818, "step": 735 }, { "epoch": 0.06, "learning_rate": 1.99426355510041e-05, "loss": 0.3463, "step": 736 }, { "epoch": 0.06, "learning_rate": 1.994233822721479e-05, "loss": 0.3334, "step": 737 }, { "epoch": 0.06, "learning_rate": 1.994204013711918e-05, "loss": 0.3172, "step": 738 }, { "epoch": 0.06, "learning_rate": 1.9941741280740235e-05, "loss": 0.3109, "step": 739 }, { "epoch": 0.06, "learning_rate": 1.9941441658101e-05, "loss": 0.3264, "step": 740 }, { "epoch": 0.06, "learning_rate": 1.9941141269224564e-05, "loss": 0.3964, "step": 741 }, { "epoch": 0.06, "learning_rate": 1.9940840114134078e-05, "loss": 0.348, "step": 742 }, { "epoch": 0.06, "learning_rate": 1.9940538192852753e-05, "loss": 0.3367, "step": 743 }, { "epoch": 0.06, "learning_rate": 1.9940235505403867e-05, "loss": 0.3307, "step": 744 }, { "epoch": 0.06, "learning_rate": 1.993993205181074e-05, "loss": 0.2921, "step": 745 }, { "epoch": 0.06, "learning_rate": 1.993962783209677e-05, "loss": 0.3784, "step": 746 }, { "epoch": 0.06, "learning_rate": 1.9939322846285397e-05, "loss": 0.3198, "step": 747 }, { "epoch": 0.06, "learning_rate": 1.9939017094400128e-05, "loss": 0.3177, "step": 748 }, { "epoch": 0.06, "learning_rate": 1.9938710576464535e-05, "loss": 0.6008, "step": 749 }, { "epoch": 0.06, "learning_rate": 1.993840329250224e-05, "loss": 0.3267, "step": 750 }, { "epoch": 0.06, "learning_rate": 1.9938095242536925e-05, "loss": 0.3933, "step": 751 }, { "epoch": 0.06, "learning_rate": 1.993778642659233e-05, "loss": 0.3732, "step": 752 }, { "epoch": 0.06, "learning_rate": 1.9937476844692268e-05, "loss": 0.306, "step": 753 }, { "epoch": 0.06, "learning_rate": 1.993716649686059e-05, "loss": 0.3562, "step": 754 }, { "epoch": 0.06, "learning_rate": 1.9936855383121217e-05, "loss": 0.2776, "step": 755 }, { "epoch": 0.06, "learning_rate": 1.9936543503498135e-05, "loss": 0.3516, "step": 756 }, { "epoch": 0.06, "learning_rate": 1.9936230858015376e-05, "loss": 0.3804, "step": 757 }, { "epoch": 0.06, "learning_rate": 1.9935917446697038e-05, "loss": 0.3049, "step": 758 }, { "epoch": 0.07, "learning_rate": 1.993560326956728e-05, "loss": 0.3068, "step": 759 }, { "epoch": 0.07, "learning_rate": 1.9935288326650314e-05, "loss": 0.3506, "step": 760 }, { "epoch": 0.07, "learning_rate": 1.9934972617970415e-05, "loss": 0.3657, "step": 761 }, { "epoch": 0.07, "learning_rate": 1.993465614355192e-05, "loss": 0.311, "step": 762 }, { "epoch": 0.07, "learning_rate": 1.9934338903419213e-05, "loss": 0.3981, "step": 763 }, { "epoch": 0.07, "learning_rate": 1.9934020897596752e-05, "loss": 0.5864, "step": 764 }, { "epoch": 0.07, "learning_rate": 1.9933702126109048e-05, "loss": 0.3585, "step": 765 }, { "epoch": 0.07, "learning_rate": 1.9933382588980665e-05, "loss": 0.5985, "step": 766 }, { "epoch": 0.07, "learning_rate": 1.9933062286236235e-05, "loss": 0.3079, "step": 767 }, { "epoch": 0.07, "learning_rate": 1.9932741217900444e-05, "loss": 0.3301, "step": 768 }, { "epoch": 0.07, "learning_rate": 1.993241938399804e-05, "loss": 0.372, "step": 769 }, { "epoch": 0.07, "learning_rate": 1.993209678455383e-05, "loss": 0.3864, "step": 770 }, { "epoch": 0.07, "learning_rate": 1.9931773419592675e-05, "loss": 0.3069, "step": 771 }, { "epoch": 0.07, "learning_rate": 1.9931449289139495e-05, "loss": 0.2963, "step": 772 }, { "epoch": 0.07, "learning_rate": 1.993112439321928e-05, "loss": 0.3364, "step": 773 }, { "epoch": 0.07, "learning_rate": 1.993079873185707e-05, "loss": 0.3281, "step": 774 }, { "epoch": 0.07, "learning_rate": 1.993047230507796e-05, "loss": 0.3059, "step": 775 }, { "epoch": 0.07, "learning_rate": 1.993014511290711e-05, "loss": 0.3986, "step": 776 }, { "epoch": 0.07, "learning_rate": 1.9929817155369746e-05, "loss": 0.3258, "step": 777 }, { "epoch": 0.07, "learning_rate": 1.9929488432491137e-05, "loss": 0.3033, "step": 778 }, { "epoch": 0.07, "learning_rate": 1.9929158944296627e-05, "loss": 0.3777, "step": 779 }, { "epoch": 0.07, "learning_rate": 1.9928828690811603e-05, "loss": 0.3599, "step": 780 }, { "epoch": 0.07, "learning_rate": 1.9928497672061523e-05, "loss": 0.3278, "step": 781 }, { "epoch": 0.07, "learning_rate": 1.99281658880719e-05, "loss": 0.3645, "step": 782 }, { "epoch": 0.07, "learning_rate": 1.992783333886831e-05, "loss": 0.3481, "step": 783 }, { "epoch": 0.07, "learning_rate": 1.9927500024476378e-05, "loss": 0.3496, "step": 784 }, { "epoch": 0.07, "learning_rate": 1.9927165944921803e-05, "loss": 0.2755, "step": 785 }, { "epoch": 0.07, "learning_rate": 1.9926831100230322e-05, "loss": 0.3297, "step": 786 }, { "epoch": 0.07, "learning_rate": 1.9926495490427753e-05, "loss": 0.3339, "step": 787 }, { "epoch": 0.07, "learning_rate": 1.992615911553996e-05, "loss": 0.2912, "step": 788 }, { "epoch": 0.07, "learning_rate": 1.9925821975592866e-05, "loss": 0.3555, "step": 789 }, { "epoch": 0.07, "learning_rate": 1.9925484070612465e-05, "loss": 0.3691, "step": 790 }, { "epoch": 0.07, "learning_rate": 1.9925145400624788e-05, "loss": 0.316, "step": 791 }, { "epoch": 0.07, "learning_rate": 1.992480596565595e-05, "loss": 0.3272, "step": 792 }, { "epoch": 0.07, "learning_rate": 1.9924465765732106e-05, "loss": 0.3127, "step": 793 }, { "epoch": 0.07, "learning_rate": 1.992412480087948e-05, "loss": 0.3422, "step": 794 }, { "epoch": 0.07, "learning_rate": 1.992378307112435e-05, "loss": 0.35, "step": 795 }, { "epoch": 0.07, "learning_rate": 1.9923440576493056e-05, "loss": 0.3304, "step": 796 }, { "epoch": 0.07, "learning_rate": 1.9923097317011995e-05, "loss": 0.3016, "step": 797 }, { "epoch": 0.07, "learning_rate": 1.9922753292707627e-05, "loss": 0.3372, "step": 798 }, { "epoch": 0.07, "learning_rate": 1.992240850360646e-05, "loss": 0.324, "step": 799 }, { "epoch": 0.07, "learning_rate": 1.992206294973508e-05, "loss": 0.3347, "step": 800 }, { "epoch": 0.07, "learning_rate": 1.992171663112011e-05, "loss": 0.3676, "step": 801 }, { "epoch": 0.07, "learning_rate": 1.9921369547788246e-05, "loss": 0.261, "step": 802 }, { "epoch": 0.07, "learning_rate": 1.9921021699766243e-05, "loss": 0.2889, "step": 803 }, { "epoch": 0.07, "learning_rate": 1.9920673087080903e-05, "loss": 0.3527, "step": 804 }, { "epoch": 0.07, "learning_rate": 1.9920323709759108e-05, "loss": 0.3105, "step": 805 }, { "epoch": 0.07, "learning_rate": 1.9919973567827776e-05, "loss": 0.2941, "step": 806 }, { "epoch": 0.07, "learning_rate": 1.9919622661313897e-05, "loss": 0.6091, "step": 807 }, { "epoch": 0.07, "learning_rate": 1.991927099024452e-05, "loss": 0.3502, "step": 808 }, { "epoch": 0.07, "learning_rate": 1.9918918554646745e-05, "loss": 0.3297, "step": 809 }, { "epoch": 0.07, "learning_rate": 1.9918565354547738e-05, "loss": 0.3663, "step": 810 }, { "epoch": 0.07, "learning_rate": 1.9918211389974726e-05, "loss": 0.3846, "step": 811 }, { "epoch": 0.07, "learning_rate": 1.9917856660954985e-05, "loss": 0.3278, "step": 812 }, { "epoch": 0.07, "learning_rate": 1.991750116751586e-05, "loss": 0.2994, "step": 813 }, { "epoch": 0.07, "learning_rate": 1.9917144909684745e-05, "loss": 0.3167, "step": 814 }, { "epoch": 0.07, "learning_rate": 1.9916787887489108e-05, "loss": 0.3237, "step": 815 }, { "epoch": 0.07, "learning_rate": 1.9916430100956458e-05, "loss": 0.2934, "step": 816 }, { "epoch": 0.07, "learning_rate": 1.991607155011437e-05, "loss": 0.3155, "step": 817 }, { "epoch": 0.07, "learning_rate": 1.9915712234990486e-05, "loss": 0.5964, "step": 818 }, { "epoch": 0.07, "learning_rate": 1.9915352155612503e-05, "loss": 0.3163, "step": 819 }, { "epoch": 0.07, "learning_rate": 1.9914991312008164e-05, "loss": 0.3191, "step": 820 }, { "epoch": 0.07, "learning_rate": 1.991462970420529e-05, "loss": 0.3099, "step": 821 }, { "epoch": 0.07, "learning_rate": 1.9914267332231746e-05, "loss": 0.3243, "step": 822 }, { "epoch": 0.07, "learning_rate": 1.991390419611546e-05, "loss": 0.3366, "step": 823 }, { "epoch": 0.07, "learning_rate": 1.991354029588443e-05, "loss": 0.3292, "step": 824 }, { "epoch": 0.07, "learning_rate": 1.9913175631566698e-05, "loss": 0.3362, "step": 825 }, { "epoch": 0.07, "learning_rate": 1.9912810203190367e-05, "loss": 0.3617, "step": 826 }, { "epoch": 0.07, "learning_rate": 1.991244401078361e-05, "loss": 0.3519, "step": 827 }, { "epoch": 0.07, "learning_rate": 1.9912077054374646e-05, "loss": 0.3312, "step": 828 }, { "epoch": 0.07, "learning_rate": 1.9911709333991758e-05, "loss": 0.4035, "step": 829 }, { "epoch": 0.07, "learning_rate": 1.9911340849663293e-05, "loss": 0.3193, "step": 830 }, { "epoch": 0.07, "learning_rate": 1.9910971601417645e-05, "loss": 0.3417, "step": 831 }, { "epoch": 0.07, "learning_rate": 1.991060158928328e-05, "loss": 0.3412, "step": 832 }, { "epoch": 0.07, "learning_rate": 1.9910230813288713e-05, "loss": 0.3275, "step": 833 }, { "epoch": 0.07, "learning_rate": 1.9909859273462525e-05, "loss": 0.295, "step": 834 }, { "epoch": 0.07, "learning_rate": 1.9909486969833346e-05, "loss": 0.3278, "step": 835 }, { "epoch": 0.07, "learning_rate": 1.990911390242988e-05, "loss": 0.3846, "step": 836 }, { "epoch": 0.07, "learning_rate": 1.9908740071280873e-05, "loss": 0.3689, "step": 837 }, { "epoch": 0.07, "learning_rate": 1.9908365476415146e-05, "loss": 0.3255, "step": 838 }, { "epoch": 0.07, "learning_rate": 1.9907990117861564e-05, "loss": 0.3314, "step": 839 }, { "epoch": 0.07, "learning_rate": 1.9907613995649063e-05, "loss": 0.3344, "step": 840 }, { "epoch": 0.07, "learning_rate": 1.9907237109806627e-05, "loss": 0.3314, "step": 841 }, { "epoch": 0.07, "learning_rate": 1.9906859460363307e-05, "loss": 0.3077, "step": 842 }, { "epoch": 0.07, "learning_rate": 1.9906481047348215e-05, "loss": 0.33, "step": 843 }, { "epoch": 0.07, "learning_rate": 1.9906101870790512e-05, "loss": 0.3832, "step": 844 }, { "epoch": 0.07, "learning_rate": 1.9905721930719425e-05, "loss": 0.3558, "step": 845 }, { "epoch": 0.07, "learning_rate": 1.990534122716423e-05, "loss": 0.3477, "step": 846 }, { "epoch": 0.07, "learning_rate": 1.9904959760154287e-05, "loss": 0.335, "step": 847 }, { "epoch": 0.07, "learning_rate": 1.9904577529718982e-05, "loss": 0.3232, "step": 848 }, { "epoch": 0.07, "learning_rate": 1.9904194535887783e-05, "loss": 0.3464, "step": 849 }, { "epoch": 0.07, "learning_rate": 1.9903810778690204e-05, "loss": 0.3001, "step": 850 }, { "epoch": 0.07, "learning_rate": 1.9903426258155833e-05, "loss": 0.3398, "step": 851 }, { "epoch": 0.07, "learning_rate": 1.9903040974314293e-05, "loss": 0.3693, "step": 852 }, { "epoch": 0.07, "learning_rate": 1.990265492719529e-05, "loss": 0.3096, "step": 853 }, { "epoch": 0.07, "learning_rate": 1.9902268116828578e-05, "loss": 0.3546, "step": 854 }, { "epoch": 0.07, "learning_rate": 1.9901880543243966e-05, "loss": 0.3275, "step": 855 }, { "epoch": 0.07, "learning_rate": 1.9901492206471325e-05, "loss": 0.3226, "step": 856 }, { "epoch": 0.07, "learning_rate": 1.9901103106540597e-05, "loss": 0.2867, "step": 857 }, { "epoch": 0.07, "learning_rate": 1.9900713243481758e-05, "loss": 0.3155, "step": 858 }, { "epoch": 0.07, "learning_rate": 1.9900322617324863e-05, "loss": 0.3881, "step": 859 }, { "epoch": 0.07, "learning_rate": 1.9899931228100024e-05, "loss": 0.3416, "step": 860 }, { "epoch": 0.07, "learning_rate": 1.98995390758374e-05, "loss": 0.3362, "step": 861 }, { "epoch": 0.07, "learning_rate": 1.989914616056722e-05, "loss": 0.3507, "step": 862 }, { "epoch": 0.07, "learning_rate": 1.9898752482319766e-05, "loss": 0.3228, "step": 863 }, { "epoch": 0.07, "learning_rate": 1.9898358041125382e-05, "loss": 0.3431, "step": 864 }, { "epoch": 0.07, "learning_rate": 1.989796283701447e-05, "loss": 0.3011, "step": 865 }, { "epoch": 0.07, "learning_rate": 1.989756687001749e-05, "loss": 0.3501, "step": 866 }, { "epoch": 0.07, "learning_rate": 1.989717014016496e-05, "loss": 0.5685, "step": 867 }, { "epoch": 0.07, "learning_rate": 1.989677264748746e-05, "loss": 0.3452, "step": 868 }, { "epoch": 0.07, "learning_rate": 1.9896374392015624e-05, "loss": 0.3271, "step": 869 }, { "epoch": 0.07, "learning_rate": 1.989597537378015e-05, "loss": 0.3281, "step": 870 }, { "epoch": 0.07, "learning_rate": 1.9895575592811795e-05, "loss": 0.3235, "step": 871 }, { "epoch": 0.07, "learning_rate": 1.9895175049141366e-05, "loss": 0.5752, "step": 872 }, { "epoch": 0.07, "learning_rate": 1.9894773742799737e-05, "loss": 0.3206, "step": 873 }, { "epoch": 0.07, "learning_rate": 1.989437167381784e-05, "loss": 0.3124, "step": 874 }, { "epoch": 0.08, "learning_rate": 1.9893968842226664e-05, "loss": 0.3116, "step": 875 }, { "epoch": 0.08, "learning_rate": 1.9893565248057257e-05, "loss": 0.3371, "step": 876 }, { "epoch": 0.08, "learning_rate": 1.9893160891340728e-05, "loss": 0.2988, "step": 877 }, { "epoch": 0.08, "learning_rate": 1.989275577210824e-05, "loss": 0.3743, "step": 878 }, { "epoch": 0.08, "learning_rate": 1.9892349890391015e-05, "loss": 0.3319, "step": 879 }, { "epoch": 0.08, "learning_rate": 1.9891943246220344e-05, "loss": 0.2971, "step": 880 }, { "epoch": 0.08, "learning_rate": 1.9891535839627565e-05, "loss": 0.3267, "step": 881 }, { "epoch": 0.08, "learning_rate": 1.9891127670644076e-05, "loss": 0.37, "step": 882 }, { "epoch": 0.08, "learning_rate": 1.9890718739301346e-05, "loss": 0.3203, "step": 883 }, { "epoch": 0.08, "learning_rate": 1.989030904563088e-05, "loss": 0.3338, "step": 884 }, { "epoch": 0.08, "learning_rate": 1.988989858966427e-05, "loss": 0.2997, "step": 885 }, { "epoch": 0.08, "learning_rate": 1.9889487371433134e-05, "loss": 0.3385, "step": 886 }, { "epoch": 0.08, "learning_rate": 1.9889075390969182e-05, "loss": 0.3393, "step": 887 }, { "epoch": 0.08, "learning_rate": 1.9888662648304162e-05, "loss": 0.3691, "step": 888 }, { "epoch": 0.08, "learning_rate": 1.988824914346989e-05, "loss": 0.2976, "step": 889 }, { "epoch": 0.08, "learning_rate": 1.9887834876498228e-05, "loss": 0.3438, "step": 890 }, { "epoch": 0.08, "learning_rate": 1.9887419847421113e-05, "loss": 0.3074, "step": 891 }, { "epoch": 0.08, "learning_rate": 1.9887004056270532e-05, "loss": 0.3702, "step": 892 }, { "epoch": 0.08, "learning_rate": 1.988658750307853e-05, "loss": 0.3333, "step": 893 }, { "epoch": 0.08, "learning_rate": 1.9886170187877214e-05, "loss": 0.3124, "step": 894 }, { "epoch": 0.08, "learning_rate": 1.988575211069875e-05, "loss": 0.3116, "step": 895 }, { "epoch": 0.08, "learning_rate": 1.9885333271575362e-05, "loss": 0.3234, "step": 896 }, { "epoch": 0.08, "learning_rate": 1.9884913670539327e-05, "loss": 0.2993, "step": 897 }, { "epoch": 0.08, "learning_rate": 1.9884493307622993e-05, "loss": 0.3008, "step": 898 }, { "epoch": 0.08, "learning_rate": 1.988407218285875e-05, "loss": 0.6298, "step": 899 }, { "epoch": 0.08, "learning_rate": 1.9883650296279068e-05, "loss": 0.2564, "step": 900 }, { "epoch": 0.08, "learning_rate": 1.9883227647916454e-05, "loss": 0.3343, "step": 901 }, { "epoch": 0.08, "learning_rate": 1.9882804237803487e-05, "loss": 0.316, "step": 902 }, { "epoch": 0.08, "learning_rate": 1.98823800659728e-05, "loss": 0.3342, "step": 903 }, { "epoch": 0.08, "learning_rate": 1.9881955132457095e-05, "loss": 0.3773, "step": 904 }, { "epoch": 0.08, "learning_rate": 1.988152943728911e-05, "loss": 0.3944, "step": 905 }, { "epoch": 0.08, "learning_rate": 1.9881102980501664e-05, "loss": 0.3303, "step": 906 }, { "epoch": 0.08, "learning_rate": 1.9880675762127624e-05, "loss": 0.3203, "step": 907 }, { "epoch": 0.08, "learning_rate": 1.988024778219992e-05, "loss": 0.3059, "step": 908 }, { "epoch": 0.08, "learning_rate": 1.9879819040751532e-05, "loss": 0.3581, "step": 909 }, { "epoch": 0.08, "learning_rate": 1.9879389537815514e-05, "loss": 0.3987, "step": 910 }, { "epoch": 0.08, "learning_rate": 1.9878959273424968e-05, "loss": 0.3455, "step": 911 }, { "epoch": 0.08, "learning_rate": 1.987852824761305e-05, "loss": 0.3065, "step": 912 }, { "epoch": 0.08, "learning_rate": 1.987809646041299e-05, "loss": 0.2888, "step": 913 }, { "epoch": 0.08, "learning_rate": 1.987766391185806e-05, "loss": 0.3319, "step": 914 }, { "epoch": 0.08, "learning_rate": 1.98772306019816e-05, "loss": 0.3539, "step": 915 }, { "epoch": 0.08, "learning_rate": 1.9876796530817017e-05, "loss": 0.3716, "step": 916 }, { "epoch": 0.08, "learning_rate": 1.9876361698397755e-05, "loss": 0.3692, "step": 917 }, { "epoch": 0.08, "learning_rate": 1.9875926104757337e-05, "loss": 0.3367, "step": 918 }, { "epoch": 0.08, "learning_rate": 1.9875489749929334e-05, "loss": 0.5887, "step": 919 }, { "epoch": 0.08, "learning_rate": 1.9875052633947373e-05, "loss": 0.3486, "step": 920 }, { "epoch": 0.08, "learning_rate": 1.987461475684515e-05, "loss": 0.3786, "step": 921 }, { "epoch": 0.08, "learning_rate": 1.9874176118656415e-05, "loss": 0.3452, "step": 922 }, { "epoch": 0.08, "learning_rate": 1.9873736719414977e-05, "loss": 0.364, "step": 923 }, { "epoch": 0.08, "learning_rate": 1.98732965591547e-05, "loss": 0.308, "step": 924 }, { "epoch": 0.08, "learning_rate": 1.9872855637909506e-05, "loss": 0.3063, "step": 925 }, { "epoch": 0.08, "learning_rate": 1.9872413955713382e-05, "loss": 0.3671, "step": 926 }, { "epoch": 0.08, "learning_rate": 1.9871971512600375e-05, "loss": 0.3909, "step": 927 }, { "epoch": 0.08, "learning_rate": 1.987152830860458e-05, "loss": 0.36, "step": 928 }, { "epoch": 0.08, "learning_rate": 1.987108434376016e-05, "loss": 0.3239, "step": 929 }, { "epoch": 0.08, "learning_rate": 1.9870639618101333e-05, "loss": 0.3331, "step": 930 }, { "epoch": 0.08, "learning_rate": 1.987019413166238e-05, "loss": 0.35, "step": 931 }, { "epoch": 0.08, "learning_rate": 1.986974788447763e-05, "loss": 0.3496, "step": 932 }, { "epoch": 0.08, "learning_rate": 1.986930087658148e-05, "loss": 0.3754, "step": 933 }, { "epoch": 0.08, "learning_rate": 1.9868853108008387e-05, "loss": 0.3561, "step": 934 }, { "epoch": 0.08, "learning_rate": 1.9868404578792858e-05, "loss": 0.3477, "step": 935 }, { "epoch": 0.08, "learning_rate": 1.9867955288969468e-05, "loss": 0.3076, "step": 936 }, { "epoch": 0.08, "learning_rate": 1.986750523857284e-05, "loss": 0.3112, "step": 937 }, { "epoch": 0.08, "learning_rate": 1.9867054427637667e-05, "loss": 0.3149, "step": 938 }, { "epoch": 0.08, "learning_rate": 1.986660285619869e-05, "loss": 0.3094, "step": 939 }, { "epoch": 0.08, "learning_rate": 1.986615052429072e-05, "loss": 0.3588, "step": 940 }, { "epoch": 0.08, "learning_rate": 1.986569743194862e-05, "loss": 0.3519, "step": 941 }, { "epoch": 0.08, "learning_rate": 1.9865243579207304e-05, "loss": 0.3169, "step": 942 }, { "epoch": 0.08, "learning_rate": 1.986478896610176e-05, "loss": 0.3291, "step": 943 }, { "epoch": 0.08, "learning_rate": 1.986433359266703e-05, "loss": 0.3159, "step": 944 }, { "epoch": 0.08, "learning_rate": 1.9863877458938204e-05, "loss": 0.3229, "step": 945 }, { "epoch": 0.08, "learning_rate": 1.9863420564950445e-05, "loss": 0.3204, "step": 946 }, { "epoch": 0.08, "learning_rate": 1.9862962910738965e-05, "loss": 0.2886, "step": 947 }, { "epoch": 0.08, "learning_rate": 1.9862504496339036e-05, "loss": 0.3209, "step": 948 }, { "epoch": 0.08, "learning_rate": 1.9862045321785994e-05, "loss": 0.3069, "step": 949 }, { "epoch": 0.08, "learning_rate": 1.9861585387115228e-05, "loss": 0.3577, "step": 950 }, { "epoch": 0.08, "learning_rate": 1.9861124692362188e-05, "loss": 0.3427, "step": 951 }, { "epoch": 0.08, "learning_rate": 1.986066323756238e-05, "loss": 0.3471, "step": 952 }, { "epoch": 0.08, "learning_rate": 1.9860201022751376e-05, "loss": 0.3679, "step": 953 }, { "epoch": 0.08, "learning_rate": 1.9859738047964795e-05, "loss": 0.3129, "step": 954 }, { "epoch": 0.08, "learning_rate": 1.9859274313238327e-05, "loss": 0.338, "step": 955 }, { "epoch": 0.08, "learning_rate": 1.985880981860771e-05, "loss": 0.3145, "step": 956 }, { "epoch": 0.08, "learning_rate": 1.9858344564108743e-05, "loss": 0.3157, "step": 957 }, { "epoch": 0.08, "learning_rate": 1.985787854977729e-05, "loss": 0.3309, "step": 958 }, { "epoch": 0.08, "learning_rate": 1.985741177564927e-05, "loss": 0.3246, "step": 959 }, { "epoch": 0.08, "learning_rate": 1.9856944241760655e-05, "loss": 0.3215, "step": 960 }, { "epoch": 0.08, "learning_rate": 1.985647594814748e-05, "loss": 0.3479, "step": 961 }, { "epoch": 0.08, "learning_rate": 1.9856006894845844e-05, "loss": 0.3265, "step": 962 }, { "epoch": 0.08, "learning_rate": 1.98555370818919e-05, "loss": 0.3215, "step": 963 }, { "epoch": 0.08, "learning_rate": 1.985506650932185e-05, "loss": 0.3504, "step": 964 }, { "epoch": 0.08, "learning_rate": 1.9854595177171968e-05, "loss": 0.3011, "step": 965 }, { "epoch": 0.08, "learning_rate": 1.9854123085478587e-05, "loss": 0.3987, "step": 966 }, { "epoch": 0.08, "learning_rate": 1.9853650234278088e-05, "loss": 0.3414, "step": 967 }, { "epoch": 0.08, "learning_rate": 1.9853176623606916e-05, "loss": 0.2996, "step": 968 }, { "epoch": 0.08, "learning_rate": 1.9852702253501578e-05, "loss": 0.3721, "step": 969 }, { "epoch": 0.08, "learning_rate": 1.985222712399863e-05, "loss": 0.2773, "step": 970 }, { "epoch": 0.08, "learning_rate": 1.98517512351347e-05, "loss": 0.3486, "step": 971 }, { "epoch": 0.08, "learning_rate": 1.9851274586946463e-05, "loss": 0.3467, "step": 972 }, { "epoch": 0.08, "learning_rate": 1.9850797179470657e-05, "loss": 0.3059, "step": 973 }, { "epoch": 0.08, "learning_rate": 1.985031901274408e-05, "loss": 0.2716, "step": 974 }, { "epoch": 0.08, "learning_rate": 1.9849840086803584e-05, "loss": 0.3488, "step": 975 }, { "epoch": 0.08, "learning_rate": 1.9849360401686084e-05, "loss": 0.371, "step": 976 }, { "epoch": 0.08, "learning_rate": 1.9848879957428552e-05, "loss": 0.5894, "step": 977 }, { "epoch": 0.08, "learning_rate": 1.9848398754068018e-05, "loss": 0.6475, "step": 978 }, { "epoch": 0.08, "learning_rate": 1.9847916791641567e-05, "loss": 0.3305, "step": 979 }, { "epoch": 0.08, "learning_rate": 1.9847434070186355e-05, "loss": 0.2983, "step": 980 }, { "epoch": 0.08, "learning_rate": 1.9846950589739576e-05, "loss": 0.3073, "step": 981 }, { "epoch": 0.08, "learning_rate": 1.9846466350338506e-05, "loss": 0.2871, "step": 982 }, { "epoch": 0.08, "learning_rate": 1.984598135202046e-05, "loss": 0.347, "step": 983 }, { "epoch": 0.08, "learning_rate": 1.9845495594822824e-05, "loss": 0.3453, "step": 984 }, { "epoch": 0.08, "learning_rate": 1.984500907878303e-05, "loss": 0.3809, "step": 985 }, { "epoch": 0.08, "learning_rate": 1.9844521803938588e-05, "loss": 0.3064, "step": 986 }, { "epoch": 0.08, "learning_rate": 1.9844033770327048e-05, "loss": 0.3229, "step": 987 }, { "epoch": 0.08, "learning_rate": 1.984354497798602e-05, "loss": 0.3281, "step": 988 }, { "epoch": 0.08, "learning_rate": 1.984305542695319e-05, "loss": 0.3356, "step": 989 }, { "epoch": 0.08, "learning_rate": 1.984256511726628e-05, "loss": 0.3574, "step": 990 }, { "epoch": 0.08, "learning_rate": 1.984207404896309e-05, "loss": 0.3558, "step": 991 }, { "epoch": 0.09, "learning_rate": 1.984158222208146e-05, "loss": 0.3175, "step": 992 }, { "epoch": 0.09, "learning_rate": 1.9841089636659296e-05, "loss": 0.3337, "step": 993 }, { "epoch": 0.09, "learning_rate": 1.9840596292734573e-05, "loss": 0.3245, "step": 994 }, { "epoch": 0.09, "learning_rate": 1.984010219034531e-05, "loss": 0.3109, "step": 995 }, { "epoch": 0.09, "learning_rate": 1.9839607329529594e-05, "loss": 0.3693, "step": 996 }, { "epoch": 0.09, "learning_rate": 1.983911171032556e-05, "loss": 0.3055, "step": 997 }, { "epoch": 0.09, "learning_rate": 1.983861533277142e-05, "loss": 0.3206, "step": 998 }, { "epoch": 0.09, "learning_rate": 1.9838118196905417e-05, "loss": 0.3652, "step": 999 }, { "epoch": 0.09, "learning_rate": 1.983762030276588e-05, "loss": 0.3318, "step": 1000 }, { "epoch": 0.09, "learning_rate": 1.9837121650391173e-05, "loss": 0.3504, "step": 1001 }, { "epoch": 0.09, "learning_rate": 1.9836622239819743e-05, "loss": 0.3115, "step": 1002 }, { "epoch": 0.09, "learning_rate": 1.983612207109007e-05, "loss": 0.36, "step": 1003 }, { "epoch": 0.09, "learning_rate": 1.983562114424071e-05, "loss": 0.332, "step": 1004 }, { "epoch": 0.09, "learning_rate": 1.983511945931027e-05, "loss": 0.3432, "step": 1005 }, { "epoch": 0.09, "learning_rate": 1.9834617016337424e-05, "loss": 0.36, "step": 1006 }, { "epoch": 0.09, "learning_rate": 1.983411381536089e-05, "loss": 0.3352, "step": 1007 }, { "epoch": 0.09, "learning_rate": 1.9833609856419452e-05, "loss": 0.3488, "step": 1008 }, { "epoch": 0.09, "learning_rate": 1.983310513955196e-05, "loss": 0.3171, "step": 1009 }, { "epoch": 0.09, "learning_rate": 1.9832599664797306e-05, "loss": 0.3448, "step": 1010 }, { "epoch": 0.09, "learning_rate": 1.983209343219446e-05, "loss": 0.3558, "step": 1011 }, { "epoch": 0.09, "learning_rate": 1.9831586441782427e-05, "loss": 0.3386, "step": 1012 }, { "epoch": 0.09, "learning_rate": 1.9831078693600295e-05, "loss": 0.3693, "step": 1013 }, { "epoch": 0.09, "learning_rate": 1.983057018768719e-05, "loss": 0.3177, "step": 1014 }, { "epoch": 0.09, "learning_rate": 1.9830060924082316e-05, "loss": 0.3682, "step": 1015 }, { "epoch": 0.09, "learning_rate": 1.9829550902824914e-05, "loss": 0.3482, "step": 1016 }, { "epoch": 0.09, "learning_rate": 1.98290401239543e-05, "loss": 0.3864, "step": 1017 }, { "epoch": 0.09, "learning_rate": 1.9828528587509836e-05, "loss": 0.299, "step": 1018 }, { "epoch": 0.09, "learning_rate": 1.9828016293530954e-05, "loss": 0.3115, "step": 1019 }, { "epoch": 0.09, "learning_rate": 1.982750324205714e-05, "loss": 0.329, "step": 1020 }, { "epoch": 0.09, "learning_rate": 1.982698943312793e-05, "loss": 0.3254, "step": 1021 }, { "epoch": 0.09, "learning_rate": 1.9826474866782933e-05, "loss": 0.3503, "step": 1022 }, { "epoch": 0.09, "learning_rate": 1.9825959543061812e-05, "loss": 0.3382, "step": 1023 }, { "epoch": 0.09, "learning_rate": 1.9825443462004278e-05, "loss": 0.3307, "step": 1024 }, { "epoch": 0.09, "learning_rate": 1.982492662365011e-05, "loss": 0.2749, "step": 1025 }, { "epoch": 0.09, "learning_rate": 1.9824409028039143e-05, "loss": 0.3265, "step": 1026 }, { "epoch": 0.09, "learning_rate": 1.9823890675211275e-05, "loss": 0.3478, "step": 1027 }, { "epoch": 0.09, "learning_rate": 1.9823371565206452e-05, "loss": 0.3239, "step": 1028 }, { "epoch": 0.09, "learning_rate": 1.9822851698064692e-05, "loss": 0.2896, "step": 1029 }, { "epoch": 0.09, "learning_rate": 1.9822331073826056e-05, "loss": 0.2916, "step": 1030 }, { "epoch": 0.09, "learning_rate": 1.9821809692530673e-05, "loss": 0.623, "step": 1031 }, { "epoch": 0.09, "learning_rate": 1.9821287554218733e-05, "loss": 0.3153, "step": 1032 }, { "epoch": 0.09, "learning_rate": 1.9820764658930477e-05, "loss": 0.3569, "step": 1033 }, { "epoch": 0.09, "learning_rate": 1.9820241006706203e-05, "loss": 0.3831, "step": 1034 }, { "epoch": 0.09, "learning_rate": 1.9819716597586277e-05, "loss": 0.6156, "step": 1035 }, { "epoch": 0.09, "learning_rate": 1.981919143161112e-05, "loss": 0.301, "step": 1036 }, { "epoch": 0.09, "learning_rate": 1.98186655088212e-05, "loss": 0.3411, "step": 1037 }, { "epoch": 0.09, "learning_rate": 1.9818138829257063e-05, "loss": 0.3174, "step": 1038 }, { "epoch": 0.09, "learning_rate": 1.9817611392959294e-05, "loss": 0.3257, "step": 1039 }, { "epoch": 0.09, "learning_rate": 1.9817083199968552e-05, "loss": 0.3298, "step": 1040 }, { "epoch": 0.09, "learning_rate": 1.981655425032554e-05, "loss": 0.3502, "step": 1041 }, { "epoch": 0.09, "learning_rate": 1.9816024544071038e-05, "loss": 0.3313, "step": 1042 }, { "epoch": 0.09, "learning_rate": 1.981549408124586e-05, "loss": 0.3438, "step": 1043 }, { "epoch": 0.09, "learning_rate": 1.9814962861890903e-05, "loss": 0.3137, "step": 1044 }, { "epoch": 0.09, "learning_rate": 1.9814430886047105e-05, "loss": 0.3065, "step": 1045 }, { "epoch": 0.09, "learning_rate": 1.9813898153755465e-05, "loss": 0.3173, "step": 1046 }, { "epoch": 0.09, "learning_rate": 1.981336466505705e-05, "loss": 0.2928, "step": 1047 }, { "epoch": 0.09, "learning_rate": 1.9812830419992976e-05, "loss": 0.5908, "step": 1048 }, { "epoch": 0.09, "learning_rate": 1.981229541860442e-05, "loss": 0.2985, "step": 1049 }, { "epoch": 0.09, "learning_rate": 1.981175966093262e-05, "loss": 0.3712, "step": 1050 }, { "epoch": 0.09, "learning_rate": 1.9811223147018862e-05, "loss": 0.3634, "step": 1051 }, { "epoch": 0.09, "learning_rate": 1.98106858769045e-05, "loss": 0.3686, "step": 1052 }, { "epoch": 0.09, "learning_rate": 1.981014785063095e-05, "loss": 0.3173, "step": 1053 }, { "epoch": 0.09, "learning_rate": 1.980960906823968e-05, "loss": 0.355, "step": 1054 }, { "epoch": 0.09, "learning_rate": 1.9809069529772215e-05, "loss": 0.3669, "step": 1055 }, { "epoch": 0.09, "learning_rate": 1.9808529235270134e-05, "loss": 0.2851, "step": 1056 }, { "epoch": 0.09, "learning_rate": 1.9807988184775085e-05, "loss": 0.3068, "step": 1057 }, { "epoch": 0.09, "learning_rate": 1.980744637832877e-05, "loss": 0.3116, "step": 1058 }, { "epoch": 0.09, "learning_rate": 1.980690381597295e-05, "loss": 0.3416, "step": 1059 }, { "epoch": 0.09, "learning_rate": 1.9806360497749436e-05, "loss": 0.3684, "step": 1060 }, { "epoch": 0.09, "learning_rate": 1.9805816423700114e-05, "loss": 0.2925, "step": 1061 }, { "epoch": 0.09, "learning_rate": 1.9805271593866914e-05, "loss": 0.3238, "step": 1062 }, { "epoch": 0.09, "learning_rate": 1.9804726008291827e-05, "loss": 0.3391, "step": 1063 }, { "epoch": 0.09, "learning_rate": 1.9804179667016906e-05, "loss": 0.3081, "step": 1064 }, { "epoch": 0.09, "learning_rate": 1.9803632570084265e-05, "loss": 0.2841, "step": 1065 }, { "epoch": 0.09, "learning_rate": 1.980308471753606e-05, "loss": 0.3983, "step": 1066 }, { "epoch": 0.09, "learning_rate": 1.9802536109414526e-05, "loss": 0.2763, "step": 1067 }, { "epoch": 0.09, "learning_rate": 1.980198674576194e-05, "loss": 0.3582, "step": 1068 }, { "epoch": 0.09, "learning_rate": 1.9801436626620658e-05, "loss": 0.3123, "step": 1069 }, { "epoch": 0.09, "learning_rate": 1.9800885752033067e-05, "loss": 0.3397, "step": 1070 }, { "epoch": 0.09, "learning_rate": 1.9800334122041626e-05, "loss": 0.6584, "step": 1071 }, { "epoch": 0.09, "learning_rate": 1.9799781736688862e-05, "loss": 0.3849, "step": 1072 }, { "epoch": 0.09, "learning_rate": 1.979922859601734e-05, "loss": 0.2826, "step": 1073 }, { "epoch": 0.09, "learning_rate": 1.9798674700069698e-05, "loss": 0.3252, "step": 1074 }, { "epoch": 0.09, "learning_rate": 1.9798120048888628e-05, "loss": 0.3536, "step": 1075 }, { "epoch": 0.09, "learning_rate": 1.9797564642516876e-05, "loss": 0.2996, "step": 1076 }, { "epoch": 0.09, "learning_rate": 1.9797008480997253e-05, "loss": 0.3048, "step": 1077 }, { "epoch": 0.09, "learning_rate": 1.9796451564372624e-05, "loss": 0.3671, "step": 1078 }, { "epoch": 0.09, "learning_rate": 1.9795893892685918e-05, "loss": 0.3156, "step": 1079 }, { "epoch": 0.09, "learning_rate": 1.979533546598011e-05, "loss": 0.3223, "step": 1080 }, { "epoch": 0.09, "learning_rate": 1.9794776284298247e-05, "loss": 0.2913, "step": 1081 }, { "epoch": 0.09, "learning_rate": 1.9794216347683425e-05, "loss": 0.3216, "step": 1082 }, { "epoch": 0.09, "learning_rate": 1.97936556561788e-05, "loss": 0.3663, "step": 1083 }, { "epoch": 0.09, "learning_rate": 1.979309420982759e-05, "loss": 0.3406, "step": 1084 }, { "epoch": 0.09, "learning_rate": 1.9792532008673067e-05, "loss": 0.3624, "step": 1085 }, { "epoch": 0.09, "learning_rate": 1.9791969052758563e-05, "loss": 0.3109, "step": 1086 }, { "epoch": 0.09, "learning_rate": 1.979140534212747e-05, "loss": 0.3622, "step": 1087 }, { "epoch": 0.09, "learning_rate": 1.979084087682323e-05, "loss": 0.2924, "step": 1088 }, { "epoch": 0.09, "learning_rate": 1.9790275656889356e-05, "loss": 0.3307, "step": 1089 }, { "epoch": 0.09, "learning_rate": 1.978970968236941e-05, "loss": 0.3334, "step": 1090 }, { "epoch": 0.09, "learning_rate": 1.978914295330701e-05, "loss": 0.312, "step": 1091 }, { "epoch": 0.09, "learning_rate": 1.9788575469745845e-05, "loss": 0.3537, "step": 1092 }, { "epoch": 0.09, "learning_rate": 1.9788007231729647e-05, "loss": 0.324, "step": 1093 }, { "epoch": 0.09, "learning_rate": 1.9787438239302217e-05, "loss": 0.3004, "step": 1094 }, { "epoch": 0.09, "learning_rate": 1.978686849250741e-05, "loss": 0.3788, "step": 1095 }, { "epoch": 0.09, "learning_rate": 1.9786297991389136e-05, "loss": 0.2927, "step": 1096 }, { "epoch": 0.09, "learning_rate": 1.978572673599137e-05, "loss": 0.3024, "step": 1097 }, { "epoch": 0.09, "learning_rate": 1.9785154726358134e-05, "loss": 0.363, "step": 1098 }, { "epoch": 0.09, "learning_rate": 1.9784581962533528e-05, "loss": 0.3209, "step": 1099 }, { "epoch": 0.09, "learning_rate": 1.9784008444561692e-05, "loss": 0.5852, "step": 1100 }, { "epoch": 0.09, "learning_rate": 1.9783434172486825e-05, "loss": 0.3198, "step": 1101 }, { "epoch": 0.09, "learning_rate": 1.9782859146353196e-05, "loss": 0.357, "step": 1102 }, { "epoch": 0.09, "learning_rate": 1.9782283366205122e-05, "loss": 0.3658, "step": 1103 }, { "epoch": 0.09, "learning_rate": 1.9781706832086984e-05, "loss": 0.3181, "step": 1104 }, { "epoch": 0.09, "learning_rate": 1.978112954404321e-05, "loss": 0.3444, "step": 1105 }, { "epoch": 0.09, "learning_rate": 1.9780551502118306e-05, "loss": 0.3747, "step": 1106 }, { "epoch": 0.09, "learning_rate": 1.9779972706356818e-05, "loss": 0.3378, "step": 1107 }, { "epoch": 0.09, "learning_rate": 1.9779393156803356e-05, "loss": 0.2866, "step": 1108 }, { "epoch": 0.1, "learning_rate": 1.9778812853502592e-05, "loss": 0.3322, "step": 1109 }, { "epoch": 0.1, "learning_rate": 1.9778231796499254e-05, "loss": 0.3243, "step": 1110 }, { "epoch": 0.1, "learning_rate": 1.9777649985838123e-05, "loss": 0.3409, "step": 1111 }, { "epoch": 0.1, "learning_rate": 1.9777067421564046e-05, "loss": 0.3838, "step": 1112 }, { "epoch": 0.1, "learning_rate": 1.9776484103721918e-05, "loss": 0.3428, "step": 1113 }, { "epoch": 0.1, "learning_rate": 1.9775900032356704e-05, "loss": 0.2975, "step": 1114 }, { "epoch": 0.1, "learning_rate": 1.977531520751342e-05, "loss": 0.332, "step": 1115 }, { "epoch": 0.1, "learning_rate": 1.9774729629237143e-05, "loss": 0.3278, "step": 1116 }, { "epoch": 0.1, "learning_rate": 1.9774143297573003e-05, "loss": 0.3423, "step": 1117 }, { "epoch": 0.1, "learning_rate": 1.977355621256619e-05, "loss": 0.3135, "step": 1118 }, { "epoch": 0.1, "learning_rate": 1.977296837426196e-05, "loss": 0.3113, "step": 1119 }, { "epoch": 0.1, "learning_rate": 1.9772379782705616e-05, "loss": 0.2944, "step": 1120 }, { "epoch": 0.1, "learning_rate": 1.9771790437942528e-05, "loss": 0.3194, "step": 1121 }, { "epoch": 0.1, "learning_rate": 1.9771200340018115e-05, "loss": 0.3669, "step": 1122 }, { "epoch": 0.1, "learning_rate": 1.977060948897786e-05, "loss": 0.3427, "step": 1123 }, { "epoch": 0.1, "learning_rate": 1.9770017884867306e-05, "loss": 0.2729, "step": 1124 }, { "epoch": 0.1, "learning_rate": 1.9769425527732046e-05, "loss": 0.3396, "step": 1125 }, { "epoch": 0.1, "learning_rate": 1.9768832417617737e-05, "loss": 0.3331, "step": 1126 }, { "epoch": 0.1, "learning_rate": 1.9768238554570098e-05, "loss": 0.2742, "step": 1127 }, { "epoch": 0.1, "learning_rate": 1.9767643938634896e-05, "loss": 0.3248, "step": 1128 }, { "epoch": 0.1, "learning_rate": 1.9767048569857963e-05, "loss": 0.2985, "step": 1129 }, { "epoch": 0.1, "learning_rate": 1.9766452448285184e-05, "loss": 0.3088, "step": 1130 }, { "epoch": 0.1, "learning_rate": 1.9765855573962512e-05, "loss": 0.3185, "step": 1131 }, { "epoch": 0.1, "learning_rate": 1.9765257946935944e-05, "loss": 0.3276, "step": 1132 }, { "epoch": 0.1, "learning_rate": 1.9764659567251546e-05, "loss": 0.3474, "step": 1133 }, { "epoch": 0.1, "learning_rate": 1.9764060434955437e-05, "loss": 0.3538, "step": 1134 }, { "epoch": 0.1, "learning_rate": 1.97634605500938e-05, "loss": 0.301, "step": 1135 }, { "epoch": 0.1, "learning_rate": 1.976285991271286e-05, "loss": 0.3145, "step": 1136 }, { "epoch": 0.1, "learning_rate": 1.9762258522858917e-05, "loss": 0.3041, "step": 1137 }, { "epoch": 0.1, "learning_rate": 1.9761656380578328e-05, "loss": 0.3067, "step": 1138 }, { "epoch": 0.1, "learning_rate": 1.9761053485917497e-05, "loss": 0.5691, "step": 1139 }, { "epoch": 0.1, "learning_rate": 1.9760449838922894e-05, "loss": 0.3526, "step": 1140 }, { "epoch": 0.1, "learning_rate": 1.9759845439641047e-05, "loss": 0.3307, "step": 1141 }, { "epoch": 0.1, "learning_rate": 1.9759240288118536e-05, "loss": 0.3066, "step": 1142 }, { "epoch": 0.1, "learning_rate": 1.9758634384402007e-05, "loss": 0.3082, "step": 1143 }, { "epoch": 0.1, "learning_rate": 1.9758027728538157e-05, "loss": 0.3555, "step": 1144 }, { "epoch": 0.1, "learning_rate": 1.9757420320573747e-05, "loss": 0.3967, "step": 1145 }, { "epoch": 0.1, "learning_rate": 1.9756812160555586e-05, "loss": 0.3606, "step": 1146 }, { "epoch": 0.1, "learning_rate": 1.975620324853056e-05, "loss": 0.269, "step": 1147 }, { "epoch": 0.1, "learning_rate": 1.9755593584545594e-05, "loss": 0.343, "step": 1148 }, { "epoch": 0.1, "learning_rate": 1.975498316864768e-05, "loss": 0.3651, "step": 1149 }, { "epoch": 0.1, "learning_rate": 1.975437200088386e-05, "loss": 0.3627, "step": 1150 }, { "epoch": 0.1, "learning_rate": 1.9753760081301242e-05, "loss": 0.3104, "step": 1151 }, { "epoch": 0.1, "learning_rate": 1.9753147409946997e-05, "loss": 0.2998, "step": 1152 }, { "epoch": 0.1, "learning_rate": 1.9752533986868337e-05, "loss": 0.3517, "step": 1153 }, { "epoch": 0.1, "learning_rate": 1.975191981211255e-05, "loss": 0.3054, "step": 1154 }, { "epoch": 0.1, "learning_rate": 1.9751304885726967e-05, "loss": 0.3893, "step": 1155 }, { "epoch": 0.1, "learning_rate": 1.9750689207758985e-05, "loss": 0.3115, "step": 1156 }, { "epoch": 0.1, "learning_rate": 1.975007277825606e-05, "loss": 0.2866, "step": 1157 }, { "epoch": 0.1, "learning_rate": 1.9749455597265704e-05, "loss": 0.3831, "step": 1158 }, { "epoch": 0.1, "learning_rate": 1.974883766483548e-05, "loss": 0.3689, "step": 1159 }, { "epoch": 0.1, "learning_rate": 1.974821898101302e-05, "loss": 0.3565, "step": 1160 }, { "epoch": 0.1, "learning_rate": 1.974759954584601e-05, "loss": 0.3069, "step": 1161 }, { "epoch": 0.1, "learning_rate": 1.9746979359382193e-05, "loss": 0.3671, "step": 1162 }, { "epoch": 0.1, "learning_rate": 1.9746358421669365e-05, "loss": 0.3289, "step": 1163 }, { "epoch": 0.1, "learning_rate": 1.974573673275539e-05, "loss": 0.2828, "step": 1164 }, { "epoch": 0.1, "learning_rate": 1.9745114292688183e-05, "loss": 0.3294, "step": 1165 }, { "epoch": 0.1, "learning_rate": 1.9744491101515715e-05, "loss": 0.3215, "step": 1166 }, { "epoch": 0.1, "learning_rate": 1.9743867159286022e-05, "loss": 0.2885, "step": 1167 }, { "epoch": 0.1, "learning_rate": 1.9743242466047196e-05, "loss": 0.2812, "step": 1168 }, { "epoch": 0.1, "learning_rate": 1.9742617021847385e-05, "loss": 0.3553, "step": 1169 }, { "epoch": 0.1, "learning_rate": 1.9741990826734793e-05, "loss": 0.3026, "step": 1170 }, { "epoch": 0.1, "learning_rate": 1.9741363880757682e-05, "loss": 0.3358, "step": 1171 }, { "epoch": 0.1, "learning_rate": 1.974073618396438e-05, "loss": 0.2916, "step": 1172 }, { "epoch": 0.1, "learning_rate": 1.974010773640326e-05, "loss": 0.3527, "step": 1173 }, { "epoch": 0.1, "learning_rate": 1.9739478538122765e-05, "loss": 0.6641, "step": 1174 }, { "epoch": 0.1, "learning_rate": 1.9738848589171388e-05, "loss": 0.3412, "step": 1175 }, { "epoch": 0.1, "learning_rate": 1.9738217889597684e-05, "loss": 0.3173, "step": 1176 }, { "epoch": 0.1, "learning_rate": 1.9737586439450262e-05, "loss": 0.319, "step": 1177 }, { "epoch": 0.1, "learning_rate": 1.9736954238777793e-05, "loss": 0.2921, "step": 1178 }, { "epoch": 0.1, "learning_rate": 1.9736321287629e-05, "loss": 0.3076, "step": 1179 }, { "epoch": 0.1, "learning_rate": 1.9735687586052673e-05, "loss": 0.3096, "step": 1180 }, { "epoch": 0.1, "learning_rate": 1.9735053134097653e-05, "loss": 0.356, "step": 1181 }, { "epoch": 0.1, "learning_rate": 1.973441793181284e-05, "loss": 0.2906, "step": 1182 }, { "epoch": 0.1, "learning_rate": 1.973378197924719e-05, "loss": 0.3333, "step": 1183 }, { "epoch": 0.1, "learning_rate": 1.973314527644972e-05, "loss": 0.3184, "step": 1184 }, { "epoch": 0.1, "learning_rate": 1.9732507823469507e-05, "loss": 0.3072, "step": 1185 }, { "epoch": 0.1, "learning_rate": 1.973186962035568e-05, "loss": 0.316, "step": 1186 }, { "epoch": 0.1, "learning_rate": 1.973123066715743e-05, "loss": 0.2676, "step": 1187 }, { "epoch": 0.1, "learning_rate": 1.9730590963924005e-05, "loss": 0.3167, "step": 1188 }, { "epoch": 0.1, "learning_rate": 1.9729950510704708e-05, "loss": 0.3391, "step": 1189 }, { "epoch": 0.1, "learning_rate": 1.97293093075489e-05, "loss": 0.3276, "step": 1190 }, { "epoch": 0.1, "learning_rate": 1.972866735450601e-05, "loss": 0.326, "step": 1191 }, { "epoch": 0.1, "learning_rate": 1.9728024651625506e-05, "loss": 0.3426, "step": 1192 }, { "epoch": 0.1, "learning_rate": 1.972738119895693e-05, "loss": 0.2963, "step": 1193 }, { "epoch": 0.1, "learning_rate": 1.972673699654988e-05, "loss": 0.3212, "step": 1194 }, { "epoch": 0.1, "learning_rate": 1.9726092044453996e-05, "loss": 0.3074, "step": 1195 }, { "epoch": 0.1, "learning_rate": 1.9725446342719e-05, "loss": 0.3315, "step": 1196 }, { "epoch": 0.1, "learning_rate": 1.9724799891394653e-05, "loss": 0.3264, "step": 1197 }, { "epoch": 0.1, "learning_rate": 1.9724152690530785e-05, "loss": 0.3649, "step": 1198 }, { "epoch": 0.1, "learning_rate": 1.9723504740177274e-05, "loss": 0.3009, "step": 1199 }, { "epoch": 0.1, "learning_rate": 1.972285604038406e-05, "loss": 0.3538, "step": 1200 }, { "epoch": 0.1, "learning_rate": 1.972220659120115e-05, "loss": 0.306, "step": 1201 }, { "epoch": 0.1, "learning_rate": 1.972155639267859e-05, "loss": 0.2983, "step": 1202 }, { "epoch": 0.1, "learning_rate": 1.97209054448665e-05, "loss": 0.3271, "step": 1203 }, { "epoch": 0.1, "learning_rate": 1.9720253747815055e-05, "loss": 0.3235, "step": 1204 }, { "epoch": 0.1, "learning_rate": 1.9719601301574476e-05, "loss": 0.3336, "step": 1205 }, { "epoch": 0.1, "learning_rate": 1.9718948106195055e-05, "loss": 0.3091, "step": 1206 }, { "epoch": 0.1, "learning_rate": 1.971829416172714e-05, "loss": 0.3513, "step": 1207 }, { "epoch": 0.1, "learning_rate": 1.9717639468221127e-05, "loss": 0.3291, "step": 1208 }, { "epoch": 0.1, "learning_rate": 1.9716984025727478e-05, "loss": 0.3432, "step": 1209 }, { "epoch": 0.1, "learning_rate": 1.971632783429672e-05, "loss": 0.3279, "step": 1210 }, { "epoch": 0.1, "learning_rate": 1.9715670893979416e-05, "loss": 0.3201, "step": 1211 }, { "epoch": 0.1, "learning_rate": 1.971501320482621e-05, "loss": 0.3038, "step": 1212 }, { "epoch": 0.1, "learning_rate": 1.9714354766887787e-05, "loss": 0.3313, "step": 1213 }, { "epoch": 0.1, "learning_rate": 1.97136955802149e-05, "loss": 0.3126, "step": 1214 }, { "epoch": 0.1, "learning_rate": 1.9713035644858354e-05, "loss": 0.3312, "step": 1215 }, { "epoch": 0.1, "learning_rate": 1.9712374960869015e-05, "loss": 0.6537, "step": 1216 }, { "epoch": 0.1, "learning_rate": 1.9711713528297802e-05, "loss": 0.2822, "step": 1217 }, { "epoch": 0.1, "learning_rate": 1.97110513471957e-05, "loss": 0.3393, "step": 1218 }, { "epoch": 0.1, "learning_rate": 1.9710388417613742e-05, "loss": 0.2998, "step": 1219 }, { "epoch": 0.1, "learning_rate": 1.9709724739603024e-05, "loss": 0.3191, "step": 1220 }, { "epoch": 0.1, "learning_rate": 1.9709060313214703e-05, "loss": 0.3303, "step": 1221 }, { "epoch": 0.1, "learning_rate": 1.9708395138499986e-05, "loss": 0.3647, "step": 1222 }, { "epoch": 0.1, "learning_rate": 1.9707729215510143e-05, "loss": 0.285, "step": 1223 }, { "epoch": 0.1, "learning_rate": 1.9707062544296497e-05, "loss": 0.3701, "step": 1224 }, { "epoch": 0.11, "learning_rate": 1.970639512491044e-05, "loss": 0.3284, "step": 1225 }, { "epoch": 0.11, "learning_rate": 1.9705726957403398e-05, "loss": 0.2914, "step": 1226 }, { "epoch": 0.11, "learning_rate": 1.9705058041826887e-05, "loss": 0.284, "step": 1227 }, { "epoch": 0.11, "learning_rate": 1.9704388378232454e-05, "loss": 0.3489, "step": 1228 }, { "epoch": 0.11, "learning_rate": 1.9703717966671715e-05, "loss": 0.2979, "step": 1229 }, { "epoch": 0.11, "learning_rate": 1.970304680719634e-05, "loss": 0.3108, "step": 1230 }, { "epoch": 0.11, "learning_rate": 1.9702374899858067e-05, "loss": 0.3232, "step": 1231 }, { "epoch": 0.11, "learning_rate": 1.9701702244708673e-05, "loss": 0.287, "step": 1232 }, { "epoch": 0.11, "learning_rate": 1.970102884180001e-05, "loss": 0.2937, "step": 1233 }, { "epoch": 0.11, "learning_rate": 1.9700354691183977e-05, "loss": 0.3445, "step": 1234 }, { "epoch": 0.11, "learning_rate": 1.969967979291254e-05, "loss": 0.3489, "step": 1235 }, { "epoch": 0.11, "learning_rate": 1.9699004147037706e-05, "loss": 0.3497, "step": 1236 }, { "epoch": 0.11, "learning_rate": 1.9698327753611557e-05, "loss": 0.3231, "step": 1237 }, { "epoch": 0.11, "learning_rate": 1.9697650612686228e-05, "loss": 0.3339, "step": 1238 }, { "epoch": 0.11, "learning_rate": 1.9696972724313904e-05, "loss": 0.3009, "step": 1239 }, { "epoch": 0.11, "learning_rate": 1.9696294088546842e-05, "loss": 0.3696, "step": 1240 }, { "epoch": 0.11, "learning_rate": 1.969561470543734e-05, "loss": 0.3711, "step": 1241 }, { "epoch": 0.11, "learning_rate": 1.9694934575037762e-05, "loss": 0.3234, "step": 1242 }, { "epoch": 0.11, "learning_rate": 1.9694253697400532e-05, "loss": 0.3335, "step": 1243 }, { "epoch": 0.11, "learning_rate": 1.9693572072578127e-05, "loss": 0.3385, "step": 1244 }, { "epoch": 0.11, "learning_rate": 1.9692889700623084e-05, "loss": 0.3211, "step": 1245 }, { "epoch": 0.11, "learning_rate": 1.9692206581588e-05, "loss": 0.3268, "step": 1246 }, { "epoch": 0.11, "learning_rate": 1.969152271552552e-05, "loss": 0.3088, "step": 1247 }, { "epoch": 0.11, "learning_rate": 1.9690838102488356e-05, "loss": 0.316, "step": 1248 }, { "epoch": 0.11, "learning_rate": 1.9690152742529277e-05, "loss": 0.3044, "step": 1249 }, { "epoch": 0.11, "learning_rate": 1.9689466635701106e-05, "loss": 0.2941, "step": 1250 }, { "epoch": 0.11, "learning_rate": 1.968877978205672e-05, "loss": 0.2966, "step": 1251 }, { "epoch": 0.11, "learning_rate": 1.9688092181649065e-05, "loss": 0.3041, "step": 1252 }, { "epoch": 0.11, "learning_rate": 1.9687403834531133e-05, "loss": 0.3092, "step": 1253 }, { "epoch": 0.11, "learning_rate": 1.968671474075598e-05, "loss": 0.3373, "step": 1254 }, { "epoch": 0.11, "learning_rate": 1.968602490037672e-05, "loss": 0.2792, "step": 1255 }, { "epoch": 0.11, "learning_rate": 1.9685334313446523e-05, "loss": 0.3065, "step": 1256 }, { "epoch": 0.11, "learning_rate": 1.968464298001861e-05, "loss": 0.3174, "step": 1257 }, { "epoch": 0.11, "learning_rate": 1.968395090014627e-05, "loss": 0.3482, "step": 1258 }, { "epoch": 0.11, "learning_rate": 1.9683258073882845e-05, "loss": 0.2797, "step": 1259 }, { "epoch": 0.11, "learning_rate": 1.9682564501281733e-05, "loss": 0.3176, "step": 1260 }, { "epoch": 0.11, "learning_rate": 1.9681870182396395e-05, "loss": 0.632, "step": 1261 }, { "epoch": 0.11, "learning_rate": 1.9681175117280343e-05, "loss": 0.3442, "step": 1262 }, { "epoch": 0.11, "learning_rate": 1.9680479305987147e-05, "loss": 0.3046, "step": 1263 }, { "epoch": 0.11, "learning_rate": 1.9679782748570434e-05, "loss": 0.3264, "step": 1264 }, { "epoch": 0.11, "learning_rate": 1.9679085445083903e-05, "loss": 0.321, "step": 1265 }, { "epoch": 0.11, "learning_rate": 1.9678387395581292e-05, "loss": 0.2701, "step": 1266 }, { "epoch": 0.11, "learning_rate": 1.96776886001164e-05, "loss": 0.2859, "step": 1267 }, { "epoch": 0.11, "learning_rate": 1.9676989058743088e-05, "loss": 0.3779, "step": 1268 }, { "epoch": 0.11, "learning_rate": 1.9676288771515275e-05, "loss": 0.335, "step": 1269 }, { "epoch": 0.11, "learning_rate": 1.9675587738486935e-05, "loss": 0.3172, "step": 1270 }, { "epoch": 0.11, "learning_rate": 1.9674885959712106e-05, "loss": 0.3051, "step": 1271 }, { "epoch": 0.11, "learning_rate": 1.9674183435244867e-05, "loss": 0.3927, "step": 1272 }, { "epoch": 0.11, "learning_rate": 1.9673480165139372e-05, "loss": 0.3196, "step": 1273 }, { "epoch": 0.11, "learning_rate": 1.9672776149449826e-05, "loss": 0.3372, "step": 1274 }, { "epoch": 0.11, "learning_rate": 1.9672071388230485e-05, "loss": 0.3205, "step": 1275 }, { "epoch": 0.11, "learning_rate": 1.967136588153568e-05, "loss": 0.604, "step": 1276 }, { "epoch": 0.11, "learning_rate": 1.9670659629419774e-05, "loss": 0.316, "step": 1277 }, { "epoch": 0.11, "learning_rate": 1.9669952631937206e-05, "loss": 0.3635, "step": 1278 }, { "epoch": 0.11, "learning_rate": 1.966924488914247e-05, "loss": 0.3298, "step": 1279 }, { "epoch": 0.11, "learning_rate": 1.9668536401090123e-05, "loss": 0.3095, "step": 1280 }, { "epoch": 0.11, "learning_rate": 1.9667827167834756e-05, "loss": 0.3401, "step": 1281 }, { "epoch": 0.11, "learning_rate": 1.9667117189431045e-05, "loss": 0.3059, "step": 1282 }, { "epoch": 0.11, "learning_rate": 1.9666406465933703e-05, "loss": 0.2832, "step": 1283 }, { "epoch": 0.11, "learning_rate": 1.966569499739752e-05, "loss": 0.3093, "step": 1284 }, { "epoch": 0.11, "learning_rate": 1.9664982783877323e-05, "loss": 0.3404, "step": 1285 }, { "epoch": 0.11, "learning_rate": 1.966426982542801e-05, "loss": 0.3428, "step": 1286 }, { "epoch": 0.11, "learning_rate": 1.966355612210453e-05, "loss": 0.3384, "step": 1287 }, { "epoch": 0.11, "learning_rate": 1.9662841673961893e-05, "loss": 0.2938, "step": 1288 }, { "epoch": 0.11, "learning_rate": 1.9662126481055165e-05, "loss": 0.3517, "step": 1289 }, { "epoch": 0.11, "learning_rate": 1.966141054343947e-05, "loss": 0.386, "step": 1290 }, { "epoch": 0.11, "learning_rate": 1.9660693861169992e-05, "loss": 0.3474, "step": 1291 }, { "epoch": 0.11, "learning_rate": 1.9659976434301967e-05, "loss": 0.2773, "step": 1292 }, { "epoch": 0.11, "learning_rate": 1.9659258262890683e-05, "loss": 0.3221, "step": 1293 }, { "epoch": 0.11, "learning_rate": 1.9658539346991504e-05, "loss": 0.3381, "step": 1294 }, { "epoch": 0.11, "learning_rate": 1.9657819686659838e-05, "loss": 0.3088, "step": 1295 }, { "epoch": 0.11, "learning_rate": 1.9657099281951148e-05, "loss": 0.2853, "step": 1296 }, { "epoch": 0.11, "learning_rate": 1.9656378132920964e-05, "loss": 0.3268, "step": 1297 }, { "epoch": 0.11, "learning_rate": 1.9655656239624864e-05, "loss": 0.3094, "step": 1298 }, { "epoch": 0.11, "learning_rate": 1.9654933602118494e-05, "loss": 0.3052, "step": 1299 }, { "epoch": 0.11, "learning_rate": 1.965421022045755e-05, "loss": 0.6509, "step": 1300 }, { "epoch": 0.11, "learning_rate": 1.9653486094697785e-05, "loss": 0.3095, "step": 1301 }, { "epoch": 0.11, "learning_rate": 1.9652761224895006e-05, "loss": 0.3526, "step": 1302 }, { "epoch": 0.11, "learning_rate": 1.9652035611105093e-05, "loss": 0.3631, "step": 1303 }, { "epoch": 0.11, "learning_rate": 1.9651309253383964e-05, "loss": 0.3007, "step": 1304 }, { "epoch": 0.11, "learning_rate": 1.9650582151787608e-05, "loss": 0.3162, "step": 1305 }, { "epoch": 0.11, "learning_rate": 1.9649854306372065e-05, "loss": 0.2925, "step": 1306 }, { "epoch": 0.11, "learning_rate": 1.964912571719343e-05, "loss": 0.3151, "step": 1307 }, { "epoch": 0.11, "learning_rate": 1.9648396384307866e-05, "loss": 0.3362, "step": 1308 }, { "epoch": 0.11, "learning_rate": 1.964766630777158e-05, "loss": 0.3009, "step": 1309 }, { "epoch": 0.11, "learning_rate": 1.9646935487640848e-05, "loss": 0.3253, "step": 1310 }, { "epoch": 0.11, "learning_rate": 1.9646203923971992e-05, "loss": 0.2739, "step": 1311 }, { "epoch": 0.11, "learning_rate": 1.9645471616821404e-05, "loss": 0.2913, "step": 1312 }, { "epoch": 0.11, "learning_rate": 1.9644738566245526e-05, "loss": 0.3408, "step": 1313 }, { "epoch": 0.11, "learning_rate": 1.964400477230085e-05, "loss": 0.348, "step": 1314 }, { "epoch": 0.11, "learning_rate": 1.9643270235043942e-05, "loss": 0.3062, "step": 1315 }, { "epoch": 0.11, "learning_rate": 1.964253495453141e-05, "loss": 0.358, "step": 1316 }, { "epoch": 0.11, "learning_rate": 1.9641798930819932e-05, "loss": 0.3432, "step": 1317 }, { "epoch": 0.11, "learning_rate": 1.9641062163966232e-05, "loss": 0.3134, "step": 1318 }, { "epoch": 0.11, "learning_rate": 1.96403246540271e-05, "loss": 0.3087, "step": 1319 }, { "epoch": 0.11, "learning_rate": 1.9639586401059376e-05, "loss": 0.3083, "step": 1320 }, { "epoch": 0.11, "learning_rate": 1.9638847405119966e-05, "loss": 0.3452, "step": 1321 }, { "epoch": 0.11, "learning_rate": 1.963810766626582e-05, "loss": 0.2977, "step": 1322 }, { "epoch": 0.11, "learning_rate": 1.963736718455396e-05, "loss": 0.3087, "step": 1323 }, { "epoch": 0.11, "learning_rate": 1.963662596004146e-05, "loss": 0.2944, "step": 1324 }, { "epoch": 0.11, "learning_rate": 1.9635883992785443e-05, "loss": 0.2821, "step": 1325 }, { "epoch": 0.11, "learning_rate": 1.9635141282843105e-05, "loss": 0.3586, "step": 1326 }, { "epoch": 0.11, "learning_rate": 1.9634397830271685e-05, "loss": 0.3307, "step": 1327 }, { "epoch": 0.11, "learning_rate": 1.963365363512848e-05, "loss": 0.2733, "step": 1328 }, { "epoch": 0.11, "learning_rate": 1.9632908697470857e-05, "loss": 0.3167, "step": 1329 }, { "epoch": 0.11, "learning_rate": 1.963216301735623e-05, "loss": 0.3754, "step": 1330 }, { "epoch": 0.11, "learning_rate": 1.963141659484207e-05, "loss": 0.3229, "step": 1331 }, { "epoch": 0.11, "learning_rate": 1.9630669429985908e-05, "loss": 0.2964, "step": 1332 }, { "epoch": 0.11, "learning_rate": 1.9629921522845338e-05, "loss": 0.2958, "step": 1333 }, { "epoch": 0.11, "learning_rate": 1.9629172873477995e-05, "loss": 0.3058, "step": 1334 }, { "epoch": 0.11, "learning_rate": 1.9628423481941587e-05, "loss": 0.3494, "step": 1335 }, { "epoch": 0.11, "learning_rate": 1.9627673348293874e-05, "loss": 0.3159, "step": 1336 }, { "epoch": 0.11, "learning_rate": 1.962692247259267e-05, "loss": 0.3448, "step": 1337 }, { "epoch": 0.11, "learning_rate": 1.962617085489585e-05, "loss": 0.3494, "step": 1338 }, { "epoch": 0.11, "learning_rate": 1.9625418495261343e-05, "loss": 0.3315, "step": 1339 }, { "epoch": 0.11, "learning_rate": 1.962466539374714e-05, "loss": 0.3564, "step": 1340 }, { "epoch": 0.11, "learning_rate": 1.9623911550411286e-05, "loss": 0.3583, "step": 1341 }, { "epoch": 0.12, "learning_rate": 1.9623156965311884e-05, "loss": 0.2705, "step": 1342 }, { "epoch": 0.12, "learning_rate": 1.9622401638507093e-05, "loss": 0.2719, "step": 1343 }, { "epoch": 0.12, "learning_rate": 1.9621645570055127e-05, "loss": 0.3119, "step": 1344 }, { "epoch": 0.12, "learning_rate": 1.9620888760014262e-05, "loss": 0.3333, "step": 1345 }, { "epoch": 0.12, "learning_rate": 1.962013120844283e-05, "loss": 0.3217, "step": 1346 }, { "epoch": 0.12, "learning_rate": 1.961937291539922e-05, "loss": 0.3124, "step": 1347 }, { "epoch": 0.12, "learning_rate": 1.9618613880941876e-05, "loss": 0.3288, "step": 1348 }, { "epoch": 0.12, "learning_rate": 1.9617854105129303e-05, "loss": 0.3229, "step": 1349 }, { "epoch": 0.12, "learning_rate": 1.9617093588020057e-05, "loss": 0.3679, "step": 1350 }, { "epoch": 0.12, "learning_rate": 1.9616332329672756e-05, "loss": 0.353, "step": 1351 }, { "epoch": 0.12, "learning_rate": 1.9615570330146077e-05, "loss": 0.3467, "step": 1352 }, { "epoch": 0.12, "learning_rate": 1.9614807589498746e-05, "loss": 0.3987, "step": 1353 }, { "epoch": 0.12, "learning_rate": 1.9614044107789553e-05, "loss": 0.3444, "step": 1354 }, { "epoch": 0.12, "learning_rate": 1.9613279885077347e-05, "loss": 0.2725, "step": 1355 }, { "epoch": 0.12, "learning_rate": 1.9612514921421028e-05, "loss": 0.3038, "step": 1356 }, { "epoch": 0.12, "learning_rate": 1.9611749216879555e-05, "loss": 0.3329, "step": 1357 }, { "epoch": 0.12, "learning_rate": 1.9610982771511947e-05, "loss": 0.2932, "step": 1358 }, { "epoch": 0.12, "learning_rate": 1.9610215585377275e-05, "loss": 0.3118, "step": 1359 }, { "epoch": 0.12, "learning_rate": 1.9609447658534673e-05, "loss": 0.3145, "step": 1360 }, { "epoch": 0.12, "learning_rate": 1.9608678991043325e-05, "loss": 0.3352, "step": 1361 }, { "epoch": 0.12, "learning_rate": 1.9607909582962478e-05, "loss": 0.2866, "step": 1362 }, { "epoch": 0.12, "learning_rate": 1.9607139434351435e-05, "loss": 0.2622, "step": 1363 }, { "epoch": 0.12, "learning_rate": 1.9606368545269557e-05, "loss": 0.3335, "step": 1364 }, { "epoch": 0.12, "learning_rate": 1.9605596915776254e-05, "loss": 0.2919, "step": 1365 }, { "epoch": 0.12, "learning_rate": 1.9604824545931005e-05, "loss": 0.3305, "step": 1366 }, { "epoch": 0.12, "learning_rate": 1.9604051435793342e-05, "loss": 0.3397, "step": 1367 }, { "epoch": 0.12, "learning_rate": 1.9603277585422847e-05, "loss": 0.3036, "step": 1368 }, { "epoch": 0.12, "learning_rate": 1.9602502994879163e-05, "loss": 0.3799, "step": 1369 }, { "epoch": 0.12, "learning_rate": 1.9601727664222e-05, "loss": 0.2985, "step": 1370 }, { "epoch": 0.12, "learning_rate": 1.960095159351111e-05, "loss": 0.3703, "step": 1371 }, { "epoch": 0.12, "learning_rate": 1.9600174782806313e-05, "loss": 0.3688, "step": 1372 }, { "epoch": 0.12, "learning_rate": 1.9599397232167478e-05, "loss": 0.2938, "step": 1373 }, { "epoch": 0.12, "learning_rate": 1.9598618941654535e-05, "loss": 0.3423, "step": 1374 }, { "epoch": 0.12, "learning_rate": 1.9597839911327475e-05, "loss": 0.3302, "step": 1375 }, { "epoch": 0.12, "learning_rate": 1.959706014124634e-05, "loss": 0.3286, "step": 1376 }, { "epoch": 0.12, "learning_rate": 1.9596279631471228e-05, "loss": 0.3239, "step": 1377 }, { "epoch": 0.12, "learning_rate": 1.9595498382062295e-05, "loss": 0.2922, "step": 1378 }, { "epoch": 0.12, "learning_rate": 1.9594716393079765e-05, "loss": 0.3518, "step": 1379 }, { "epoch": 0.12, "learning_rate": 1.9593933664583903e-05, "loss": 0.3268, "step": 1380 }, { "epoch": 0.12, "learning_rate": 1.9593150196635037e-05, "loss": 0.3065, "step": 1381 }, { "epoch": 0.12, "learning_rate": 1.9592365989293557e-05, "loss": 0.2964, "step": 1382 }, { "epoch": 0.12, "learning_rate": 1.95915810426199e-05, "loss": 0.3573, "step": 1383 }, { "epoch": 0.12, "learning_rate": 1.9590795356674578e-05, "loss": 0.34, "step": 1384 }, { "epoch": 0.12, "learning_rate": 1.9590008931518133e-05, "loss": 0.3181, "step": 1385 }, { "epoch": 0.12, "learning_rate": 1.9589221767211188e-05, "loss": 0.3452, "step": 1386 }, { "epoch": 0.12, "learning_rate": 1.9588433863814405e-05, "loss": 0.2997, "step": 1387 }, { "epoch": 0.12, "learning_rate": 1.9587645221388522e-05, "loss": 0.3014, "step": 1388 }, { "epoch": 0.12, "learning_rate": 1.958685583999432e-05, "loss": 0.3198, "step": 1389 }, { "epoch": 0.12, "learning_rate": 1.9586065719692636e-05, "loss": 0.3335, "step": 1390 }, { "epoch": 0.12, "learning_rate": 1.958527486054438e-05, "loss": 0.3108, "step": 1391 }, { "epoch": 0.12, "learning_rate": 1.9584483262610492e-05, "loss": 0.3513, "step": 1392 }, { "epoch": 0.12, "learning_rate": 1.9583690925951996e-05, "loss": 0.2822, "step": 1393 }, { "epoch": 0.12, "learning_rate": 1.9582897850629958e-05, "loss": 0.3318, "step": 1394 }, { "epoch": 0.12, "learning_rate": 1.9582104036705506e-05, "loss": 0.2783, "step": 1395 }, { "epoch": 0.12, "learning_rate": 1.9581309484239818e-05, "loss": 0.3094, "step": 1396 }, { "epoch": 0.12, "learning_rate": 1.9580514193294137e-05, "loss": 0.3391, "step": 1397 }, { "epoch": 0.12, "learning_rate": 1.9579718163929767e-05, "loss": 0.2934, "step": 1398 }, { "epoch": 0.12, "learning_rate": 1.957892139620805e-05, "loss": 0.3187, "step": 1399 }, { "epoch": 0.12, "learning_rate": 1.9578123890190405e-05, "loss": 0.3441, "step": 1400 }, { "epoch": 0.12, "learning_rate": 1.9577325645938294e-05, "loss": 0.3539, "step": 1401 }, { "epoch": 0.12, "learning_rate": 1.957652666351325e-05, "loss": 0.2948, "step": 1402 }, { "epoch": 0.12, "learning_rate": 1.9575726942976844e-05, "loss": 0.3293, "step": 1403 }, { "epoch": 0.12, "learning_rate": 1.9574926484390725e-05, "loss": 0.3058, "step": 1404 }, { "epoch": 0.12, "learning_rate": 1.9574125287816582e-05, "loss": 0.3119, "step": 1405 }, { "epoch": 0.12, "learning_rate": 1.957332335331617e-05, "loss": 0.3138, "step": 1406 }, { "epoch": 0.12, "learning_rate": 1.95725206809513e-05, "loss": 0.3298, "step": 1407 }, { "epoch": 0.12, "learning_rate": 1.9571717270783827e-05, "loss": 0.2903, "step": 1408 }, { "epoch": 0.12, "learning_rate": 1.9570913122875686e-05, "loss": 0.3236, "step": 1409 }, { "epoch": 0.12, "learning_rate": 1.9570108237288853e-05, "loss": 0.316, "step": 1410 }, { "epoch": 0.12, "learning_rate": 1.9569302614085366e-05, "loss": 0.3088, "step": 1411 }, { "epoch": 0.12, "learning_rate": 1.9568496253327317e-05, "loss": 0.3454, "step": 1412 }, { "epoch": 0.12, "learning_rate": 1.956768915507685e-05, "loss": 0.3477, "step": 1413 }, { "epoch": 0.12, "learning_rate": 1.9566881319396184e-05, "loss": 0.3123, "step": 1414 }, { "epoch": 0.12, "learning_rate": 1.9566072746347576e-05, "loss": 0.3279, "step": 1415 }, { "epoch": 0.12, "learning_rate": 1.956526343599335e-05, "loss": 0.3585, "step": 1416 }, { "epoch": 0.12, "learning_rate": 1.956445338839588e-05, "loss": 0.316, "step": 1417 }, { "epoch": 0.12, "learning_rate": 1.95636426036176e-05, "loss": 0.2994, "step": 1418 }, { "epoch": 0.12, "learning_rate": 1.9562831081721007e-05, "loss": 0.318, "step": 1419 }, { "epoch": 0.12, "learning_rate": 1.956201882276864e-05, "loss": 0.2904, "step": 1420 }, { "epoch": 0.12, "learning_rate": 1.9561205826823116e-05, "loss": 0.3336, "step": 1421 }, { "epoch": 0.12, "learning_rate": 1.956039209394709e-05, "loss": 0.3124, "step": 1422 }, { "epoch": 0.12, "learning_rate": 1.9559577624203277e-05, "loss": 0.3179, "step": 1423 }, { "epoch": 0.12, "learning_rate": 1.9558762417654456e-05, "loss": 0.313, "step": 1424 }, { "epoch": 0.12, "learning_rate": 1.9557946474363462e-05, "loss": 0.3146, "step": 1425 }, { "epoch": 0.12, "learning_rate": 1.955712979439318e-05, "loss": 0.3507, "step": 1426 }, { "epoch": 0.12, "learning_rate": 1.9556312377806555e-05, "loss": 0.3499, "step": 1427 }, { "epoch": 0.12, "learning_rate": 1.955549422466659e-05, "loss": 0.3262, "step": 1428 }, { "epoch": 0.12, "learning_rate": 1.955467533503635e-05, "loss": 0.282, "step": 1429 }, { "epoch": 0.12, "learning_rate": 1.9553855708978943e-05, "loss": 0.3256, "step": 1430 }, { "epoch": 0.12, "learning_rate": 1.9553035346557543e-05, "loss": 0.3435, "step": 1431 }, { "epoch": 0.12, "learning_rate": 1.9552214247835387e-05, "loss": 0.3371, "step": 1432 }, { "epoch": 0.12, "learning_rate": 1.955139241287575e-05, "loss": 0.3386, "step": 1433 }, { "epoch": 0.12, "learning_rate": 1.9550569841741984e-05, "loss": 0.3165, "step": 1434 }, { "epoch": 0.12, "learning_rate": 1.9549746534497484e-05, "loss": 0.3429, "step": 1435 }, { "epoch": 0.12, "learning_rate": 1.9548922491205708e-05, "loss": 0.3297, "step": 1436 }, { "epoch": 0.12, "learning_rate": 1.954809771193017e-05, "loss": 0.3159, "step": 1437 }, { "epoch": 0.12, "learning_rate": 1.9547272196734436e-05, "loss": 0.2679, "step": 1438 }, { "epoch": 0.12, "learning_rate": 1.954644594568214e-05, "loss": 0.28, "step": 1439 }, { "epoch": 0.12, "learning_rate": 1.9545618958836957e-05, "loss": 0.2993, "step": 1440 }, { "epoch": 0.12, "learning_rate": 1.9544791236262634e-05, "loss": 0.295, "step": 1441 }, { "epoch": 0.12, "learning_rate": 1.954396277802296e-05, "loss": 0.295, "step": 1442 }, { "epoch": 0.12, "learning_rate": 1.9543133584181798e-05, "loss": 0.345, "step": 1443 }, { "epoch": 0.12, "learning_rate": 1.954230365480305e-05, "loss": 0.3275, "step": 1444 }, { "epoch": 0.12, "learning_rate": 1.954147298995069e-05, "loss": 0.2953, "step": 1445 }, { "epoch": 0.12, "learning_rate": 1.9540641589688735e-05, "loss": 0.4037, "step": 1446 }, { "epoch": 0.12, "learning_rate": 1.953980945408127e-05, "loss": 0.3205, "step": 1447 }, { "epoch": 0.12, "learning_rate": 1.9538976583192428e-05, "loss": 0.3191, "step": 1448 }, { "epoch": 0.12, "learning_rate": 1.9538142977086406e-05, "loss": 0.3475, "step": 1449 }, { "epoch": 0.12, "learning_rate": 1.953730863582745e-05, "loss": 0.3641, "step": 1450 }, { "epoch": 0.12, "learning_rate": 1.9536473559479873e-05, "loss": 0.6105, "step": 1451 }, { "epoch": 0.12, "learning_rate": 1.953563774810803e-05, "loss": 0.2547, "step": 1452 }, { "epoch": 0.12, "learning_rate": 1.953480120177635e-05, "loss": 0.353, "step": 1453 }, { "epoch": 0.12, "learning_rate": 1.9533963920549307e-05, "loss": 0.6849, "step": 1454 }, { "epoch": 0.12, "learning_rate": 1.9533125904491433e-05, "loss": 0.384, "step": 1455 }, { "epoch": 0.12, "learning_rate": 1.953228715366732e-05, "loss": 0.3024, "step": 1456 }, { "epoch": 0.12, "learning_rate": 1.953144766814161e-05, "loss": 0.2779, "step": 1457 }, { "epoch": 0.12, "learning_rate": 1.953060744797901e-05, "loss": 0.2972, "step": 1458 }, { "epoch": 0.13, "learning_rate": 1.9529766493244284e-05, "loss": 0.3231, "step": 1459 }, { "epoch": 0.13, "learning_rate": 1.952892480400224e-05, "loss": 0.5813, "step": 1460 }, { "epoch": 0.13, "learning_rate": 1.952808238031776e-05, "loss": 0.3079, "step": 1461 }, { "epoch": 0.13, "learning_rate": 1.952723922225577e-05, "loss": 0.2817, "step": 1462 }, { "epoch": 0.13, "learning_rate": 1.9526395329881253e-05, "loss": 0.3154, "step": 1463 }, { "epoch": 0.13, "learning_rate": 1.9525550703259257e-05, "loss": 0.32, "step": 1464 }, { "epoch": 0.13, "learning_rate": 1.952470534245488e-05, "loss": 0.3211, "step": 1465 }, { "epoch": 0.13, "learning_rate": 1.952385924753328e-05, "loss": 0.2892, "step": 1466 }, { "epoch": 0.13, "learning_rate": 1.9523012418559663e-05, "loss": 0.3455, "step": 1467 }, { "epoch": 0.13, "learning_rate": 1.9522164855599306e-05, "loss": 0.3442, "step": 1468 }, { "epoch": 0.13, "learning_rate": 1.9521316558717534e-05, "loss": 0.2601, "step": 1469 }, { "epoch": 0.13, "learning_rate": 1.9520467527979726e-05, "loss": 0.2995, "step": 1470 }, { "epoch": 0.13, "learning_rate": 1.9519617763451322e-05, "loss": 0.2793, "step": 1471 }, { "epoch": 0.13, "learning_rate": 1.9518767265197823e-05, "loss": 0.2986, "step": 1472 }, { "epoch": 0.13, "learning_rate": 1.951791603328477e-05, "loss": 0.3049, "step": 1473 }, { "epoch": 0.13, "learning_rate": 1.9517064067777786e-05, "loss": 0.28, "step": 1474 }, { "epoch": 0.13, "learning_rate": 1.9516211368742524e-05, "loss": 0.3165, "step": 1475 }, { "epoch": 0.13, "learning_rate": 1.951535793624471e-05, "loss": 0.3018, "step": 1476 }, { "epoch": 0.13, "learning_rate": 1.9514503770350125e-05, "loss": 0.3166, "step": 1477 }, { "epoch": 0.13, "learning_rate": 1.9513648871124604e-05, "loss": 0.3506, "step": 1478 }, { "epoch": 0.13, "learning_rate": 1.9512793238634035e-05, "loss": 0.309, "step": 1479 }, { "epoch": 0.13, "learning_rate": 1.9511936872944367e-05, "loss": 0.3057, "step": 1480 }, { "epoch": 0.13, "learning_rate": 1.95110797741216e-05, "loss": 0.3157, "step": 1481 }, { "epoch": 0.13, "learning_rate": 1.9510221942231803e-05, "loss": 0.2953, "step": 1482 }, { "epoch": 0.13, "learning_rate": 1.950936337734109e-05, "loss": 0.3173, "step": 1483 }, { "epoch": 0.13, "learning_rate": 1.9508504079515637e-05, "loss": 0.3226, "step": 1484 }, { "epoch": 0.13, "learning_rate": 1.9507644048821674e-05, "loss": 0.2922, "step": 1485 }, { "epoch": 0.13, "learning_rate": 1.9506783285325482e-05, "loss": 0.3123, "step": 1486 }, { "epoch": 0.13, "learning_rate": 1.950592178909341e-05, "loss": 0.3209, "step": 1487 }, { "epoch": 0.13, "learning_rate": 1.950505956019186e-05, "loss": 0.2998, "step": 1488 }, { "epoch": 0.13, "learning_rate": 1.950419659868728e-05, "loss": 0.3032, "step": 1489 }, { "epoch": 0.13, "learning_rate": 1.9503332904646188e-05, "loss": 0.2921, "step": 1490 }, { "epoch": 0.13, "learning_rate": 1.9502468478135158e-05, "loss": 0.3018, "step": 1491 }, { "epoch": 0.13, "learning_rate": 1.9501603319220807e-05, "loss": 0.3274, "step": 1492 }, { "epoch": 0.13, "learning_rate": 1.950073742796982e-05, "loss": 0.3657, "step": 1493 }, { "epoch": 0.13, "learning_rate": 1.9499870804448936e-05, "loss": 0.3357, "step": 1494 }, { "epoch": 0.13, "learning_rate": 1.9499003448724956e-05, "loss": 0.3713, "step": 1495 }, { "epoch": 0.13, "learning_rate": 1.9498135360864724e-05, "loss": 0.3123, "step": 1496 }, { "epoch": 0.13, "learning_rate": 1.9497266540935146e-05, "loss": 0.324, "step": 1497 }, { "epoch": 0.13, "learning_rate": 1.9496396989003195e-05, "loss": 0.2944, "step": 1498 }, { "epoch": 0.13, "learning_rate": 1.9495526705135885e-05, "loss": 0.3209, "step": 1499 }, { "epoch": 0.13, "learning_rate": 1.9494655689400294e-05, "loss": 0.6093, "step": 1500 }, { "epoch": 0.13, "learning_rate": 1.949378394186356e-05, "loss": 0.3105, "step": 1501 }, { "epoch": 0.13, "learning_rate": 1.949291146259287e-05, "loss": 0.2971, "step": 1502 }, { "epoch": 0.13, "learning_rate": 1.949203825165547e-05, "loss": 0.3077, "step": 1503 }, { "epoch": 0.13, "learning_rate": 1.949116430911866e-05, "loss": 0.3217, "step": 1504 }, { "epoch": 0.13, "learning_rate": 1.9490289635049805e-05, "loss": 0.3035, "step": 1505 }, { "epoch": 0.13, "learning_rate": 1.9489414229516318e-05, "loss": 0.3545, "step": 1506 }, { "epoch": 0.13, "learning_rate": 1.948853809258567e-05, "loss": 0.3203, "step": 1507 }, { "epoch": 0.13, "learning_rate": 1.948766122432539e-05, "loss": 0.3126, "step": 1508 }, { "epoch": 0.13, "learning_rate": 1.948678362480306e-05, "loss": 0.3262, "step": 1509 }, { "epoch": 0.13, "learning_rate": 1.948590529408633e-05, "loss": 0.3683, "step": 1510 }, { "epoch": 0.13, "learning_rate": 1.9485026232242887e-05, "loss": 0.2927, "step": 1511 }, { "epoch": 0.13, "learning_rate": 1.948414643934049e-05, "loss": 0.3082, "step": 1512 }, { "epoch": 0.13, "learning_rate": 1.9483265915446946e-05, "loss": 0.3093, "step": 1513 }, { "epoch": 0.13, "learning_rate": 1.9482384660630125e-05, "loss": 0.2876, "step": 1514 }, { "epoch": 0.13, "learning_rate": 1.948150267495795e-05, "loss": 0.3708, "step": 1515 }, { "epoch": 0.13, "learning_rate": 1.9480619958498393e-05, "loss": 0.3249, "step": 1516 }, { "epoch": 0.13, "learning_rate": 1.94797365113195e-05, "loss": 0.3624, "step": 1517 }, { "epoch": 0.13, "learning_rate": 1.9478852333489356e-05, "loss": 0.2885, "step": 1518 }, { "epoch": 0.13, "learning_rate": 1.947796742507611e-05, "loss": 0.2864, "step": 1519 }, { "epoch": 0.13, "learning_rate": 1.947708178614797e-05, "loss": 0.2759, "step": 1520 }, { "epoch": 0.13, "learning_rate": 1.9476195416773188e-05, "loss": 0.2808, "step": 1521 }, { "epoch": 0.13, "learning_rate": 1.947530831702009e-05, "loss": 0.3254, "step": 1522 }, { "epoch": 0.13, "learning_rate": 1.9474420486957045e-05, "loss": 0.3075, "step": 1523 }, { "epoch": 0.13, "learning_rate": 1.9473531926652483e-05, "loss": 0.2863, "step": 1524 }, { "epoch": 0.13, "learning_rate": 1.947264263617489e-05, "loss": 0.3137, "step": 1525 }, { "epoch": 0.13, "learning_rate": 1.947175261559281e-05, "loss": 0.3484, "step": 1526 }, { "epoch": 0.13, "learning_rate": 1.9470861864974837e-05, "loss": 0.3353, "step": 1527 }, { "epoch": 0.13, "learning_rate": 1.946997038438963e-05, "loss": 0.2685, "step": 1528 }, { "epoch": 0.13, "learning_rate": 1.94690781739059e-05, "loss": 0.2676, "step": 1529 }, { "epoch": 0.13, "learning_rate": 1.946818523359241e-05, "loss": 0.3207, "step": 1530 }, { "epoch": 0.13, "learning_rate": 1.9467291563517987e-05, "loss": 0.3172, "step": 1531 }, { "epoch": 0.13, "learning_rate": 1.946639716375151e-05, "loss": 0.3126, "step": 1532 }, { "epoch": 0.13, "learning_rate": 1.9465502034361908e-05, "loss": 0.3037, "step": 1533 }, { "epoch": 0.13, "learning_rate": 1.946460617541819e-05, "loss": 0.3279, "step": 1534 }, { "epoch": 0.13, "learning_rate": 1.9463709586989387e-05, "loss": 0.2889, "step": 1535 }, { "epoch": 0.13, "learning_rate": 1.9462812269144613e-05, "loss": 0.275, "step": 1536 }, { "epoch": 0.13, "learning_rate": 1.9461914221953026e-05, "loss": 0.3031, "step": 1537 }, { "epoch": 0.13, "learning_rate": 1.9461015445483843e-05, "loss": 0.2833, "step": 1538 }, { "epoch": 0.13, "learning_rate": 1.946011593980634e-05, "loss": 0.3129, "step": 1539 }, { "epoch": 0.13, "learning_rate": 1.945921570498984e-05, "loss": 0.3472, "step": 1540 }, { "epoch": 0.13, "learning_rate": 1.9458314741103737e-05, "loss": 0.3273, "step": 1541 }, { "epoch": 0.13, "learning_rate": 1.9457413048217466e-05, "loss": 0.32, "step": 1542 }, { "epoch": 0.13, "learning_rate": 1.945651062640053e-05, "loss": 0.3675, "step": 1543 }, { "epoch": 0.13, "learning_rate": 1.9455607475722478e-05, "loss": 0.3088, "step": 1544 }, { "epoch": 0.13, "learning_rate": 1.9454703596252926e-05, "loss": 0.3207, "step": 1545 }, { "epoch": 0.13, "learning_rate": 1.9453798988061535e-05, "loss": 0.3292, "step": 1546 }, { "epoch": 0.13, "learning_rate": 1.945289365121803e-05, "loss": 0.309, "step": 1547 }, { "epoch": 0.13, "learning_rate": 1.9451987585792195e-05, "loss": 0.2941, "step": 1548 }, { "epoch": 0.13, "learning_rate": 1.945108079185386e-05, "loss": 0.3597, "step": 1549 }, { "epoch": 0.13, "learning_rate": 1.9450173269472915e-05, "loss": 0.2799, "step": 1550 }, { "epoch": 0.13, "learning_rate": 1.9449265018719307e-05, "loss": 0.3589, "step": 1551 }, { "epoch": 0.13, "learning_rate": 1.9448356039663044e-05, "loss": 0.3272, "step": 1552 }, { "epoch": 0.13, "learning_rate": 1.9447446332374182e-05, "loss": 0.3453, "step": 1553 }, { "epoch": 0.13, "learning_rate": 1.944653589692284e-05, "loss": 0.3012, "step": 1554 }, { "epoch": 0.13, "learning_rate": 1.9445624733379186e-05, "loss": 0.3046, "step": 1555 }, { "epoch": 0.13, "learning_rate": 1.944471284181345e-05, "loss": 0.316, "step": 1556 }, { "epoch": 0.13, "learning_rate": 1.9443800222295918e-05, "loss": 0.291, "step": 1557 }, { "epoch": 0.13, "learning_rate": 1.9442886874896924e-05, "loss": 0.3208, "step": 1558 }, { "epoch": 0.13, "learning_rate": 1.944197279968687e-05, "loss": 0.3055, "step": 1559 }, { "epoch": 0.13, "learning_rate": 1.9441057996736207e-05, "loss": 0.3518, "step": 1560 }, { "epoch": 0.13, "learning_rate": 1.9440142466115443e-05, "loss": 0.2662, "step": 1561 }, { "epoch": 0.13, "learning_rate": 1.9439226207895143e-05, "loss": 0.2976, "step": 1562 }, { "epoch": 0.13, "learning_rate": 1.943830922214593e-05, "loss": 0.2922, "step": 1563 }, { "epoch": 0.13, "learning_rate": 1.9437391508938476e-05, "loss": 0.3578, "step": 1564 }, { "epoch": 0.13, "learning_rate": 1.9436473068343516e-05, "loss": 0.31, "step": 1565 }, { "epoch": 0.13, "learning_rate": 1.9435553900431838e-05, "loss": 0.3405, "step": 1566 }, { "epoch": 0.13, "learning_rate": 1.943463400527429e-05, "loss": 0.306, "step": 1567 }, { "epoch": 0.13, "learning_rate": 1.9433713382941768e-05, "loss": 0.3032, "step": 1568 }, { "epoch": 0.13, "learning_rate": 1.943279203350523e-05, "loss": 0.288, "step": 1569 }, { "epoch": 0.13, "learning_rate": 1.9431869957035698e-05, "loss": 0.3475, "step": 1570 }, { "epoch": 0.13, "learning_rate": 1.943094715360423e-05, "loss": 0.3038, "step": 1571 }, { "epoch": 0.13, "learning_rate": 1.9430023623281955e-05, "loss": 0.3118, "step": 1572 }, { "epoch": 0.13, "learning_rate": 1.9429099366140055e-05, "loss": 0.3512, "step": 1573 }, { "epoch": 0.13, "learning_rate": 1.9428174382249764e-05, "loss": 0.3785, "step": 1574 }, { "epoch": 0.14, "learning_rate": 1.942724867168238e-05, "loss": 0.3773, "step": 1575 }, { "epoch": 0.14, "learning_rate": 1.9426322234509248e-05, "loss": 0.3151, "step": 1576 }, { "epoch": 0.14, "learning_rate": 1.9425395070801775e-05, "loss": 0.3328, "step": 1577 }, { "epoch": 0.14, "learning_rate": 1.9424467180631422e-05, "loss": 0.3016, "step": 1578 }, { "epoch": 0.14, "learning_rate": 1.942353856406971e-05, "loss": 0.3376, "step": 1579 }, { "epoch": 0.14, "learning_rate": 1.9422609221188208e-05, "loss": 0.2816, "step": 1580 }, { "epoch": 0.14, "learning_rate": 1.9421679152058545e-05, "loss": 0.3253, "step": 1581 }, { "epoch": 0.14, "learning_rate": 1.9420748356752405e-05, "loss": 0.3448, "step": 1582 }, { "epoch": 0.14, "learning_rate": 1.9419816835341528e-05, "loss": 0.2772, "step": 1583 }, { "epoch": 0.14, "learning_rate": 1.941888458789772e-05, "loss": 0.3293, "step": 1584 }, { "epoch": 0.14, "learning_rate": 1.9417951614492824e-05, "loss": 0.2995, "step": 1585 }, { "epoch": 0.14, "learning_rate": 1.9417017915198758e-05, "loss": 0.3094, "step": 1586 }, { "epoch": 0.14, "learning_rate": 1.9416083490087475e-05, "loss": 0.3469, "step": 1587 }, { "epoch": 0.14, "learning_rate": 1.9415148339231008e-05, "loss": 0.3573, "step": 1588 }, { "epoch": 0.14, "learning_rate": 1.9414212462701428e-05, "loss": 0.3596, "step": 1589 }, { "epoch": 0.14, "learning_rate": 1.941327586057087e-05, "loss": 0.3703, "step": 1590 }, { "epoch": 0.14, "learning_rate": 1.9412338532911515e-05, "loss": 0.3645, "step": 1591 }, { "epoch": 0.14, "learning_rate": 1.9411400479795618e-05, "loss": 0.2745, "step": 1592 }, { "epoch": 0.14, "learning_rate": 1.9410461701295474e-05, "loss": 0.2949, "step": 1593 }, { "epoch": 0.14, "learning_rate": 1.940952219748344e-05, "loss": 0.6079, "step": 1594 }, { "epoch": 0.14, "learning_rate": 1.940858196843193e-05, "loss": 0.3245, "step": 1595 }, { "epoch": 0.14, "learning_rate": 1.940764101421341e-05, "loss": 0.3019, "step": 1596 }, { "epoch": 0.14, "learning_rate": 1.9406699334900407e-05, "loss": 0.3044, "step": 1597 }, { "epoch": 0.14, "learning_rate": 1.9405756930565496e-05, "loss": 0.3335, "step": 1598 }, { "epoch": 0.14, "learning_rate": 1.940481380128132e-05, "loss": 0.2662, "step": 1599 }, { "epoch": 0.14, "learning_rate": 1.9403869947120563e-05, "loss": 0.3498, "step": 1600 }, { "epoch": 0.14, "learning_rate": 1.9402925368155978e-05, "loss": 0.3083, "step": 1601 }, { "epoch": 0.14, "learning_rate": 1.940198006446037e-05, "loss": 0.313, "step": 1602 }, { "epoch": 0.14, "learning_rate": 1.940103403610659e-05, "loss": 0.2927, "step": 1603 }, { "epoch": 0.14, "learning_rate": 1.940008728316756e-05, "loss": 0.2975, "step": 1604 }, { "epoch": 0.14, "learning_rate": 1.939913980571625e-05, "loss": 0.3222, "step": 1605 }, { "epoch": 0.14, "learning_rate": 1.9398191603825687e-05, "loss": 0.3059, "step": 1606 }, { "epoch": 0.14, "learning_rate": 1.939724267756895e-05, "loss": 0.3022, "step": 1607 }, { "epoch": 0.14, "learning_rate": 1.9396293027019186e-05, "loss": 0.2991, "step": 1608 }, { "epoch": 0.14, "learning_rate": 1.9395342652249578e-05, "loss": 0.3347, "step": 1609 }, { "epoch": 0.14, "learning_rate": 1.9394391553333384e-05, "loss": 0.3745, "step": 1610 }, { "epoch": 0.14, "learning_rate": 1.939343973034391e-05, "loss": 0.2777, "step": 1611 }, { "epoch": 0.14, "learning_rate": 1.9392487183354514e-05, "loss": 0.2809, "step": 1612 }, { "epoch": 0.14, "learning_rate": 1.9391533912438615e-05, "loss": 0.3855, "step": 1613 }, { "epoch": 0.14, "learning_rate": 1.939057991766969e-05, "loss": 0.3334, "step": 1614 }, { "epoch": 0.14, "learning_rate": 1.9389625199121264e-05, "loss": 0.271, "step": 1615 }, { "epoch": 0.14, "learning_rate": 1.938866975686692e-05, "loss": 0.2993, "step": 1616 }, { "epoch": 0.14, "learning_rate": 1.9387713590980305e-05, "loss": 0.3319, "step": 1617 }, { "epoch": 0.14, "learning_rate": 1.9386756701535115e-05, "loss": 0.3168, "step": 1618 }, { "epoch": 0.14, "learning_rate": 1.9385799088605095e-05, "loss": 0.2659, "step": 1619 }, { "epoch": 0.14, "learning_rate": 1.938484075226406e-05, "loss": 0.3383, "step": 1620 }, { "epoch": 0.14, "learning_rate": 1.938388169258587e-05, "loss": 0.299, "step": 1621 }, { "epoch": 0.14, "learning_rate": 1.9382921909644448e-05, "loss": 0.2908, "step": 1622 }, { "epoch": 0.14, "learning_rate": 1.938196140351377e-05, "loss": 0.3201, "step": 1623 }, { "epoch": 0.14, "learning_rate": 1.938100017426786e-05, "loss": 0.3209, "step": 1624 }, { "epoch": 0.14, "learning_rate": 1.938003822198081e-05, "loss": 0.2896, "step": 1625 }, { "epoch": 0.14, "learning_rate": 1.9379075546726764e-05, "loss": 0.2878, "step": 1626 }, { "epoch": 0.14, "learning_rate": 1.9378112148579916e-05, "loss": 0.3328, "step": 1627 }, { "epoch": 0.14, "learning_rate": 1.9377148027614523e-05, "loss": 0.3584, "step": 1628 }, { "epoch": 0.14, "learning_rate": 1.9376183183904896e-05, "loss": 0.3284, "step": 1629 }, { "epoch": 0.14, "learning_rate": 1.9375217617525396e-05, "loss": 0.3144, "step": 1630 }, { "epoch": 0.14, "learning_rate": 1.9374251328550448e-05, "loss": 0.2917, "step": 1631 }, { "epoch": 0.14, "learning_rate": 1.9373284317054525e-05, "loss": 0.3212, "step": 1632 }, { "epoch": 0.14, "learning_rate": 1.9372316583112163e-05, "loss": 0.3152, "step": 1633 }, { "epoch": 0.14, "learning_rate": 1.937134812679795e-05, "loss": 0.3564, "step": 1634 }, { "epoch": 0.14, "learning_rate": 1.9370378948186526e-05, "loss": 0.3588, "step": 1635 }, { "epoch": 0.14, "learning_rate": 1.9369409047352593e-05, "loss": 0.3024, "step": 1636 }, { "epoch": 0.14, "learning_rate": 1.936843842437091e-05, "loss": 0.3436, "step": 1637 }, { "epoch": 0.14, "learning_rate": 1.936746707931628e-05, "loss": 0.3406, "step": 1638 }, { "epoch": 0.14, "learning_rate": 1.9366495012263575e-05, "loss": 0.3167, "step": 1639 }, { "epoch": 0.14, "learning_rate": 1.9365522223287717e-05, "loss": 0.3439, "step": 1640 }, { "epoch": 0.14, "learning_rate": 1.936454871246368e-05, "loss": 0.364, "step": 1641 }, { "epoch": 0.14, "learning_rate": 1.9363574479866504e-05, "loss": 0.3007, "step": 1642 }, { "epoch": 0.14, "learning_rate": 1.936259952557127e-05, "loss": 0.3105, "step": 1643 }, { "epoch": 0.14, "learning_rate": 1.9361623849653126e-05, "loss": 0.3159, "step": 1644 }, { "epoch": 0.14, "learning_rate": 1.9360647452187273e-05, "loss": 0.3434, "step": 1645 }, { "epoch": 0.14, "learning_rate": 1.9359670333248967e-05, "loss": 0.35, "step": 1646 }, { "epoch": 0.14, "learning_rate": 1.935869249291352e-05, "loss": 0.3565, "step": 1647 }, { "epoch": 0.14, "learning_rate": 1.9357713931256298e-05, "loss": 0.2778, "step": 1648 }, { "epoch": 0.14, "learning_rate": 1.9356734648352727e-05, "loss": 0.3659, "step": 1649 }, { "epoch": 0.14, "learning_rate": 1.935575464427828e-05, "loss": 0.2652, "step": 1650 }, { "epoch": 0.14, "learning_rate": 1.935477391910849e-05, "loss": 0.3193, "step": 1651 }, { "epoch": 0.14, "learning_rate": 1.9353792472918954e-05, "loss": 0.2858, "step": 1652 }, { "epoch": 0.14, "learning_rate": 1.9352810305785314e-05, "loss": 0.2687, "step": 1653 }, { "epoch": 0.14, "learning_rate": 1.935182741778326e-05, "loss": 0.2885, "step": 1654 }, { "epoch": 0.14, "learning_rate": 1.9350843808988566e-05, "loss": 0.3188, "step": 1655 }, { "epoch": 0.14, "learning_rate": 1.9349859479477034e-05, "loss": 0.3995, "step": 1656 }, { "epoch": 0.14, "learning_rate": 1.934887442932453e-05, "loss": 0.308, "step": 1657 }, { "epoch": 0.14, "learning_rate": 1.934788865860698e-05, "loss": 0.3033, "step": 1658 }, { "epoch": 0.14, "learning_rate": 1.9346902167400363e-05, "loss": 0.33, "step": 1659 }, { "epoch": 0.14, "learning_rate": 1.9345914955780708e-05, "loss": 0.2574, "step": 1660 }, { "epoch": 0.14, "learning_rate": 1.9344927023824112e-05, "loss": 0.6281, "step": 1661 }, { "epoch": 0.14, "learning_rate": 1.9343938371606714e-05, "loss": 0.332, "step": 1662 }, { "epoch": 0.14, "learning_rate": 1.9342948999204712e-05, "loss": 0.2936, "step": 1663 }, { "epoch": 0.14, "learning_rate": 1.934195890669437e-05, "loss": 0.4017, "step": 1664 }, { "epoch": 0.14, "learning_rate": 1.9340968094151997e-05, "loss": 0.3555, "step": 1665 }, { "epoch": 0.14, "learning_rate": 1.9339976561653956e-05, "loss": 0.317, "step": 1666 }, { "epoch": 0.14, "learning_rate": 1.933898430927667e-05, "loss": 0.3245, "step": 1667 }, { "epoch": 0.14, "learning_rate": 1.933799133709662e-05, "loss": 0.2429, "step": 1668 }, { "epoch": 0.14, "learning_rate": 1.9336997645190342e-05, "loss": 0.3087, "step": 1669 }, { "epoch": 0.14, "learning_rate": 1.933600323363442e-05, "loss": 0.3079, "step": 1670 }, { "epoch": 0.14, "learning_rate": 1.9335008102505494e-05, "loss": 0.314, "step": 1671 }, { "epoch": 0.14, "learning_rate": 1.9334012251880274e-05, "loss": 0.3101, "step": 1672 }, { "epoch": 0.14, "learning_rate": 1.933301568183551e-05, "loss": 0.3582, "step": 1673 }, { "epoch": 0.14, "learning_rate": 1.933201839244801e-05, "loss": 0.3174, "step": 1674 }, { "epoch": 0.14, "learning_rate": 1.9331020383794647e-05, "loss": 0.3832, "step": 1675 }, { "epoch": 0.14, "learning_rate": 1.9330021655952333e-05, "loss": 0.3176, "step": 1676 }, { "epoch": 0.14, "learning_rate": 1.9329022208998056e-05, "loss": 0.35, "step": 1677 }, { "epoch": 0.14, "learning_rate": 1.9328022043008842e-05, "loss": 0.3019, "step": 1678 }, { "epoch": 0.14, "learning_rate": 1.9327021158061776e-05, "loss": 0.3632, "step": 1679 }, { "epoch": 0.14, "learning_rate": 1.932601955423401e-05, "loss": 0.2994, "step": 1680 }, { "epoch": 0.14, "learning_rate": 1.9325017231602737e-05, "loss": 0.3564, "step": 1681 }, { "epoch": 0.14, "learning_rate": 1.932401419024521e-05, "loss": 0.3143, "step": 1682 }, { "epoch": 0.14, "learning_rate": 1.9323010430238746e-05, "loss": 0.3065, "step": 1683 }, { "epoch": 0.14, "learning_rate": 1.93220059516607e-05, "loss": 0.3391, "step": 1684 }, { "epoch": 0.14, "learning_rate": 1.9321000754588497e-05, "loss": 0.3037, "step": 1685 }, { "epoch": 0.14, "learning_rate": 1.931999483909961e-05, "loss": 0.3147, "step": 1686 }, { "epoch": 0.14, "learning_rate": 1.9318988205271577e-05, "loss": 0.3432, "step": 1687 }, { "epoch": 0.14, "learning_rate": 1.9317980853181975e-05, "loss": 0.3123, "step": 1688 }, { "epoch": 0.14, "learning_rate": 1.9316972782908455e-05, "loss": 0.2975, "step": 1689 }, { "epoch": 0.14, "learning_rate": 1.9315963994528707e-05, "loss": 0.3085, "step": 1690 }, { "epoch": 0.14, "learning_rate": 1.9314954488120484e-05, "loss": 0.3236, "step": 1691 }, { "epoch": 0.15, "learning_rate": 1.93139442637616e-05, "loss": 0.306, "step": 1692 }, { "epoch": 0.15, "learning_rate": 1.9312933321529912e-05, "loss": 0.3127, "step": 1693 }, { "epoch": 0.15, "learning_rate": 1.9311921661503338e-05, "loss": 0.2866, "step": 1694 }, { "epoch": 0.15, "learning_rate": 1.9310909283759855e-05, "loss": 0.3159, "step": 1695 }, { "epoch": 0.15, "learning_rate": 1.930989618837749e-05, "loss": 0.3041, "step": 1696 }, { "epoch": 0.15, "learning_rate": 1.930888237543433e-05, "loss": 0.3199, "step": 1697 }, { "epoch": 0.15, "learning_rate": 1.9307867845008513e-05, "loss": 0.3293, "step": 1698 }, { "epoch": 0.15, "learning_rate": 1.9306852597178233e-05, "loss": 0.3046, "step": 1699 }, { "epoch": 0.15, "learning_rate": 1.9305836632021744e-05, "loss": 0.6284, "step": 1700 }, { "epoch": 0.15, "learning_rate": 1.930481994961735e-05, "loss": 0.3041, "step": 1701 }, { "epoch": 0.15, "learning_rate": 1.9303802550043404e-05, "loss": 0.3094, "step": 1702 }, { "epoch": 0.15, "learning_rate": 1.9302784433378333e-05, "loss": 0.32, "step": 1703 }, { "epoch": 0.15, "learning_rate": 1.9301765599700604e-05, "loss": 0.2931, "step": 1704 }, { "epoch": 0.15, "learning_rate": 1.9300746049088746e-05, "loss": 0.3551, "step": 1705 }, { "epoch": 0.15, "learning_rate": 1.9299725781621335e-05, "loss": 0.2919, "step": 1706 }, { "epoch": 0.15, "learning_rate": 1.9298704797377013e-05, "loss": 0.3267, "step": 1707 }, { "epoch": 0.15, "learning_rate": 1.929768309643447e-05, "loss": 0.2783, "step": 1708 }, { "epoch": 0.15, "learning_rate": 1.929666067887246e-05, "loss": 0.2867, "step": 1709 }, { "epoch": 0.15, "learning_rate": 1.929563754476978e-05, "loss": 0.3345, "step": 1710 }, { "epoch": 0.15, "learning_rate": 1.9294613694205285e-05, "loss": 0.3818, "step": 1711 }, { "epoch": 0.15, "learning_rate": 1.9293589127257896e-05, "loss": 0.3317, "step": 1712 }, { "epoch": 0.15, "learning_rate": 1.9292563844006578e-05, "loss": 0.3288, "step": 1713 }, { "epoch": 0.15, "learning_rate": 1.9291537844530352e-05, "loss": 0.316, "step": 1714 }, { "epoch": 0.15, "learning_rate": 1.92905111289083e-05, "loss": 0.3204, "step": 1715 }, { "epoch": 0.15, "learning_rate": 1.928948369721956e-05, "loss": 0.2514, "step": 1716 }, { "epoch": 0.15, "learning_rate": 1.9288455549543315e-05, "loss": 0.2993, "step": 1717 }, { "epoch": 0.15, "learning_rate": 1.928742668595881e-05, "loss": 0.3181, "step": 1718 }, { "epoch": 0.15, "learning_rate": 1.9286397106545348e-05, "loss": 0.3017, "step": 1719 }, { "epoch": 0.15, "learning_rate": 1.9285366811382283e-05, "loss": 0.2833, "step": 1720 }, { "epoch": 0.15, "learning_rate": 1.9284335800549026e-05, "loss": 0.2975, "step": 1721 }, { "epoch": 0.15, "learning_rate": 1.928330407412504e-05, "loss": 0.3507, "step": 1722 }, { "epoch": 0.15, "learning_rate": 1.9282271632189844e-05, "loss": 0.373, "step": 1723 }, { "epoch": 0.15, "learning_rate": 1.9281238474823016e-05, "loss": 0.3446, "step": 1724 }, { "epoch": 0.15, "learning_rate": 1.9280204602104185e-05, "loss": 0.2996, "step": 1725 }, { "epoch": 0.15, "learning_rate": 1.927917001411304e-05, "loss": 0.3392, "step": 1726 }, { "epoch": 0.15, "learning_rate": 1.9278134710929322e-05, "loss": 0.3228, "step": 1727 }, { "epoch": 0.15, "learning_rate": 1.9277098692632824e-05, "loss": 0.3207, "step": 1728 }, { "epoch": 0.15, "learning_rate": 1.9276061959303397e-05, "loss": 0.2594, "step": 1729 }, { "epoch": 0.15, "learning_rate": 1.927502451102095e-05, "loss": 0.3571, "step": 1730 }, { "epoch": 0.15, "learning_rate": 1.927398634786544e-05, "loss": 0.3116, "step": 1731 }, { "epoch": 0.15, "learning_rate": 1.927294746991689e-05, "loss": 0.3369, "step": 1732 }, { "epoch": 0.15, "learning_rate": 1.9271907877255364e-05, "loss": 0.2906, "step": 1733 }, { "epoch": 0.15, "learning_rate": 1.9270867569960994e-05, "loss": 0.325, "step": 1734 }, { "epoch": 0.15, "learning_rate": 1.9269826548113964e-05, "loss": 0.3072, "step": 1735 }, { "epoch": 0.15, "learning_rate": 1.9268784811794507e-05, "loss": 0.3358, "step": 1736 }, { "epoch": 0.15, "learning_rate": 1.926774236108291e-05, "loss": 0.3163, "step": 1737 }, { "epoch": 0.15, "learning_rate": 1.926669919605953e-05, "loss": 0.288, "step": 1738 }, { "epoch": 0.15, "learning_rate": 1.926565531680476e-05, "loss": 0.3553, "step": 1739 }, { "epoch": 0.15, "learning_rate": 1.9264610723399065e-05, "loss": 0.2657, "step": 1740 }, { "epoch": 0.15, "learning_rate": 1.926356541592295e-05, "loss": 0.2704, "step": 1741 }, { "epoch": 0.15, "learning_rate": 1.9262519394456985e-05, "loss": 0.3201, "step": 1742 }, { "epoch": 0.15, "learning_rate": 1.9261472659081793e-05, "loss": 0.3238, "step": 1743 }, { "epoch": 0.15, "learning_rate": 1.9260425209878052e-05, "loss": 0.3533, "step": 1744 }, { "epoch": 0.15, "learning_rate": 1.9259377046926494e-05, "loss": 0.2781, "step": 1745 }, { "epoch": 0.15, "learning_rate": 1.9258328170307905e-05, "loss": 0.3461, "step": 1746 }, { "epoch": 0.15, "learning_rate": 1.9257278580103124e-05, "loss": 0.3197, "step": 1747 }, { "epoch": 0.15, "learning_rate": 1.9256228276393055e-05, "loss": 0.3057, "step": 1748 }, { "epoch": 0.15, "learning_rate": 1.9255177259258647e-05, "loss": 0.3086, "step": 1749 }, { "epoch": 0.15, "learning_rate": 1.9254125528780908e-05, "loss": 0.3104, "step": 1750 }, { "epoch": 0.15, "learning_rate": 1.9253073085040895e-05, "loss": 0.3045, "step": 1751 }, { "epoch": 0.15, "learning_rate": 1.9252019928119733e-05, "loss": 0.2928, "step": 1752 }, { "epoch": 0.15, "learning_rate": 1.925096605809859e-05, "loss": 0.3046, "step": 1753 }, { "epoch": 0.15, "learning_rate": 1.924991147505869e-05, "loss": 0.3116, "step": 1754 }, { "epoch": 0.15, "learning_rate": 1.924885617908132e-05, "loss": 0.3054, "step": 1755 }, { "epoch": 0.15, "learning_rate": 1.9247800170247817e-05, "loss": 0.3149, "step": 1756 }, { "epoch": 0.15, "learning_rate": 1.924674344863957e-05, "loss": 0.2634, "step": 1757 }, { "epoch": 0.15, "learning_rate": 1.924568601433803e-05, "loss": 0.3314, "step": 1758 }, { "epoch": 0.15, "learning_rate": 1.9244627867424695e-05, "loss": 0.3168, "step": 1759 }, { "epoch": 0.15, "learning_rate": 1.9243569007981126e-05, "loss": 0.2955, "step": 1760 }, { "epoch": 0.15, "learning_rate": 1.9242509436088928e-05, "loss": 0.34, "step": 1761 }, { "epoch": 0.15, "learning_rate": 1.924144915182977e-05, "loss": 0.3589, "step": 1762 }, { "epoch": 0.15, "learning_rate": 1.9240388155285378e-05, "loss": 0.5908, "step": 1763 }, { "epoch": 0.15, "learning_rate": 1.9239326446537526e-05, "loss": 0.2861, "step": 1764 }, { "epoch": 0.15, "learning_rate": 1.9238264025668043e-05, "loss": 0.3053, "step": 1765 }, { "epoch": 0.15, "learning_rate": 1.9237200892758814e-05, "loss": 0.2995, "step": 1766 }, { "epoch": 0.15, "learning_rate": 1.9236137047891783e-05, "loss": 0.3101, "step": 1767 }, { "epoch": 0.15, "learning_rate": 1.9235072491148946e-05, "loss": 0.295, "step": 1768 }, { "epoch": 0.15, "learning_rate": 1.9234007222612356e-05, "loss": 0.3499, "step": 1769 }, { "epoch": 0.15, "learning_rate": 1.9232941242364114e-05, "loss": 0.6174, "step": 1770 }, { "epoch": 0.15, "learning_rate": 1.923187455048638e-05, "loss": 0.3262, "step": 1771 }, { "epoch": 0.15, "learning_rate": 1.9230807147061374e-05, "loss": 0.3286, "step": 1772 }, { "epoch": 0.15, "learning_rate": 1.9229739032171365e-05, "loss": 0.3104, "step": 1773 }, { "epoch": 0.15, "learning_rate": 1.9228670205898675e-05, "loss": 0.3065, "step": 1774 }, { "epoch": 0.15, "learning_rate": 1.9227600668325687e-05, "loss": 0.2874, "step": 1775 }, { "epoch": 0.15, "learning_rate": 1.9226530419534834e-05, "loss": 0.2909, "step": 1776 }, { "epoch": 0.15, "learning_rate": 1.9225459459608604e-05, "loss": 0.2921, "step": 1777 }, { "epoch": 0.15, "learning_rate": 1.9224387788629547e-05, "loss": 0.2999, "step": 1778 }, { "epoch": 0.15, "learning_rate": 1.9223315406680254e-05, "loss": 0.2816, "step": 1779 }, { "epoch": 0.15, "learning_rate": 1.9222242313843385e-05, "loss": 0.3271, "step": 1780 }, { "epoch": 0.15, "learning_rate": 1.922116851020165e-05, "loss": 0.2969, "step": 1781 }, { "epoch": 0.15, "learning_rate": 1.9220093995837805e-05, "loss": 0.3345, "step": 1782 }, { "epoch": 0.15, "learning_rate": 1.9219018770834676e-05, "loss": 0.3407, "step": 1783 }, { "epoch": 0.15, "learning_rate": 1.921794283527513e-05, "loss": 0.3454, "step": 1784 }, { "epoch": 0.15, "learning_rate": 1.9216866189242095e-05, "loss": 0.3488, "step": 1785 }, { "epoch": 0.15, "learning_rate": 1.921578883281856e-05, "loss": 0.3583, "step": 1786 }, { "epoch": 0.15, "learning_rate": 1.921471076608756e-05, "loss": 0.3239, "step": 1787 }, { "epoch": 0.15, "learning_rate": 1.9213631989132184e-05, "loss": 0.3118, "step": 1788 }, { "epoch": 0.15, "learning_rate": 1.9212552502035576e-05, "loss": 0.3254, "step": 1789 }, { "epoch": 0.15, "learning_rate": 1.9211472304880945e-05, "loss": 0.316, "step": 1790 }, { "epoch": 0.15, "learning_rate": 1.9210391397751544e-05, "loss": 0.3038, "step": 1791 }, { "epoch": 0.15, "learning_rate": 1.920930978073068e-05, "loss": 0.3088, "step": 1792 }, { "epoch": 0.15, "learning_rate": 1.920822745390173e-05, "loss": 0.3186, "step": 1793 }, { "epoch": 0.15, "learning_rate": 1.9207144417348103e-05, "loss": 0.3231, "step": 1794 }, { "epoch": 0.15, "learning_rate": 1.920606067115328e-05, "loss": 0.3181, "step": 1795 }, { "epoch": 0.15, "learning_rate": 1.9204976215400788e-05, "loss": 0.6426, "step": 1796 }, { "epoch": 0.15, "learning_rate": 1.920389105017421e-05, "loss": 0.2999, "step": 1797 }, { "epoch": 0.15, "learning_rate": 1.920280517555719e-05, "loss": 0.3567, "step": 1798 }, { "epoch": 0.15, "learning_rate": 1.9201718591633417e-05, "loss": 0.3383, "step": 1799 }, { "epoch": 0.15, "learning_rate": 1.9200631298486647e-05, "loss": 0.2611, "step": 1800 }, { "epoch": 0.15, "learning_rate": 1.9199543296200675e-05, "loss": 0.3422, "step": 1801 }, { "epoch": 0.15, "learning_rate": 1.919845458485936e-05, "loss": 0.3254, "step": 1802 }, { "epoch": 0.15, "learning_rate": 1.919736516454662e-05, "loss": 0.3239, "step": 1803 }, { "epoch": 0.15, "learning_rate": 1.919627503534642e-05, "loss": 0.3033, "step": 1804 }, { "epoch": 0.15, "learning_rate": 1.9195184197342775e-05, "loss": 0.3134, "step": 1805 }, { "epoch": 0.15, "learning_rate": 1.9194092650619767e-05, "loss": 0.3568, "step": 1806 }, { "epoch": 0.15, "learning_rate": 1.9193000395261532e-05, "loss": 0.3514, "step": 1807 }, { "epoch": 0.15, "learning_rate": 1.9191907431352248e-05, "loss": 0.3237, "step": 1808 }, { "epoch": 0.16, "learning_rate": 1.9190813758976156e-05, "loss": 0.2794, "step": 1809 }, { "epoch": 0.16, "learning_rate": 1.9189719378217554e-05, "loss": 0.3182, "step": 1810 }, { "epoch": 0.16, "learning_rate": 1.9188624289160792e-05, "loss": 0.2975, "step": 1811 }, { "epoch": 0.16, "learning_rate": 1.918752849189027e-05, "loss": 0.3179, "step": 1812 }, { "epoch": 0.16, "learning_rate": 1.918643198649045e-05, "loss": 0.3536, "step": 1813 }, { "epoch": 0.16, "learning_rate": 1.918533477304584e-05, "loss": 0.3471, "step": 1814 }, { "epoch": 0.16, "learning_rate": 1.9184236851641017e-05, "loss": 0.3458, "step": 1815 }, { "epoch": 0.16, "learning_rate": 1.9183138222360596e-05, "loss": 0.3276, "step": 1816 }, { "epoch": 0.16, "learning_rate": 1.9182038885289256e-05, "loss": 0.2862, "step": 1817 }, { "epoch": 0.16, "learning_rate": 1.9180938840511727e-05, "loss": 0.289, "step": 1818 }, { "epoch": 0.16, "learning_rate": 1.91798380881128e-05, "loss": 0.3012, "step": 1819 }, { "epoch": 0.16, "learning_rate": 1.9178736628177308e-05, "loss": 0.2905, "step": 1820 }, { "epoch": 0.16, "learning_rate": 1.9177634460790152e-05, "loss": 0.2877, "step": 1821 }, { "epoch": 0.16, "learning_rate": 1.9176531586036282e-05, "loss": 0.3076, "step": 1822 }, { "epoch": 0.16, "learning_rate": 1.9175428004000695e-05, "loss": 0.3479, "step": 1823 }, { "epoch": 0.16, "learning_rate": 1.917432371476846e-05, "loss": 0.3928, "step": 1824 }, { "epoch": 0.16, "learning_rate": 1.9173218718424683e-05, "loss": 0.3532, "step": 1825 }, { "epoch": 0.16, "learning_rate": 1.917211301505453e-05, "loss": 0.2908, "step": 1826 }, { "epoch": 0.16, "learning_rate": 1.917100660474323e-05, "loss": 0.3114, "step": 1827 }, { "epoch": 0.16, "learning_rate": 1.9169899487576056e-05, "loss": 0.3756, "step": 1828 }, { "epoch": 0.16, "learning_rate": 1.9168791663638338e-05, "loss": 0.2842, "step": 1829 }, { "epoch": 0.16, "learning_rate": 1.9167683133015465e-05, "loss": 0.3354, "step": 1830 }, { "epoch": 0.16, "learning_rate": 1.9166573895792873e-05, "loss": 0.5896, "step": 1831 }, { "epoch": 0.16, "learning_rate": 1.916546395205606e-05, "loss": 0.3014, "step": 1832 }, { "epoch": 0.16, "learning_rate": 1.916435330189057e-05, "loss": 0.2882, "step": 1833 }, { "epoch": 0.16, "learning_rate": 1.9163241945382012e-05, "loss": 0.3216, "step": 1834 }, { "epoch": 0.16, "learning_rate": 1.9162129882616046e-05, "loss": 0.3249, "step": 1835 }, { "epoch": 0.16, "learning_rate": 1.9161017113678376e-05, "loss": 0.3039, "step": 1836 }, { "epoch": 0.16, "learning_rate": 1.9159903638654773e-05, "loss": 0.3002, "step": 1837 }, { "epoch": 0.16, "learning_rate": 1.9158789457631054e-05, "loss": 0.2936, "step": 1838 }, { "epoch": 0.16, "learning_rate": 1.9157674570693104e-05, "loss": 0.3344, "step": 1839 }, { "epoch": 0.16, "learning_rate": 1.9156558977926847e-05, "loss": 0.3528, "step": 1840 }, { "epoch": 0.16, "learning_rate": 1.9155442679418267e-05, "loss": 0.3334, "step": 1841 }, { "epoch": 0.16, "learning_rate": 1.91543256752534e-05, "loss": 0.3035, "step": 1842 }, { "epoch": 0.16, "learning_rate": 1.915320796551835e-05, "loss": 0.3264, "step": 1843 }, { "epoch": 0.16, "learning_rate": 1.9152089550299253e-05, "loss": 0.2941, "step": 1844 }, { "epoch": 0.16, "learning_rate": 1.9150970429682316e-05, "loss": 0.3088, "step": 1845 }, { "epoch": 0.16, "learning_rate": 1.9149850603753793e-05, "loss": 0.3586, "step": 1846 }, { "epoch": 0.16, "learning_rate": 1.91487300726e-05, "loss": 0.3423, "step": 1847 }, { "epoch": 0.16, "learning_rate": 1.9147608836307296e-05, "loss": 0.2903, "step": 1848 }, { "epoch": 0.16, "learning_rate": 1.91464868949621e-05, "loss": 0.3341, "step": 1849 }, { "epoch": 0.16, "learning_rate": 1.9145364248650892e-05, "loss": 0.3488, "step": 1850 }, { "epoch": 0.16, "learning_rate": 1.9144240897460195e-05, "loss": 0.2908, "step": 1851 }, { "epoch": 0.16, "learning_rate": 1.914311684147659e-05, "loss": 0.3142, "step": 1852 }, { "epoch": 0.16, "learning_rate": 1.9141992080786718e-05, "loss": 0.3373, "step": 1853 }, { "epoch": 0.16, "learning_rate": 1.9140866615477272e-05, "loss": 0.2561, "step": 1854 }, { "epoch": 0.16, "learning_rate": 1.913974044563499e-05, "loss": 0.3109, "step": 1855 }, { "epoch": 0.16, "learning_rate": 1.9138613571346676e-05, "loss": 0.272, "step": 1856 }, { "epoch": 0.16, "learning_rate": 1.9137485992699182e-05, "loss": 0.289, "step": 1857 }, { "epoch": 0.16, "learning_rate": 1.9136357709779418e-05, "loss": 0.3573, "step": 1858 }, { "epoch": 0.16, "learning_rate": 1.9135228722674347e-05, "loss": 0.3286, "step": 1859 }, { "epoch": 0.16, "learning_rate": 1.913409903147098e-05, "loss": 0.27, "step": 1860 }, { "epoch": 0.16, "learning_rate": 1.91329686362564e-05, "loss": 0.2725, "step": 1861 }, { "epoch": 0.16, "learning_rate": 1.9131837537117724e-05, "loss": 0.3669, "step": 1862 }, { "epoch": 0.16, "learning_rate": 1.9130705734142127e-05, "loss": 0.3441, "step": 1863 }, { "epoch": 0.16, "learning_rate": 1.912957322741685e-05, "loss": 0.2839, "step": 1864 }, { "epoch": 0.16, "learning_rate": 1.912844001702918e-05, "loss": 0.3696, "step": 1865 }, { "epoch": 0.16, "learning_rate": 1.912730610306646e-05, "loss": 0.2759, "step": 1866 }, { "epoch": 0.16, "learning_rate": 1.912617148561608e-05, "loss": 0.3264, "step": 1867 }, { "epoch": 0.16, "learning_rate": 1.9125036164765502e-05, "loss": 0.2915, "step": 1868 }, { "epoch": 0.16, "learning_rate": 1.912390014060222e-05, "loss": 0.3107, "step": 1869 }, { "epoch": 0.16, "learning_rate": 1.91227634132138e-05, "loss": 0.3126, "step": 1870 }, { "epoch": 0.16, "learning_rate": 1.9121625982687854e-05, "loss": 0.3183, "step": 1871 }, { "epoch": 0.16, "learning_rate": 1.9120487849112046e-05, "loss": 0.3176, "step": 1872 }, { "epoch": 0.16, "learning_rate": 1.91193490125741e-05, "loss": 0.3382, "step": 1873 }, { "epoch": 0.16, "learning_rate": 1.9118209473161794e-05, "loss": 0.2983, "step": 1874 }, { "epoch": 0.16, "learning_rate": 1.9117069230962956e-05, "loss": 0.3129, "step": 1875 }, { "epoch": 0.16, "learning_rate": 1.9115928286065467e-05, "loss": 0.2848, "step": 1876 }, { "epoch": 0.16, "learning_rate": 1.9114786638557272e-05, "loss": 0.2885, "step": 1877 }, { "epoch": 0.16, "learning_rate": 1.911364428852636e-05, "loss": 0.2938, "step": 1878 }, { "epoch": 0.16, "learning_rate": 1.9112501236060777e-05, "loss": 0.2717, "step": 1879 }, { "epoch": 0.16, "learning_rate": 1.9111357481248627e-05, "loss": 0.3409, "step": 1880 }, { "epoch": 0.16, "learning_rate": 1.9110213024178062e-05, "loss": 0.3215, "step": 1881 }, { "epoch": 0.16, "learning_rate": 1.9109067864937292e-05, "loss": 0.3062, "step": 1882 }, { "epoch": 0.16, "learning_rate": 1.910792200361458e-05, "loss": 0.3286, "step": 1883 }, { "epoch": 0.16, "learning_rate": 1.9106775440298242e-05, "loss": 0.2964, "step": 1884 }, { "epoch": 0.16, "learning_rate": 1.9105628175076654e-05, "loss": 0.2551, "step": 1885 }, { "epoch": 0.16, "learning_rate": 1.9104480208038236e-05, "loss": 0.3083, "step": 1886 }, { "epoch": 0.16, "learning_rate": 1.9103331539271473e-05, "loss": 0.3112, "step": 1887 }, { "epoch": 0.16, "learning_rate": 1.9102182168864894e-05, "loss": 0.279, "step": 1888 }, { "epoch": 0.16, "learning_rate": 1.9101032096907088e-05, "loss": 0.2707, "step": 1889 }, { "epoch": 0.16, "learning_rate": 1.90998813234867e-05, "loss": 0.2793, "step": 1890 }, { "epoch": 0.16, "learning_rate": 1.909872984869242e-05, "loss": 0.3558, "step": 1891 }, { "epoch": 0.16, "learning_rate": 1.9097577672613002e-05, "loss": 0.2997, "step": 1892 }, { "epoch": 0.16, "learning_rate": 1.909642479533725e-05, "loss": 0.288, "step": 1893 }, { "epoch": 0.16, "learning_rate": 1.9095271216954022e-05, "loss": 0.2843, "step": 1894 }, { "epoch": 0.16, "learning_rate": 1.909411693755223e-05, "loss": 0.327, "step": 1895 }, { "epoch": 0.16, "learning_rate": 1.909296195722084e-05, "loss": 0.3427, "step": 1896 }, { "epoch": 0.16, "learning_rate": 1.909180627604887e-05, "loss": 0.2953, "step": 1897 }, { "epoch": 0.16, "learning_rate": 1.9090649894125395e-05, "loss": 0.2892, "step": 1898 }, { "epoch": 0.16, "learning_rate": 1.908949281153955e-05, "loss": 0.3901, "step": 1899 }, { "epoch": 0.16, "learning_rate": 1.9088335028380504e-05, "loss": 0.3151, "step": 1900 }, { "epoch": 0.16, "learning_rate": 1.9087176544737507e-05, "loss": 0.3711, "step": 1901 }, { "epoch": 0.16, "learning_rate": 1.9086017360699843e-05, "loss": 0.2921, "step": 1902 }, { "epoch": 0.16, "learning_rate": 1.9084857476356852e-05, "loss": 0.3477, "step": 1903 }, { "epoch": 0.16, "learning_rate": 1.908369689179794e-05, "loss": 0.2933, "step": 1904 }, { "epoch": 0.16, "learning_rate": 1.9082535607112554e-05, "loss": 0.2968, "step": 1905 }, { "epoch": 0.16, "learning_rate": 1.9081373622390204e-05, "loss": 0.3433, "step": 1906 }, { "epoch": 0.16, "learning_rate": 1.9080210937720443e-05, "loss": 0.2717, "step": 1907 }, { "epoch": 0.16, "learning_rate": 1.907904755319289e-05, "loss": 0.3653, "step": 1908 }, { "epoch": 0.16, "learning_rate": 1.9077883468897215e-05, "loss": 0.2932, "step": 1909 }, { "epoch": 0.16, "learning_rate": 1.9076718684923136e-05, "loss": 0.3005, "step": 1910 }, { "epoch": 0.16, "learning_rate": 1.9075553201360432e-05, "loss": 0.2938, "step": 1911 }, { "epoch": 0.16, "learning_rate": 1.907438701829893e-05, "loss": 0.3301, "step": 1912 }, { "epoch": 0.16, "learning_rate": 1.9073220135828513e-05, "loss": 0.3149, "step": 1913 }, { "epoch": 0.16, "learning_rate": 1.9072052554039123e-05, "loss": 0.3231, "step": 1914 }, { "epoch": 0.16, "learning_rate": 1.9070884273020745e-05, "loss": 0.3517, "step": 1915 }, { "epoch": 0.16, "learning_rate": 1.906971529286343e-05, "loss": 0.6, "step": 1916 }, { "epoch": 0.16, "learning_rate": 1.9068545613657273e-05, "loss": 0.3317, "step": 1917 }, { "epoch": 0.16, "learning_rate": 1.906737523549243e-05, "loss": 0.3067, "step": 1918 }, { "epoch": 0.16, "learning_rate": 1.906620415845911e-05, "loss": 0.2936, "step": 1919 }, { "epoch": 0.16, "learning_rate": 1.9065032382647566e-05, "loss": 0.3136, "step": 1920 }, { "epoch": 0.16, "learning_rate": 1.9063859908148123e-05, "loss": 0.3419, "step": 1921 }, { "epoch": 0.16, "learning_rate": 1.906268673505114e-05, "loss": 0.2871, "step": 1922 }, { "epoch": 0.16, "learning_rate": 1.9061512863447046e-05, "loss": 0.3053, "step": 1923 }, { "epoch": 0.16, "learning_rate": 1.906033829342631e-05, "loss": 0.2992, "step": 1924 }, { "epoch": 0.17, "learning_rate": 1.905916302507947e-05, "loss": 0.3175, "step": 1925 }, { "epoch": 0.17, "learning_rate": 1.9057987058497106e-05, "loss": 0.3081, "step": 1926 }, { "epoch": 0.17, "learning_rate": 1.9056810393769857e-05, "loss": 0.3236, "step": 1927 }, { "epoch": 0.17, "learning_rate": 1.9055633030988417e-05, "loss": 0.2919, "step": 1928 }, { "epoch": 0.17, "learning_rate": 1.9054454970243525e-05, "loss": 0.3991, "step": 1929 }, { "epoch": 0.17, "learning_rate": 1.905327621162598e-05, "loss": 0.338, "step": 1930 }, { "epoch": 0.17, "learning_rate": 1.9052096755226643e-05, "loss": 0.2745, "step": 1931 }, { "epoch": 0.17, "learning_rate": 1.9050916601136418e-05, "loss": 0.3105, "step": 1932 }, { "epoch": 0.17, "learning_rate": 1.904973574944626e-05, "loss": 0.3568, "step": 1933 }, { "epoch": 0.17, "learning_rate": 1.9048554200247184e-05, "loss": 0.2861, "step": 1934 }, { "epoch": 0.17, "learning_rate": 1.9047371953630262e-05, "loss": 0.3148, "step": 1935 }, { "epoch": 0.17, "learning_rate": 1.904618900968662e-05, "loss": 0.2726, "step": 1936 }, { "epoch": 0.17, "learning_rate": 1.9045005368507418e-05, "loss": 0.3517, "step": 1937 }, { "epoch": 0.17, "learning_rate": 1.90438210301839e-05, "loss": 0.3078, "step": 1938 }, { "epoch": 0.17, "learning_rate": 1.9042635994807344e-05, "loss": 0.2863, "step": 1939 }, { "epoch": 0.17, "learning_rate": 1.9041450262469087e-05, "loss": 0.4042, "step": 1940 }, { "epoch": 0.17, "learning_rate": 1.9040263833260513e-05, "loss": 0.3239, "step": 1941 }, { "epoch": 0.17, "learning_rate": 1.903907670727308e-05, "loss": 0.3154, "step": 1942 }, { "epoch": 0.17, "learning_rate": 1.9037888884598272e-05, "loss": 0.3273, "step": 1943 }, { "epoch": 0.17, "learning_rate": 1.9036700365327648e-05, "loss": 0.3491, "step": 1944 }, { "epoch": 0.17, "learning_rate": 1.9035511149552816e-05, "loss": 0.3012, "step": 1945 }, { "epoch": 0.17, "learning_rate": 1.9034321237365424e-05, "loss": 0.3345, "step": 1946 }, { "epoch": 0.17, "learning_rate": 1.9033130628857194e-05, "loss": 0.6304, "step": 1947 }, { "epoch": 0.17, "learning_rate": 1.903193932411989e-05, "loss": 0.3242, "step": 1948 }, { "epoch": 0.17, "learning_rate": 1.903074732324533e-05, "loss": 0.3118, "step": 1949 }, { "epoch": 0.17, "learning_rate": 1.9029554626325386e-05, "loss": 0.2962, "step": 1950 }, { "epoch": 0.17, "learning_rate": 1.902836123345199e-05, "loss": 0.2737, "step": 1951 }, { "epoch": 0.17, "learning_rate": 1.902716714471712e-05, "loss": 0.3114, "step": 1952 }, { "epoch": 0.17, "learning_rate": 1.9025972360212813e-05, "loss": 0.3246, "step": 1953 }, { "epoch": 0.17, "learning_rate": 1.9024776880031154e-05, "loss": 0.3302, "step": 1954 }, { "epoch": 0.17, "learning_rate": 1.9023580704264284e-05, "loss": 0.2795, "step": 1955 }, { "epoch": 0.17, "learning_rate": 1.9022383833004404e-05, "loss": 0.3148, "step": 1956 }, { "epoch": 0.17, "learning_rate": 1.9021186266343756e-05, "loss": 0.2803, "step": 1957 }, { "epoch": 0.17, "learning_rate": 1.9019988004374645e-05, "loss": 0.291, "step": 1958 }, { "epoch": 0.17, "learning_rate": 1.901878904718943e-05, "loss": 0.3284, "step": 1959 }, { "epoch": 0.17, "learning_rate": 1.9017589394880515e-05, "loss": 0.2957, "step": 1960 }, { "epoch": 0.17, "learning_rate": 1.9016389047540368e-05, "loss": 0.2958, "step": 1961 }, { "epoch": 0.17, "learning_rate": 1.9015188005261505e-05, "loss": 0.3433, "step": 1962 }, { "epoch": 0.17, "learning_rate": 1.90139862681365e-05, "loss": 0.2927, "step": 1963 }, { "epoch": 0.17, "learning_rate": 1.901278383625797e-05, "loss": 0.2879, "step": 1964 }, { "epoch": 0.17, "learning_rate": 1.9011580709718594e-05, "loss": 0.283, "step": 1965 }, { "epoch": 0.17, "learning_rate": 1.9010376888611106e-05, "loss": 0.3064, "step": 1966 }, { "epoch": 0.17, "learning_rate": 1.9009172373028286e-05, "loss": 0.3329, "step": 1967 }, { "epoch": 0.17, "learning_rate": 1.9007967163062978e-05, "loss": 0.2832, "step": 1968 }, { "epoch": 0.17, "learning_rate": 1.900676125880807e-05, "loss": 0.2922, "step": 1969 }, { "epoch": 0.17, "learning_rate": 1.9005554660356505e-05, "loss": 0.3642, "step": 1970 }, { "epoch": 0.17, "learning_rate": 1.9004347367801288e-05, "loss": 0.2974, "step": 1971 }, { "epoch": 0.17, "learning_rate": 1.9003139381235467e-05, "loss": 0.2886, "step": 1972 }, { "epoch": 0.17, "learning_rate": 1.900193070075215e-05, "loss": 0.2899, "step": 1973 }, { "epoch": 0.17, "learning_rate": 1.9000721326444492e-05, "loss": 0.6342, "step": 1974 }, { "epoch": 0.17, "learning_rate": 1.899951125840571e-05, "loss": 0.2646, "step": 1975 }, { "epoch": 0.17, "learning_rate": 1.8998300496729066e-05, "loss": 0.2878, "step": 1976 }, { "epoch": 0.17, "learning_rate": 1.8997089041507882e-05, "loss": 0.2545, "step": 1977 }, { "epoch": 0.17, "learning_rate": 1.899587689283553e-05, "loss": 0.2688, "step": 1978 }, { "epoch": 0.17, "learning_rate": 1.8994664050805437e-05, "loss": 0.3099, "step": 1979 }, { "epoch": 0.17, "learning_rate": 1.8993450515511086e-05, "loss": 0.3057, "step": 1980 }, { "epoch": 0.17, "learning_rate": 1.8992236287046008e-05, "loss": 0.6141, "step": 1981 }, { "epoch": 0.17, "learning_rate": 1.8991021365503782e-05, "loss": 0.3133, "step": 1982 }, { "epoch": 0.17, "learning_rate": 1.8989805750978062e-05, "loss": 0.3036, "step": 1983 }, { "epoch": 0.17, "learning_rate": 1.898858944356253e-05, "loss": 0.2885, "step": 1984 }, { "epoch": 0.17, "learning_rate": 1.898737244335094e-05, "loss": 0.2827, "step": 1985 }, { "epoch": 0.17, "learning_rate": 1.898615475043709e-05, "loss": 0.2995, "step": 1986 }, { "epoch": 0.17, "learning_rate": 1.8984936364914835e-05, "loss": 0.3311, "step": 1987 }, { "epoch": 0.17, "learning_rate": 1.8983717286878078e-05, "loss": 0.2932, "step": 1988 }, { "epoch": 0.17, "learning_rate": 1.898249751642078e-05, "loss": 0.3197, "step": 1989 }, { "epoch": 0.17, "learning_rate": 1.8981277053636963e-05, "loss": 0.3235, "step": 1990 }, { "epoch": 0.17, "learning_rate": 1.8980055898620688e-05, "loss": 0.3088, "step": 1991 }, { "epoch": 0.17, "learning_rate": 1.8978834051466073e-05, "loss": 0.3177, "step": 1992 }, { "epoch": 0.17, "learning_rate": 1.8977611512267294e-05, "loss": 0.2531, "step": 1993 }, { "epoch": 0.17, "learning_rate": 1.8976388281118584e-05, "loss": 0.3004, "step": 1994 }, { "epoch": 0.17, "learning_rate": 1.8975164358114216e-05, "loss": 0.3114, "step": 1995 }, { "epoch": 0.17, "learning_rate": 1.8973939743348527e-05, "loss": 0.3235, "step": 1996 }, { "epoch": 0.17, "learning_rate": 1.8972714436915905e-05, "loss": 0.3309, "step": 1997 }, { "epoch": 0.17, "learning_rate": 1.897148843891079e-05, "loss": 0.3483, "step": 1998 }, { "epoch": 0.17, "learning_rate": 1.8970261749427674e-05, "loss": 0.3162, "step": 1999 }, { "epoch": 0.17, "learning_rate": 1.8969034368561105e-05, "loss": 0.3036, "step": 2000 }, { "epoch": 0.17, "learning_rate": 1.8967806296405686e-05, "loss": 0.2573, "step": 2001 }, { "epoch": 0.17, "learning_rate": 1.896657753305607e-05, "loss": 0.2863, "step": 2002 }, { "epoch": 0.17, "learning_rate": 1.8965348078606962e-05, "loss": 0.317, "step": 2003 }, { "epoch": 0.17, "learning_rate": 1.896411793315312e-05, "loss": 0.3018, "step": 2004 }, { "epoch": 0.17, "learning_rate": 1.8962887096789363e-05, "loss": 0.3557, "step": 2005 }, { "epoch": 0.17, "learning_rate": 1.8961655569610557e-05, "loss": 0.3107, "step": 2006 }, { "epoch": 0.17, "learning_rate": 1.8960423351711622e-05, "loss": 0.3047, "step": 2007 }, { "epoch": 0.17, "learning_rate": 1.8959190443187525e-05, "loss": 0.3517, "step": 2008 }, { "epoch": 0.17, "learning_rate": 1.89579568441333e-05, "loss": 0.3871, "step": 2009 }, { "epoch": 0.17, "learning_rate": 1.8956722554644026e-05, "loss": 0.305, "step": 2010 }, { "epoch": 0.17, "learning_rate": 1.895548757481483e-05, "loss": 0.2921, "step": 2011 }, { "epoch": 0.17, "learning_rate": 1.8954251904740904e-05, "loss": 0.2544, "step": 2012 }, { "epoch": 0.17, "learning_rate": 1.8953015544517482e-05, "loss": 0.2961, "step": 2013 }, { "epoch": 0.17, "learning_rate": 1.8951778494239862e-05, "loss": 0.2822, "step": 2014 }, { "epoch": 0.17, "learning_rate": 1.895054075400339e-05, "loss": 0.2968, "step": 2015 }, { "epoch": 0.17, "learning_rate": 1.894930232390346e-05, "loss": 0.3514, "step": 2016 }, { "epoch": 0.17, "learning_rate": 1.894806320403553e-05, "loss": 0.3347, "step": 2017 }, { "epoch": 0.17, "learning_rate": 1.89468233944951e-05, "loss": 0.2739, "step": 2018 }, { "epoch": 0.17, "learning_rate": 1.894558289537773e-05, "loss": 0.2888, "step": 2019 }, { "epoch": 0.17, "learning_rate": 1.8944341706779033e-05, "loss": 0.3325, "step": 2020 }, { "epoch": 0.17, "learning_rate": 1.894309982879467e-05, "loss": 0.313, "step": 2021 }, { "epoch": 0.17, "learning_rate": 1.8941857261520363e-05, "loss": 0.2653, "step": 2022 }, { "epoch": 0.17, "learning_rate": 1.8940614005051882e-05, "loss": 0.3132, "step": 2023 }, { "epoch": 0.17, "learning_rate": 1.893937005948505e-05, "loss": 0.3065, "step": 2024 }, { "epoch": 0.17, "learning_rate": 1.8938125424915744e-05, "loss": 0.3253, "step": 2025 }, { "epoch": 0.17, "learning_rate": 1.8936880101439893e-05, "loss": 0.3522, "step": 2026 }, { "epoch": 0.17, "learning_rate": 1.8935634089153486e-05, "loss": 0.3019, "step": 2027 }, { "epoch": 0.17, "learning_rate": 1.8934387388152554e-05, "loss": 0.3073, "step": 2028 }, { "epoch": 0.17, "learning_rate": 1.893313999853319e-05, "loss": 0.2911, "step": 2029 }, { "epoch": 0.17, "learning_rate": 1.8931891920391533e-05, "loss": 0.2897, "step": 2030 }, { "epoch": 0.17, "learning_rate": 1.8930643153823777e-05, "loss": 0.3073, "step": 2031 }, { "epoch": 0.17, "learning_rate": 1.8929393698926177e-05, "loss": 0.2939, "step": 2032 }, { "epoch": 0.17, "learning_rate": 1.8928143555795034e-05, "loss": 0.3099, "step": 2033 }, { "epoch": 0.17, "learning_rate": 1.89268927245267e-05, "loss": 0.3145, "step": 2034 }, { "epoch": 0.17, "learning_rate": 1.8925641205217583e-05, "loss": 0.2874, "step": 2035 }, { "epoch": 0.17, "learning_rate": 1.8924388997964147e-05, "loss": 0.3191, "step": 2036 }, { "epoch": 0.17, "learning_rate": 1.8923136102862902e-05, "loss": 0.3145, "step": 2037 }, { "epoch": 0.17, "learning_rate": 1.8921882520010416e-05, "loss": 0.3859, "step": 2038 }, { "epoch": 0.17, "learning_rate": 1.892062824950331e-05, "loss": 0.312, "step": 2039 }, { "epoch": 0.17, "learning_rate": 1.8919373291438257e-05, "loss": 0.2589, "step": 2040 }, { "epoch": 0.17, "learning_rate": 1.8918117645911985e-05, "loss": 0.3284, "step": 2041 }, { "epoch": 0.18, "learning_rate": 1.8916861313021268e-05, "loss": 0.3251, "step": 2042 }, { "epoch": 0.18, "learning_rate": 1.891560429286294e-05, "loss": 0.3199, "step": 2043 }, { "epoch": 0.18, "learning_rate": 1.891434658553389e-05, "loss": 0.3227, "step": 2044 }, { "epoch": 0.18, "learning_rate": 1.8913088191131047e-05, "loss": 0.2993, "step": 2045 }, { "epoch": 0.18, "learning_rate": 1.891182910975141e-05, "loss": 0.3227, "step": 2046 }, { "epoch": 0.18, "learning_rate": 1.891056934149202e-05, "loss": 0.3022, "step": 2047 }, { "epoch": 0.18, "learning_rate": 1.890930888644997e-05, "loss": 0.3165, "step": 2048 }, { "epoch": 0.18, "learning_rate": 1.8908047744722417e-05, "loss": 0.3163, "step": 2049 }, { "epoch": 0.18, "learning_rate": 1.890678591640656e-05, "loss": 0.3312, "step": 2050 }, { "epoch": 0.18, "learning_rate": 1.8905523401599655e-05, "loss": 0.3138, "step": 2051 }, { "epoch": 0.18, "learning_rate": 1.890426020039901e-05, "loss": 0.3002, "step": 2052 }, { "epoch": 0.18, "learning_rate": 1.890299631290198e-05, "loss": 0.3511, "step": 2053 }, { "epoch": 0.18, "learning_rate": 1.8901731739205992e-05, "loss": 0.3078, "step": 2054 }, { "epoch": 0.18, "learning_rate": 1.8900466479408505e-05, "loss": 0.3206, "step": 2055 }, { "epoch": 0.18, "learning_rate": 1.8899200533607037e-05, "loss": 0.3278, "step": 2056 }, { "epoch": 0.18, "learning_rate": 1.8897933901899165e-05, "loss": 0.3536, "step": 2057 }, { "epoch": 0.18, "learning_rate": 1.8896666584382516e-05, "loss": 0.318, "step": 2058 }, { "epoch": 0.18, "learning_rate": 1.8895398581154763e-05, "loss": 0.2911, "step": 2059 }, { "epoch": 0.18, "learning_rate": 1.8894129892313643e-05, "loss": 0.2946, "step": 2060 }, { "epoch": 0.18, "learning_rate": 1.8892860517956938e-05, "loss": 0.287, "step": 2061 }, { "epoch": 0.18, "learning_rate": 1.8891590458182486e-05, "loss": 0.6182, "step": 2062 }, { "epoch": 0.18, "learning_rate": 1.8890319713088178e-05, "loss": 0.3275, "step": 2063 }, { "epoch": 0.18, "learning_rate": 1.888904828277195e-05, "loss": 0.2731, "step": 2064 }, { "epoch": 0.18, "learning_rate": 1.8887776167331803e-05, "loss": 0.3074, "step": 2065 }, { "epoch": 0.18, "learning_rate": 1.8886503366865786e-05, "loss": 0.2817, "step": 2066 }, { "epoch": 0.18, "learning_rate": 1.8885229881472002e-05, "loss": 0.3303, "step": 2067 }, { "epoch": 0.18, "learning_rate": 1.88839557112486e-05, "loss": 0.3032, "step": 2068 }, { "epoch": 0.18, "learning_rate": 1.8882680856293785e-05, "loss": 0.3172, "step": 2069 }, { "epoch": 0.18, "learning_rate": 1.8881405316705824e-05, "loss": 0.2908, "step": 2070 }, { "epoch": 0.18, "learning_rate": 1.8880129092583027e-05, "loss": 0.3937, "step": 2071 }, { "epoch": 0.18, "learning_rate": 1.8878852184023754e-05, "loss": 0.2861, "step": 2072 }, { "epoch": 0.18, "learning_rate": 1.8877574591126427e-05, "loss": 0.3243, "step": 2073 }, { "epoch": 0.18, "learning_rate": 1.8876296313989516e-05, "loss": 0.2978, "step": 2074 }, { "epoch": 0.18, "learning_rate": 1.8875017352711547e-05, "loss": 0.3172, "step": 2075 }, { "epoch": 0.18, "learning_rate": 1.887373770739109e-05, "loss": 0.3356, "step": 2076 }, { "epoch": 0.18, "learning_rate": 1.8872457378126778e-05, "loss": 0.3309, "step": 2077 }, { "epoch": 0.18, "learning_rate": 1.8871176365017293e-05, "loss": 0.3307, "step": 2078 }, { "epoch": 0.18, "learning_rate": 1.8869894668161365e-05, "loss": 0.2736, "step": 2079 }, { "epoch": 0.18, "learning_rate": 1.8868612287657783e-05, "loss": 0.2706, "step": 2080 }, { "epoch": 0.18, "learning_rate": 1.886732922360539e-05, "loss": 0.3145, "step": 2081 }, { "epoch": 0.18, "learning_rate": 1.8866045476103073e-05, "loss": 0.3381, "step": 2082 }, { "epoch": 0.18, "learning_rate": 1.8864761045249777e-05, "loss": 0.3324, "step": 2083 }, { "epoch": 0.18, "learning_rate": 1.8863475931144506e-05, "loss": 0.3838, "step": 2084 }, { "epoch": 0.18, "learning_rate": 1.8862190133886303e-05, "loss": 0.3732, "step": 2085 }, { "epoch": 0.18, "learning_rate": 1.8860903653574277e-05, "loss": 0.2498, "step": 2086 }, { "epoch": 0.18, "learning_rate": 1.8859616490307578e-05, "loss": 0.3399, "step": 2087 }, { "epoch": 0.18, "learning_rate": 1.8858328644185414e-05, "loss": 0.3198, "step": 2088 }, { "epoch": 0.18, "learning_rate": 1.8857040115307055e-05, "loss": 0.3476, "step": 2089 }, { "epoch": 0.18, "learning_rate": 1.8855750903771805e-05, "loss": 0.3046, "step": 2090 }, { "epoch": 0.18, "learning_rate": 1.885446100967903e-05, "loss": 0.3245, "step": 2091 }, { "epoch": 0.18, "learning_rate": 1.8853170433128155e-05, "loss": 0.3167, "step": 2092 }, { "epoch": 0.18, "learning_rate": 1.8851879174218645e-05, "loss": 0.2946, "step": 2093 }, { "epoch": 0.18, "learning_rate": 1.885058723305003e-05, "loss": 0.3139, "step": 2094 }, { "epoch": 0.18, "learning_rate": 1.884929460972188e-05, "loss": 0.3494, "step": 2095 }, { "epoch": 0.18, "learning_rate": 1.8848001304333828e-05, "loss": 0.2764, "step": 2096 }, { "epoch": 0.18, "learning_rate": 1.8846707316985556e-05, "loss": 0.3117, "step": 2097 }, { "epoch": 0.18, "learning_rate": 1.8845412647776795e-05, "loss": 0.2834, "step": 2098 }, { "epoch": 0.18, "learning_rate": 1.8844117296807332e-05, "loss": 0.2692, "step": 2099 }, { "epoch": 0.18, "learning_rate": 1.8842821264177012e-05, "loss": 0.2645, "step": 2100 }, { "epoch": 0.18, "learning_rate": 1.884152454998572e-05, "loss": 0.3059, "step": 2101 }, { "epoch": 0.18, "learning_rate": 1.8840227154333405e-05, "loss": 0.3278, "step": 2102 }, { "epoch": 0.18, "learning_rate": 1.883892907732006e-05, "loss": 0.3282, "step": 2103 }, { "epoch": 0.18, "learning_rate": 1.883763031904574e-05, "loss": 0.6036, "step": 2104 }, { "epoch": 0.18, "learning_rate": 1.883633087961054e-05, "loss": 0.2932, "step": 2105 }, { "epoch": 0.18, "learning_rate": 1.8835030759114617e-05, "loss": 0.2885, "step": 2106 }, { "epoch": 0.18, "learning_rate": 1.883372995765818e-05, "loss": 0.2918, "step": 2107 }, { "epoch": 0.18, "learning_rate": 1.8832428475341486e-05, "loss": 0.2858, "step": 2108 }, { "epoch": 0.18, "learning_rate": 1.8831126312264843e-05, "loss": 0.2882, "step": 2109 }, { "epoch": 0.18, "learning_rate": 1.8829823468528624e-05, "loss": 0.331, "step": 2110 }, { "epoch": 0.18, "learning_rate": 1.882851994423324e-05, "loss": 0.3467, "step": 2111 }, { "epoch": 0.18, "learning_rate": 1.8827215739479163e-05, "loss": 0.2709, "step": 2112 }, { "epoch": 0.18, "learning_rate": 1.8825910854366914e-05, "loss": 0.319, "step": 2113 }, { "epoch": 0.18, "learning_rate": 1.8824605288997064e-05, "loss": 0.306, "step": 2114 }, { "epoch": 0.18, "learning_rate": 1.8823299043470243e-05, "loss": 0.3165, "step": 2115 }, { "epoch": 0.18, "learning_rate": 1.882199211788713e-05, "loss": 0.3169, "step": 2116 }, { "epoch": 0.18, "learning_rate": 1.8820684512348455e-05, "loss": 0.3154, "step": 2117 }, { "epoch": 0.18, "learning_rate": 1.8819376226955e-05, "loss": 0.3041, "step": 2118 }, { "epoch": 0.18, "learning_rate": 1.8818067261807606e-05, "loss": 0.303, "step": 2119 }, { "epoch": 0.18, "learning_rate": 1.881675761700716e-05, "loss": 0.2918, "step": 2120 }, { "epoch": 0.18, "learning_rate": 1.8815447292654598e-05, "loss": 0.3339, "step": 2121 }, { "epoch": 0.18, "learning_rate": 1.881413628885092e-05, "loss": 0.2827, "step": 2122 }, { "epoch": 0.18, "learning_rate": 1.8812824605697163e-05, "loss": 0.3256, "step": 2123 }, { "epoch": 0.18, "learning_rate": 1.8811512243294436e-05, "loss": 0.3273, "step": 2124 }, { "epoch": 0.18, "learning_rate": 1.8810199201743884e-05, "loss": 0.2935, "step": 2125 }, { "epoch": 0.18, "learning_rate": 1.880888548114671e-05, "loss": 0.2778, "step": 2126 }, { "epoch": 0.18, "learning_rate": 1.8807571081604167e-05, "loss": 0.3264, "step": 2127 }, { "epoch": 0.18, "learning_rate": 1.8806256003217566e-05, "loss": 0.3231, "step": 2128 }, { "epoch": 0.18, "learning_rate": 1.8804940246088265e-05, "loss": 0.3083, "step": 2129 }, { "epoch": 0.18, "learning_rate": 1.8803623810317678e-05, "loss": 0.3403, "step": 2130 }, { "epoch": 0.18, "learning_rate": 1.8802306696007265e-05, "loss": 0.2906, "step": 2131 }, { "epoch": 0.18, "learning_rate": 1.880098890325855e-05, "loss": 0.3035, "step": 2132 }, { "epoch": 0.18, "learning_rate": 1.8799670432173093e-05, "loss": 0.2933, "step": 2133 }, { "epoch": 0.18, "learning_rate": 1.879835128285252e-05, "loss": 0.3176, "step": 2134 }, { "epoch": 0.18, "learning_rate": 1.8797031455398504e-05, "loss": 0.2634, "step": 2135 }, { "epoch": 0.18, "learning_rate": 1.879571094991277e-05, "loss": 0.3088, "step": 2136 }, { "epoch": 0.18, "learning_rate": 1.87943897664971e-05, "loss": 0.2938, "step": 2137 }, { "epoch": 0.18, "learning_rate": 1.8793067905253318e-05, "loss": 0.3134, "step": 2138 }, { "epoch": 0.18, "learning_rate": 1.8791745366283313e-05, "loss": 0.2851, "step": 2139 }, { "epoch": 0.18, "learning_rate": 1.879042214968901e-05, "loss": 0.317, "step": 2140 }, { "epoch": 0.18, "learning_rate": 1.878909825557241e-05, "loss": 0.2835, "step": 2141 }, { "epoch": 0.18, "learning_rate": 1.878777368403554e-05, "loss": 0.3166, "step": 2142 }, { "epoch": 0.18, "learning_rate": 1.87864484351805e-05, "loss": 0.2952, "step": 2143 }, { "epoch": 0.18, "learning_rate": 1.8785122509109425e-05, "loss": 0.2638, "step": 2144 }, { "epoch": 0.18, "learning_rate": 1.8783795905924516e-05, "loss": 0.2883, "step": 2145 }, { "epoch": 0.18, "learning_rate": 1.8782468625728027e-05, "loss": 0.3054, "step": 2146 }, { "epoch": 0.18, "learning_rate": 1.8781140668622243e-05, "loss": 0.3151, "step": 2147 }, { "epoch": 0.18, "learning_rate": 1.8779812034709532e-05, "loss": 0.2928, "step": 2148 }, { "epoch": 0.18, "learning_rate": 1.877848272409229e-05, "loss": 0.2657, "step": 2149 }, { "epoch": 0.18, "learning_rate": 1.877715273687297e-05, "loss": 0.3471, "step": 2150 }, { "epoch": 0.18, "learning_rate": 1.8775822073154093e-05, "loss": 0.3234, "step": 2151 }, { "epoch": 0.18, "learning_rate": 1.8774490733038214e-05, "loss": 0.3738, "step": 2152 }, { "epoch": 0.18, "learning_rate": 1.877315871662794e-05, "loss": 0.3361, "step": 2153 }, { "epoch": 0.18, "learning_rate": 1.8771826024025944e-05, "loss": 0.302, "step": 2154 }, { "epoch": 0.18, "learning_rate": 1.8770492655334938e-05, "loss": 0.3044, "step": 2155 }, { "epoch": 0.18, "learning_rate": 1.87691586106577e-05, "loss": 0.2828, "step": 2156 }, { "epoch": 0.18, "learning_rate": 1.8767823890097044e-05, "loss": 0.3239, "step": 2157 }, { "epoch": 0.18, "learning_rate": 1.8766488493755845e-05, "loss": 0.3028, "step": 2158 }, { "epoch": 0.19, "learning_rate": 1.876515242173703e-05, "loss": 0.3709, "step": 2159 }, { "epoch": 0.19, "learning_rate": 1.8763815674143574e-05, "loss": 0.2885, "step": 2160 }, { "epoch": 0.19, "learning_rate": 1.8762478251078508e-05, "loss": 0.2751, "step": 2161 }, { "epoch": 0.19, "learning_rate": 1.876114015264492e-05, "loss": 0.2958, "step": 2162 }, { "epoch": 0.19, "learning_rate": 1.8759801378945938e-05, "loss": 0.3157, "step": 2163 }, { "epoch": 0.19, "learning_rate": 1.8758461930084745e-05, "loss": 0.2953, "step": 2164 }, { "epoch": 0.19, "learning_rate": 1.8757121806164588e-05, "loss": 0.2949, "step": 2165 }, { "epoch": 0.19, "learning_rate": 1.875578100728875e-05, "loss": 0.3612, "step": 2166 }, { "epoch": 0.19, "learning_rate": 1.8754439533560576e-05, "loss": 0.3277, "step": 2167 }, { "epoch": 0.19, "learning_rate": 1.875309738508346e-05, "loss": 0.319, "step": 2168 }, { "epoch": 0.19, "learning_rate": 1.8751754561960847e-05, "loss": 0.286, "step": 2169 }, { "epoch": 0.19, "learning_rate": 1.8750411064296237e-05, "loss": 0.2993, "step": 2170 }, { "epoch": 0.19, "learning_rate": 1.874906689219318e-05, "loss": 0.3162, "step": 2171 }, { "epoch": 0.19, "learning_rate": 1.8747722045755273e-05, "loss": 0.2867, "step": 2172 }, { "epoch": 0.19, "learning_rate": 1.874637652508618e-05, "loss": 0.3014, "step": 2173 }, { "epoch": 0.19, "learning_rate": 1.87450303302896e-05, "loss": 0.2891, "step": 2174 }, { "epoch": 0.19, "learning_rate": 1.874368346146929e-05, "loss": 0.2783, "step": 2175 }, { "epoch": 0.19, "learning_rate": 1.8742335918729066e-05, "loss": 0.3362, "step": 2176 }, { "epoch": 0.19, "learning_rate": 1.8740987702172787e-05, "loss": 0.3497, "step": 2177 }, { "epoch": 0.19, "learning_rate": 1.8739638811904363e-05, "loss": 0.3555, "step": 2178 }, { "epoch": 0.19, "learning_rate": 1.8738289248027764e-05, "loss": 0.2637, "step": 2179 }, { "epoch": 0.19, "learning_rate": 1.8736939010647008e-05, "loss": 0.3163, "step": 2180 }, { "epoch": 0.19, "learning_rate": 1.873558809986616e-05, "loss": 0.3246, "step": 2181 }, { "epoch": 0.19, "learning_rate": 1.873423651578935e-05, "loss": 0.3236, "step": 2182 }, { "epoch": 0.19, "learning_rate": 1.8732884258520745e-05, "loss": 0.3394, "step": 2183 }, { "epoch": 0.19, "learning_rate": 1.8731531328164572e-05, "loss": 0.3182, "step": 2184 }, { "epoch": 0.19, "learning_rate": 1.8730177724825107e-05, "loss": 0.2806, "step": 2185 }, { "epoch": 0.19, "learning_rate": 1.872882344860668e-05, "loss": 0.3619, "step": 2186 }, { "epoch": 0.19, "learning_rate": 1.8727468499613675e-05, "loss": 0.2708, "step": 2187 }, { "epoch": 0.19, "learning_rate": 1.8726112877950517e-05, "loss": 0.3092, "step": 2188 }, { "epoch": 0.19, "learning_rate": 1.87247565837217e-05, "loss": 0.2755, "step": 2189 }, { "epoch": 0.19, "learning_rate": 1.8723399617031754e-05, "loss": 0.3241, "step": 2190 }, { "epoch": 0.19, "learning_rate": 1.8722041977985264e-05, "loss": 0.2998, "step": 2191 }, { "epoch": 0.19, "learning_rate": 1.8720683666686882e-05, "loss": 0.311, "step": 2192 }, { "epoch": 0.19, "learning_rate": 1.871932468324129e-05, "loss": 0.3071, "step": 2193 }, { "epoch": 0.19, "learning_rate": 1.8717965027753235e-05, "loss": 0.283, "step": 2194 }, { "epoch": 0.19, "learning_rate": 1.8716604700327516e-05, "loss": 0.3267, "step": 2195 }, { "epoch": 0.19, "learning_rate": 1.871524370106897e-05, "loss": 0.3349, "step": 2196 }, { "epoch": 0.19, "learning_rate": 1.871388203008251e-05, "loss": 0.3089, "step": 2197 }, { "epoch": 0.19, "learning_rate": 1.8712519687473075e-05, "loss": 0.3096, "step": 2198 }, { "epoch": 0.19, "learning_rate": 1.8711156673345675e-05, "loss": 0.3047, "step": 2199 }, { "epoch": 0.19, "learning_rate": 1.8709792987805357e-05, "loss": 0.3143, "step": 2200 }, { "epoch": 0.19, "learning_rate": 1.8708428630957236e-05, "loss": 0.3167, "step": 2201 }, { "epoch": 0.19, "learning_rate": 1.8707063602906466e-05, "loss": 0.2883, "step": 2202 }, { "epoch": 0.19, "learning_rate": 1.8705697903758254e-05, "loss": 0.2916, "step": 2203 }, { "epoch": 0.19, "learning_rate": 1.8704331533617866e-05, "loss": 0.2983, "step": 2204 }, { "epoch": 0.19, "learning_rate": 1.8702964492590613e-05, "loss": 0.3063, "step": 2205 }, { "epoch": 0.19, "learning_rate": 1.8701596780781855e-05, "loss": 0.2296, "step": 2206 }, { "epoch": 0.19, "learning_rate": 1.8700228398297017e-05, "loss": 0.3269, "step": 2207 }, { "epoch": 0.19, "learning_rate": 1.8698859345241562e-05, "loss": 0.3207, "step": 2208 }, { "epoch": 0.19, "learning_rate": 1.8697489621721013e-05, "loss": 0.3031, "step": 2209 }, { "epoch": 0.19, "learning_rate": 1.8696119227840937e-05, "loss": 0.2792, "step": 2210 }, { "epoch": 0.19, "learning_rate": 1.869474816370696e-05, "loss": 0.5715, "step": 2211 }, { "epoch": 0.19, "learning_rate": 1.8693376429424756e-05, "loss": 0.274, "step": 2212 }, { "epoch": 0.19, "learning_rate": 1.8692004025100054e-05, "loss": 0.2643, "step": 2213 }, { "epoch": 0.19, "learning_rate": 1.869063095083863e-05, "loss": 0.3497, "step": 2214 }, { "epoch": 0.19, "learning_rate": 1.8689257206746312e-05, "loss": 0.2688, "step": 2215 }, { "epoch": 0.19, "learning_rate": 1.8687882792928987e-05, "loss": 0.2903, "step": 2216 }, { "epoch": 0.19, "learning_rate": 1.868650770949258e-05, "loss": 0.3087, "step": 2217 }, { "epoch": 0.19, "learning_rate": 1.8685131956543082e-05, "loss": 0.3093, "step": 2218 }, { "epoch": 0.19, "learning_rate": 1.8683755534186528e-05, "loss": 0.3425, "step": 2219 }, { "epoch": 0.19, "learning_rate": 1.8682378442529005e-05, "loss": 0.3351, "step": 2220 }, { "epoch": 0.19, "learning_rate": 1.8681000681676652e-05, "loss": 0.3079, "step": 2221 }, { "epoch": 0.19, "learning_rate": 1.867962225173566e-05, "loss": 0.2826, "step": 2222 }, { "epoch": 0.19, "learning_rate": 1.8678243152812273e-05, "loss": 0.3386, "step": 2223 }, { "epoch": 0.19, "learning_rate": 1.8676863385012785e-05, "loss": 0.3022, "step": 2224 }, { "epoch": 0.19, "learning_rate": 1.867548294844354e-05, "loss": 0.3186, "step": 2225 }, { "epoch": 0.19, "learning_rate": 1.8674101843210935e-05, "loss": 0.278, "step": 2226 }, { "epoch": 0.19, "learning_rate": 1.867272006942142e-05, "loss": 0.6252, "step": 2227 }, { "epoch": 0.19, "learning_rate": 1.8671337627181497e-05, "loss": 0.3765, "step": 2228 }, { "epoch": 0.19, "learning_rate": 1.8669954516597717e-05, "loss": 0.3115, "step": 2229 }, { "epoch": 0.19, "learning_rate": 1.866857073777668e-05, "loss": 0.3462, "step": 2230 }, { "epoch": 0.19, "learning_rate": 1.866718629082504e-05, "loss": 0.3608, "step": 2231 }, { "epoch": 0.19, "learning_rate": 1.866580117584951e-05, "loss": 0.3091, "step": 2232 }, { "epoch": 0.19, "learning_rate": 1.8664415392956848e-05, "loss": 0.3033, "step": 2233 }, { "epoch": 0.19, "learning_rate": 1.8663028942253854e-05, "loss": 0.326, "step": 2234 }, { "epoch": 0.19, "learning_rate": 1.86616418238474e-05, "loss": 0.3051, "step": 2235 }, { "epoch": 0.19, "learning_rate": 1.866025403784439e-05, "loss": 0.2784, "step": 2236 }, { "epoch": 0.19, "learning_rate": 1.8658865584351787e-05, "loss": 0.273, "step": 2237 }, { "epoch": 0.19, "learning_rate": 1.865747646347661e-05, "loss": 0.338, "step": 2238 }, { "epoch": 0.19, "learning_rate": 1.865608667532593e-05, "loss": 0.3135, "step": 2239 }, { "epoch": 0.19, "learning_rate": 1.865469622000686e-05, "loss": 0.2948, "step": 2240 }, { "epoch": 0.19, "learning_rate": 1.8653305097626565e-05, "loss": 0.3037, "step": 2241 }, { "epoch": 0.19, "learning_rate": 1.865191330829227e-05, "loss": 0.3492, "step": 2242 }, { "epoch": 0.19, "learning_rate": 1.865052085211125e-05, "loss": 0.6414, "step": 2243 }, { "epoch": 0.19, "learning_rate": 1.8649127729190825e-05, "loss": 0.3001, "step": 2244 }, { "epoch": 0.19, "learning_rate": 1.8647733939638373e-05, "loss": 0.3071, "step": 2245 }, { "epoch": 0.19, "learning_rate": 1.864633948356132e-05, "loss": 0.2802, "step": 2246 }, { "epoch": 0.19, "learning_rate": 1.864494436106714e-05, "loss": 0.3492, "step": 2247 }, { "epoch": 0.19, "learning_rate": 1.8643548572263364e-05, "loss": 0.2723, "step": 2248 }, { "epoch": 0.19, "learning_rate": 1.8642152117257574e-05, "loss": 0.3063, "step": 2249 }, { "epoch": 0.19, "learning_rate": 1.8640754996157397e-05, "loss": 0.3141, "step": 2250 }, { "epoch": 0.19, "learning_rate": 1.8639357209070526e-05, "loss": 0.2906, "step": 2251 }, { "epoch": 0.19, "learning_rate": 1.8637958756104687e-05, "loss": 0.2592, "step": 2252 }, { "epoch": 0.19, "learning_rate": 1.863655963736767e-05, "loss": 0.308, "step": 2253 }, { "epoch": 0.19, "learning_rate": 1.863515985296731e-05, "loss": 0.325, "step": 2254 }, { "epoch": 0.19, "learning_rate": 1.8633759403011496e-05, "loss": 0.2852, "step": 2255 }, { "epoch": 0.19, "learning_rate": 1.8632358287608167e-05, "loss": 0.3242, "step": 2256 }, { "epoch": 0.19, "learning_rate": 1.8630956506865313e-05, "loss": 0.2997, "step": 2257 }, { "epoch": 0.19, "learning_rate": 1.8629554060890982e-05, "loss": 0.3124, "step": 2258 }, { "epoch": 0.19, "learning_rate": 1.8628150949793262e-05, "loss": 0.3447, "step": 2259 }, { "epoch": 0.19, "learning_rate": 1.8626747173680302e-05, "loss": 0.3174, "step": 2260 }, { "epoch": 0.19, "learning_rate": 1.862534273266029e-05, "loss": 0.3788, "step": 2261 }, { "epoch": 0.19, "learning_rate": 1.8623937626841485e-05, "loss": 0.3508, "step": 2262 }, { "epoch": 0.19, "learning_rate": 1.862253185633218e-05, "loss": 0.2986, "step": 2263 }, { "epoch": 0.19, "learning_rate": 1.8621125421240722e-05, "loss": 0.2567, "step": 2264 }, { "epoch": 0.19, "learning_rate": 1.8619718321675514e-05, "loss": 0.2867, "step": 2265 }, { "epoch": 0.19, "learning_rate": 1.861831055774501e-05, "loss": 0.2954, "step": 2266 }, { "epoch": 0.19, "learning_rate": 1.8616902129557718e-05, "loss": 0.3412, "step": 2267 }, { "epoch": 0.19, "learning_rate": 1.861549303722218e-05, "loss": 0.3212, "step": 2268 }, { "epoch": 0.19, "learning_rate": 1.8614083280847013e-05, "loss": 0.3067, "step": 2269 }, { "epoch": 0.19, "learning_rate": 1.8612672860540865e-05, "loss": 0.3444, "step": 2270 }, { "epoch": 0.19, "learning_rate": 1.8611261776412455e-05, "loss": 0.308, "step": 2271 }, { "epoch": 0.19, "learning_rate": 1.8609850028570536e-05, "loss": 0.279, "step": 2272 }, { "epoch": 0.19, "learning_rate": 1.8608437617123917e-05, "loss": 0.3302, "step": 2273 }, { "epoch": 0.19, "learning_rate": 1.8607024542181465e-05, "loss": 0.3188, "step": 2274 }, { "epoch": 0.2, "learning_rate": 1.8605610803852086e-05, "loss": 0.3171, "step": 2275 }, { "epoch": 0.2, "learning_rate": 1.8604196402244752e-05, "loss": 0.2758, "step": 2276 }, { "epoch": 0.2, "learning_rate": 1.8602781337468472e-05, "loss": 0.2822, "step": 2277 }, { "epoch": 0.2, "learning_rate": 1.8601365609632315e-05, "loss": 0.2995, "step": 2278 }, { "epoch": 0.2, "learning_rate": 1.8599949218845394e-05, "loss": 0.333, "step": 2279 }, { "epoch": 0.2, "learning_rate": 1.8598532165216882e-05, "loss": 0.2821, "step": 2280 }, { "epoch": 0.2, "learning_rate": 1.8597114448856e-05, "loss": 0.3109, "step": 2281 }, { "epoch": 0.2, "learning_rate": 1.8595696069872013e-05, "loss": 0.2556, "step": 2282 }, { "epoch": 0.2, "learning_rate": 1.8594277028374245e-05, "loss": 0.3195, "step": 2283 }, { "epoch": 0.2, "learning_rate": 1.8592857324472073e-05, "loss": 0.2954, "step": 2284 }, { "epoch": 0.2, "learning_rate": 1.8591436958274914e-05, "loss": 0.6501, "step": 2285 }, { "epoch": 0.2, "learning_rate": 1.8590015929892245e-05, "loss": 0.2958, "step": 2286 }, { "epoch": 0.2, "learning_rate": 1.85885942394336e-05, "loss": 0.5961, "step": 2287 }, { "epoch": 0.2, "learning_rate": 1.858717188700854e-05, "loss": 0.274, "step": 2288 }, { "epoch": 0.2, "learning_rate": 1.8585748872726704e-05, "loss": 0.3195, "step": 2289 }, { "epoch": 0.2, "learning_rate": 1.8584325196697767e-05, "loss": 0.2817, "step": 2290 }, { "epoch": 0.2, "learning_rate": 1.858290085903146e-05, "loss": 0.2811, "step": 2291 }, { "epoch": 0.2, "learning_rate": 1.858147585983757e-05, "loss": 0.3346, "step": 2292 }, { "epoch": 0.2, "learning_rate": 1.858005019922592e-05, "loss": 0.2809, "step": 2293 }, { "epoch": 0.2, "learning_rate": 1.8578623877306394e-05, "loss": 0.3316, "step": 2294 }, { "epoch": 0.2, "learning_rate": 1.8577196894188926e-05, "loss": 0.3046, "step": 2295 }, { "epoch": 0.2, "learning_rate": 1.857576924998351e-05, "loss": 0.3535, "step": 2296 }, { "epoch": 0.2, "learning_rate": 1.8574340944800165e-05, "loss": 0.2656, "step": 2297 }, { "epoch": 0.2, "learning_rate": 1.8572911978748993e-05, "loss": 0.3254, "step": 2298 }, { "epoch": 0.2, "learning_rate": 1.8571482351940124e-05, "loss": 0.296, "step": 2299 }, { "epoch": 0.2, "learning_rate": 1.857005206448375e-05, "loss": 0.3737, "step": 2300 }, { "epoch": 0.2, "learning_rate": 1.8568621116490108e-05, "loss": 0.3925, "step": 2301 }, { "epoch": 0.2, "learning_rate": 1.856718950806949e-05, "loss": 0.2954, "step": 2302 }, { "epoch": 0.2, "learning_rate": 1.8565757239332232e-05, "loss": 0.3333, "step": 2303 }, { "epoch": 0.2, "learning_rate": 1.8564324310388735e-05, "loss": 0.3148, "step": 2304 }, { "epoch": 0.2, "learning_rate": 1.8562890721349434e-05, "loss": 0.3179, "step": 2305 }, { "epoch": 0.2, "learning_rate": 1.856145647232483e-05, "loss": 0.3184, "step": 2306 }, { "epoch": 0.2, "learning_rate": 1.8560021563425462e-05, "loss": 0.2841, "step": 2307 }, { "epoch": 0.2, "learning_rate": 1.8558585994761932e-05, "loss": 0.3315, "step": 2308 }, { "epoch": 0.2, "learning_rate": 1.855714976644488e-05, "loss": 0.3156, "step": 2309 }, { "epoch": 0.2, "learning_rate": 1.8555712878585005e-05, "loss": 0.3032, "step": 2310 }, { "epoch": 0.2, "learning_rate": 1.855427533129306e-05, "loss": 0.2994, "step": 2311 }, { "epoch": 0.2, "learning_rate": 1.8552837124679835e-05, "loss": 0.334, "step": 2312 }, { "epoch": 0.2, "learning_rate": 1.855139825885619e-05, "loss": 0.2925, "step": 2313 }, { "epoch": 0.2, "learning_rate": 1.854995873393302e-05, "loss": 0.3335, "step": 2314 }, { "epoch": 0.2, "learning_rate": 1.8548518550021274e-05, "loss": 0.327, "step": 2315 }, { "epoch": 0.2, "learning_rate": 1.8547077707231963e-05, "loss": 0.3067, "step": 2316 }, { "epoch": 0.2, "learning_rate": 1.8545636205676133e-05, "loss": 0.2688, "step": 2317 }, { "epoch": 0.2, "learning_rate": 1.8544194045464888e-05, "loss": 0.2933, "step": 2318 }, { "epoch": 0.2, "learning_rate": 1.8542751226709382e-05, "loss": 0.3052, "step": 2319 }, { "epoch": 0.2, "learning_rate": 1.8541307749520828e-05, "loss": 0.2762, "step": 2320 }, { "epoch": 0.2, "learning_rate": 1.853986361401047e-05, "loss": 0.2846, "step": 2321 }, { "epoch": 0.2, "learning_rate": 1.8538418820289628e-05, "loss": 0.3182, "step": 2322 }, { "epoch": 0.2, "learning_rate": 1.853697336846965e-05, "loss": 0.3107, "step": 2323 }, { "epoch": 0.2, "learning_rate": 1.8535527258661944e-05, "loss": 0.3061, "step": 2324 }, { "epoch": 0.2, "learning_rate": 1.8534080490977977e-05, "loss": 0.3303, "step": 2325 }, { "epoch": 0.2, "learning_rate": 1.853263306552925e-05, "loss": 0.2715, "step": 2326 }, { "epoch": 0.2, "learning_rate": 1.853118498242733e-05, "loss": 0.2714, "step": 2327 }, { "epoch": 0.2, "learning_rate": 1.8529736241783825e-05, "loss": 0.3378, "step": 2328 }, { "epoch": 0.2, "learning_rate": 1.8528286843710398e-05, "loss": 0.2717, "step": 2329 }, { "epoch": 0.2, "learning_rate": 1.852683678831876e-05, "loss": 0.3435, "step": 2330 }, { "epoch": 0.2, "learning_rate": 1.8525386075720675e-05, "loss": 0.3502, "step": 2331 }, { "epoch": 0.2, "learning_rate": 1.8523934706027952e-05, "loss": 0.3094, "step": 2332 }, { "epoch": 0.2, "learning_rate": 1.8522482679352464e-05, "loss": 0.3027, "step": 2333 }, { "epoch": 0.2, "learning_rate": 1.8521029995806123e-05, "loss": 0.3154, "step": 2334 }, { "epoch": 0.2, "learning_rate": 1.851957665550089e-05, "loss": 0.2781, "step": 2335 }, { "epoch": 0.2, "learning_rate": 1.851812265854879e-05, "loss": 0.3049, "step": 2336 }, { "epoch": 0.2, "learning_rate": 1.851666800506188e-05, "loss": 0.2771, "step": 2337 }, { "epoch": 0.2, "learning_rate": 1.8515212695152284e-05, "loss": 0.3619, "step": 2338 }, { "epoch": 0.2, "learning_rate": 1.851375672893217e-05, "loss": 0.311, "step": 2339 }, { "epoch": 0.2, "learning_rate": 1.851230010651375e-05, "loss": 0.6145, "step": 2340 }, { "epoch": 0.2, "learning_rate": 1.8510842828009303e-05, "loss": 0.293, "step": 2341 }, { "epoch": 0.2, "learning_rate": 1.850938489353114e-05, "loss": 0.279, "step": 2342 }, { "epoch": 0.2, "learning_rate": 1.850792630319164e-05, "loss": 0.3413, "step": 2343 }, { "epoch": 0.2, "learning_rate": 1.8506467057103217e-05, "loss": 0.2908, "step": 2344 }, { "epoch": 0.2, "learning_rate": 1.8505007155378347e-05, "loss": 0.2761, "step": 2345 }, { "epoch": 0.2, "learning_rate": 1.8503546598129547e-05, "loss": 0.2833, "step": 2346 }, { "epoch": 0.2, "learning_rate": 1.8502085385469396e-05, "loss": 0.2955, "step": 2347 }, { "epoch": 0.2, "learning_rate": 1.850062351751051e-05, "loss": 0.304, "step": 2348 }, { "epoch": 0.2, "learning_rate": 1.8499160994365568e-05, "loss": 0.2961, "step": 2349 }, { "epoch": 0.2, "learning_rate": 1.849769781614729e-05, "loss": 0.2924, "step": 2350 }, { "epoch": 0.2, "learning_rate": 1.849623398296846e-05, "loss": 0.4272, "step": 2351 }, { "epoch": 0.2, "learning_rate": 1.8494769494941883e-05, "loss": 0.3087, "step": 2352 }, { "epoch": 0.2, "learning_rate": 1.8493304352180455e-05, "loss": 0.3331, "step": 2353 }, { "epoch": 0.2, "learning_rate": 1.8491838554797096e-05, "loss": 0.3338, "step": 2354 }, { "epoch": 0.2, "learning_rate": 1.8490372102904778e-05, "loss": 0.2849, "step": 2355 }, { "epoch": 0.2, "learning_rate": 1.848890499661653e-05, "loss": 0.2883, "step": 2356 }, { "epoch": 0.2, "learning_rate": 1.848743723604543e-05, "loss": 0.3171, "step": 2357 }, { "epoch": 0.2, "learning_rate": 1.8485968821304604e-05, "loss": 0.2959, "step": 2358 }, { "epoch": 0.2, "learning_rate": 1.8484499752507234e-05, "loss": 0.3009, "step": 2359 }, { "epoch": 0.2, "learning_rate": 1.8483030029766548e-05, "loss": 0.3013, "step": 2360 }, { "epoch": 0.2, "learning_rate": 1.848155965319582e-05, "loss": 0.3588, "step": 2361 }, { "epoch": 0.2, "learning_rate": 1.8480088622908382e-05, "loss": 0.3114, "step": 2362 }, { "epoch": 0.2, "learning_rate": 1.8478616939017615e-05, "loss": 0.3317, "step": 2363 }, { "epoch": 0.2, "learning_rate": 1.847714460163695e-05, "loss": 0.3439, "step": 2364 }, { "epoch": 0.2, "learning_rate": 1.8475671610879864e-05, "loss": 0.3274, "step": 2365 }, { "epoch": 0.2, "learning_rate": 1.847419796685989e-05, "loss": 0.3441, "step": 2366 }, { "epoch": 0.2, "learning_rate": 1.847272366969061e-05, "loss": 0.2842, "step": 2367 }, { "epoch": 0.2, "learning_rate": 1.8471248719485654e-05, "loss": 0.2921, "step": 2368 }, { "epoch": 0.2, "learning_rate": 1.8469773116358705e-05, "loss": 0.2912, "step": 2369 }, { "epoch": 0.2, "learning_rate": 1.8468296860423494e-05, "loss": 0.3018, "step": 2370 }, { "epoch": 0.2, "learning_rate": 1.8466819951793805e-05, "loss": 0.2886, "step": 2371 }, { "epoch": 0.2, "learning_rate": 1.846534239058347e-05, "loss": 0.3138, "step": 2372 }, { "epoch": 0.2, "learning_rate": 1.846386417690637e-05, "loss": 0.3499, "step": 2373 }, { "epoch": 0.2, "learning_rate": 1.8462385310876444e-05, "loss": 0.3368, "step": 2374 }, { "epoch": 0.2, "learning_rate": 1.8460905792607667e-05, "loss": 0.2726, "step": 2375 }, { "epoch": 0.2, "learning_rate": 1.8459425622214082e-05, "loss": 0.298, "step": 2376 }, { "epoch": 0.2, "learning_rate": 1.8457944799809765e-05, "loss": 0.2847, "step": 2377 }, { "epoch": 0.2, "learning_rate": 1.845646332550886e-05, "loss": 0.2843, "step": 2378 }, { "epoch": 0.2, "learning_rate": 1.8454981199425542e-05, "loss": 0.2782, "step": 2379 }, { "epoch": 0.2, "learning_rate": 1.8453498421674055e-05, "loss": 0.2857, "step": 2380 }, { "epoch": 0.2, "learning_rate": 1.845201499236868e-05, "loss": 0.3391, "step": 2381 }, { "epoch": 0.2, "learning_rate": 1.8450530911623747e-05, "loss": 0.3425, "step": 2382 }, { "epoch": 0.2, "learning_rate": 1.844904617955365e-05, "loss": 0.264, "step": 2383 }, { "epoch": 0.2, "learning_rate": 1.8447560796272817e-05, "loss": 0.2936, "step": 2384 }, { "epoch": 0.2, "learning_rate": 1.8446074761895746e-05, "loss": 0.2835, "step": 2385 }, { "epoch": 0.2, "learning_rate": 1.844458807653696e-05, "loss": 0.2734, "step": 2386 }, { "epoch": 0.2, "learning_rate": 1.844310074031105e-05, "loss": 0.2825, "step": 2387 }, { "epoch": 0.2, "learning_rate": 1.8441612753332658e-05, "loss": 0.2701, "step": 2388 }, { "epoch": 0.2, "learning_rate": 1.8440124115716463e-05, "loss": 0.373, "step": 2389 }, { "epoch": 0.2, "learning_rate": 1.843863482757721e-05, "loss": 0.3769, "step": 2390 }, { "epoch": 0.2, "learning_rate": 1.8437144889029675e-05, "loss": 0.351, "step": 2391 }, { "epoch": 0.21, "learning_rate": 1.8435654300188705e-05, "loss": 0.3513, "step": 2392 }, { "epoch": 0.21, "learning_rate": 1.8434163061169178e-05, "loss": 0.3262, "step": 2393 }, { "epoch": 0.21, "learning_rate": 1.8432671172086044e-05, "loss": 0.3374, "step": 2394 }, { "epoch": 0.21, "learning_rate": 1.8431178633054275e-05, "loss": 0.2755, "step": 2395 }, { "epoch": 0.21, "learning_rate": 1.8429685444188922e-05, "loss": 0.289, "step": 2396 }, { "epoch": 0.21, "learning_rate": 1.8428191605605067e-05, "loss": 0.307, "step": 2397 }, { "epoch": 0.21, "learning_rate": 1.8426697117417848e-05, "loss": 0.3223, "step": 2398 }, { "epoch": 0.21, "learning_rate": 1.8425201979742455e-05, "loss": 0.2998, "step": 2399 }, { "epoch": 0.21, "learning_rate": 1.8423706192694118e-05, "loss": 0.3238, "step": 2400 }, { "epoch": 0.21, "learning_rate": 1.8422209756388132e-05, "loss": 0.3258, "step": 2401 }, { "epoch": 0.21, "learning_rate": 1.8420712670939837e-05, "loss": 0.3314, "step": 2402 }, { "epoch": 0.21, "learning_rate": 1.8419214936464613e-05, "loss": 0.2272, "step": 2403 }, { "epoch": 0.21, "learning_rate": 1.8417716553077903e-05, "loss": 0.2912, "step": 2404 }, { "epoch": 0.21, "learning_rate": 1.8416217520895198e-05, "loss": 0.259, "step": 2405 }, { "epoch": 0.21, "learning_rate": 1.841471784003203e-05, "loss": 0.3311, "step": 2406 }, { "epoch": 0.21, "learning_rate": 1.8413217510603988e-05, "loss": 0.2832, "step": 2407 }, { "epoch": 0.21, "learning_rate": 1.8411716532726707e-05, "loss": 0.3456, "step": 2408 }, { "epoch": 0.21, "learning_rate": 1.8410214906515887e-05, "loss": 0.2956, "step": 2409 }, { "epoch": 0.21, "learning_rate": 1.8408712632087256e-05, "loss": 0.2783, "step": 2410 }, { "epoch": 0.21, "learning_rate": 1.8407209709556603e-05, "loss": 0.3012, "step": 2411 }, { "epoch": 0.21, "learning_rate": 1.8405706139039766e-05, "loss": 0.3292, "step": 2412 }, { "epoch": 0.21, "learning_rate": 1.8404201920652635e-05, "loss": 0.311, "step": 2413 }, { "epoch": 0.21, "learning_rate": 1.8402697054511145e-05, "loss": 0.2782, "step": 2414 }, { "epoch": 0.21, "learning_rate": 1.8401191540731286e-05, "loss": 0.3084, "step": 2415 }, { "epoch": 0.21, "learning_rate": 1.839968537942909e-05, "loss": 0.251, "step": 2416 }, { "epoch": 0.21, "learning_rate": 1.839817857072066e-05, "loss": 0.3433, "step": 2417 }, { "epoch": 0.21, "learning_rate": 1.8396671114722112e-05, "loss": 0.3035, "step": 2418 }, { "epoch": 0.21, "learning_rate": 1.839516301154965e-05, "loss": 0.2882, "step": 2419 }, { "epoch": 0.21, "learning_rate": 1.8393654261319504e-05, "loss": 0.3397, "step": 2420 }, { "epoch": 0.21, "learning_rate": 1.839214486414796e-05, "loss": 0.2822, "step": 2421 }, { "epoch": 0.21, "learning_rate": 1.8390634820151353e-05, "loss": 0.3478, "step": 2422 }, { "epoch": 0.21, "learning_rate": 1.8389124129446078e-05, "loss": 0.3356, "step": 2423 }, { "epoch": 0.21, "learning_rate": 1.8387612792148566e-05, "loss": 0.2992, "step": 2424 }, { "epoch": 0.21, "learning_rate": 1.8386100808375305e-05, "loss": 0.3185, "step": 2425 }, { "epoch": 0.21, "learning_rate": 1.8384588178242828e-05, "loss": 0.2957, "step": 2426 }, { "epoch": 0.21, "learning_rate": 1.8383074901867728e-05, "loss": 0.3258, "step": 2427 }, { "epoch": 0.21, "learning_rate": 1.8381560979366633e-05, "loss": 0.3, "step": 2428 }, { "epoch": 0.21, "learning_rate": 1.8380046410856234e-05, "loss": 0.2985, "step": 2429 }, { "epoch": 0.21, "learning_rate": 1.8378531196453265e-05, "loss": 0.3295, "step": 2430 }, { "epoch": 0.21, "learning_rate": 1.8377015336274507e-05, "loss": 0.6105, "step": 2431 }, { "epoch": 0.21, "learning_rate": 1.8375498830436805e-05, "loss": 0.3095, "step": 2432 }, { "epoch": 0.21, "learning_rate": 1.8373981679057036e-05, "loss": 0.3124, "step": 2433 }, { "epoch": 0.21, "learning_rate": 1.8372463882252133e-05, "loss": 0.3447, "step": 2434 }, { "epoch": 0.21, "learning_rate": 1.837094544013909e-05, "loss": 0.3058, "step": 2435 }, { "epoch": 0.21, "learning_rate": 1.8369426352834927e-05, "loss": 0.3349, "step": 2436 }, { "epoch": 0.21, "learning_rate": 1.8367906620456737e-05, "loss": 0.2932, "step": 2437 }, { "epoch": 0.21, "learning_rate": 1.8366386243121654e-05, "loss": 0.2755, "step": 2438 }, { "epoch": 0.21, "learning_rate": 1.8364865220946856e-05, "loss": 0.3089, "step": 2439 }, { "epoch": 0.21, "learning_rate": 1.8363343554049582e-05, "loss": 0.3532, "step": 2440 }, { "epoch": 0.21, "learning_rate": 1.836182124254711e-05, "loss": 0.2551, "step": 2441 }, { "epoch": 0.21, "learning_rate": 1.8360298286556774e-05, "loss": 0.361, "step": 2442 }, { "epoch": 0.21, "learning_rate": 1.8358774686195956e-05, "loss": 0.2964, "step": 2443 }, { "epoch": 0.21, "learning_rate": 1.8357250441582085e-05, "loss": 0.3021, "step": 2444 }, { "epoch": 0.21, "learning_rate": 1.8355725552832648e-05, "loss": 0.2558, "step": 2445 }, { "epoch": 0.21, "learning_rate": 1.8354200020065168e-05, "loss": 0.2985, "step": 2446 }, { "epoch": 0.21, "learning_rate": 1.8352673843397232e-05, "loss": 0.2946, "step": 2447 }, { "epoch": 0.21, "learning_rate": 1.8351147022946468e-05, "loss": 0.3085, "step": 2448 }, { "epoch": 0.21, "learning_rate": 1.8349619558830553e-05, "loss": 0.6111, "step": 2449 }, { "epoch": 0.21, "learning_rate": 1.8348091451167224e-05, "loss": 0.3421, "step": 2450 }, { "epoch": 0.21, "learning_rate": 1.8346562700074253e-05, "loss": 0.2653, "step": 2451 }, { "epoch": 0.21, "learning_rate": 1.834503330566947e-05, "loss": 0.293, "step": 2452 }, { "epoch": 0.21, "learning_rate": 1.8343503268070752e-05, "loss": 0.315, "step": 2453 }, { "epoch": 0.21, "learning_rate": 1.8341972587396032e-05, "loss": 0.2822, "step": 2454 }, { "epoch": 0.21, "learning_rate": 1.8340441263763282e-05, "loss": 0.3389, "step": 2455 }, { "epoch": 0.21, "learning_rate": 1.833890929729053e-05, "loss": 0.3104, "step": 2456 }, { "epoch": 0.21, "learning_rate": 1.8337376688095854e-05, "loss": 0.325, "step": 2457 }, { "epoch": 0.21, "learning_rate": 1.833584343629738e-05, "loss": 0.3056, "step": 2458 }, { "epoch": 0.21, "learning_rate": 1.8334309542013282e-05, "loss": 0.2958, "step": 2459 }, { "epoch": 0.21, "learning_rate": 1.8332775005361786e-05, "loss": 0.3148, "step": 2460 }, { "epoch": 0.21, "learning_rate": 1.8331239826461165e-05, "loss": 0.3084, "step": 2461 }, { "epoch": 0.21, "learning_rate": 1.8329704005429745e-05, "loss": 0.3241, "step": 2462 }, { "epoch": 0.21, "learning_rate": 1.8328167542385898e-05, "loss": 0.3234, "step": 2463 }, { "epoch": 0.21, "learning_rate": 1.8326630437448045e-05, "loss": 0.2882, "step": 2464 }, { "epoch": 0.21, "learning_rate": 1.8325092690734663e-05, "loss": 0.3796, "step": 2465 }, { "epoch": 0.21, "learning_rate": 1.8323554302364273e-05, "loss": 0.3371, "step": 2466 }, { "epoch": 0.21, "learning_rate": 1.8322015272455445e-05, "loss": 0.3239, "step": 2467 }, { "epoch": 0.21, "learning_rate": 1.83204756011268e-05, "loss": 0.2818, "step": 2468 }, { "epoch": 0.21, "learning_rate": 1.8318935288497007e-05, "loss": 0.312, "step": 2469 }, { "epoch": 0.21, "learning_rate": 1.831739433468479e-05, "loss": 0.2676, "step": 2470 }, { "epoch": 0.21, "learning_rate": 1.8315852739808914e-05, "loss": 0.3077, "step": 2471 }, { "epoch": 0.21, "learning_rate": 1.8314310503988198e-05, "loss": 0.2911, "step": 2472 }, { "epoch": 0.21, "learning_rate": 1.831276762734151e-05, "loss": 0.3268, "step": 2473 }, { "epoch": 0.21, "learning_rate": 1.8311224109987768e-05, "loss": 0.2565, "step": 2474 }, { "epoch": 0.21, "learning_rate": 1.830967995204594e-05, "loss": 0.3039, "step": 2475 }, { "epoch": 0.21, "learning_rate": 1.8308135153635037e-05, "loss": 0.2855, "step": 2476 }, { "epoch": 0.21, "learning_rate": 1.830658971487413e-05, "loss": 0.6318, "step": 2477 }, { "epoch": 0.21, "learning_rate": 1.8305043635882334e-05, "loss": 0.293, "step": 2478 }, { "epoch": 0.21, "learning_rate": 1.830349691677881e-05, "loss": 0.258, "step": 2479 }, { "epoch": 0.21, "learning_rate": 1.830194955768277e-05, "loss": 0.2902, "step": 2480 }, { "epoch": 0.21, "learning_rate": 1.830040155871348e-05, "loss": 0.3034, "step": 2481 }, { "epoch": 0.21, "learning_rate": 1.8298852919990254e-05, "loss": 0.3347, "step": 2482 }, { "epoch": 0.21, "learning_rate": 1.8297303641632448e-05, "loss": 0.2749, "step": 2483 }, { "epoch": 0.21, "learning_rate": 1.829575372375948e-05, "loss": 0.293, "step": 2484 }, { "epoch": 0.21, "learning_rate": 1.8294203166490797e-05, "loss": 0.3049, "step": 2485 }, { "epoch": 0.21, "learning_rate": 1.8292651969945923e-05, "loss": 0.2784, "step": 2486 }, { "epoch": 0.21, "learning_rate": 1.8291100134244407e-05, "loss": 0.2871, "step": 2487 }, { "epoch": 0.21, "learning_rate": 1.8289547659505867e-05, "loss": 0.3049, "step": 2488 }, { "epoch": 0.21, "learning_rate": 1.8287994545849948e-05, "loss": 0.295, "step": 2489 }, { "epoch": 0.21, "learning_rate": 1.828644079339636e-05, "loss": 0.3129, "step": 2490 }, { "epoch": 0.21, "learning_rate": 1.8284886402264864e-05, "loss": 0.2702, "step": 2491 }, { "epoch": 0.21, "learning_rate": 1.8283331372575258e-05, "loss": 0.3613, "step": 2492 }, { "epoch": 0.21, "learning_rate": 1.8281775704447402e-05, "loss": 0.2748, "step": 2493 }, { "epoch": 0.21, "learning_rate": 1.8280219398001192e-05, "loss": 0.2891, "step": 2494 }, { "epoch": 0.21, "learning_rate": 1.8278662453356588e-05, "loss": 0.3527, "step": 2495 }, { "epoch": 0.21, "learning_rate": 1.8277104870633588e-05, "loss": 0.2952, "step": 2496 }, { "epoch": 0.21, "learning_rate": 1.827554664995224e-05, "loss": 0.306, "step": 2497 }, { "epoch": 0.21, "learning_rate": 1.827398779143265e-05, "loss": 0.2878, "step": 2498 }, { "epoch": 0.21, "learning_rate": 1.8272428295194965e-05, "loss": 0.3098, "step": 2499 }, { "epoch": 0.21, "learning_rate": 1.8270868161359377e-05, "loss": 0.327, "step": 2500 }, { "epoch": 0.21, "learning_rate": 1.8269307390046143e-05, "loss": 0.3403, "step": 2501 }, { "epoch": 0.21, "learning_rate": 1.8267745981375555e-05, "loss": 0.3116, "step": 2502 }, { "epoch": 0.21, "learning_rate": 1.826618393546796e-05, "loss": 0.6324, "step": 2503 }, { "epoch": 0.21, "learning_rate": 1.826462125244375e-05, "loss": 0.5909, "step": 2504 }, { "epoch": 0.21, "learning_rate": 1.826305793242337e-05, "loss": 0.2809, "step": 2505 }, { "epoch": 0.21, "learning_rate": 1.8261493975527312e-05, "loss": 0.2873, "step": 2506 }, { "epoch": 0.21, "learning_rate": 1.825992938187612e-05, "loss": 0.3083, "step": 2507 }, { "epoch": 0.21, "learning_rate": 1.8258364151590386e-05, "loss": 0.2934, "step": 2508 }, { "epoch": 0.22, "learning_rate": 1.825679828479075e-05, "loss": 0.3502, "step": 2509 }, { "epoch": 0.22, "learning_rate": 1.82552317815979e-05, "loss": 0.2945, "step": 2510 }, { "epoch": 0.22, "learning_rate": 1.8253664642132576e-05, "loss": 0.2768, "step": 2511 }, { "epoch": 0.22, "learning_rate": 1.825209686651556e-05, "loss": 0.3544, "step": 2512 }, { "epoch": 0.22, "learning_rate": 1.8250528454867695e-05, "loss": 0.3334, "step": 2513 }, { "epoch": 0.22, "learning_rate": 1.8248959407309862e-05, "loss": 0.2967, "step": 2514 }, { "epoch": 0.22, "learning_rate": 1.8247389723962998e-05, "loss": 0.2784, "step": 2515 }, { "epoch": 0.22, "learning_rate": 1.8245819404948088e-05, "loss": 0.2604, "step": 2516 }, { "epoch": 0.22, "learning_rate": 1.8244248450386156e-05, "loss": 0.265, "step": 2517 }, { "epoch": 0.22, "learning_rate": 1.8242676860398295e-05, "loss": 0.2751, "step": 2518 }, { "epoch": 0.22, "learning_rate": 1.8241104635105627e-05, "loss": 0.2861, "step": 2519 }, { "epoch": 0.22, "learning_rate": 1.823953177462934e-05, "loss": 0.3376, "step": 2520 }, { "epoch": 0.22, "learning_rate": 1.823795827909065e-05, "loss": 0.2474, "step": 2521 }, { "epoch": 0.22, "learning_rate": 1.8236384148610843e-05, "loss": 0.2532, "step": 2522 }, { "epoch": 0.22, "learning_rate": 1.823480938331124e-05, "loss": 0.2949, "step": 2523 }, { "epoch": 0.22, "learning_rate": 1.8233233983313224e-05, "loss": 0.325, "step": 2524 }, { "epoch": 0.22, "learning_rate": 1.8231657948738212e-05, "loss": 0.3331, "step": 2525 }, { "epoch": 0.22, "learning_rate": 1.8230081279707675e-05, "loss": 0.3224, "step": 2526 }, { "epoch": 0.22, "learning_rate": 1.8228503976343147e-05, "loss": 0.3739, "step": 2527 }, { "epoch": 0.22, "learning_rate": 1.8226926038766185e-05, "loss": 0.2748, "step": 2528 }, { "epoch": 0.22, "learning_rate": 1.8225347467098418e-05, "loss": 0.3304, "step": 2529 }, { "epoch": 0.22, "learning_rate": 1.822376826146151e-05, "loss": 0.2908, "step": 2530 }, { "epoch": 0.22, "learning_rate": 1.8222188421977178e-05, "loss": 0.3076, "step": 2531 }, { "epoch": 0.22, "learning_rate": 1.8220607948767187e-05, "loss": 0.3163, "step": 2532 }, { "epoch": 0.22, "learning_rate": 1.821902684195336e-05, "loss": 0.2876, "step": 2533 }, { "epoch": 0.22, "learning_rate": 1.8217445101657553e-05, "loss": 0.3032, "step": 2534 }, { "epoch": 0.22, "learning_rate": 1.821586272800168e-05, "loss": 0.319, "step": 2535 }, { "epoch": 0.22, "learning_rate": 1.8214279721107705e-05, "loss": 0.3348, "step": 2536 }, { "epoch": 0.22, "learning_rate": 1.8212696081097636e-05, "loss": 0.301, "step": 2537 }, { "epoch": 0.22, "learning_rate": 1.8211111808093534e-05, "loss": 0.3395, "step": 2538 }, { "epoch": 0.22, "learning_rate": 1.8209526902217506e-05, "loss": 0.3055, "step": 2539 }, { "epoch": 0.22, "learning_rate": 1.820794136359171e-05, "loss": 0.3011, "step": 2540 }, { "epoch": 0.22, "learning_rate": 1.820635519233835e-05, "loss": 0.3148, "step": 2541 }, { "epoch": 0.22, "learning_rate": 1.820476838857968e-05, "loss": 0.3375, "step": 2542 }, { "epoch": 0.22, "learning_rate": 1.8203180952438e-05, "loss": 0.3148, "step": 2543 }, { "epoch": 0.22, "learning_rate": 1.820159288403567e-05, "loss": 0.3963, "step": 2544 }, { "epoch": 0.22, "learning_rate": 1.8200004183495085e-05, "loss": 0.2939, "step": 2545 }, { "epoch": 0.22, "learning_rate": 1.8198414850938694e-05, "loss": 0.3329, "step": 2546 }, { "epoch": 0.22, "learning_rate": 1.8196824886488996e-05, "loss": 0.3519, "step": 2547 }, { "epoch": 0.22, "learning_rate": 1.8195234290268536e-05, "loss": 0.2755, "step": 2548 }, { "epoch": 0.22, "learning_rate": 1.8193643062399913e-05, "loss": 0.2719, "step": 2549 }, { "epoch": 0.22, "learning_rate": 1.8192051203005768e-05, "loss": 0.3317, "step": 2550 }, { "epoch": 0.22, "learning_rate": 1.8190458712208795e-05, "loss": 0.3207, "step": 2551 }, { "epoch": 0.22, "learning_rate": 1.8188865590131733e-05, "loss": 0.2989, "step": 2552 }, { "epoch": 0.22, "learning_rate": 1.8187271836897377e-05, "loss": 0.295, "step": 2553 }, { "epoch": 0.22, "learning_rate": 1.8185677452628557e-05, "loss": 0.2838, "step": 2554 }, { "epoch": 0.22, "learning_rate": 1.818408243744817e-05, "loss": 0.6395, "step": 2555 }, { "epoch": 0.22, "learning_rate": 1.8182486791479145e-05, "loss": 0.2875, "step": 2556 }, { "epoch": 0.22, "learning_rate": 1.818089051484447e-05, "loss": 0.2908, "step": 2557 }, { "epoch": 0.22, "learning_rate": 1.8179293607667177e-05, "loss": 0.2991, "step": 2558 }, { "epoch": 0.22, "learning_rate": 1.817769607007035e-05, "loss": 0.276, "step": 2559 }, { "epoch": 0.22, "learning_rate": 1.8176097902177115e-05, "loss": 0.2872, "step": 2560 }, { "epoch": 0.22, "learning_rate": 1.8174499104110653e-05, "loss": 0.3059, "step": 2561 }, { "epoch": 0.22, "learning_rate": 1.817289967599419e-05, "loss": 0.3077, "step": 2562 }, { "epoch": 0.22, "learning_rate": 1.8171299617951007e-05, "loss": 0.299, "step": 2563 }, { "epoch": 0.22, "learning_rate": 1.816969893010442e-05, "loss": 0.2706, "step": 2564 }, { "epoch": 0.22, "learning_rate": 1.816809761257781e-05, "loss": 0.3182, "step": 2565 }, { "epoch": 0.22, "learning_rate": 1.81664956654946e-05, "loss": 0.3344, "step": 2566 }, { "epoch": 0.22, "learning_rate": 1.816489308897825e-05, "loss": 0.3079, "step": 2567 }, { "epoch": 0.22, "learning_rate": 1.8163289883152285e-05, "loss": 0.2955, "step": 2568 }, { "epoch": 0.22, "learning_rate": 1.8161686048140275e-05, "loss": 0.2844, "step": 2569 }, { "epoch": 0.22, "learning_rate": 1.8160081584065833e-05, "loss": 0.2552, "step": 2570 }, { "epoch": 0.22, "learning_rate": 1.8158476491052616e-05, "loss": 0.2571, "step": 2571 }, { "epoch": 0.22, "learning_rate": 1.815687076922435e-05, "loss": 0.3284, "step": 2572 }, { "epoch": 0.22, "learning_rate": 1.8155264418704785e-05, "loss": 0.3126, "step": 2573 }, { "epoch": 0.22, "learning_rate": 1.8153657439617738e-05, "loss": 0.3306, "step": 2574 }, { "epoch": 0.22, "learning_rate": 1.8152049832087063e-05, "loss": 0.2721, "step": 2575 }, { "epoch": 0.22, "learning_rate": 1.8150441596236667e-05, "loss": 0.3096, "step": 2576 }, { "epoch": 0.22, "learning_rate": 1.8148832732190508e-05, "loss": 0.3085, "step": 2577 }, { "epoch": 0.22, "learning_rate": 1.814722324007258e-05, "loss": 0.3082, "step": 2578 }, { "epoch": 0.22, "learning_rate": 1.8145613120006947e-05, "loss": 0.3274, "step": 2579 }, { "epoch": 0.22, "learning_rate": 1.8144002372117705e-05, "loss": 0.3143, "step": 2580 }, { "epoch": 0.22, "learning_rate": 1.8142390996528994e-05, "loss": 0.3163, "step": 2581 }, { "epoch": 0.22, "learning_rate": 1.814077899336502e-05, "loss": 0.3377, "step": 2582 }, { "epoch": 0.22, "learning_rate": 1.813916636275003e-05, "loss": 0.2891, "step": 2583 }, { "epoch": 0.22, "learning_rate": 1.8137553104808308e-05, "loss": 0.3035, "step": 2584 }, { "epoch": 0.22, "learning_rate": 1.8135939219664205e-05, "loss": 0.3094, "step": 2585 }, { "epoch": 0.22, "learning_rate": 1.813432470744211e-05, "loss": 0.2516, "step": 2586 }, { "epoch": 0.22, "learning_rate": 1.8132709568266457e-05, "loss": 0.2946, "step": 2587 }, { "epoch": 0.22, "learning_rate": 1.8131093802261738e-05, "loss": 0.3115, "step": 2588 }, { "epoch": 0.22, "learning_rate": 1.812947740955248e-05, "loss": 0.2675, "step": 2589 }, { "epoch": 0.22, "learning_rate": 1.8127860390263275e-05, "loss": 0.3096, "step": 2590 }, { "epoch": 0.22, "learning_rate": 1.8126242744518753e-05, "loss": 0.2975, "step": 2591 }, { "epoch": 0.22, "learning_rate": 1.8124624472443592e-05, "loss": 0.2581, "step": 2592 }, { "epoch": 0.22, "learning_rate": 1.812300557416252e-05, "loss": 0.2588, "step": 2593 }, { "epoch": 0.22, "learning_rate": 1.8121386049800317e-05, "loss": 0.3183, "step": 2594 }, { "epoch": 0.22, "learning_rate": 1.8119765899481807e-05, "loss": 0.2561, "step": 2595 }, { "epoch": 0.22, "learning_rate": 1.811814512333186e-05, "loss": 0.3161, "step": 2596 }, { "epoch": 0.22, "learning_rate": 1.81165237214754e-05, "loss": 0.2949, "step": 2597 }, { "epoch": 0.22, "learning_rate": 1.8114901694037402e-05, "loss": 0.3259, "step": 2598 }, { "epoch": 0.22, "learning_rate": 1.811327904114287e-05, "loss": 0.2724, "step": 2599 }, { "epoch": 0.22, "learning_rate": 1.8111655762916885e-05, "loss": 0.3187, "step": 2600 }, { "epoch": 0.22, "learning_rate": 1.8110031859484554e-05, "loss": 0.309, "step": 2601 }, { "epoch": 0.22, "learning_rate": 1.810840733097104e-05, "loss": 0.3133, "step": 2602 }, { "epoch": 0.22, "learning_rate": 1.810678217750155e-05, "loss": 0.3854, "step": 2603 }, { "epoch": 0.22, "learning_rate": 1.810515639920135e-05, "loss": 0.2874, "step": 2604 }, { "epoch": 0.22, "learning_rate": 1.810352999619574e-05, "loss": 0.2714, "step": 2605 }, { "epoch": 0.22, "learning_rate": 1.8101902968610082e-05, "loss": 0.3195, "step": 2606 }, { "epoch": 0.22, "learning_rate": 1.8100275316569774e-05, "loss": 0.2803, "step": 2607 }, { "epoch": 0.22, "learning_rate": 1.809864704020027e-05, "loss": 0.3149, "step": 2608 }, { "epoch": 0.22, "learning_rate": 1.8097018139627068e-05, "loss": 0.2936, "step": 2609 }, { "epoch": 0.22, "learning_rate": 1.809538861497572e-05, "loss": 0.3329, "step": 2610 }, { "epoch": 0.22, "learning_rate": 1.8093758466371812e-05, "loss": 0.2933, "step": 2611 }, { "epoch": 0.22, "learning_rate": 1.8092127693940998e-05, "loss": 0.2873, "step": 2612 }, { "epoch": 0.22, "learning_rate": 1.8090496297808962e-05, "loss": 0.2746, "step": 2613 }, { "epoch": 0.22, "learning_rate": 1.8088864278101452e-05, "loss": 0.3024, "step": 2614 }, { "epoch": 0.22, "learning_rate": 1.808723163494425e-05, "loss": 0.3531, "step": 2615 }, { "epoch": 0.22, "learning_rate": 1.8085598368463194e-05, "loss": 0.2829, "step": 2616 }, { "epoch": 0.22, "learning_rate": 1.8083964478784167e-05, "loss": 0.2899, "step": 2617 }, { "epoch": 0.22, "learning_rate": 1.8082329966033105e-05, "loss": 0.272, "step": 2618 }, { "epoch": 0.22, "learning_rate": 1.8080694830335985e-05, "loss": 0.2876, "step": 2619 }, { "epoch": 0.22, "learning_rate": 1.8079059071818828e-05, "loss": 0.3261, "step": 2620 }, { "epoch": 0.22, "learning_rate": 1.8077422690607725e-05, "loss": 0.3115, "step": 2621 }, { "epoch": 0.22, "learning_rate": 1.807578568682879e-05, "loss": 0.3017, "step": 2622 }, { "epoch": 0.22, "learning_rate": 1.8074148060608197e-05, "loss": 0.2876, "step": 2623 }, { "epoch": 0.22, "learning_rate": 1.807250981207217e-05, "loss": 0.2832, "step": 2624 }, { "epoch": 0.23, "learning_rate": 1.807087094134697e-05, "loss": 0.3242, "step": 2625 }, { "epoch": 0.23, "learning_rate": 1.8069231448558923e-05, "loss": 0.3752, "step": 2626 }, { "epoch": 0.23, "learning_rate": 1.8067591333834382e-05, "loss": 0.339, "step": 2627 }, { "epoch": 0.23, "learning_rate": 1.806595059729977e-05, "loss": 0.2871, "step": 2628 }, { "epoch": 0.23, "learning_rate": 1.8064309239081535e-05, "loss": 0.3015, "step": 2629 }, { "epoch": 0.23, "learning_rate": 1.8062667259306193e-05, "loss": 0.2793, "step": 2630 }, { "epoch": 0.23, "learning_rate": 1.8061024658100298e-05, "loss": 0.291, "step": 2631 }, { "epoch": 0.23, "learning_rate": 1.805938143559045e-05, "loss": 0.3187, "step": 2632 }, { "epoch": 0.23, "learning_rate": 1.8057737591903306e-05, "loss": 0.2576, "step": 2633 }, { "epoch": 0.23, "learning_rate": 1.8056093127165564e-05, "loss": 0.3286, "step": 2634 }, { "epoch": 0.23, "learning_rate": 1.8054448041503966e-05, "loss": 0.2805, "step": 2635 }, { "epoch": 0.23, "learning_rate": 1.8052802335045315e-05, "loss": 0.2883, "step": 2636 }, { "epoch": 0.23, "learning_rate": 1.805115600791645e-05, "loss": 0.3074, "step": 2637 }, { "epoch": 0.23, "learning_rate": 1.804950906024426e-05, "loss": 0.3187, "step": 2638 }, { "epoch": 0.23, "learning_rate": 1.8047861492155687e-05, "loss": 0.3398, "step": 2639 }, { "epoch": 0.23, "learning_rate": 1.8046213303777717e-05, "loss": 0.312, "step": 2640 }, { "epoch": 0.23, "learning_rate": 1.804456449523738e-05, "loss": 0.3007, "step": 2641 }, { "epoch": 0.23, "learning_rate": 1.804291506666176e-05, "loss": 0.3564, "step": 2642 }, { "epoch": 0.23, "learning_rate": 1.804126501817799e-05, "loss": 0.31, "step": 2643 }, { "epoch": 0.23, "learning_rate": 1.8039614349913245e-05, "loss": 0.3165, "step": 2644 }, { "epoch": 0.23, "learning_rate": 1.8037963061994756e-05, "loss": 0.28, "step": 2645 }, { "epoch": 0.23, "learning_rate": 1.8036311154549783e-05, "loss": 0.3203, "step": 2646 }, { "epoch": 0.23, "learning_rate": 1.803465862770566e-05, "loss": 0.2863, "step": 2647 }, { "epoch": 0.23, "learning_rate": 1.8033005481589746e-05, "loss": 0.2887, "step": 2648 }, { "epoch": 0.23, "learning_rate": 1.8031351716329462e-05, "loss": 0.2966, "step": 2649 }, { "epoch": 0.23, "learning_rate": 1.8029697332052277e-05, "loss": 0.2929, "step": 2650 }, { "epoch": 0.23, "learning_rate": 1.8028042328885694e-05, "loss": 0.29, "step": 2651 }, { "epoch": 0.23, "learning_rate": 1.8026386706957278e-05, "loss": 0.2834, "step": 2652 }, { "epoch": 0.23, "learning_rate": 1.8024730466394632e-05, "loss": 0.3765, "step": 2653 }, { "epoch": 0.23, "learning_rate": 1.802307360732541e-05, "loss": 0.2834, "step": 2654 }, { "epoch": 0.23, "learning_rate": 1.8021416129877324e-05, "loss": 0.3094, "step": 2655 }, { "epoch": 0.23, "learning_rate": 1.8019758034178116e-05, "loss": 0.3654, "step": 2656 }, { "epoch": 0.23, "learning_rate": 1.8018099320355586e-05, "loss": 0.3066, "step": 2657 }, { "epoch": 0.23, "learning_rate": 1.8016439988537576e-05, "loss": 0.3427, "step": 2658 }, { "epoch": 0.23, "learning_rate": 1.8014780038851983e-05, "loss": 0.3094, "step": 2659 }, { "epoch": 0.23, "learning_rate": 1.8013119471426748e-05, "loss": 0.2836, "step": 2660 }, { "epoch": 0.23, "learning_rate": 1.8011458286389856e-05, "loss": 0.3178, "step": 2661 }, { "epoch": 0.23, "learning_rate": 1.8009796483869347e-05, "loss": 0.2842, "step": 2662 }, { "epoch": 0.23, "learning_rate": 1.80081340639933e-05, "loss": 0.2948, "step": 2663 }, { "epoch": 0.23, "learning_rate": 1.8006471026889852e-05, "loss": 0.3229, "step": 2664 }, { "epoch": 0.23, "learning_rate": 1.8004807372687175e-05, "loss": 0.2938, "step": 2665 }, { "epoch": 0.23, "learning_rate": 1.8003143101513502e-05, "loss": 0.3467, "step": 2666 }, { "epoch": 0.23, "learning_rate": 1.8001478213497104e-05, "loss": 0.37, "step": 2667 }, { "epoch": 0.23, "learning_rate": 1.79998127087663e-05, "loss": 0.2593, "step": 2668 }, { "epoch": 0.23, "learning_rate": 1.7998146587449457e-05, "loss": 0.2997, "step": 2669 }, { "epoch": 0.23, "learning_rate": 1.7996479849675e-05, "loss": 0.3116, "step": 2670 }, { "epoch": 0.23, "learning_rate": 1.7994812495571387e-05, "loss": 0.3419, "step": 2671 }, { "epoch": 0.23, "learning_rate": 1.799314452526713e-05, "loss": 0.3844, "step": 2672 }, { "epoch": 0.23, "learning_rate": 1.799147593889079e-05, "loss": 0.3381, "step": 2673 }, { "epoch": 0.23, "learning_rate": 1.798980673657097e-05, "loss": 0.2622, "step": 2674 }, { "epoch": 0.23, "learning_rate": 1.7988136918436324e-05, "loss": 0.3355, "step": 2675 }, { "epoch": 0.23, "learning_rate": 1.7986466484615557e-05, "loss": 0.272, "step": 2676 }, { "epoch": 0.23, "learning_rate": 1.7984795435237418e-05, "loss": 0.3593, "step": 2677 }, { "epoch": 0.23, "learning_rate": 1.7983123770430696e-05, "loss": 0.3203, "step": 2678 }, { "epoch": 0.23, "learning_rate": 1.798145149032424e-05, "loss": 0.3458, "step": 2679 }, { "epoch": 0.23, "learning_rate": 1.797977859504694e-05, "loss": 0.3336, "step": 2680 }, { "epoch": 0.23, "learning_rate": 1.797810508472774e-05, "loss": 0.3352, "step": 2681 }, { "epoch": 0.23, "learning_rate": 1.7976430959495617e-05, "loss": 0.2817, "step": 2682 }, { "epoch": 0.23, "learning_rate": 1.797475621947961e-05, "loss": 0.2972, "step": 2683 }, { "epoch": 0.23, "learning_rate": 1.7973080864808795e-05, "loss": 0.2954, "step": 2684 }, { "epoch": 0.23, "learning_rate": 1.797140489561231e-05, "loss": 0.299, "step": 2685 }, { "epoch": 0.23, "learning_rate": 1.7969728312019316e-05, "loss": 0.3123, "step": 2686 }, { "epoch": 0.23, "learning_rate": 1.7968051114159046e-05, "loss": 0.2874, "step": 2687 }, { "epoch": 0.23, "learning_rate": 1.796637330216077e-05, "loss": 0.3176, "step": 2688 }, { "epoch": 0.23, "learning_rate": 1.7964694876153802e-05, "loss": 0.3162, "step": 2689 }, { "epoch": 0.23, "learning_rate": 1.7963015836267502e-05, "loss": 0.327, "step": 2690 }, { "epoch": 0.23, "learning_rate": 1.7961336182631293e-05, "loss": 0.2814, "step": 2691 }, { "epoch": 0.23, "learning_rate": 1.795965591537463e-05, "loss": 0.2719, "step": 2692 }, { "epoch": 0.23, "learning_rate": 1.7957975034627017e-05, "loss": 0.3038, "step": 2693 }, { "epoch": 0.23, "learning_rate": 1.795629354051801e-05, "loss": 0.3014, "step": 2694 }, { "epoch": 0.23, "learning_rate": 1.795461143317721e-05, "loss": 0.3409, "step": 2695 }, { "epoch": 0.23, "learning_rate": 1.7952928712734266e-05, "loss": 0.3441, "step": 2696 }, { "epoch": 0.23, "learning_rate": 1.7951245379318872e-05, "loss": 0.2802, "step": 2697 }, { "epoch": 0.23, "learning_rate": 1.7949561433060775e-05, "loss": 0.3172, "step": 2698 }, { "epoch": 0.23, "learning_rate": 1.794787687408976e-05, "loss": 0.3266, "step": 2699 }, { "epoch": 0.23, "learning_rate": 1.794619170253567e-05, "loss": 0.2952, "step": 2700 }, { "epoch": 0.23, "learning_rate": 1.7944505918528384e-05, "loss": 0.3082, "step": 2701 }, { "epoch": 0.23, "learning_rate": 1.7942819522197837e-05, "loss": 0.2697, "step": 2702 }, { "epoch": 0.23, "learning_rate": 1.794113251367401e-05, "loss": 0.2893, "step": 2703 }, { "epoch": 0.23, "learning_rate": 1.7939444893086925e-05, "loss": 0.2945, "step": 2704 }, { "epoch": 0.23, "learning_rate": 1.793775666056666e-05, "loss": 0.3228, "step": 2705 }, { "epoch": 0.23, "learning_rate": 1.793606781624333e-05, "loss": 0.2875, "step": 2706 }, { "epoch": 0.23, "learning_rate": 1.793437836024711e-05, "loss": 0.2986, "step": 2707 }, { "epoch": 0.23, "learning_rate": 1.793268829270821e-05, "loss": 0.2694, "step": 2708 }, { "epoch": 0.23, "learning_rate": 1.7930997613756892e-05, "loss": 0.3179, "step": 2709 }, { "epoch": 0.23, "learning_rate": 1.7929306323523463e-05, "loss": 0.3358, "step": 2710 }, { "epoch": 0.23, "learning_rate": 1.7927614422138286e-05, "loss": 0.3011, "step": 2711 }, { "epoch": 0.23, "learning_rate": 1.792592190973176e-05, "loss": 0.3267, "step": 2712 }, { "epoch": 0.23, "learning_rate": 1.7924228786434333e-05, "loss": 0.3232, "step": 2713 }, { "epoch": 0.23, "learning_rate": 1.792253505237651e-05, "loss": 0.307, "step": 2714 }, { "epoch": 0.23, "learning_rate": 1.7920840707688833e-05, "loss": 0.2712, "step": 2715 }, { "epoch": 0.23, "learning_rate": 1.791914575250189e-05, "loss": 0.2921, "step": 2716 }, { "epoch": 0.23, "learning_rate": 1.7917450186946323e-05, "loss": 0.29, "step": 2717 }, { "epoch": 0.23, "learning_rate": 1.7915754011152815e-05, "loss": 0.2966, "step": 2718 }, { "epoch": 0.23, "learning_rate": 1.7914057225252103e-05, "loss": 0.313, "step": 2719 }, { "epoch": 0.23, "learning_rate": 1.7912359829374963e-05, "loss": 0.2825, "step": 2720 }, { "epoch": 0.23, "learning_rate": 1.7910661823652223e-05, "loss": 0.3318, "step": 2721 }, { "epoch": 0.23, "learning_rate": 1.790896320821476e-05, "loss": 0.3194, "step": 2722 }, { "epoch": 0.23, "learning_rate": 1.790726398319349e-05, "loss": 0.329, "step": 2723 }, { "epoch": 0.23, "learning_rate": 1.7905564148719383e-05, "loss": 0.3492, "step": 2724 }, { "epoch": 0.23, "learning_rate": 1.7903863704923453e-05, "loss": 0.2863, "step": 2725 }, { "epoch": 0.23, "learning_rate": 1.7902162651936766e-05, "loss": 0.3461, "step": 2726 }, { "epoch": 0.23, "learning_rate": 1.7900460989890424e-05, "loss": 0.3307, "step": 2727 }, { "epoch": 0.23, "learning_rate": 1.789875871891559e-05, "loss": 0.3148, "step": 2728 }, { "epoch": 0.23, "learning_rate": 1.7897055839143457e-05, "loss": 0.2863, "step": 2729 }, { "epoch": 0.23, "learning_rate": 1.7895352350705288e-05, "loss": 0.305, "step": 2730 }, { "epoch": 0.23, "learning_rate": 1.7893648253732364e-05, "loss": 0.6641, "step": 2731 }, { "epoch": 0.23, "learning_rate": 1.7891943548356043e-05, "loss": 0.3036, "step": 2732 }, { "epoch": 0.23, "learning_rate": 1.7890238234707708e-05, "loss": 0.3477, "step": 2733 }, { "epoch": 0.23, "learning_rate": 1.7888532312918793e-05, "loss": 0.2765, "step": 2734 }, { "epoch": 0.23, "learning_rate": 1.7886825783120786e-05, "loss": 0.3339, "step": 2735 }, { "epoch": 0.23, "learning_rate": 1.788511864544522e-05, "loss": 0.3472, "step": 2736 }, { "epoch": 0.23, "learning_rate": 1.7883410900023667e-05, "loss": 0.3389, "step": 2737 }, { "epoch": 0.23, "learning_rate": 1.788170254698776e-05, "loss": 0.3372, "step": 2738 }, { "epoch": 0.23, "learning_rate": 1.787999358646916e-05, "loss": 0.2847, "step": 2739 }, { "epoch": 0.23, "learning_rate": 1.7878284018599594e-05, "loss": 0.2751, "step": 2740 }, { "epoch": 0.23, "learning_rate": 1.7876573843510822e-05, "loss": 0.2642, "step": 2741 }, { "epoch": 0.24, "learning_rate": 1.7874863061334658e-05, "loss": 0.2982, "step": 2742 }, { "epoch": 0.24, "learning_rate": 1.787315167220296e-05, "loss": 0.2791, "step": 2743 }, { "epoch": 0.24, "learning_rate": 1.7871439676247632e-05, "loss": 0.2979, "step": 2744 }, { "epoch": 0.24, "learning_rate": 1.786972707360063e-05, "loss": 0.2662, "step": 2745 }, { "epoch": 0.24, "learning_rate": 1.786801386439395e-05, "loss": 0.3167, "step": 2746 }, { "epoch": 0.24, "learning_rate": 1.786630004875964e-05, "loss": 0.614, "step": 2747 }, { "epoch": 0.24, "learning_rate": 1.7864585626829786e-05, "loss": 0.2914, "step": 2748 }, { "epoch": 0.24, "learning_rate": 1.7862870598736534e-05, "loss": 0.3242, "step": 2749 }, { "epoch": 0.24, "learning_rate": 1.786115496461207e-05, "loss": 0.3099, "step": 2750 }, { "epoch": 0.24, "learning_rate": 1.7859438724588623e-05, "loss": 0.2908, "step": 2751 }, { "epoch": 0.24, "learning_rate": 1.7857721878798476e-05, "loss": 0.3257, "step": 2752 }, { "epoch": 0.24, "learning_rate": 1.785600442737395e-05, "loss": 0.2869, "step": 2753 }, { "epoch": 0.24, "learning_rate": 1.785428637044742e-05, "loss": 0.2927, "step": 2754 }, { "epoch": 0.24, "learning_rate": 1.7852567708151306e-05, "loss": 0.3333, "step": 2755 }, { "epoch": 0.24, "learning_rate": 1.7850848440618075e-05, "loss": 0.3088, "step": 2756 }, { "epoch": 0.24, "learning_rate": 1.7849128567980238e-05, "loss": 0.3142, "step": 2757 }, { "epoch": 0.24, "learning_rate": 1.7847408090370355e-05, "loss": 0.3382, "step": 2758 }, { "epoch": 0.24, "learning_rate": 1.7845687007921034e-05, "loss": 0.3124, "step": 2759 }, { "epoch": 0.24, "learning_rate": 1.784396532076492e-05, "loss": 0.3123, "step": 2760 }, { "epoch": 0.24, "learning_rate": 1.784224302903472e-05, "loss": 0.2925, "step": 2761 }, { "epoch": 0.24, "learning_rate": 1.7840520132863173e-05, "loss": 0.2896, "step": 2762 }, { "epoch": 0.24, "learning_rate": 1.783879663238308e-05, "loss": 0.2937, "step": 2763 }, { "epoch": 0.24, "learning_rate": 1.7837072527727275e-05, "loss": 0.3057, "step": 2764 }, { "epoch": 0.24, "learning_rate": 1.7835347819028642e-05, "loss": 0.3079, "step": 2765 }, { "epoch": 0.24, "learning_rate": 1.7833622506420116e-05, "loss": 0.2964, "step": 2766 }, { "epoch": 0.24, "learning_rate": 1.783189659003467e-05, "loss": 0.2938, "step": 2767 }, { "epoch": 0.24, "learning_rate": 1.783017007000534e-05, "loss": 0.3206, "step": 2768 }, { "epoch": 0.24, "learning_rate": 1.7828442946465188e-05, "loss": 0.3374, "step": 2769 }, { "epoch": 0.24, "learning_rate": 1.7826715219547336e-05, "loss": 0.3456, "step": 2770 }, { "epoch": 0.24, "learning_rate": 1.7824986889384948e-05, "loss": 0.3585, "step": 2771 }, { "epoch": 0.24, "learning_rate": 1.7823257956111233e-05, "loss": 0.2675, "step": 2772 }, { "epoch": 0.24, "learning_rate": 1.782152841985945e-05, "loss": 0.3141, "step": 2773 }, { "epoch": 0.24, "learning_rate": 1.7819798280762907e-05, "loss": 0.3415, "step": 2774 }, { "epoch": 0.24, "learning_rate": 1.781806753895495e-05, "loss": 0.3244, "step": 2775 }, { "epoch": 0.24, "learning_rate": 1.7816336194568976e-05, "loss": 0.2383, "step": 2776 }, { "epoch": 0.24, "learning_rate": 1.781460424773843e-05, "loss": 0.2368, "step": 2777 }, { "epoch": 0.24, "learning_rate": 1.78128716985968e-05, "loss": 0.3093, "step": 2778 }, { "epoch": 0.24, "learning_rate": 1.781113854727763e-05, "loss": 0.2943, "step": 2779 }, { "epoch": 0.24, "learning_rate": 1.780940479391449e-05, "loss": 0.2877, "step": 2780 }, { "epoch": 0.24, "learning_rate": 1.780767043864102e-05, "loss": 0.308, "step": 2781 }, { "epoch": 0.24, "learning_rate": 1.780593548159089e-05, "loss": 0.269, "step": 2782 }, { "epoch": 0.24, "learning_rate": 1.780419992289782e-05, "loss": 0.2917, "step": 2783 }, { "epoch": 0.24, "learning_rate": 1.7802463762695588e-05, "loss": 0.3575, "step": 2784 }, { "epoch": 0.24, "learning_rate": 1.7800727001118e-05, "loss": 0.2968, "step": 2785 }, { "epoch": 0.24, "learning_rate": 1.779898963829892e-05, "loss": 0.3095, "step": 2786 }, { "epoch": 0.24, "learning_rate": 1.7797251674372253e-05, "loss": 0.3192, "step": 2787 }, { "epoch": 0.24, "learning_rate": 1.7795513109471952e-05, "loss": 0.5713, "step": 2788 }, { "epoch": 0.24, "learning_rate": 1.7793773943732026e-05, "loss": 0.2745, "step": 2789 }, { "epoch": 0.24, "learning_rate": 1.7792034177286508e-05, "loss": 0.368, "step": 2790 }, { "epoch": 0.24, "learning_rate": 1.77902938102695e-05, "loss": 0.2783, "step": 2791 }, { "epoch": 0.24, "learning_rate": 1.7788552842815136e-05, "loss": 0.3057, "step": 2792 }, { "epoch": 0.24, "learning_rate": 1.7786811275057606e-05, "loss": 0.3386, "step": 2793 }, { "epoch": 0.24, "learning_rate": 1.778506910713114e-05, "loss": 0.3011, "step": 2794 }, { "epoch": 0.24, "learning_rate": 1.778332633917001e-05, "loss": 0.3047, "step": 2795 }, { "epoch": 0.24, "learning_rate": 1.7781582971308547e-05, "loss": 0.322, "step": 2796 }, { "epoch": 0.24, "learning_rate": 1.777983900368112e-05, "loss": 0.3665, "step": 2797 }, { "epoch": 0.24, "learning_rate": 1.777809443642214e-05, "loss": 0.2877, "step": 2798 }, { "epoch": 0.24, "learning_rate": 1.7776349269666076e-05, "loss": 0.2598, "step": 2799 }, { "epoch": 0.24, "learning_rate": 1.7774603503547432e-05, "loss": 0.3298, "step": 2800 }, { "epoch": 0.24, "learning_rate": 1.7772857138200767e-05, "loss": 0.3345, "step": 2801 }, { "epoch": 0.24, "learning_rate": 1.777111017376068e-05, "loss": 0.3273, "step": 2802 }, { "epoch": 0.24, "learning_rate": 1.7769362610361824e-05, "loss": 0.3007, "step": 2803 }, { "epoch": 0.24, "learning_rate": 1.7767614448138882e-05, "loss": 0.3427, "step": 2804 }, { "epoch": 0.24, "learning_rate": 1.77658656872266e-05, "loss": 0.3618, "step": 2805 }, { "epoch": 0.24, "learning_rate": 1.776411632775976e-05, "loss": 0.3187, "step": 2806 }, { "epoch": 0.24, "learning_rate": 1.7762366369873204e-05, "loss": 0.2852, "step": 2807 }, { "epoch": 0.24, "learning_rate": 1.77606158137018e-05, "loss": 0.3292, "step": 2808 }, { "epoch": 0.24, "learning_rate": 1.7758864659380474e-05, "loss": 0.314, "step": 2809 }, { "epoch": 0.24, "learning_rate": 1.77571129070442e-05, "loss": 0.3109, "step": 2810 }, { "epoch": 0.24, "learning_rate": 1.775536055682799e-05, "loss": 0.3658, "step": 2811 }, { "epoch": 0.24, "learning_rate": 1.775360760886691e-05, "loss": 0.2958, "step": 2812 }, { "epoch": 0.24, "learning_rate": 1.7751854063296068e-05, "loss": 0.2924, "step": 2813 }, { "epoch": 0.24, "learning_rate": 1.7750099920250616e-05, "loss": 0.2863, "step": 2814 }, { "epoch": 0.24, "learning_rate": 1.774834517986576e-05, "loss": 0.3532, "step": 2815 }, { "epoch": 0.24, "learning_rate": 1.774658984227674e-05, "loss": 0.3488, "step": 2816 }, { "epoch": 0.24, "learning_rate": 1.774483390761885e-05, "loss": 0.308, "step": 2817 }, { "epoch": 0.24, "learning_rate": 1.7743077376027433e-05, "loss": 0.2943, "step": 2818 }, { "epoch": 0.24, "learning_rate": 1.7741320247637875e-05, "loss": 0.2876, "step": 2819 }, { "epoch": 0.24, "learning_rate": 1.7739562522585598e-05, "loss": 0.2805, "step": 2820 }, { "epoch": 0.24, "learning_rate": 1.7737804201006084e-05, "loss": 0.2848, "step": 2821 }, { "epoch": 0.24, "learning_rate": 1.773604528303486e-05, "loss": 0.285, "step": 2822 }, { "epoch": 0.24, "learning_rate": 1.773428576880749e-05, "loss": 0.3236, "step": 2823 }, { "epoch": 0.24, "learning_rate": 1.7732525658459586e-05, "loss": 0.3361, "step": 2824 }, { "epoch": 0.24, "learning_rate": 1.7730764952126813e-05, "loss": 0.2708, "step": 2825 }, { "epoch": 0.24, "learning_rate": 1.7729003649944878e-05, "loss": 0.2791, "step": 2826 }, { "epoch": 0.24, "learning_rate": 1.772724175204953e-05, "loss": 0.2586, "step": 2827 }, { "epoch": 0.24, "learning_rate": 1.772547925857657e-05, "loss": 0.3179, "step": 2828 }, { "epoch": 0.24, "learning_rate": 1.7723716169661843e-05, "loss": 0.2417, "step": 2829 }, { "epoch": 0.24, "learning_rate": 1.7721952485441232e-05, "loss": 0.3043, "step": 2830 }, { "epoch": 0.24, "learning_rate": 1.7720188206050682e-05, "loss": 0.2878, "step": 2831 }, { "epoch": 0.24, "learning_rate": 1.7718423331626175e-05, "loss": 0.2891, "step": 2832 }, { "epoch": 0.24, "learning_rate": 1.7716657862303733e-05, "loss": 0.3344, "step": 2833 }, { "epoch": 0.24, "learning_rate": 1.7714891798219432e-05, "loss": 0.3486, "step": 2834 }, { "epoch": 0.24, "learning_rate": 1.771312513950939e-05, "loss": 0.3138, "step": 2835 }, { "epoch": 0.24, "learning_rate": 1.7711357886309777e-05, "loss": 0.294, "step": 2836 }, { "epoch": 0.24, "learning_rate": 1.77095900387568e-05, "loss": 0.2626, "step": 2837 }, { "epoch": 0.24, "learning_rate": 1.7707821596986715e-05, "loss": 0.3339, "step": 2838 }, { "epoch": 0.24, "learning_rate": 1.7706052561135826e-05, "loss": 0.3048, "step": 2839 }, { "epoch": 0.24, "learning_rate": 1.7704282931340488e-05, "loss": 0.6313, "step": 2840 }, { "epoch": 0.24, "learning_rate": 1.7702512707737086e-05, "loss": 0.2995, "step": 2841 }, { "epoch": 0.24, "learning_rate": 1.770074189046206e-05, "loss": 0.2975, "step": 2842 }, { "epoch": 0.24, "learning_rate": 1.7698970479651904e-05, "loss": 0.2623, "step": 2843 }, { "epoch": 0.24, "learning_rate": 1.7697198475443146e-05, "loss": 0.3027, "step": 2844 }, { "epoch": 0.24, "learning_rate": 1.769542587797236e-05, "loss": 0.3005, "step": 2845 }, { "epoch": 0.24, "learning_rate": 1.7693652687376173e-05, "loss": 0.2664, "step": 2846 }, { "epoch": 0.24, "learning_rate": 1.7691878903791252e-05, "loss": 0.6333, "step": 2847 }, { "epoch": 0.24, "learning_rate": 1.7690104527354313e-05, "loss": 0.3271, "step": 2848 }, { "epoch": 0.24, "learning_rate": 1.768832955820211e-05, "loss": 0.283, "step": 2849 }, { "epoch": 0.24, "learning_rate": 1.768655399647146e-05, "loss": 0.3414, "step": 2850 }, { "epoch": 0.24, "learning_rate": 1.7684777842299206e-05, "loss": 0.3184, "step": 2851 }, { "epoch": 0.24, "learning_rate": 1.7683001095822245e-05, "loss": 0.3103, "step": 2852 }, { "epoch": 0.24, "learning_rate": 1.7681223757177526e-05, "loss": 0.3055, "step": 2853 }, { "epoch": 0.24, "learning_rate": 1.7679445826502033e-05, "loss": 0.306, "step": 2854 }, { "epoch": 0.24, "learning_rate": 1.7677667303932797e-05, "loss": 0.3315, "step": 2855 }, { "epoch": 0.24, "learning_rate": 1.7675888189606907e-05, "loss": 0.3152, "step": 2856 }, { "epoch": 0.24, "learning_rate": 1.767410848366148e-05, "loss": 0.2598, "step": 2857 }, { "epoch": 0.24, "learning_rate": 1.7672328186233692e-05, "loss": 0.3183, "step": 2858 }, { "epoch": 0.25, "learning_rate": 1.7670547297460758e-05, "loss": 0.2806, "step": 2859 }, { "epoch": 0.25, "learning_rate": 1.7668765817479937e-05, "loss": 0.2607, "step": 2860 }, { "epoch": 0.25, "learning_rate": 1.766698374642854e-05, "loss": 0.3216, "step": 2861 }, { "epoch": 0.25, "learning_rate": 1.766520108444392e-05, "loss": 0.262, "step": 2862 }, { "epoch": 0.25, "learning_rate": 1.7663417831663474e-05, "loss": 0.3249, "step": 2863 }, { "epoch": 0.25, "learning_rate": 1.766163398822465e-05, "loss": 0.3141, "step": 2864 }, { "epoch": 0.25, "learning_rate": 1.765984955426493e-05, "loss": 0.3157, "step": 2865 }, { "epoch": 0.25, "learning_rate": 1.765806452992186e-05, "loss": 0.3539, "step": 2866 }, { "epoch": 0.25, "learning_rate": 1.7656278915333017e-05, "loss": 0.2562, "step": 2867 }, { "epoch": 0.25, "learning_rate": 1.765449271063602e-05, "loss": 0.3212, "step": 2868 }, { "epoch": 0.25, "learning_rate": 1.7652705915968552e-05, "loss": 0.3331, "step": 2869 }, { "epoch": 0.25, "learning_rate": 1.7650918531468326e-05, "loss": 0.321, "step": 2870 }, { "epoch": 0.25, "learning_rate": 1.76491305572731e-05, "loss": 0.2733, "step": 2871 }, { "epoch": 0.25, "learning_rate": 1.7647341993520687e-05, "loss": 0.3022, "step": 2872 }, { "epoch": 0.25, "learning_rate": 1.764555284034894e-05, "loss": 0.3089, "step": 2873 }, { "epoch": 0.25, "learning_rate": 1.764376309789576e-05, "loss": 0.3743, "step": 2874 }, { "epoch": 0.25, "learning_rate": 1.7641972766299088e-05, "loss": 0.2706, "step": 2875 }, { "epoch": 0.25, "learning_rate": 1.7640181845696914e-05, "loss": 0.2991, "step": 2876 }, { "epoch": 0.25, "learning_rate": 1.7638390336227275e-05, "loss": 0.3155, "step": 2877 }, { "epoch": 0.25, "learning_rate": 1.7636598238028253e-05, "loss": 0.2684, "step": 2878 }, { "epoch": 0.25, "learning_rate": 1.763480555123797e-05, "loss": 0.2991, "step": 2879 }, { "epoch": 0.25, "learning_rate": 1.76330122759946e-05, "loss": 0.2953, "step": 2880 }, { "epoch": 0.25, "learning_rate": 1.7631218412436362e-05, "loss": 0.2729, "step": 2881 }, { "epoch": 0.25, "learning_rate": 1.7629423960701513e-05, "loss": 0.3105, "step": 2882 }, { "epoch": 0.25, "learning_rate": 1.7627628920928366e-05, "loss": 0.3604, "step": 2883 }, { "epoch": 0.25, "learning_rate": 1.7625833293255268e-05, "loss": 0.311, "step": 2884 }, { "epoch": 0.25, "learning_rate": 1.762403707782062e-05, "loss": 0.3134, "step": 2885 }, { "epoch": 0.25, "learning_rate": 1.762224027476287e-05, "loss": 0.3066, "step": 2886 }, { "epoch": 0.25, "learning_rate": 1.7620442884220495e-05, "loss": 0.2988, "step": 2887 }, { "epoch": 0.25, "learning_rate": 1.761864490633204e-05, "loss": 0.3286, "step": 2888 }, { "epoch": 0.25, "learning_rate": 1.7616846341236082e-05, "loss": 0.2817, "step": 2889 }, { "epoch": 0.25, "learning_rate": 1.761504718907124e-05, "loss": 0.2957, "step": 2890 }, { "epoch": 0.25, "learning_rate": 1.7613247449976195e-05, "loss": 0.2975, "step": 2891 }, { "epoch": 0.25, "learning_rate": 1.761144712408965e-05, "loss": 0.2997, "step": 2892 }, { "epoch": 0.25, "learning_rate": 1.760964621155037e-05, "loss": 0.3035, "step": 2893 }, { "epoch": 0.25, "learning_rate": 1.760784471249716e-05, "loss": 0.3041, "step": 2894 }, { "epoch": 0.25, "learning_rate": 1.760604262706887e-05, "loss": 0.3328, "step": 2895 }, { "epoch": 0.25, "learning_rate": 1.76042399554044e-05, "loss": 0.3155, "step": 2896 }, { "epoch": 0.25, "learning_rate": 1.7602436697642686e-05, "loss": 0.3151, "step": 2897 }, { "epoch": 0.25, "learning_rate": 1.7600632853922713e-05, "loss": 0.265, "step": 2898 }, { "epoch": 0.25, "learning_rate": 1.7598828424383522e-05, "loss": 0.3282, "step": 2899 }, { "epoch": 0.25, "learning_rate": 1.759702340916418e-05, "loss": 0.288, "step": 2900 }, { "epoch": 0.25, "learning_rate": 1.7595217808403814e-05, "loss": 0.2635, "step": 2901 }, { "epoch": 0.25, "learning_rate": 1.7593411622241584e-05, "loss": 0.3057, "step": 2902 }, { "epoch": 0.25, "learning_rate": 1.7591604850816705e-05, "loss": 0.3115, "step": 2903 }, { "epoch": 0.25, "learning_rate": 1.758979749426844e-05, "loss": 0.2652, "step": 2904 }, { "epoch": 0.25, "learning_rate": 1.7587989552736085e-05, "loss": 0.3064, "step": 2905 }, { "epoch": 0.25, "learning_rate": 1.7586181026358987e-05, "loss": 0.3276, "step": 2906 }, { "epoch": 0.25, "learning_rate": 1.758437191527654e-05, "loss": 0.2938, "step": 2907 }, { "epoch": 0.25, "learning_rate": 1.7582562219628185e-05, "loss": 0.2686, "step": 2908 }, { "epoch": 0.25, "learning_rate": 1.7580751939553396e-05, "loss": 0.2975, "step": 2909 }, { "epoch": 0.25, "learning_rate": 1.757894107519171e-05, "loss": 0.3219, "step": 2910 }, { "epoch": 0.25, "learning_rate": 1.757712962668269e-05, "loss": 0.3459, "step": 2911 }, { "epoch": 0.25, "learning_rate": 1.7575317594165963e-05, "loss": 0.3243, "step": 2912 }, { "epoch": 0.25, "learning_rate": 1.757350497778118e-05, "loss": 0.3334, "step": 2913 }, { "epoch": 0.25, "learning_rate": 1.757169177766806e-05, "loss": 0.2744, "step": 2914 }, { "epoch": 0.25, "learning_rate": 1.756987799396635e-05, "loss": 0.2953, "step": 2915 }, { "epoch": 0.25, "learning_rate": 1.7568063626815844e-05, "loss": 0.3466, "step": 2916 }, { "epoch": 0.25, "learning_rate": 1.7566248676356394e-05, "loss": 0.3323, "step": 2917 }, { "epoch": 0.25, "learning_rate": 1.7564433142727882e-05, "loss": 0.3196, "step": 2918 }, { "epoch": 0.25, "learning_rate": 1.7562617026070238e-05, "loss": 0.2961, "step": 2919 }, { "epoch": 0.25, "learning_rate": 1.7560800326523442e-05, "loss": 0.3482, "step": 2920 }, { "epoch": 0.25, "learning_rate": 1.7558983044227513e-05, "loss": 0.2996, "step": 2921 }, { "epoch": 0.25, "learning_rate": 1.7557165179322522e-05, "loss": 0.3297, "step": 2922 }, { "epoch": 0.25, "learning_rate": 1.7555346731948587e-05, "loss": 0.2964, "step": 2923 }, { "epoch": 0.25, "learning_rate": 1.755352770224585e-05, "loss": 0.2969, "step": 2924 }, { "epoch": 0.25, "learning_rate": 1.755170809035452e-05, "loss": 0.2765, "step": 2925 }, { "epoch": 0.25, "learning_rate": 1.7549887896414853e-05, "loss": 0.3168, "step": 2926 }, { "epoch": 0.25, "learning_rate": 1.7548067120567126e-05, "loss": 0.3132, "step": 2927 }, { "epoch": 0.25, "learning_rate": 1.754624576295168e-05, "loss": 0.2957, "step": 2928 }, { "epoch": 0.25, "learning_rate": 1.7544423823708903e-05, "loss": 0.2717, "step": 2929 }, { "epoch": 0.25, "learning_rate": 1.7542601302979213e-05, "loss": 0.3103, "step": 2930 }, { "epoch": 0.25, "learning_rate": 1.7540778200903082e-05, "loss": 0.3256, "step": 2931 }, { "epoch": 0.25, "learning_rate": 1.753895451762103e-05, "loss": 0.2991, "step": 2932 }, { "epoch": 0.25, "learning_rate": 1.7537130253273613e-05, "loss": 0.2603, "step": 2933 }, { "epoch": 0.25, "learning_rate": 1.753530540800144e-05, "loss": 0.3221, "step": 2934 }, { "epoch": 0.25, "learning_rate": 1.7533479981945157e-05, "loss": 0.3535, "step": 2935 }, { "epoch": 0.25, "learning_rate": 1.7531653975245463e-05, "loss": 0.3038, "step": 2936 }, { "epoch": 0.25, "learning_rate": 1.7529827388043093e-05, "loss": 0.2844, "step": 2937 }, { "epoch": 0.25, "learning_rate": 1.7528000220478836e-05, "loss": 0.5901, "step": 2938 }, { "epoch": 0.25, "learning_rate": 1.7526172472693518e-05, "loss": 0.3172, "step": 2939 }, { "epoch": 0.25, "learning_rate": 1.7524344144828015e-05, "loss": 0.327, "step": 2940 }, { "epoch": 0.25, "learning_rate": 1.7522515237023242e-05, "loss": 0.2545, "step": 2941 }, { "epoch": 0.25, "learning_rate": 1.7520685749420164e-05, "loss": 0.274, "step": 2942 }, { "epoch": 0.25, "learning_rate": 1.7518855682159793e-05, "loss": 0.2668, "step": 2943 }, { "epoch": 0.25, "learning_rate": 1.7517025035383175e-05, "loss": 0.2944, "step": 2944 }, { "epoch": 0.25, "learning_rate": 1.751519380923141e-05, "loss": 0.2792, "step": 2945 }, { "epoch": 0.25, "learning_rate": 1.751336200384564e-05, "loss": 0.2963, "step": 2946 }, { "epoch": 0.25, "learning_rate": 1.7511529619367055e-05, "loss": 0.2906, "step": 2947 }, { "epoch": 0.25, "learning_rate": 1.7509696655936878e-05, "loss": 0.2549, "step": 2948 }, { "epoch": 0.25, "learning_rate": 1.750786311369639e-05, "loss": 0.3553, "step": 2949 }, { "epoch": 0.25, "learning_rate": 1.7506028992786912e-05, "loss": 0.2766, "step": 2950 }, { "epoch": 0.25, "learning_rate": 1.7504194293349805e-05, "loss": 0.3689, "step": 2951 }, { "epoch": 0.25, "learning_rate": 1.7502359015526488e-05, "loss": 0.3091, "step": 2952 }, { "epoch": 0.25, "learning_rate": 1.75005231594584e-05, "loss": 0.2937, "step": 2953 }, { "epoch": 0.25, "learning_rate": 1.749868672528705e-05, "loss": 0.3443, "step": 2954 }, { "epoch": 0.25, "learning_rate": 1.749684971315398e-05, "loss": 0.3121, "step": 2955 }, { "epoch": 0.25, "learning_rate": 1.749501212320078e-05, "loss": 0.2676, "step": 2956 }, { "epoch": 0.25, "learning_rate": 1.749317395556908e-05, "loss": 0.2698, "step": 2957 }, { "epoch": 0.25, "learning_rate": 1.7491335210400554e-05, "loss": 0.3367, "step": 2958 }, { "epoch": 0.25, "learning_rate": 1.7489495887836922e-05, "loss": 0.2939, "step": 2959 }, { "epoch": 0.25, "learning_rate": 1.7487655988019957e-05, "loss": 0.2722, "step": 2960 }, { "epoch": 0.25, "learning_rate": 1.7485815511091466e-05, "loss": 0.3036, "step": 2961 }, { "epoch": 0.25, "learning_rate": 1.7483974457193307e-05, "loss": 0.3098, "step": 2962 }, { "epoch": 0.25, "learning_rate": 1.748213282646737e-05, "loss": 0.2928, "step": 2963 }, { "epoch": 0.25, "learning_rate": 1.748029061905561e-05, "loss": 0.295, "step": 2964 }, { "epoch": 0.25, "learning_rate": 1.747844783510001e-05, "loss": 0.3275, "step": 2965 }, { "epoch": 0.25, "learning_rate": 1.74766044747426e-05, "loss": 0.2881, "step": 2966 }, { "epoch": 0.25, "learning_rate": 1.747476053812546e-05, "loss": 0.301, "step": 2967 }, { "epoch": 0.25, "learning_rate": 1.7472916025390714e-05, "loss": 0.2794, "step": 2968 }, { "epoch": 0.25, "learning_rate": 1.7471070936680527e-05, "loss": 0.2684, "step": 2969 }, { "epoch": 0.25, "learning_rate": 1.7469225272137104e-05, "loss": 0.3401, "step": 2970 }, { "epoch": 0.25, "learning_rate": 1.7467379031902707e-05, "loss": 0.3375, "step": 2971 }, { "epoch": 0.25, "learning_rate": 1.7465532216119628e-05, "loss": 0.3051, "step": 2972 }, { "epoch": 0.25, "learning_rate": 1.7463684824930215e-05, "loss": 0.3447, "step": 2973 }, { "epoch": 0.25, "learning_rate": 1.7461836858476858e-05, "loss": 0.2483, "step": 2974 }, { "epoch": 0.26, "learning_rate": 1.7459988316901984e-05, "loss": 0.2654, "step": 2975 }, { "epoch": 0.26, "learning_rate": 1.745813920034807e-05, "loss": 0.6151, "step": 2976 }, { "epoch": 0.26, "learning_rate": 1.745628950895764e-05, "loss": 0.3342, "step": 2977 }, { "epoch": 0.26, "learning_rate": 1.7454439242873257e-05, "loss": 0.2723, "step": 2978 }, { "epoch": 0.26, "learning_rate": 1.7452588402237525e-05, "loss": 0.613, "step": 2979 }, { "epoch": 0.26, "learning_rate": 1.7450736987193113e-05, "loss": 0.3793, "step": 2980 }, { "epoch": 0.26, "learning_rate": 1.7448884997882706e-05, "loss": 0.3612, "step": 2981 }, { "epoch": 0.26, "learning_rate": 1.7447032434449045e-05, "loss": 0.2988, "step": 2982 }, { "epoch": 0.26, "learning_rate": 1.7445179297034925e-05, "loss": 0.3465, "step": 2983 }, { "epoch": 0.26, "learning_rate": 1.744332558578317e-05, "loss": 0.3279, "step": 2984 }, { "epoch": 0.26, "learning_rate": 1.744147130083666e-05, "loss": 0.3226, "step": 2985 }, { "epoch": 0.26, "learning_rate": 1.743961644233831e-05, "loss": 0.3051, "step": 2986 }, { "epoch": 0.26, "learning_rate": 1.7437761010431083e-05, "loss": 0.2554, "step": 2987 }, { "epoch": 0.26, "learning_rate": 1.743590500525799e-05, "loss": 0.2751, "step": 2988 }, { "epoch": 0.26, "learning_rate": 1.7434048426962086e-05, "loss": 0.2797, "step": 2989 }, { "epoch": 0.26, "learning_rate": 1.7432191275686454e-05, "loss": 0.3446, "step": 2990 }, { "epoch": 0.26, "learning_rate": 1.7430333551574247e-05, "loss": 0.6001, "step": 2991 }, { "epoch": 0.26, "learning_rate": 1.742847525476864e-05, "loss": 0.2774, "step": 2992 }, { "epoch": 0.26, "learning_rate": 1.742661638541287e-05, "loss": 0.3793, "step": 2993 }, { "epoch": 0.26, "learning_rate": 1.7424756943650203e-05, "loss": 0.3013, "step": 2994 }, { "epoch": 0.26, "learning_rate": 1.7422896929623957e-05, "loss": 0.2703, "step": 2995 }, { "epoch": 0.26, "learning_rate": 1.7421036343477498e-05, "loss": 0.3277, "step": 2996 }, { "epoch": 0.26, "learning_rate": 1.741917518535422e-05, "loss": 0.4084, "step": 2997 }, { "epoch": 0.26, "learning_rate": 1.741731345539758e-05, "loss": 0.3613, "step": 2998 }, { "epoch": 0.26, "learning_rate": 1.7415451153751068e-05, "loss": 0.3193, "step": 2999 }, { "epoch": 0.26, "learning_rate": 1.7413588280558223e-05, "loss": 0.3335, "step": 3000 }, { "epoch": 0.26, "learning_rate": 1.7411724835962623e-05, "loss": 0.2513, "step": 3001 }, { "epoch": 0.26, "learning_rate": 1.74098608201079e-05, "loss": 0.292, "step": 3002 }, { "epoch": 0.26, "learning_rate": 1.7407996233137713e-05, "loss": 0.2734, "step": 3003 }, { "epoch": 0.26, "learning_rate": 1.7406131075195784e-05, "loss": 0.2944, "step": 3004 }, { "epoch": 0.26, "learning_rate": 1.7404265346425867e-05, "loss": 0.3594, "step": 3005 }, { "epoch": 0.26, "learning_rate": 1.740239904697176e-05, "loss": 0.269, "step": 3006 }, { "epoch": 0.26, "learning_rate": 1.740053217697731e-05, "loss": 0.2882, "step": 3007 }, { "epoch": 0.26, "learning_rate": 1.739866473658641e-05, "loss": 0.2902, "step": 3008 }, { "epoch": 0.26, "learning_rate": 1.7396796725942986e-05, "loss": 0.3079, "step": 3009 }, { "epoch": 0.26, "learning_rate": 1.739492814519102e-05, "loss": 0.2763, "step": 3010 }, { "epoch": 0.26, "learning_rate": 1.7393058994474535e-05, "loss": 0.2678, "step": 3011 }, { "epoch": 0.26, "learning_rate": 1.739118927393759e-05, "loss": 0.3147, "step": 3012 }, { "epoch": 0.26, "learning_rate": 1.73893189837243e-05, "loss": 0.3236, "step": 3013 }, { "epoch": 0.26, "learning_rate": 1.7387448123978813e-05, "loss": 0.2949, "step": 3014 }, { "epoch": 0.26, "learning_rate": 1.7385576694845324e-05, "loss": 0.3339, "step": 3015 }, { "epoch": 0.26, "learning_rate": 1.738370469646808e-05, "loss": 0.307, "step": 3016 }, { "epoch": 0.26, "learning_rate": 1.738183212899136e-05, "loss": 0.2856, "step": 3017 }, { "epoch": 0.26, "learning_rate": 1.7379958992559494e-05, "loss": 0.2893, "step": 3018 }, { "epoch": 0.26, "learning_rate": 1.7378085287316853e-05, "loss": 0.3495, "step": 3019 }, { "epoch": 0.26, "learning_rate": 1.737621101340786e-05, "loss": 0.3138, "step": 3020 }, { "epoch": 0.26, "learning_rate": 1.7374336170976964e-05, "loss": 0.2986, "step": 3021 }, { "epoch": 0.26, "learning_rate": 1.7372460760168676e-05, "loss": 0.3237, "step": 3022 }, { "epoch": 0.26, "learning_rate": 1.737058478112754e-05, "loss": 0.2622, "step": 3023 }, { "epoch": 0.26, "learning_rate": 1.7368708233998148e-05, "loss": 0.2722, "step": 3024 }, { "epoch": 0.26, "learning_rate": 1.7366831118925133e-05, "loss": 0.3397, "step": 3025 }, { "epoch": 0.26, "learning_rate": 1.736495343605318e-05, "loss": 0.3392, "step": 3026 }, { "epoch": 0.26, "learning_rate": 1.7363075185527007e-05, "loss": 0.2745, "step": 3027 }, { "epoch": 0.26, "learning_rate": 1.7361196367491378e-05, "loss": 0.2632, "step": 3028 }, { "epoch": 0.26, "learning_rate": 1.735931698209111e-05, "loss": 0.302, "step": 3029 }, { "epoch": 0.26, "learning_rate": 1.735743702947105e-05, "loss": 0.3304, "step": 3030 }, { "epoch": 0.26, "learning_rate": 1.7355556509776093e-05, "loss": 0.3356, "step": 3031 }, { "epoch": 0.26, "learning_rate": 1.7353675423151194e-05, "loss": 0.2728, "step": 3032 }, { "epoch": 0.26, "learning_rate": 1.7351793769741326e-05, "loss": 0.2748, "step": 3033 }, { "epoch": 0.26, "learning_rate": 1.734991154969152e-05, "loss": 0.33, "step": 3034 }, { "epoch": 0.26, "learning_rate": 1.7348028763146843e-05, "loss": 0.3058, "step": 3035 }, { "epoch": 0.26, "learning_rate": 1.7346145410252422e-05, "loss": 0.3177, "step": 3036 }, { "epoch": 0.26, "learning_rate": 1.7344261491153412e-05, "loss": 0.2975, "step": 3037 }, { "epoch": 0.26, "learning_rate": 1.7342377005995014e-05, "loss": 0.3472, "step": 3038 }, { "epoch": 0.26, "learning_rate": 1.7340491954922474e-05, "loss": 0.2886, "step": 3039 }, { "epoch": 0.26, "learning_rate": 1.733860633808109e-05, "loss": 0.2708, "step": 3040 }, { "epoch": 0.26, "learning_rate": 1.7336720155616186e-05, "loss": 0.2965, "step": 3041 }, { "epoch": 0.26, "learning_rate": 1.7334833407673145e-05, "loss": 0.6443, "step": 3042 }, { "epoch": 0.26, "learning_rate": 1.733294609439739e-05, "loss": 0.2844, "step": 3043 }, { "epoch": 0.26, "learning_rate": 1.733105821593438e-05, "loss": 0.283, "step": 3044 }, { "epoch": 0.26, "learning_rate": 1.7329169772429628e-05, "loss": 0.3101, "step": 3045 }, { "epoch": 0.26, "learning_rate": 1.7327280764028683e-05, "loss": 0.3412, "step": 3046 }, { "epoch": 0.26, "learning_rate": 1.7325391190877144e-05, "loss": 0.6011, "step": 3047 }, { "epoch": 0.26, "learning_rate": 1.732350105312065e-05, "loss": 0.3168, "step": 3048 }, { "epoch": 0.26, "learning_rate": 1.7321610350904877e-05, "loss": 0.2737, "step": 3049 }, { "epoch": 0.26, "learning_rate": 1.7319719084375556e-05, "loss": 0.3342, "step": 3050 }, { "epoch": 0.26, "learning_rate": 1.7317827253678456e-05, "loss": 0.2621, "step": 3051 }, { "epoch": 0.26, "learning_rate": 1.731593485895939e-05, "loss": 0.2642, "step": 3052 }, { "epoch": 0.26, "learning_rate": 1.7314041900364215e-05, "loss": 0.3472, "step": 3053 }, { "epoch": 0.26, "learning_rate": 1.731214837803883e-05, "loss": 0.2865, "step": 3054 }, { "epoch": 0.26, "learning_rate": 1.7310254292129175e-05, "loss": 0.5972, "step": 3055 }, { "epoch": 0.26, "learning_rate": 1.730835964278124e-05, "loss": 0.3008, "step": 3056 }, { "epoch": 0.26, "learning_rate": 1.730646443014106e-05, "loss": 0.2531, "step": 3057 }, { "epoch": 0.26, "learning_rate": 1.7304568654354703e-05, "loss": 0.3138, "step": 3058 }, { "epoch": 0.26, "learning_rate": 1.7302672315568284e-05, "loss": 0.2844, "step": 3059 }, { "epoch": 0.26, "learning_rate": 1.730077541392797e-05, "loss": 0.3058, "step": 3060 }, { "epoch": 0.26, "learning_rate": 1.7298877949579962e-05, "loss": 0.2876, "step": 3061 }, { "epoch": 0.26, "learning_rate": 1.7296979922670502e-05, "loss": 0.324, "step": 3062 }, { "epoch": 0.26, "learning_rate": 1.7295081333345887e-05, "loss": 0.3231, "step": 3063 }, { "epoch": 0.26, "learning_rate": 1.729318218175245e-05, "loss": 0.2648, "step": 3064 }, { "epoch": 0.26, "learning_rate": 1.729128246803657e-05, "loss": 0.317, "step": 3065 }, { "epoch": 0.26, "learning_rate": 1.728938219234466e-05, "loss": 0.2813, "step": 3066 }, { "epoch": 0.26, "learning_rate": 1.7287481354823187e-05, "loss": 0.276, "step": 3067 }, { "epoch": 0.26, "learning_rate": 1.7285579955618663e-05, "loss": 0.3404, "step": 3068 }, { "epoch": 0.26, "learning_rate": 1.7283677994877634e-05, "loss": 0.3178, "step": 3069 }, { "epoch": 0.26, "learning_rate": 1.7281775472746695e-05, "loss": 0.3302, "step": 3070 }, { "epoch": 0.26, "learning_rate": 1.7279872389372484e-05, "loss": 0.2679, "step": 3071 }, { "epoch": 0.26, "learning_rate": 1.727796874490168e-05, "loss": 0.3051, "step": 3072 }, { "epoch": 0.26, "learning_rate": 1.7276064539481007e-05, "loss": 0.3224, "step": 3073 }, { "epoch": 0.26, "learning_rate": 1.7274159773257227e-05, "loss": 0.329, "step": 3074 }, { "epoch": 0.26, "learning_rate": 1.727225444637716e-05, "loss": 0.3153, "step": 3075 }, { "epoch": 0.26, "learning_rate": 1.727034855898765e-05, "loss": 0.335, "step": 3076 }, { "epoch": 0.26, "learning_rate": 1.72684421112356e-05, "loss": 0.3243, "step": 3077 }, { "epoch": 0.26, "learning_rate": 1.7266535103267943e-05, "loss": 0.295, "step": 3078 }, { "epoch": 0.26, "learning_rate": 1.7264627535231667e-05, "loss": 0.3094, "step": 3079 }, { "epoch": 0.26, "learning_rate": 1.7262719407273795e-05, "loss": 0.2799, "step": 3080 }, { "epoch": 0.26, "learning_rate": 1.72608107195414e-05, "loss": 0.3039, "step": 3081 }, { "epoch": 0.26, "learning_rate": 1.7258901472181587e-05, "loss": 0.2769, "step": 3082 }, { "epoch": 0.26, "learning_rate": 1.725699166534152e-05, "loss": 0.3543, "step": 3083 }, { "epoch": 0.26, "learning_rate": 1.7255081299168393e-05, "loss": 0.6128, "step": 3084 }, { "epoch": 0.26, "learning_rate": 1.7253170373809447e-05, "loss": 0.3118, "step": 3085 }, { "epoch": 0.26, "learning_rate": 1.7251258889411964e-05, "loss": 0.3437, "step": 3086 }, { "epoch": 0.26, "learning_rate": 1.724934684612328e-05, "loss": 0.2847, "step": 3087 }, { "epoch": 0.26, "learning_rate": 1.724743424409076e-05, "loss": 0.2795, "step": 3088 }, { "epoch": 0.26, "learning_rate": 1.724552108346182e-05, "loss": 0.297, "step": 3089 }, { "epoch": 0.26, "learning_rate": 1.7243607364383916e-05, "loss": 0.3001, "step": 3090 }, { "epoch": 0.26, "learning_rate": 1.7241693087004546e-05, "loss": 0.303, "step": 3091 }, { "epoch": 0.27, "learning_rate": 1.7239778251471255e-05, "loss": 0.2795, "step": 3092 }, { "epoch": 0.27, "learning_rate": 1.723786285793163e-05, "loss": 0.3063, "step": 3093 }, { "epoch": 0.27, "learning_rate": 1.72359469065333e-05, "loss": 0.3005, "step": 3094 }, { "epoch": 0.27, "learning_rate": 1.7234030397423935e-05, "loss": 0.3278, "step": 3095 }, { "epoch": 0.27, "learning_rate": 1.723211333075125e-05, "loss": 0.2925, "step": 3096 }, { "epoch": 0.27, "learning_rate": 1.723019570666301e-05, "loss": 0.3046, "step": 3097 }, { "epoch": 0.27, "learning_rate": 1.7228277525307007e-05, "loss": 0.2824, "step": 3098 }, { "epoch": 0.27, "learning_rate": 1.7226358786831087e-05, "loss": 0.2944, "step": 3099 }, { "epoch": 0.27, "learning_rate": 1.722443949138314e-05, "loss": 0.3724, "step": 3100 }, { "epoch": 0.27, "learning_rate": 1.7222519639111094e-05, "loss": 0.3059, "step": 3101 }, { "epoch": 0.27, "learning_rate": 1.7220599230162917e-05, "loss": 0.29, "step": 3102 }, { "epoch": 0.27, "learning_rate": 1.7218678264686634e-05, "loss": 0.3082, "step": 3103 }, { "epoch": 0.27, "learning_rate": 1.72167567428303e-05, "loss": 0.2892, "step": 3104 }, { "epoch": 0.27, "learning_rate": 1.7214834664742014e-05, "loss": 0.2596, "step": 3105 }, { "epoch": 0.27, "learning_rate": 1.7212912030569923e-05, "loss": 0.2877, "step": 3106 }, { "epoch": 0.27, "learning_rate": 1.7210988840462207e-05, "loss": 0.2654, "step": 3107 }, { "epoch": 0.27, "learning_rate": 1.7209065094567107e-05, "loss": 0.2755, "step": 3108 }, { "epoch": 0.27, "learning_rate": 1.7207140793032892e-05, "loss": 0.2919, "step": 3109 }, { "epoch": 0.27, "learning_rate": 1.720521593600787e-05, "loss": 0.316, "step": 3110 }, { "epoch": 0.27, "learning_rate": 1.720329052364041e-05, "loss": 0.308, "step": 3111 }, { "epoch": 0.27, "learning_rate": 1.7201364556078908e-05, "loss": 0.2589, "step": 3112 }, { "epoch": 0.27, "learning_rate": 1.7199438033471812e-05, "loss": 0.3171, "step": 3113 }, { "epoch": 0.27, "learning_rate": 1.71975109559676e-05, "loss": 0.2607, "step": 3114 }, { "epoch": 0.27, "learning_rate": 1.7195583323714812e-05, "loss": 0.342, "step": 3115 }, { "epoch": 0.27, "learning_rate": 1.7193655136862016e-05, "loss": 0.3469, "step": 3116 }, { "epoch": 0.27, "learning_rate": 1.719172639555782e-05, "loss": 0.2828, "step": 3117 }, { "epoch": 0.27, "learning_rate": 1.7189797099950895e-05, "loss": 0.3389, "step": 3118 }, { "epoch": 0.27, "learning_rate": 1.7187867250189936e-05, "loss": 0.322, "step": 3119 }, { "epoch": 0.27, "learning_rate": 1.7185936846423686e-05, "loss": 0.6334, "step": 3120 }, { "epoch": 0.27, "learning_rate": 1.718400588880093e-05, "loss": 0.2861, "step": 3121 }, { "epoch": 0.27, "learning_rate": 1.7182074377470494e-05, "loss": 0.2823, "step": 3122 }, { "epoch": 0.27, "learning_rate": 1.7180142312581253e-05, "loss": 0.2714, "step": 3123 }, { "epoch": 0.27, "learning_rate": 1.717820969428212e-05, "loss": 0.3376, "step": 3124 }, { "epoch": 0.27, "learning_rate": 1.7176276522722054e-05, "loss": 0.2822, "step": 3125 }, { "epoch": 0.27, "learning_rate": 1.7174342798050056e-05, "loss": 0.319, "step": 3126 }, { "epoch": 0.27, "learning_rate": 1.717240852041516e-05, "loss": 0.3087, "step": 3127 }, { "epoch": 0.27, "learning_rate": 1.717047368996646e-05, "loss": 0.298, "step": 3128 }, { "epoch": 0.27, "learning_rate": 1.7168538306853075e-05, "loss": 0.2981, "step": 3129 }, { "epoch": 0.27, "learning_rate": 1.7166602371224178e-05, "loss": 0.2862, "step": 3130 }, { "epoch": 0.27, "learning_rate": 1.7164665883228982e-05, "loss": 0.2809, "step": 3131 }, { "epoch": 0.27, "learning_rate": 1.716272884301674e-05, "loss": 0.3267, "step": 3132 }, { "epoch": 0.27, "learning_rate": 1.7160791250736754e-05, "loss": 0.3401, "step": 3133 }, { "epoch": 0.27, "learning_rate": 1.7158853106538358e-05, "loss": 0.27, "step": 3134 }, { "epoch": 0.27, "learning_rate": 1.7156914410570937e-05, "loss": 0.29, "step": 3135 }, { "epoch": 0.27, "learning_rate": 1.7154975162983917e-05, "loss": 0.2925, "step": 3136 }, { "epoch": 0.27, "learning_rate": 1.7153035363926766e-05, "loss": 0.2865, "step": 3137 }, { "epoch": 0.27, "learning_rate": 1.7151095013548996e-05, "loss": 0.3004, "step": 3138 }, { "epoch": 0.27, "learning_rate": 1.7149154112000154e-05, "loss": 0.3009, "step": 3139 }, { "epoch": 0.27, "learning_rate": 1.7147212659429837e-05, "loss": 0.2823, "step": 3140 }, { "epoch": 0.27, "learning_rate": 1.7145270655987686e-05, "loss": 0.2711, "step": 3141 }, { "epoch": 0.27, "learning_rate": 1.714332810182338e-05, "loss": 0.3141, "step": 3142 }, { "epoch": 0.27, "learning_rate": 1.7141384997086638e-05, "loss": 0.299, "step": 3143 }, { "epoch": 0.27, "learning_rate": 1.7139441341927224e-05, "loss": 0.2684, "step": 3144 }, { "epoch": 0.27, "learning_rate": 1.7137497136494953e-05, "loss": 0.2828, "step": 3145 }, { "epoch": 0.27, "learning_rate": 1.713555238093967e-05, "loss": 0.3049, "step": 3146 }, { "epoch": 0.27, "learning_rate": 1.7133607075411266e-05, "loss": 0.3228, "step": 3147 }, { "epoch": 0.27, "learning_rate": 1.7131661220059675e-05, "loss": 0.3062, "step": 3148 }, { "epoch": 0.27, "learning_rate": 1.7129714815034876e-05, "loss": 0.2823, "step": 3149 }, { "epoch": 0.27, "learning_rate": 1.7127767860486892e-05, "loss": 0.3042, "step": 3150 }, { "epoch": 0.27, "learning_rate": 1.7125820356565776e-05, "loss": 0.2822, "step": 3151 }, { "epoch": 0.27, "learning_rate": 1.712387230342164e-05, "loss": 0.308, "step": 3152 }, { "epoch": 0.27, "learning_rate": 1.7121923701204623e-05, "loss": 0.3065, "step": 3153 }, { "epoch": 0.27, "learning_rate": 1.711997455006492e-05, "loss": 0.3438, "step": 3154 }, { "epoch": 0.27, "learning_rate": 1.7118024850152763e-05, "loss": 0.3024, "step": 3155 }, { "epoch": 0.27, "learning_rate": 1.7116074601618418e-05, "loss": 0.2725, "step": 3156 }, { "epoch": 0.27, "learning_rate": 1.7114123804612205e-05, "loss": 0.3035, "step": 3157 }, { "epoch": 0.27, "learning_rate": 1.7112172459284478e-05, "loss": 0.2926, "step": 3158 }, { "epoch": 0.27, "learning_rate": 1.7110220565785644e-05, "loss": 0.2766, "step": 3159 }, { "epoch": 0.27, "learning_rate": 1.710826812426614e-05, "loss": 0.2586, "step": 3160 }, { "epoch": 0.27, "learning_rate": 1.710631513487645e-05, "loss": 0.2908, "step": 3161 }, { "epoch": 0.27, "learning_rate": 1.7104361597767107e-05, "loss": 0.3331, "step": 3162 }, { "epoch": 0.27, "learning_rate": 1.7102407513088676e-05, "loss": 0.2949, "step": 3163 }, { "epoch": 0.27, "learning_rate": 1.7100452880991764e-05, "loss": 0.3762, "step": 3164 }, { "epoch": 0.27, "learning_rate": 1.7098497701627027e-05, "loss": 0.3291, "step": 3165 }, { "epoch": 0.27, "learning_rate": 1.709654197514517e-05, "loss": 0.288, "step": 3166 }, { "epoch": 0.27, "learning_rate": 1.7094585701696916e-05, "loss": 0.2801, "step": 3167 }, { "epoch": 0.27, "learning_rate": 1.7092628881433052e-05, "loss": 0.2711, "step": 3168 }, { "epoch": 0.27, "learning_rate": 1.70906715145044e-05, "loss": 0.2989, "step": 3169 }, { "epoch": 0.27, "learning_rate": 1.7088713601061823e-05, "loss": 0.264, "step": 3170 }, { "epoch": 0.27, "learning_rate": 1.708675514125623e-05, "loss": 0.2844, "step": 3171 }, { "epoch": 0.27, "learning_rate": 1.7084796135238566e-05, "loss": 0.3286, "step": 3172 }, { "epoch": 0.27, "learning_rate": 1.7082836583159826e-05, "loss": 0.304, "step": 3173 }, { "epoch": 0.27, "learning_rate": 1.7080876485171035e-05, "loss": 0.285, "step": 3174 }, { "epoch": 0.27, "learning_rate": 1.7078915841423273e-05, "loss": 0.2709, "step": 3175 }, { "epoch": 0.27, "learning_rate": 1.7076954652067657e-05, "loss": 0.2842, "step": 3176 }, { "epoch": 0.27, "learning_rate": 1.7074992917255343e-05, "loss": 0.3191, "step": 3177 }, { "epoch": 0.27, "learning_rate": 1.7073030637137535e-05, "loss": 0.3039, "step": 3178 }, { "epoch": 0.27, "learning_rate": 1.7071067811865477e-05, "loss": 0.2858, "step": 3179 }, { "epoch": 0.27, "learning_rate": 1.706910444159045e-05, "loss": 0.289, "step": 3180 }, { "epoch": 0.27, "learning_rate": 1.7067140526463778e-05, "loss": 0.3314, "step": 3181 }, { "epoch": 0.27, "learning_rate": 1.7065176066636836e-05, "loss": 0.2966, "step": 3182 }, { "epoch": 0.27, "learning_rate": 1.7063211062261034e-05, "loss": 0.3057, "step": 3183 }, { "epoch": 0.27, "learning_rate": 1.7061245513487824e-05, "loss": 0.28, "step": 3184 }, { "epoch": 0.27, "learning_rate": 1.70592794204687e-05, "loss": 0.3273, "step": 3185 }, { "epoch": 0.27, "learning_rate": 1.70573127833552e-05, "loss": 0.3187, "step": 3186 }, { "epoch": 0.27, "learning_rate": 1.70553456022989e-05, "loss": 0.3081, "step": 3187 }, { "epoch": 0.27, "learning_rate": 1.7053377877451424e-05, "loss": 0.365, "step": 3188 }, { "epoch": 0.27, "learning_rate": 1.7051409608964433e-05, "loss": 0.2942, "step": 3189 }, { "epoch": 0.27, "learning_rate": 1.704944079698963e-05, "loss": 0.299, "step": 3190 }, { "epoch": 0.27, "learning_rate": 1.7047471441678764e-05, "loss": 0.2977, "step": 3191 }, { "epoch": 0.27, "learning_rate": 1.704550154318362e-05, "loss": 0.2982, "step": 3192 }, { "epoch": 0.27, "learning_rate": 1.704353110165603e-05, "loss": 0.3495, "step": 3193 }, { "epoch": 0.27, "learning_rate": 1.704156011724787e-05, "loss": 0.2707, "step": 3194 }, { "epoch": 0.27, "learning_rate": 1.7039588590111045e-05, "loss": 0.2767, "step": 3195 }, { "epoch": 0.27, "learning_rate": 1.7037616520397515e-05, "loss": 0.3471, "step": 3196 }, { "epoch": 0.27, "learning_rate": 1.7035643908259278e-05, "loss": 0.2956, "step": 3197 }, { "epoch": 0.27, "learning_rate": 1.7033670753848373e-05, "loss": 0.3131, "step": 3198 }, { "epoch": 0.27, "learning_rate": 1.7031697057316883e-05, "loss": 0.3559, "step": 3199 }, { "epoch": 0.27, "learning_rate": 1.702972281881693e-05, "loss": 0.2711, "step": 3200 }, { "epoch": 0.27, "learning_rate": 1.702774803850067e-05, "loss": 0.2862, "step": 3201 }, { "epoch": 0.27, "learning_rate": 1.7025772716520324e-05, "loss": 0.2958, "step": 3202 }, { "epoch": 0.27, "learning_rate": 1.7023796853028125e-05, "loss": 0.3347, "step": 3203 }, { "epoch": 0.27, "learning_rate": 1.7021820448176372e-05, "loss": 0.2975, "step": 3204 }, { "epoch": 0.27, "learning_rate": 1.7019843502117398e-05, "loss": 0.3403, "step": 3205 }, { "epoch": 0.27, "learning_rate": 1.701786601500357e-05, "loss": 0.2828, "step": 3206 }, { "epoch": 0.27, "learning_rate": 1.701588798698731e-05, "loss": 0.2838, "step": 3207 }, { "epoch": 0.27, "learning_rate": 1.7013909418221065e-05, "loss": 0.3041, "step": 3208 }, { "epoch": 0.28, "learning_rate": 1.701193030885734e-05, "loss": 0.2836, "step": 3209 }, { "epoch": 0.28, "learning_rate": 1.7009950659048677e-05, "loss": 0.3325, "step": 3210 }, { "epoch": 0.28, "learning_rate": 1.7007970468947653e-05, "loss": 0.325, "step": 3211 }, { "epoch": 0.28, "learning_rate": 1.7005989738706892e-05, "loss": 0.3261, "step": 3212 }, { "epoch": 0.28, "learning_rate": 1.700400846847906e-05, "loss": 0.2954, "step": 3213 }, { "epoch": 0.28, "learning_rate": 1.7002026658416862e-05, "loss": 0.3056, "step": 3214 }, { "epoch": 0.28, "learning_rate": 1.700004430867305e-05, "loss": 0.3188, "step": 3215 }, { "epoch": 0.28, "learning_rate": 1.6998061419400408e-05, "loss": 0.2596, "step": 3216 }, { "epoch": 0.28, "learning_rate": 1.699607799075177e-05, "loss": 0.3025, "step": 3217 }, { "epoch": 0.28, "learning_rate": 1.699409402288001e-05, "loss": 0.3573, "step": 3218 }, { "epoch": 0.28, "learning_rate": 1.6992109515938042e-05, "loss": 0.2955, "step": 3219 }, { "epoch": 0.28, "learning_rate": 1.699012447007882e-05, "loss": 0.3124, "step": 3220 }, { "epoch": 0.28, "learning_rate": 1.698813888545535e-05, "loss": 0.2769, "step": 3221 }, { "epoch": 0.28, "learning_rate": 1.6986152762220655e-05, "loss": 0.3116, "step": 3222 }, { "epoch": 0.28, "learning_rate": 1.698416610052783e-05, "loss": 0.3093, "step": 3223 }, { "epoch": 0.28, "learning_rate": 1.6982178900529988e-05, "loss": 0.2751, "step": 3224 }, { "epoch": 0.28, "learning_rate": 1.6980191162380298e-05, "loss": 0.3327, "step": 3225 }, { "epoch": 0.28, "learning_rate": 1.6978202886231963e-05, "loss": 0.287, "step": 3226 }, { "epoch": 0.28, "learning_rate": 1.697621407223823e-05, "loss": 0.3455, "step": 3227 }, { "epoch": 0.28, "learning_rate": 1.697422472055239e-05, "loss": 0.2659, "step": 3228 }, { "epoch": 0.28, "learning_rate": 1.6972234831327767e-05, "loss": 0.2847, "step": 3229 }, { "epoch": 0.28, "learning_rate": 1.6970244404717732e-05, "loss": 0.3066, "step": 3230 }, { "epoch": 0.28, "learning_rate": 1.6968253440875702e-05, "loss": 0.3514, "step": 3231 }, { "epoch": 0.28, "learning_rate": 1.6966261939955125e-05, "loss": 0.2875, "step": 3232 }, { "epoch": 0.28, "learning_rate": 1.69642699021095e-05, "loss": 0.2546, "step": 3233 }, { "epoch": 0.28, "learning_rate": 1.6962277327492366e-05, "loss": 0.3536, "step": 3234 }, { "epoch": 0.28, "learning_rate": 1.6960284216257293e-05, "loss": 0.3553, "step": 3235 }, { "epoch": 0.28, "learning_rate": 1.6958290568557905e-05, "loss": 0.2839, "step": 3236 }, { "epoch": 0.28, "learning_rate": 1.695629638454786e-05, "loss": 0.3006, "step": 3237 }, { "epoch": 0.28, "learning_rate": 1.6954301664380867e-05, "loss": 0.2579, "step": 3238 }, { "epoch": 0.28, "learning_rate": 1.6952306408210663e-05, "loss": 0.284, "step": 3239 }, { "epoch": 0.28, "learning_rate": 1.695031061619103e-05, "loss": 0.2819, "step": 3240 }, { "epoch": 0.28, "learning_rate": 1.6948314288475796e-05, "loss": 0.3775, "step": 3241 }, { "epoch": 0.28, "learning_rate": 1.6946317425218834e-05, "loss": 0.29, "step": 3242 }, { "epoch": 0.28, "learning_rate": 1.6944320026574047e-05, "loss": 0.3154, "step": 3243 }, { "epoch": 0.28, "learning_rate": 1.694232209269538e-05, "loss": 0.3047, "step": 3244 }, { "epoch": 0.28, "learning_rate": 1.6940323623736835e-05, "loss": 0.2916, "step": 3245 }, { "epoch": 0.28, "learning_rate": 1.6938324619852435e-05, "loss": 0.3168, "step": 3246 }, { "epoch": 0.28, "learning_rate": 1.693632508119626e-05, "loss": 0.2517, "step": 3247 }, { "epoch": 0.28, "learning_rate": 1.6934325007922418e-05, "loss": 0.2792, "step": 3248 }, { "epoch": 0.28, "learning_rate": 1.6932324400185073e-05, "loss": 0.3254, "step": 3249 }, { "epoch": 0.28, "learning_rate": 1.693032325813841e-05, "loss": 0.3464, "step": 3250 }, { "epoch": 0.28, "learning_rate": 1.6928321581936676e-05, "loss": 0.2496, "step": 3251 }, { "epoch": 0.28, "learning_rate": 1.692631937173415e-05, "loss": 0.2857, "step": 3252 }, { "epoch": 0.28, "learning_rate": 1.692431662768515e-05, "loss": 0.3168, "step": 3253 }, { "epoch": 0.28, "learning_rate": 1.6922313349944037e-05, "loss": 0.2949, "step": 3254 }, { "epoch": 0.28, "learning_rate": 1.6920309538665215e-05, "loss": 0.3461, "step": 3255 }, { "epoch": 0.28, "learning_rate": 1.691830519400313e-05, "loss": 0.2751, "step": 3256 }, { "epoch": 0.28, "learning_rate": 1.6916300316112265e-05, "loss": 0.3233, "step": 3257 }, { "epoch": 0.28, "learning_rate": 1.6914294905147144e-05, "loss": 0.2827, "step": 3258 }, { "epoch": 0.28, "learning_rate": 1.691228896126234e-05, "loss": 0.2757, "step": 3259 }, { "epoch": 0.28, "learning_rate": 1.6910282484612452e-05, "loss": 0.266, "step": 3260 }, { "epoch": 0.28, "learning_rate": 1.690827547535214e-05, "loss": 0.2808, "step": 3261 }, { "epoch": 0.28, "learning_rate": 1.6906267933636087e-05, "loss": 0.3284, "step": 3262 }, { "epoch": 0.28, "learning_rate": 1.6904259859619028e-05, "loss": 0.2706, "step": 3263 }, { "epoch": 0.28, "learning_rate": 1.690225125345573e-05, "loss": 0.2667, "step": 3264 }, { "epoch": 0.28, "learning_rate": 1.6900242115301014e-05, "loss": 0.2571, "step": 3265 }, { "epoch": 0.28, "learning_rate": 1.689823244530973e-05, "loss": 0.2697, "step": 3266 }, { "epoch": 0.28, "learning_rate": 1.6896222243636775e-05, "loss": 0.3256, "step": 3267 }, { "epoch": 0.28, "learning_rate": 1.6894211510437086e-05, "loss": 0.3239, "step": 3268 }, { "epoch": 0.28, "learning_rate": 1.6892200245865635e-05, "loss": 0.2856, "step": 3269 }, { "epoch": 0.28, "learning_rate": 1.6890188450077445e-05, "loss": 0.2835, "step": 3270 }, { "epoch": 0.28, "learning_rate": 1.6888176123227576e-05, "loss": 0.3461, "step": 3271 }, { "epoch": 0.28, "learning_rate": 1.6886163265471127e-05, "loss": 0.3284, "step": 3272 }, { "epoch": 0.28, "learning_rate": 1.688414987696324e-05, "loss": 0.2874, "step": 3273 }, { "epoch": 0.28, "learning_rate": 1.6882135957859095e-05, "loss": 0.2784, "step": 3274 }, { "epoch": 0.28, "learning_rate": 1.6880121508313916e-05, "loss": 0.3162, "step": 3275 }, { "epoch": 0.28, "learning_rate": 1.6878106528482968e-05, "loss": 0.2836, "step": 3276 }, { "epoch": 0.28, "learning_rate": 1.687609101852155e-05, "loss": 0.2795, "step": 3277 }, { "epoch": 0.28, "learning_rate": 1.6874074978585018e-05, "loss": 0.3029, "step": 3278 }, { "epoch": 0.28, "learning_rate": 1.687205840882875e-05, "loss": 0.265, "step": 3279 }, { "epoch": 0.28, "learning_rate": 1.6870041309408174e-05, "loss": 0.3556, "step": 3280 }, { "epoch": 0.28, "learning_rate": 1.6868023680478763e-05, "loss": 0.2484, "step": 3281 }, { "epoch": 0.28, "learning_rate": 1.686600552219602e-05, "loss": 0.3536, "step": 3282 }, { "epoch": 0.28, "learning_rate": 1.6863986834715497e-05, "loss": 0.2745, "step": 3283 }, { "epoch": 0.28, "learning_rate": 1.686196761819279e-05, "loss": 0.2825, "step": 3284 }, { "epoch": 0.28, "learning_rate": 1.6859947872783517e-05, "loss": 0.2963, "step": 3285 }, { "epoch": 0.28, "learning_rate": 1.6857927598643362e-05, "loss": 0.2841, "step": 3286 }, { "epoch": 0.28, "learning_rate": 1.6855906795928033e-05, "loss": 0.3093, "step": 3287 }, { "epoch": 0.28, "learning_rate": 1.6853885464793287e-05, "loss": 0.2869, "step": 3288 }, { "epoch": 0.28, "learning_rate": 1.685186360539491e-05, "loss": 0.2253, "step": 3289 }, { "epoch": 0.28, "learning_rate": 1.6849841217888748e-05, "loss": 0.2869, "step": 3290 }, { "epoch": 0.28, "learning_rate": 1.684781830243067e-05, "loss": 0.6086, "step": 3291 }, { "epoch": 0.28, "learning_rate": 1.684579485917659e-05, "loss": 0.2919, "step": 3292 }, { "epoch": 0.28, "learning_rate": 1.684377088828247e-05, "loss": 0.3093, "step": 3293 }, { "epoch": 0.28, "learning_rate": 1.6841746389904306e-05, "loss": 0.2957, "step": 3294 }, { "epoch": 0.28, "learning_rate": 1.6839721364198134e-05, "loss": 0.3026, "step": 3295 }, { "epoch": 0.28, "learning_rate": 1.683769581132004e-05, "loss": 0.2643, "step": 3296 }, { "epoch": 0.28, "learning_rate": 1.6835669731426137e-05, "loss": 0.2872, "step": 3297 }, { "epoch": 0.28, "learning_rate": 1.6833643124672586e-05, "loss": 0.3674, "step": 3298 }, { "epoch": 0.28, "learning_rate": 1.683161599121559e-05, "loss": 0.3269, "step": 3299 }, { "epoch": 0.28, "learning_rate": 1.682958833121139e-05, "loss": 0.2796, "step": 3300 }, { "epoch": 0.28, "learning_rate": 1.6827560144816266e-05, "loss": 0.285, "step": 3301 }, { "epoch": 0.28, "learning_rate": 1.6825531432186545e-05, "loss": 0.2755, "step": 3302 }, { "epoch": 0.28, "learning_rate": 1.6823502193478583e-05, "loss": 0.3187, "step": 3303 }, { "epoch": 0.28, "learning_rate": 1.6821472428848788e-05, "loss": 0.283, "step": 3304 }, { "epoch": 0.28, "learning_rate": 1.6819442138453605e-05, "loss": 0.3214, "step": 3305 }, { "epoch": 0.28, "learning_rate": 1.681741132244952e-05, "loss": 0.3398, "step": 3306 }, { "epoch": 0.28, "learning_rate": 1.6815379980993055e-05, "loss": 0.3513, "step": 3307 }, { "epoch": 0.28, "learning_rate": 1.6813348114240775e-05, "loss": 0.2922, "step": 3308 }, { "epoch": 0.28, "learning_rate": 1.681131572234929e-05, "loss": 0.379, "step": 3309 }, { "epoch": 0.28, "learning_rate": 1.6809282805475243e-05, "loss": 0.2955, "step": 3310 }, { "epoch": 0.28, "learning_rate": 1.6807249363775326e-05, "loss": 0.2535, "step": 3311 }, { "epoch": 0.28, "learning_rate": 1.6805215397406264e-05, "loss": 0.3207, "step": 3312 }, { "epoch": 0.28, "learning_rate": 1.6803180906524822e-05, "loss": 0.34, "step": 3313 }, { "epoch": 0.28, "learning_rate": 1.680114589128781e-05, "loss": 0.3007, "step": 3314 }, { "epoch": 0.28, "learning_rate": 1.679911035185208e-05, "loss": 0.3101, "step": 3315 }, { "epoch": 0.28, "learning_rate": 1.679707428837452e-05, "loss": 0.5824, "step": 3316 }, { "epoch": 0.28, "learning_rate": 1.679503770101206e-05, "loss": 0.3018, "step": 3317 }, { "epoch": 0.28, "learning_rate": 1.6793000589921666e-05, "loss": 0.2902, "step": 3318 }, { "epoch": 0.28, "learning_rate": 1.679096295526035e-05, "loss": 0.2965, "step": 3319 }, { "epoch": 0.28, "learning_rate": 1.6788924797185174e-05, "loss": 0.2875, "step": 3320 }, { "epoch": 0.28, "learning_rate": 1.6786886115853214e-05, "loss": 0.3323, "step": 3321 }, { "epoch": 0.28, "learning_rate": 1.6784846911421605e-05, "loss": 0.2855, "step": 3322 }, { "epoch": 0.28, "learning_rate": 1.6782807184047524e-05, "loss": 0.3079, "step": 3323 }, { "epoch": 0.28, "learning_rate": 1.678076693388818e-05, "loss": 0.5862, "step": 3324 }, { "epoch": 0.29, "learning_rate": 1.6778726161100825e-05, "loss": 0.2911, "step": 3325 }, { "epoch": 0.29, "learning_rate": 1.6776684865842748e-05, "loss": 0.2958, "step": 3326 }, { "epoch": 0.29, "learning_rate": 1.677464304827129e-05, "loss": 0.2935, "step": 3327 }, { "epoch": 0.29, "learning_rate": 1.6772600708543822e-05, "loss": 0.3343, "step": 3328 }, { "epoch": 0.29, "learning_rate": 1.6770557846817754e-05, "loss": 0.2747, "step": 3329 }, { "epoch": 0.29, "learning_rate": 1.6768514463250544e-05, "loss": 0.3183, "step": 3330 }, { "epoch": 0.29, "learning_rate": 1.676647055799968e-05, "loss": 0.2773, "step": 3331 }, { "epoch": 0.29, "learning_rate": 1.67644261312227e-05, "loss": 0.3177, "step": 3332 }, { "epoch": 0.29, "learning_rate": 1.6762381183077178e-05, "loss": 0.3218, "step": 3333 }, { "epoch": 0.29, "learning_rate": 1.6760335713720727e-05, "loss": 0.3254, "step": 3334 }, { "epoch": 0.29, "learning_rate": 1.6758289723311007e-05, "loss": 0.278, "step": 3335 }, { "epoch": 0.29, "learning_rate": 1.6756243212005704e-05, "loss": 0.2692, "step": 3336 }, { "epoch": 0.29, "learning_rate": 1.6754196179962563e-05, "loss": 0.3147, "step": 3337 }, { "epoch": 0.29, "learning_rate": 1.675214862733935e-05, "loss": 0.324, "step": 3338 }, { "epoch": 0.29, "learning_rate": 1.6750100554293886e-05, "loss": 0.2687, "step": 3339 }, { "epoch": 0.29, "learning_rate": 1.674805196098402e-05, "loss": 0.2871, "step": 3340 }, { "epoch": 0.29, "learning_rate": 1.6746002847567656e-05, "loss": 0.3069, "step": 3341 }, { "epoch": 0.29, "learning_rate": 1.674395321420273e-05, "loss": 0.2973, "step": 3342 }, { "epoch": 0.29, "learning_rate": 1.6741903061047204e-05, "loss": 0.2627, "step": 3343 }, { "epoch": 0.29, "learning_rate": 1.6739852388259107e-05, "loss": 0.2808, "step": 3344 }, { "epoch": 0.29, "learning_rate": 1.6737801195996492e-05, "loss": 0.3117, "step": 3345 }, { "epoch": 0.29, "learning_rate": 1.6735749484417452e-05, "loss": 0.2991, "step": 3346 }, { "epoch": 0.29, "learning_rate": 1.6733697253680124e-05, "loss": 0.3022, "step": 3347 }, { "epoch": 0.29, "learning_rate": 1.6731644503942684e-05, "loss": 0.2876, "step": 3348 }, { "epoch": 0.29, "learning_rate": 1.6729591235363346e-05, "loss": 0.2958, "step": 3349 }, { "epoch": 0.29, "learning_rate": 1.672753744810037e-05, "loss": 0.2692, "step": 3350 }, { "epoch": 0.29, "learning_rate": 1.6725483142312046e-05, "loss": 0.2693, "step": 3351 }, { "epoch": 0.29, "learning_rate": 1.6723428318156715e-05, "loss": 0.3182, "step": 3352 }, { "epoch": 0.29, "learning_rate": 1.6721372975792752e-05, "loss": 0.3208, "step": 3353 }, { "epoch": 0.29, "learning_rate": 1.671931711537857e-05, "loss": 0.2614, "step": 3354 }, { "epoch": 0.29, "learning_rate": 1.6717260737072628e-05, "loss": 0.3231, "step": 3355 }, { "epoch": 0.29, "learning_rate": 1.6715203841033417e-05, "loss": 0.3244, "step": 3356 }, { "epoch": 0.29, "learning_rate": 1.6713146427419473e-05, "loss": 0.2794, "step": 3357 }, { "epoch": 0.29, "learning_rate": 1.6711088496389375e-05, "loss": 0.3044, "step": 3358 }, { "epoch": 0.29, "learning_rate": 1.6709030048101738e-05, "loss": 0.2933, "step": 3359 }, { "epoch": 0.29, "learning_rate": 1.6706971082715212e-05, "loss": 0.2614, "step": 3360 }, { "epoch": 0.29, "learning_rate": 1.6704911600388496e-05, "loss": 0.3378, "step": 3361 }, { "epoch": 0.29, "learning_rate": 1.6702851601280322e-05, "loss": 0.3207, "step": 3362 }, { "epoch": 0.29, "learning_rate": 1.6700791085549474e-05, "loss": 0.3201, "step": 3363 }, { "epoch": 0.29, "learning_rate": 1.669873005335475e-05, "loss": 0.2818, "step": 3364 }, { "epoch": 0.29, "learning_rate": 1.6696668504855016e-05, "loss": 0.3013, "step": 3365 }, { "epoch": 0.29, "learning_rate": 1.6694606440209163e-05, "loss": 0.2685, "step": 3366 }, { "epoch": 0.29, "learning_rate": 1.6692543859576124e-05, "loss": 0.292, "step": 3367 }, { "epoch": 0.29, "learning_rate": 1.6690480763114877e-05, "loss": 0.2921, "step": 3368 }, { "epoch": 0.29, "learning_rate": 1.6688417150984423e-05, "loss": 0.2457, "step": 3369 }, { "epoch": 0.29, "learning_rate": 1.668635302334383e-05, "loss": 0.326, "step": 3370 }, { "epoch": 0.29, "learning_rate": 1.6684288380352182e-05, "loss": 0.299, "step": 3371 }, { "epoch": 0.29, "learning_rate": 1.6682223222168614e-05, "loss": 0.3047, "step": 3372 }, { "epoch": 0.29, "learning_rate": 1.66801575489523e-05, "loss": 0.3005, "step": 3373 }, { "epoch": 0.29, "learning_rate": 1.6678091360862447e-05, "loss": 0.278, "step": 3374 }, { "epoch": 0.29, "learning_rate": 1.667602465805831e-05, "loss": 0.3246, "step": 3375 }, { "epoch": 0.29, "learning_rate": 1.6673957440699183e-05, "loss": 0.2977, "step": 3376 }, { "epoch": 0.29, "learning_rate": 1.667188970894439e-05, "loss": 0.3099, "step": 3377 }, { "epoch": 0.29, "learning_rate": 1.6669821462953303e-05, "loss": 0.238, "step": 3378 }, { "epoch": 0.29, "learning_rate": 1.666775270288534e-05, "loss": 0.2957, "step": 3379 }, { "epoch": 0.29, "learning_rate": 1.666568342889994e-05, "loss": 0.2776, "step": 3380 }, { "epoch": 0.29, "learning_rate": 1.66636136411566e-05, "loss": 0.3134, "step": 3381 }, { "epoch": 0.29, "learning_rate": 1.6661543339814847e-05, "loss": 0.3475, "step": 3382 }, { "epoch": 0.29, "learning_rate": 1.6659472525034245e-05, "loss": 0.2585, "step": 3383 }, { "epoch": 0.29, "learning_rate": 1.6657401196974405e-05, "loss": 0.3091, "step": 3384 }, { "epoch": 0.29, "learning_rate": 1.6655329355794982e-05, "loss": 0.2728, "step": 3385 }, { "epoch": 0.29, "learning_rate": 1.6653257001655652e-05, "loss": 0.295, "step": 3386 }, { "epoch": 0.29, "learning_rate": 1.6651184134716156e-05, "loss": 0.2575, "step": 3387 }, { "epoch": 0.29, "learning_rate": 1.6649110755136242e-05, "loss": 0.2902, "step": 3388 }, { "epoch": 0.29, "learning_rate": 1.664703686307573e-05, "loss": 0.2437, "step": 3389 }, { "epoch": 0.29, "learning_rate": 1.6644962458694457e-05, "loss": 0.3054, "step": 3390 }, { "epoch": 0.29, "learning_rate": 1.6642887542152312e-05, "loss": 0.3232, "step": 3391 }, { "epoch": 0.29, "learning_rate": 1.664081211360922e-05, "loss": 0.3205, "step": 3392 }, { "epoch": 0.29, "learning_rate": 1.663873617322514e-05, "loss": 0.2703, "step": 3393 }, { "epoch": 0.29, "learning_rate": 1.6636659721160088e-05, "loss": 0.3407, "step": 3394 }, { "epoch": 0.29, "learning_rate": 1.663458275757409e-05, "loss": 0.3225, "step": 3395 }, { "epoch": 0.29, "learning_rate": 1.6632505282627238e-05, "loss": 0.3024, "step": 3396 }, { "epoch": 0.29, "learning_rate": 1.663042729647965e-05, "loss": 0.2923, "step": 3397 }, { "epoch": 0.29, "learning_rate": 1.662834879929149e-05, "loss": 0.2529, "step": 3398 }, { "epoch": 0.29, "learning_rate": 1.662626979122295e-05, "loss": 0.3021, "step": 3399 }, { "epoch": 0.29, "learning_rate": 1.6624190272434282e-05, "loss": 0.321, "step": 3400 }, { "epoch": 0.29, "learning_rate": 1.662211024308576e-05, "loss": 0.3032, "step": 3401 }, { "epoch": 0.29, "learning_rate": 1.6620029703337697e-05, "loss": 0.3085, "step": 3402 }, { "epoch": 0.29, "learning_rate": 1.6617948653350455e-05, "loss": 0.3025, "step": 3403 }, { "epoch": 0.29, "learning_rate": 1.6615867093284434e-05, "loss": 0.308, "step": 3404 }, { "epoch": 0.29, "learning_rate": 1.6613785023300063e-05, "loss": 0.2805, "step": 3405 }, { "epoch": 0.29, "learning_rate": 1.6611702443557826e-05, "loss": 0.259, "step": 3406 }, { "epoch": 0.29, "learning_rate": 1.660961935421823e-05, "loss": 0.3278, "step": 3407 }, { "epoch": 0.29, "learning_rate": 1.6607535755441837e-05, "loss": 0.3074, "step": 3408 }, { "epoch": 0.29, "learning_rate": 1.6605451647389238e-05, "loss": 0.28, "step": 3409 }, { "epoch": 0.29, "learning_rate": 1.660336703022106e-05, "loss": 0.2719, "step": 3410 }, { "epoch": 0.29, "learning_rate": 1.6601281904097985e-05, "loss": 0.6416, "step": 3411 }, { "epoch": 0.29, "learning_rate": 1.6599196269180712e-05, "loss": 0.3318, "step": 3412 }, { "epoch": 0.29, "learning_rate": 1.6597110125630004e-05, "loss": 0.3199, "step": 3413 }, { "epoch": 0.29, "learning_rate": 1.659502347360664e-05, "loss": 0.5948, "step": 3414 }, { "epoch": 0.29, "learning_rate": 1.6592936313271456e-05, "loss": 0.3248, "step": 3415 }, { "epoch": 0.29, "learning_rate": 1.659084864478532e-05, "loss": 0.2456, "step": 3416 }, { "epoch": 0.29, "learning_rate": 1.6588760468309134e-05, "loss": 0.2679, "step": 3417 }, { "epoch": 0.29, "learning_rate": 1.6586671784003846e-05, "loss": 0.2751, "step": 3418 }, { "epoch": 0.29, "learning_rate": 1.6584582592030444e-05, "loss": 0.3278, "step": 3419 }, { "epoch": 0.29, "learning_rate": 1.6582492892549954e-05, "loss": 0.3103, "step": 3420 }, { "epoch": 0.29, "learning_rate": 1.6580402685723434e-05, "loss": 0.3233, "step": 3421 }, { "epoch": 0.29, "learning_rate": 1.657831197171199e-05, "loss": 0.329, "step": 3422 }, { "epoch": 0.29, "learning_rate": 1.6576220750676763e-05, "loss": 0.2979, "step": 3423 }, { "epoch": 0.29, "learning_rate": 1.6574129022778936e-05, "loss": 0.3029, "step": 3424 }, { "epoch": 0.29, "learning_rate": 1.6572036788179728e-05, "loss": 0.259, "step": 3425 }, { "epoch": 0.29, "learning_rate": 1.6569944047040394e-05, "loss": 0.2759, "step": 3426 }, { "epoch": 0.29, "learning_rate": 1.6567850799522237e-05, "loss": 0.3139, "step": 3427 }, { "epoch": 0.29, "learning_rate": 1.6565757045786595e-05, "loss": 0.2843, "step": 3428 }, { "epoch": 0.29, "learning_rate": 1.6563662785994843e-05, "loss": 0.319, "step": 3429 }, { "epoch": 0.29, "learning_rate": 1.6561568020308397e-05, "loss": 0.2744, "step": 3430 }, { "epoch": 0.29, "learning_rate": 1.6559472748888703e-05, "loss": 0.2747, "step": 3431 }, { "epoch": 0.29, "learning_rate": 1.655737697189727e-05, "loss": 0.6115, "step": 3432 }, { "epoch": 0.29, "learning_rate": 1.6555280689495613e-05, "loss": 0.3093, "step": 3433 }, { "epoch": 0.29, "learning_rate": 1.6553183901845313e-05, "loss": 0.3391, "step": 3434 }, { "epoch": 0.29, "learning_rate": 1.655108660910798e-05, "loss": 0.2307, "step": 3435 }, { "epoch": 0.29, "learning_rate": 1.654898881144526e-05, "loss": 0.2841, "step": 3436 }, { "epoch": 0.29, "learning_rate": 1.654689050901884e-05, "loss": 0.292, "step": 3437 }, { "epoch": 0.29, "learning_rate": 1.654479170199045e-05, "loss": 0.3267, "step": 3438 }, { "epoch": 0.29, "learning_rate": 1.654269239052186e-05, "loss": 0.2788, "step": 3439 }, { "epoch": 0.29, "learning_rate": 1.654059257477486e-05, "loss": 0.3215, "step": 3440 }, { "epoch": 0.29, "learning_rate": 1.6538492254911305e-05, "loss": 0.2996, "step": 3441 }, { "epoch": 0.3, "learning_rate": 1.6536391431093077e-05, "loss": 0.3179, "step": 3442 }, { "epoch": 0.3, "learning_rate": 1.6534290103482094e-05, "loss": 0.2978, "step": 3443 }, { "epoch": 0.3, "learning_rate": 1.6532188272240314e-05, "loss": 0.2497, "step": 3444 }, { "epoch": 0.3, "learning_rate": 1.6530085937529743e-05, "loss": 0.3055, "step": 3445 }, { "epoch": 0.3, "learning_rate": 1.6527983099512414e-05, "loss": 0.2488, "step": 3446 }, { "epoch": 0.3, "learning_rate": 1.65258797583504e-05, "loss": 0.2935, "step": 3447 }, { "epoch": 0.3, "learning_rate": 1.652377591420582e-05, "loss": 0.3154, "step": 3448 }, { "epoch": 0.3, "learning_rate": 1.6521671567240826e-05, "loss": 0.2964, "step": 3449 }, { "epoch": 0.3, "learning_rate": 1.6519566717617616e-05, "loss": 0.2855, "step": 3450 }, { "epoch": 0.3, "learning_rate": 1.6517461365498416e-05, "loss": 0.2857, "step": 3451 }, { "epoch": 0.3, "learning_rate": 1.6515355511045495e-05, "loss": 0.3333, "step": 3452 }, { "epoch": 0.3, "learning_rate": 1.651324915442117e-05, "loss": 0.3373, "step": 3453 }, { "epoch": 0.3, "learning_rate": 1.651114229578778e-05, "loss": 0.3002, "step": 3454 }, { "epoch": 0.3, "learning_rate": 1.6509034935307716e-05, "loss": 0.2891, "step": 3455 }, { "epoch": 0.3, "learning_rate": 1.6506927073143396e-05, "loss": 0.2403, "step": 3456 }, { "epoch": 0.3, "learning_rate": 1.6504818709457294e-05, "loss": 0.3245, "step": 3457 }, { "epoch": 0.3, "learning_rate": 1.6502709844411907e-05, "loss": 0.2944, "step": 3458 }, { "epoch": 0.3, "learning_rate": 1.6500600478169776e-05, "loss": 0.2986, "step": 3459 }, { "epoch": 0.3, "learning_rate": 1.6498490610893478e-05, "loss": 0.2884, "step": 3460 }, { "epoch": 0.3, "learning_rate": 1.6496380242745635e-05, "loss": 0.3492, "step": 3461 }, { "epoch": 0.3, "learning_rate": 1.6494269373888902e-05, "loss": 0.3046, "step": 3462 }, { "epoch": 0.3, "learning_rate": 1.6492158004485977e-05, "loss": 0.269, "step": 3463 }, { "epoch": 0.3, "learning_rate": 1.6490046134699586e-05, "loss": 0.2934, "step": 3464 }, { "epoch": 0.3, "learning_rate": 1.6487933764692514e-05, "loss": 0.27, "step": 3465 }, { "epoch": 0.3, "learning_rate": 1.648582089462756e-05, "loss": 0.2817, "step": 3466 }, { "epoch": 0.3, "learning_rate": 1.648370752466758e-05, "loss": 0.2922, "step": 3467 }, { "epoch": 0.3, "learning_rate": 1.6481593654975466e-05, "loss": 0.3057, "step": 3468 }, { "epoch": 0.3, "learning_rate": 1.6479479285714133e-05, "loss": 0.2835, "step": 3469 }, { "epoch": 0.3, "learning_rate": 1.647736441704656e-05, "loss": 0.3217, "step": 3470 }, { "epoch": 0.3, "learning_rate": 1.6475249049135736e-05, "loss": 0.3239, "step": 3471 }, { "epoch": 0.3, "learning_rate": 1.6473133182144715e-05, "loss": 0.2886, "step": 3472 }, { "epoch": 0.3, "learning_rate": 1.647101681623657e-05, "loss": 0.312, "step": 3473 }, { "epoch": 0.3, "learning_rate": 1.6468899951574423e-05, "loss": 0.2891, "step": 3474 }, { "epoch": 0.3, "learning_rate": 1.646678258832143e-05, "loss": 0.2823, "step": 3475 }, { "epoch": 0.3, "learning_rate": 1.6464664726640793e-05, "loss": 0.3081, "step": 3476 }, { "epoch": 0.3, "learning_rate": 1.646254636669574e-05, "loss": 0.2733, "step": 3477 }, { "epoch": 0.3, "learning_rate": 1.6460427508649546e-05, "loss": 0.3147, "step": 3478 }, { "epoch": 0.3, "learning_rate": 1.6458308152665522e-05, "loss": 0.3237, "step": 3479 }, { "epoch": 0.3, "learning_rate": 1.6456188298907015e-05, "loss": 0.3083, "step": 3480 }, { "epoch": 0.3, "learning_rate": 1.6454067947537417e-05, "loss": 0.2651, "step": 3481 }, { "epoch": 0.3, "learning_rate": 1.6451947098720148e-05, "loss": 0.3128, "step": 3482 }, { "epoch": 0.3, "learning_rate": 1.644982575261868e-05, "loss": 0.3183, "step": 3483 }, { "epoch": 0.3, "learning_rate": 1.6447703909396514e-05, "loss": 0.2783, "step": 3484 }, { "epoch": 0.3, "learning_rate": 1.6445581569217184e-05, "loss": 0.3288, "step": 3485 }, { "epoch": 0.3, "learning_rate": 1.644345873224428e-05, "loss": 0.2936, "step": 3486 }, { "epoch": 0.3, "learning_rate": 1.6441335398641417e-05, "loss": 0.3128, "step": 3487 }, { "epoch": 0.3, "learning_rate": 1.643921156857224e-05, "loss": 0.2819, "step": 3488 }, { "epoch": 0.3, "learning_rate": 1.6437087242200462e-05, "loss": 0.3202, "step": 3489 }, { "epoch": 0.3, "learning_rate": 1.6434962419689803e-05, "loss": 0.2733, "step": 3490 }, { "epoch": 0.3, "learning_rate": 1.6432837101204036e-05, "loss": 0.2684, "step": 3491 }, { "epoch": 0.3, "learning_rate": 1.6430711286906968e-05, "loss": 0.2916, "step": 3492 }, { "epoch": 0.3, "learning_rate": 1.642858497696245e-05, "loss": 0.6289, "step": 3493 }, { "epoch": 0.3, "learning_rate": 1.642645817153437e-05, "loss": 0.3239, "step": 3494 }, { "epoch": 0.3, "learning_rate": 1.6424330870786648e-05, "loss": 0.2929, "step": 3495 }, { "epoch": 0.3, "learning_rate": 1.6422203074883247e-05, "loss": 0.2653, "step": 3496 }, { "epoch": 0.3, "learning_rate": 1.6420074783988164e-05, "loss": 0.2754, "step": 3497 }, { "epoch": 0.3, "learning_rate": 1.6417945998265436e-05, "loss": 0.3062, "step": 3498 }, { "epoch": 0.3, "learning_rate": 1.6415816717879146e-05, "loss": 0.3181, "step": 3499 }, { "epoch": 0.3, "learning_rate": 1.6413686942993405e-05, "loss": 0.2699, "step": 3500 }, { "epoch": 0.3, "learning_rate": 1.6411556673772365e-05, "loss": 0.3083, "step": 3501 }, { "epoch": 0.3, "learning_rate": 1.6409425910380215e-05, "loss": 0.2824, "step": 3502 }, { "epoch": 0.3, "learning_rate": 1.6407294652981187e-05, "loss": 0.3623, "step": 3503 }, { "epoch": 0.3, "learning_rate": 1.6405162901739545e-05, "loss": 0.6411, "step": 3504 }, { "epoch": 0.3, "learning_rate": 1.6403030656819597e-05, "loss": 0.3047, "step": 3505 }, { "epoch": 0.3, "learning_rate": 1.6400897918385687e-05, "loss": 0.337, "step": 3506 }, { "epoch": 0.3, "learning_rate": 1.6398764686602188e-05, "loss": 0.5853, "step": 3507 }, { "epoch": 0.3, "learning_rate": 1.639663096163353e-05, "loss": 0.2921, "step": 3508 }, { "epoch": 0.3, "learning_rate": 1.639449674364416e-05, "loss": 0.2695, "step": 3509 }, { "epoch": 0.3, "learning_rate": 1.6392362032798578e-05, "loss": 0.2542, "step": 3510 }, { "epoch": 0.3, "learning_rate": 1.6390226829261317e-05, "loss": 0.2558, "step": 3511 }, { "epoch": 0.3, "learning_rate": 1.6388091133196946e-05, "loss": 0.2843, "step": 3512 }, { "epoch": 0.3, "learning_rate": 1.6385954944770076e-05, "loss": 0.2916, "step": 3513 }, { "epoch": 0.3, "learning_rate": 1.638381826414535e-05, "loss": 0.2812, "step": 3514 }, { "epoch": 0.3, "learning_rate": 1.638168109148746e-05, "loss": 0.2671, "step": 3515 }, { "epoch": 0.3, "learning_rate": 1.6379543426961122e-05, "loss": 0.338, "step": 3516 }, { "epoch": 0.3, "learning_rate": 1.6377405270731097e-05, "loss": 0.2952, "step": 3517 }, { "epoch": 0.3, "learning_rate": 1.6375266622962188e-05, "loss": 0.3425, "step": 3518 }, { "epoch": 0.3, "learning_rate": 1.6373127483819227e-05, "loss": 0.3367, "step": 3519 }, { "epoch": 0.3, "learning_rate": 1.637098785346709e-05, "loss": 0.2607, "step": 3520 }, { "epoch": 0.3, "learning_rate": 1.636884773207069e-05, "loss": 0.308, "step": 3521 }, { "epoch": 0.3, "learning_rate": 1.6366707119794978e-05, "loss": 0.2767, "step": 3522 }, { "epoch": 0.3, "learning_rate": 1.636456601680494e-05, "loss": 0.2756, "step": 3523 }, { "epoch": 0.3, "learning_rate": 1.63624244232656e-05, "loss": 0.2422, "step": 3524 }, { "epoch": 0.3, "learning_rate": 1.6360282339342023e-05, "loss": 0.3226, "step": 3525 }, { "epoch": 0.3, "learning_rate": 1.635813976519931e-05, "loss": 0.2922, "step": 3526 }, { "epoch": 0.3, "learning_rate": 1.6355996701002603e-05, "loss": 0.3536, "step": 3527 }, { "epoch": 0.3, "learning_rate": 1.6353853146917073e-05, "loss": 0.2979, "step": 3528 }, { "epoch": 0.3, "learning_rate": 1.635170910310794e-05, "loss": 0.2936, "step": 3529 }, { "epoch": 0.3, "learning_rate": 1.6349564569740454e-05, "loss": 0.2738, "step": 3530 }, { "epoch": 0.3, "learning_rate": 1.6347419546979902e-05, "loss": 0.3466, "step": 3531 }, { "epoch": 0.3, "learning_rate": 1.6345274034991615e-05, "loss": 0.2838, "step": 3532 }, { "epoch": 0.3, "learning_rate": 1.6343128033940962e-05, "loss": 0.277, "step": 3533 }, { "epoch": 0.3, "learning_rate": 1.634098154399334e-05, "loss": 0.2803, "step": 3534 }, { "epoch": 0.3, "learning_rate": 1.6338834565314193e-05, "loss": 0.2666, "step": 3535 }, { "epoch": 0.3, "learning_rate": 1.6336687098069e-05, "loss": 0.3102, "step": 3536 }, { "epoch": 0.3, "learning_rate": 1.6334539142423275e-05, "loss": 0.3169, "step": 3537 }, { "epoch": 0.3, "learning_rate": 1.633239069854257e-05, "loss": 0.2339, "step": 3538 }, { "epoch": 0.3, "learning_rate": 1.6330241766592485e-05, "loss": 0.3302, "step": 3539 }, { "epoch": 0.3, "learning_rate": 1.632809234673864e-05, "loss": 0.3147, "step": 3540 }, { "epoch": 0.3, "learning_rate": 1.6325942439146705e-05, "loss": 0.2712, "step": 3541 }, { "epoch": 0.3, "learning_rate": 1.632379204398238e-05, "loss": 0.2921, "step": 3542 }, { "epoch": 0.3, "learning_rate": 1.632164116141142e-05, "loss": 0.2944, "step": 3543 }, { "epoch": 0.3, "learning_rate": 1.631948979159959e-05, "loss": 0.2791, "step": 3544 }, { "epoch": 0.3, "learning_rate": 1.631733793471271e-05, "loss": 0.2876, "step": 3545 }, { "epoch": 0.3, "learning_rate": 1.6315185590916644e-05, "loss": 0.2985, "step": 3546 }, { "epoch": 0.3, "learning_rate": 1.631303276037727e-05, "loss": 0.2849, "step": 3547 }, { "epoch": 0.3, "learning_rate": 1.631087944326053e-05, "loss": 0.2609, "step": 3548 }, { "epoch": 0.3, "learning_rate": 1.630872563973238e-05, "loss": 0.269, "step": 3549 }, { "epoch": 0.3, "learning_rate": 1.6306571349958833e-05, "loss": 0.3069, "step": 3550 }, { "epoch": 0.3, "learning_rate": 1.630441657410593e-05, "loss": 0.3502, "step": 3551 }, { "epoch": 0.3, "learning_rate": 1.6302261312339745e-05, "loss": 0.2947, "step": 3552 }, { "epoch": 0.3, "learning_rate": 1.63001055648264e-05, "loss": 0.2584, "step": 3553 }, { "epoch": 0.3, "learning_rate": 1.6297949331732047e-05, "loss": 0.2751, "step": 3554 }, { "epoch": 0.3, "learning_rate": 1.629579261322288e-05, "loss": 0.278, "step": 3555 }, { "epoch": 0.3, "learning_rate": 1.6293635409465127e-05, "loss": 0.2886, "step": 3556 }, { "epoch": 0.3, "learning_rate": 1.6291477720625052e-05, "loss": 0.2904, "step": 3557 }, { "epoch": 0.3, "learning_rate": 1.6289319546868966e-05, "loss": 0.2527, "step": 3558 }, { "epoch": 0.31, "learning_rate": 1.62871608883632e-05, "loss": 0.3637, "step": 3559 }, { "epoch": 0.31, "learning_rate": 1.6285001745274143e-05, "loss": 0.2681, "step": 3560 }, { "epoch": 0.31, "learning_rate": 1.62828421177682e-05, "loss": 0.3171, "step": 3561 }, { "epoch": 0.31, "learning_rate": 1.628068200601184e-05, "loss": 0.256, "step": 3562 }, { "epoch": 0.31, "learning_rate": 1.6278521410171538e-05, "loss": 0.2552, "step": 3563 }, { "epoch": 0.31, "learning_rate": 1.627636033041383e-05, "loss": 0.3043, "step": 3564 }, { "epoch": 0.31, "learning_rate": 1.6274198766905286e-05, "loss": 0.267, "step": 3565 }, { "epoch": 0.31, "learning_rate": 1.6272036719812496e-05, "loss": 0.2993, "step": 3566 }, { "epoch": 0.31, "learning_rate": 1.626987418930211e-05, "loss": 0.3393, "step": 3567 }, { "epoch": 0.31, "learning_rate": 1.6267711175540795e-05, "loss": 0.3575, "step": 3568 }, { "epoch": 0.31, "learning_rate": 1.626554767869528e-05, "loss": 0.3052, "step": 3569 }, { "epoch": 0.31, "learning_rate": 1.6263383698932307e-05, "loss": 0.2604, "step": 3570 }, { "epoch": 0.31, "learning_rate": 1.6261219236418667e-05, "loss": 0.25, "step": 3571 }, { "epoch": 0.31, "learning_rate": 1.6259054291321186e-05, "loss": 0.2901, "step": 3572 }, { "epoch": 0.31, "learning_rate": 1.6256888863806724e-05, "loss": 0.3086, "step": 3573 }, { "epoch": 0.31, "learning_rate": 1.625472295404219e-05, "loss": 0.3322, "step": 3574 }, { "epoch": 0.31, "learning_rate": 1.6252556562194514e-05, "loss": 0.2962, "step": 3575 }, { "epoch": 0.31, "learning_rate": 1.625038968843067e-05, "loss": 0.2575, "step": 3576 }, { "epoch": 0.31, "learning_rate": 1.6248222332917672e-05, "loss": 0.2944, "step": 3577 }, { "epoch": 0.31, "learning_rate": 1.6246054495822575e-05, "loss": 0.2662, "step": 3578 }, { "epoch": 0.31, "learning_rate": 1.624388617731246e-05, "loss": 0.3342, "step": 3579 }, { "epoch": 0.31, "learning_rate": 1.624171737755445e-05, "loss": 0.3099, "step": 3580 }, { "epoch": 0.31, "learning_rate": 1.6239548096715703e-05, "loss": 0.3141, "step": 3581 }, { "epoch": 0.31, "learning_rate": 1.6237378334963422e-05, "loss": 0.2843, "step": 3582 }, { "epoch": 0.31, "learning_rate": 1.6235208092464832e-05, "loss": 0.2454, "step": 3583 }, { "epoch": 0.31, "learning_rate": 1.6233037369387223e-05, "loss": 0.3518, "step": 3584 }, { "epoch": 0.31, "learning_rate": 1.6230866165897882e-05, "loss": 0.2885, "step": 3585 }, { "epoch": 0.31, "learning_rate": 1.6228694482164167e-05, "loss": 0.3163, "step": 3586 }, { "epoch": 0.31, "learning_rate": 1.6226522318353462e-05, "loss": 0.3243, "step": 3587 }, { "epoch": 0.31, "learning_rate": 1.6224349674633178e-05, "loss": 0.2996, "step": 3588 }, { "epoch": 0.31, "learning_rate": 1.622217655117078e-05, "loss": 0.6091, "step": 3589 }, { "epoch": 0.31, "learning_rate": 1.6220002948133756e-05, "loss": 0.3337, "step": 3590 }, { "epoch": 0.31, "learning_rate": 1.621782886568964e-05, "loss": 0.3636, "step": 3591 }, { "epoch": 0.31, "learning_rate": 1.6215654304005995e-05, "loss": 0.2984, "step": 3592 }, { "epoch": 0.31, "learning_rate": 1.6213479263250433e-05, "loss": 0.2817, "step": 3593 }, { "epoch": 0.31, "learning_rate": 1.621130374359059e-05, "loss": 0.2816, "step": 3594 }, { "epoch": 0.31, "learning_rate": 1.6209127745194143e-05, "loss": 0.2599, "step": 3595 }, { "epoch": 0.31, "learning_rate": 1.620695126822881e-05, "loss": 0.3114, "step": 3596 }, { "epoch": 0.31, "learning_rate": 1.6204774312862346e-05, "loss": 0.2878, "step": 3597 }, { "epoch": 0.31, "learning_rate": 1.6202596879262536e-05, "loss": 0.2787, "step": 3598 }, { "epoch": 0.31, "learning_rate": 1.6200418967597205e-05, "loss": 0.3201, "step": 3599 }, { "epoch": 0.31, "learning_rate": 1.6198240578034216e-05, "loss": 0.2548, "step": 3600 }, { "epoch": 0.31, "learning_rate": 1.6196061710741472e-05, "loss": 0.2433, "step": 3601 }, { "epoch": 0.31, "learning_rate": 1.6193882365886905e-05, "loss": 0.3009, "step": 3602 }, { "epoch": 0.31, "learning_rate": 1.6191702543638493e-05, "loss": 0.3177, "step": 3603 }, { "epoch": 0.31, "learning_rate": 1.618952224416424e-05, "loss": 0.2981, "step": 3604 }, { "epoch": 0.31, "learning_rate": 1.6187341467632198e-05, "loss": 0.621, "step": 3605 }, { "epoch": 0.31, "learning_rate": 1.6185160214210447e-05, "loss": 0.337, "step": 3606 }, { "epoch": 0.31, "learning_rate": 1.6182978484067106e-05, "loss": 0.2773, "step": 3607 }, { "epoch": 0.31, "learning_rate": 1.618079627737034e-05, "loss": 0.649, "step": 3608 }, { "epoch": 0.31, "learning_rate": 1.617861359428833e-05, "loss": 0.282, "step": 3609 }, { "epoch": 0.31, "learning_rate": 1.617643043498932e-05, "loss": 0.3079, "step": 3610 }, { "epoch": 0.31, "learning_rate": 1.617424679964157e-05, "loss": 0.2839, "step": 3611 }, { "epoch": 0.31, "learning_rate": 1.617206268841338e-05, "loss": 0.3082, "step": 3612 }, { "epoch": 0.31, "learning_rate": 1.61698781014731e-05, "loss": 0.3101, "step": 3613 }, { "epoch": 0.31, "learning_rate": 1.6167693038989098e-05, "loss": 0.2913, "step": 3614 }, { "epoch": 0.31, "learning_rate": 1.6165507501129796e-05, "loss": 0.2648, "step": 3615 }, { "epoch": 0.31, "learning_rate": 1.6163321488063636e-05, "loss": 0.2505, "step": 3616 }, { "epoch": 0.31, "learning_rate": 1.6161134999959115e-05, "loss": 0.3237, "step": 3617 }, { "epoch": 0.31, "learning_rate": 1.615894803698475e-05, "loss": 0.2932, "step": 3618 }, { "epoch": 0.31, "learning_rate": 1.61567605993091e-05, "loss": 0.2676, "step": 3619 }, { "epoch": 0.31, "learning_rate": 1.6154572687100766e-05, "loss": 0.3368, "step": 3620 }, { "epoch": 0.31, "learning_rate": 1.6152384300528375e-05, "loss": 0.29, "step": 3621 }, { "epoch": 0.31, "learning_rate": 1.615019543976061e-05, "loss": 0.2947, "step": 3622 }, { "epoch": 0.31, "learning_rate": 1.6148006104966164e-05, "loss": 0.3134, "step": 3623 }, { "epoch": 0.31, "learning_rate": 1.614581629631379e-05, "loss": 0.259, "step": 3624 }, { "epoch": 0.31, "learning_rate": 1.614362601397226e-05, "loss": 0.3729, "step": 3625 }, { "epoch": 0.31, "learning_rate": 1.6141435258110397e-05, "loss": 0.2959, "step": 3626 }, { "epoch": 0.31, "learning_rate": 1.6139244028897044e-05, "loss": 0.2861, "step": 3627 }, { "epoch": 0.31, "learning_rate": 1.6137052326501098e-05, "loss": 0.2738, "step": 3628 }, { "epoch": 0.31, "learning_rate": 1.613486015109149e-05, "loss": 0.246, "step": 3629 }, { "epoch": 0.31, "learning_rate": 1.6132667502837164e-05, "loss": 0.3085, "step": 3630 }, { "epoch": 0.31, "learning_rate": 1.6130474381907135e-05, "loss": 0.3641, "step": 3631 }, { "epoch": 0.31, "learning_rate": 1.6128280788470432e-05, "loss": 0.3244, "step": 3632 }, { "epoch": 0.31, "learning_rate": 1.6126086722696123e-05, "loss": 0.2672, "step": 3633 }, { "epoch": 0.31, "learning_rate": 1.6123892184753324e-05, "loss": 0.3281, "step": 3634 }, { "epoch": 0.31, "learning_rate": 1.612169717481117e-05, "loss": 0.3254, "step": 3635 }, { "epoch": 0.31, "learning_rate": 1.611950169303885e-05, "loss": 0.2674, "step": 3636 }, { "epoch": 0.31, "learning_rate": 1.6117305739605574e-05, "loss": 0.3234, "step": 3637 }, { "epoch": 0.31, "learning_rate": 1.6115109314680603e-05, "loss": 0.268, "step": 3638 }, { "epoch": 0.31, "learning_rate": 1.6112912418433218e-05, "loss": 0.2967, "step": 3639 }, { "epoch": 0.31, "learning_rate": 1.6110715051032748e-05, "loss": 0.2777, "step": 3640 }, { "epoch": 0.31, "learning_rate": 1.6108517212648556e-05, "loss": 0.2784, "step": 3641 }, { "epoch": 0.31, "learning_rate": 1.6106318903450042e-05, "loss": 0.2413, "step": 3642 }, { "epoch": 0.31, "learning_rate": 1.610412012360664e-05, "loss": 0.2925, "step": 3643 }, { "epoch": 0.31, "learning_rate": 1.6101920873287815e-05, "loss": 0.2543, "step": 3644 }, { "epoch": 0.31, "learning_rate": 1.6099721152663084e-05, "loss": 0.2842, "step": 3645 }, { "epoch": 0.31, "learning_rate": 1.6097520961901983e-05, "loss": 0.2676, "step": 3646 }, { "epoch": 0.31, "learning_rate": 1.6095320301174097e-05, "loss": 0.2871, "step": 3647 }, { "epoch": 0.31, "learning_rate": 1.609311917064904e-05, "loss": 0.2686, "step": 3648 }, { "epoch": 0.31, "learning_rate": 1.6090917570496465e-05, "loss": 0.3093, "step": 3649 }, { "epoch": 0.31, "learning_rate": 1.608871550088606e-05, "loss": 0.3452, "step": 3650 }, { "epoch": 0.31, "learning_rate": 1.6086512961987548e-05, "loss": 0.3013, "step": 3651 }, { "epoch": 0.31, "learning_rate": 1.608430995397069e-05, "loss": 0.3232, "step": 3652 }, { "epoch": 0.31, "learning_rate": 1.608210647700529e-05, "loss": 0.2588, "step": 3653 }, { "epoch": 0.31, "learning_rate": 1.607990253126117e-05, "loss": 0.2703, "step": 3654 }, { "epoch": 0.31, "learning_rate": 1.6077698116908204e-05, "loss": 0.2668, "step": 3655 }, { "epoch": 0.31, "learning_rate": 1.60754932341163e-05, "loss": 0.32, "step": 3656 }, { "epoch": 0.31, "learning_rate": 1.60732878830554e-05, "loss": 0.3237, "step": 3657 }, { "epoch": 0.31, "learning_rate": 1.6071082063895476e-05, "loss": 0.3441, "step": 3658 }, { "epoch": 0.31, "learning_rate": 1.606887577680654e-05, "loss": 0.3376, "step": 3659 }, { "epoch": 0.31, "learning_rate": 1.6066669021958653e-05, "loss": 0.3409, "step": 3660 }, { "epoch": 0.31, "learning_rate": 1.6064461799521892e-05, "loss": 0.2643, "step": 3661 }, { "epoch": 0.31, "learning_rate": 1.6062254109666383e-05, "loss": 0.2762, "step": 3662 }, { "epoch": 0.31, "learning_rate": 1.606004595256228e-05, "loss": 0.2946, "step": 3663 }, { "epoch": 0.31, "learning_rate": 1.6057837328379778e-05, "loss": 0.2802, "step": 3664 }, { "epoch": 0.31, "learning_rate": 1.6055628237289103e-05, "loss": 0.3182, "step": 3665 }, { "epoch": 0.31, "learning_rate": 1.6053418679460534e-05, "loss": 0.2902, "step": 3666 }, { "epoch": 0.31, "learning_rate": 1.605120865506436e-05, "loss": 0.2957, "step": 3667 }, { "epoch": 0.31, "learning_rate": 1.604899816427092e-05, "loss": 0.2753, "step": 3668 }, { "epoch": 0.31, "learning_rate": 1.6046787207250597e-05, "loss": 0.3199, "step": 3669 }, { "epoch": 0.31, "learning_rate": 1.604457578417379e-05, "loss": 0.3224, "step": 3670 }, { "epoch": 0.31, "learning_rate": 1.6042363895210948e-05, "loss": 0.3215, "step": 3671 }, { "epoch": 0.31, "learning_rate": 1.6040151540532553e-05, "loss": 0.2979, "step": 3672 }, { "epoch": 0.31, "learning_rate": 1.6037938720309122e-05, "loss": 0.3066, "step": 3673 }, { "epoch": 0.31, "learning_rate": 1.603572543471121e-05, "loss": 0.3035, "step": 3674 }, { "epoch": 0.32, "learning_rate": 1.6033511683909406e-05, "loss": 0.2681, "step": 3675 }, { "epoch": 0.32, "learning_rate": 1.6031297468074335e-05, "loss": 0.2849, "step": 3676 }, { "epoch": 0.32, "learning_rate": 1.6029082787376653e-05, "loss": 0.2579, "step": 3677 }, { "epoch": 0.32, "learning_rate": 1.602686764198706e-05, "loss": 0.582, "step": 3678 }, { "epoch": 0.32, "learning_rate": 1.6024652032076295e-05, "loss": 0.2867, "step": 3679 }, { "epoch": 0.32, "learning_rate": 1.6022435957815116e-05, "loss": 0.283, "step": 3680 }, { "epoch": 0.32, "learning_rate": 1.602021941937433e-05, "loss": 0.2457, "step": 3681 }, { "epoch": 0.32, "learning_rate": 1.601800241692478e-05, "loss": 0.3286, "step": 3682 }, { "epoch": 0.32, "learning_rate": 1.6015784950637338e-05, "loss": 0.3573, "step": 3683 }, { "epoch": 0.32, "learning_rate": 1.6013567020682917e-05, "loss": 0.3053, "step": 3684 }, { "epoch": 0.32, "learning_rate": 1.6011348627232463e-05, "loss": 0.2863, "step": 3685 }, { "epoch": 0.32, "learning_rate": 1.6009129770456962e-05, "loss": 0.3561, "step": 3686 }, { "epoch": 0.32, "learning_rate": 1.6006910450527428e-05, "loss": 0.2638, "step": 3687 }, { "epoch": 0.32, "learning_rate": 1.600469066761492e-05, "loss": 0.34, "step": 3688 }, { "epoch": 0.32, "learning_rate": 1.6002470421890522e-05, "loss": 0.2606, "step": 3689 }, { "epoch": 0.32, "learning_rate": 1.6000249713525366e-05, "loss": 0.2829, "step": 3690 }, { "epoch": 0.32, "learning_rate": 1.599802854269061e-05, "loss": 0.3141, "step": 3691 }, { "epoch": 0.32, "learning_rate": 1.599580690955745e-05, "loss": 0.6311, "step": 3692 }, { "epoch": 0.32, "learning_rate": 1.599358481429712e-05, "loss": 0.2985, "step": 3693 }, { "epoch": 0.32, "learning_rate": 1.599136225708089e-05, "loss": 0.3333, "step": 3694 }, { "epoch": 0.32, "learning_rate": 1.598913923808006e-05, "loss": 0.2717, "step": 3695 }, { "epoch": 0.32, "learning_rate": 1.5986915757465968e-05, "loss": 0.3118, "step": 3696 }, { "epoch": 0.32, "learning_rate": 1.598469181540999e-05, "loss": 0.257, "step": 3697 }, { "epoch": 0.32, "learning_rate": 1.5982467412083543e-05, "loss": 0.2535, "step": 3698 }, { "epoch": 0.32, "learning_rate": 1.5980242547658068e-05, "loss": 0.2959, "step": 3699 }, { "epoch": 0.32, "learning_rate": 1.5978017222305046e-05, "loss": 0.3043, "step": 3700 }, { "epoch": 0.32, "learning_rate": 1.5975791436195994e-05, "loss": 0.2791, "step": 3701 }, { "epoch": 0.32, "learning_rate": 1.5973565189502463e-05, "loss": 0.3318, "step": 3702 }, { "epoch": 0.32, "learning_rate": 1.597133848239605e-05, "loss": 0.2953, "step": 3703 }, { "epoch": 0.32, "learning_rate": 1.5969111315048365e-05, "loss": 0.2986, "step": 3704 }, { "epoch": 0.32, "learning_rate": 1.5966883687631075e-05, "loss": 0.2959, "step": 3705 }, { "epoch": 0.32, "learning_rate": 1.596465560031588e-05, "loss": 0.2951, "step": 3706 }, { "epoch": 0.32, "learning_rate": 1.5962427053274495e-05, "loss": 0.2969, "step": 3707 }, { "epoch": 0.32, "learning_rate": 1.59601980466787e-05, "loss": 0.2968, "step": 3708 }, { "epoch": 0.32, "learning_rate": 1.595796858070029e-05, "loss": 0.293, "step": 3709 }, { "epoch": 0.32, "learning_rate": 1.5955738655511094e-05, "loss": 0.3122, "step": 3710 }, { "epoch": 0.32, "learning_rate": 1.5953508271282997e-05, "loss": 0.2566, "step": 3711 }, { "epoch": 0.32, "learning_rate": 1.59512774281879e-05, "loss": 0.2891, "step": 3712 }, { "epoch": 0.32, "learning_rate": 1.594904612639774e-05, "loss": 0.2946, "step": 3713 }, { "epoch": 0.32, "learning_rate": 1.5946814366084505e-05, "loss": 0.317, "step": 3714 }, { "epoch": 0.32, "learning_rate": 1.59445821474202e-05, "loss": 0.2724, "step": 3715 }, { "epoch": 0.32, "learning_rate": 1.5942349470576878e-05, "loss": 0.3065, "step": 3716 }, { "epoch": 0.32, "learning_rate": 1.5940116335726615e-05, "loss": 0.2784, "step": 3717 }, { "epoch": 0.32, "learning_rate": 1.5937882743041543e-05, "loss": 0.3358, "step": 3718 }, { "epoch": 0.32, "learning_rate": 1.5935648692693805e-05, "loss": 0.3549, "step": 3719 }, { "epoch": 0.32, "learning_rate": 1.5933414184855597e-05, "loss": 0.3517, "step": 3720 }, { "epoch": 0.32, "learning_rate": 1.5931179219699144e-05, "loss": 0.2758, "step": 3721 }, { "epoch": 0.32, "learning_rate": 1.5928943797396695e-05, "loss": 0.3008, "step": 3722 }, { "epoch": 0.32, "learning_rate": 1.592670791812056e-05, "loss": 0.3405, "step": 3723 }, { "epoch": 0.32, "learning_rate": 1.592447158204306e-05, "loss": 0.2762, "step": 3724 }, { "epoch": 0.32, "learning_rate": 1.5922234789336567e-05, "loss": 0.3245, "step": 3725 }, { "epoch": 0.32, "learning_rate": 1.591999754017348e-05, "loss": 0.3244, "step": 3726 }, { "epoch": 0.32, "learning_rate": 1.5917759834726233e-05, "loss": 0.2568, "step": 3727 }, { "epoch": 0.32, "learning_rate": 1.5915521673167296e-05, "loss": 0.3459, "step": 3728 }, { "epoch": 0.32, "learning_rate": 1.591328305566918e-05, "loss": 0.3571, "step": 3729 }, { "epoch": 0.32, "learning_rate": 1.5911043982404426e-05, "loss": 0.2926, "step": 3730 }, { "epoch": 0.32, "learning_rate": 1.5908804453545608e-05, "loss": 0.3193, "step": 3731 }, { "epoch": 0.32, "learning_rate": 1.590656446926534e-05, "loss": 0.3146, "step": 3732 }, { "epoch": 0.32, "learning_rate": 1.5904324029736266e-05, "loss": 0.298, "step": 3733 }, { "epoch": 0.32, "learning_rate": 1.5902083135131067e-05, "loss": 0.2803, "step": 3734 }, { "epoch": 0.32, "learning_rate": 1.5899841785622468e-05, "loss": 0.2667, "step": 3735 }, { "epoch": 0.32, "learning_rate": 1.5897599981383214e-05, "loss": 0.2776, "step": 3736 }, { "epoch": 0.32, "learning_rate": 1.5895357722586093e-05, "loss": 0.2954, "step": 3737 }, { "epoch": 0.32, "learning_rate": 1.5893115009403932e-05, "loss": 0.3359, "step": 3738 }, { "epoch": 0.32, "learning_rate": 1.5890871842009582e-05, "loss": 0.3189, "step": 3739 }, { "epoch": 0.32, "learning_rate": 1.588862822057594e-05, "loss": 0.615, "step": 3740 }, { "epoch": 0.32, "learning_rate": 1.588638414527593e-05, "loss": 0.3118, "step": 3741 }, { "epoch": 0.32, "learning_rate": 1.5884139616282517e-05, "loss": 0.3116, "step": 3742 }, { "epoch": 0.32, "learning_rate": 1.5881894633768697e-05, "loss": 0.324, "step": 3743 }, { "epoch": 0.32, "learning_rate": 1.58796491979075e-05, "loss": 0.3676, "step": 3744 }, { "epoch": 0.32, "learning_rate": 1.5877403308871997e-05, "loss": 0.3316, "step": 3745 }, { "epoch": 0.32, "learning_rate": 1.5875156966835285e-05, "loss": 0.2591, "step": 3746 }, { "epoch": 0.32, "learning_rate": 1.5872910171970506e-05, "loss": 0.3391, "step": 3747 }, { "epoch": 0.32, "learning_rate": 1.587066292445083e-05, "loss": 0.5974, "step": 3748 }, { "epoch": 0.32, "learning_rate": 1.5868415224449463e-05, "loss": 0.3163, "step": 3749 }, { "epoch": 0.32, "learning_rate": 1.5866167072139645e-05, "loss": 0.31, "step": 3750 }, { "epoch": 0.32, "learning_rate": 1.586391846769466e-05, "loss": 0.3144, "step": 3751 }, { "epoch": 0.32, "learning_rate": 1.586166941128781e-05, "loss": 0.3107, "step": 3752 }, { "epoch": 0.32, "learning_rate": 1.585941990309245e-05, "loss": 0.2744, "step": 3753 }, { "epoch": 0.32, "learning_rate": 1.5857169943281948e-05, "loss": 0.2595, "step": 3754 }, { "epoch": 0.32, "learning_rate": 1.5854919532029734e-05, "loss": 0.2889, "step": 3755 }, { "epoch": 0.32, "learning_rate": 1.5852668669509252e-05, "loss": 0.3166, "step": 3756 }, { "epoch": 0.32, "learning_rate": 1.5850417355893984e-05, "loss": 0.2836, "step": 3757 }, { "epoch": 0.32, "learning_rate": 1.5848165591357458e-05, "loss": 0.3284, "step": 3758 }, { "epoch": 0.32, "learning_rate": 1.5845913376073222e-05, "loss": 0.2581, "step": 3759 }, { "epoch": 0.32, "learning_rate": 1.5843660710214872e-05, "loss": 0.4052, "step": 3760 }, { "epoch": 0.32, "learning_rate": 1.5841407593956026e-05, "loss": 0.2761, "step": 3761 }, { "epoch": 0.32, "learning_rate": 1.5839154027470346e-05, "loss": 0.3372, "step": 3762 }, { "epoch": 0.32, "learning_rate": 1.5836900010931527e-05, "loss": 0.3278, "step": 3763 }, { "epoch": 0.32, "learning_rate": 1.5834645544513296e-05, "loss": 0.296, "step": 3764 }, { "epoch": 0.32, "learning_rate": 1.5832390628389417e-05, "loss": 0.3231, "step": 3765 }, { "epoch": 0.32, "learning_rate": 1.5830135262733684e-05, "loss": 0.3066, "step": 3766 }, { "epoch": 0.32, "learning_rate": 1.5827879447719932e-05, "loss": 0.2812, "step": 3767 }, { "epoch": 0.32, "learning_rate": 1.582562318352203e-05, "loss": 0.2911, "step": 3768 }, { "epoch": 0.32, "learning_rate": 1.582336647031388e-05, "loss": 0.2672, "step": 3769 }, { "epoch": 0.32, "learning_rate": 1.5821109308269416e-05, "loss": 0.3187, "step": 3770 }, { "epoch": 0.32, "learning_rate": 1.581885169756261e-05, "loss": 0.2862, "step": 3771 }, { "epoch": 0.32, "learning_rate": 1.581659363836747e-05, "loss": 0.2764, "step": 3772 }, { "epoch": 0.32, "learning_rate": 1.5814335130858026e-05, "loss": 0.257, "step": 3773 }, { "epoch": 0.32, "learning_rate": 1.581207617520836e-05, "loss": 0.261, "step": 3774 }, { "epoch": 0.32, "learning_rate": 1.5809816771592584e-05, "loss": 0.2747, "step": 3775 }, { "epoch": 0.32, "learning_rate": 1.5807556920184837e-05, "loss": 0.3214, "step": 3776 }, { "epoch": 0.32, "learning_rate": 1.58052966211593e-05, "loss": 0.2962, "step": 3777 }, { "epoch": 0.32, "learning_rate": 1.5803035874690186e-05, "loss": 0.2794, "step": 3778 }, { "epoch": 0.32, "learning_rate": 1.5800774680951736e-05, "loss": 0.3008, "step": 3779 }, { "epoch": 0.32, "learning_rate": 1.579851304011824e-05, "loss": 0.2864, "step": 3780 }, { "epoch": 0.32, "learning_rate": 1.5796250952364008e-05, "loss": 0.2719, "step": 3781 }, { "epoch": 0.32, "learning_rate": 1.579398841786339e-05, "loss": 0.3212, "step": 3782 }, { "epoch": 0.32, "learning_rate": 1.579172543679078e-05, "loss": 0.2466, "step": 3783 }, { "epoch": 0.32, "learning_rate": 1.5789462009320586e-05, "loss": 0.27, "step": 3784 }, { "epoch": 0.32, "learning_rate": 1.578719813562727e-05, "loss": 0.2386, "step": 3785 }, { "epoch": 0.32, "learning_rate": 1.5784933815885315e-05, "loss": 0.2562, "step": 3786 }, { "epoch": 0.32, "learning_rate": 1.5782669050269243e-05, "loss": 0.3107, "step": 3787 }, { "epoch": 0.32, "learning_rate": 1.578040383895362e-05, "loss": 0.2988, "step": 3788 }, { "epoch": 0.32, "learning_rate": 1.5778138182113027e-05, "loss": 0.3156, "step": 3789 }, { "epoch": 0.32, "learning_rate": 1.5775872079922098e-05, "loss": 0.2906, "step": 3790 }, { "epoch": 0.32, "learning_rate": 1.5773605532555484e-05, "loss": 0.287, "step": 3791 }, { "epoch": 0.33, "learning_rate": 1.5771338540187883e-05, "loss": 0.2913, "step": 3792 }, { "epoch": 0.33, "learning_rate": 1.5769071102994024e-05, "loss": 0.5977, "step": 3793 }, { "epoch": 0.33, "learning_rate": 1.5766803221148676e-05, "loss": 0.3246, "step": 3794 }, { "epoch": 0.33, "learning_rate": 1.5764534894826623e-05, "loss": 0.2846, "step": 3795 }, { "epoch": 0.33, "learning_rate": 1.5762266124202708e-05, "loss": 0.28, "step": 3796 }, { "epoch": 0.33, "learning_rate": 1.5759996909451795e-05, "loss": 0.2986, "step": 3797 }, { "epoch": 0.33, "learning_rate": 1.5757727250748773e-05, "loss": 0.302, "step": 3798 }, { "epoch": 0.33, "learning_rate": 1.575545714826859e-05, "loss": 0.356, "step": 3799 }, { "epoch": 0.33, "learning_rate": 1.5753186602186207e-05, "loss": 0.3202, "step": 3800 }, { "epoch": 0.33, "learning_rate": 1.575091561267663e-05, "loss": 0.307, "step": 3801 }, { "epoch": 0.33, "learning_rate": 1.574864417991489e-05, "loss": 0.2645, "step": 3802 }, { "epoch": 0.33, "learning_rate": 1.5746372304076065e-05, "loss": 0.2705, "step": 3803 }, { "epoch": 0.33, "learning_rate": 1.5744099985335255e-05, "loss": 0.3011, "step": 3804 }, { "epoch": 0.33, "learning_rate": 1.5741827223867602e-05, "loss": 0.6638, "step": 3805 }, { "epoch": 0.33, "learning_rate": 1.5739554019848274e-05, "loss": 0.3149, "step": 3806 }, { "epoch": 0.33, "learning_rate": 1.5737280373452487e-05, "loss": 0.2958, "step": 3807 }, { "epoch": 0.33, "learning_rate": 1.5735006284855473e-05, "loss": 0.3245, "step": 3808 }, { "epoch": 0.33, "learning_rate": 1.5732731754232516e-05, "loss": 0.2841, "step": 3809 }, { "epoch": 0.33, "learning_rate": 1.573045678175892e-05, "loss": 0.3588, "step": 3810 }, { "epoch": 0.33, "learning_rate": 1.572818136761003e-05, "loss": 0.2849, "step": 3811 }, { "epoch": 0.33, "learning_rate": 1.5725905511961226e-05, "loss": 0.3043, "step": 3812 }, { "epoch": 0.33, "learning_rate": 1.5723629214987915e-05, "loss": 0.3508, "step": 3813 }, { "epoch": 0.33, "learning_rate": 1.5721352476865546e-05, "loss": 0.2888, "step": 3814 }, { "epoch": 0.33, "learning_rate": 1.5719075297769596e-05, "loss": 0.2879, "step": 3815 }, { "epoch": 0.33, "learning_rate": 1.5716797677875586e-05, "loss": 0.3232, "step": 3816 }, { "epoch": 0.33, "learning_rate": 1.5714519617359054e-05, "loss": 0.2796, "step": 3817 }, { "epoch": 0.33, "learning_rate": 1.571224111639559e-05, "loss": 0.2831, "step": 3818 }, { "epoch": 0.33, "learning_rate": 1.5709962175160806e-05, "loss": 0.2746, "step": 3819 }, { "epoch": 0.33, "learning_rate": 1.5707682793830347e-05, "loss": 0.2793, "step": 3820 }, { "epoch": 0.33, "learning_rate": 1.5705402972579902e-05, "loss": 0.3566, "step": 3821 }, { "epoch": 0.33, "learning_rate": 1.570312271158519e-05, "loss": 0.2615, "step": 3822 }, { "epoch": 0.33, "learning_rate": 1.5700842011021954e-05, "loss": 0.2676, "step": 3823 }, { "epoch": 0.33, "learning_rate": 1.5698560871065986e-05, "loss": 0.3256, "step": 3824 }, { "epoch": 0.33, "learning_rate": 1.5696279291893107e-05, "loss": 0.2972, "step": 3825 }, { "epoch": 0.33, "learning_rate": 1.5693997273679165e-05, "loss": 0.2914, "step": 3826 }, { "epoch": 0.33, "learning_rate": 1.5691714816600045e-05, "loss": 0.303, "step": 3827 }, { "epoch": 0.33, "learning_rate": 1.5689431920831676e-05, "loss": 0.3151, "step": 3828 }, { "epoch": 0.33, "learning_rate": 1.5687148586550003e-05, "loss": 0.2988, "step": 3829 }, { "epoch": 0.33, "learning_rate": 1.568486481393102e-05, "loss": 0.2723, "step": 3830 }, { "epoch": 0.33, "learning_rate": 1.5682580603150742e-05, "loss": 0.2534, "step": 3831 }, { "epoch": 0.33, "learning_rate": 1.5680295954385235e-05, "loss": 0.2994, "step": 3832 }, { "epoch": 0.33, "learning_rate": 1.5678010867810583e-05, "loss": 0.3505, "step": 3833 }, { "epoch": 0.33, "learning_rate": 1.5675725343602904e-05, "loss": 0.2736, "step": 3834 }, { "epoch": 0.33, "learning_rate": 1.5673439381938365e-05, "loss": 0.2769, "step": 3835 }, { "epoch": 0.33, "learning_rate": 1.567115298299315e-05, "loss": 0.2642, "step": 3836 }, { "epoch": 0.33, "learning_rate": 1.5668866146943484e-05, "loss": 0.2465, "step": 3837 }, { "epoch": 0.33, "learning_rate": 1.5666578873965627e-05, "loss": 0.2972, "step": 3838 }, { "epoch": 0.33, "learning_rate": 1.566429116423587e-05, "loss": 0.293, "step": 3839 }, { "epoch": 0.33, "learning_rate": 1.566200301793054e-05, "loss": 0.2595, "step": 3840 }, { "epoch": 0.33, "learning_rate": 1.5659714435225993e-05, "loss": 0.3223, "step": 3841 }, { "epoch": 0.33, "learning_rate": 1.5657425416298623e-05, "loss": 0.2939, "step": 3842 }, { "epoch": 0.33, "learning_rate": 1.5655135961324856e-05, "loss": 0.2733, "step": 3843 }, { "epoch": 0.33, "learning_rate": 1.565284607048115e-05, "loss": 0.2607, "step": 3844 }, { "epoch": 0.33, "learning_rate": 1.5650555743944002e-05, "loss": 0.3189, "step": 3845 }, { "epoch": 0.33, "learning_rate": 1.5648264981889936e-05, "loss": 0.2677, "step": 3846 }, { "epoch": 0.33, "learning_rate": 1.5645973784495517e-05, "loss": 0.2839, "step": 3847 }, { "epoch": 0.33, "learning_rate": 1.5643682151937333e-05, "loss": 0.296, "step": 3848 }, { "epoch": 0.33, "learning_rate": 1.564139008439202e-05, "loss": 0.2631, "step": 3849 }, { "epoch": 0.33, "learning_rate": 1.5639097582036226e-05, "loss": 0.3555, "step": 3850 }, { "epoch": 0.33, "learning_rate": 1.563680464504666e-05, "loss": 0.2966, "step": 3851 }, { "epoch": 0.33, "learning_rate": 1.5634511273600042e-05, "loss": 0.2507, "step": 3852 }, { "epoch": 0.33, "learning_rate": 1.563221746787314e-05, "loss": 0.2749, "step": 3853 }, { "epoch": 0.33, "learning_rate": 1.562992322804274e-05, "loss": 0.2935, "step": 3854 }, { "epoch": 0.33, "learning_rate": 1.5627628554285678e-05, "loss": 0.3058, "step": 3855 }, { "epoch": 0.33, "learning_rate": 1.5625333446778812e-05, "loss": 0.2646, "step": 3856 }, { "epoch": 0.33, "learning_rate": 1.5623037905699043e-05, "loss": 0.2944, "step": 3857 }, { "epoch": 0.33, "learning_rate": 1.5620741931223292e-05, "loss": 0.2922, "step": 3858 }, { "epoch": 0.33, "learning_rate": 1.5618445523528533e-05, "loss": 0.3265, "step": 3859 }, { "epoch": 0.33, "learning_rate": 1.561614868279175e-05, "loss": 0.287, "step": 3860 }, { "epoch": 0.33, "learning_rate": 1.5613851409189974e-05, "loss": 0.2684, "step": 3861 }, { "epoch": 0.33, "learning_rate": 1.5611553702900275e-05, "loss": 0.301, "step": 3862 }, { "epoch": 0.33, "learning_rate": 1.560925556409974e-05, "loss": 0.2579, "step": 3863 }, { "epoch": 0.33, "learning_rate": 1.5606956992965504e-05, "loss": 0.2759, "step": 3864 }, { "epoch": 0.33, "learning_rate": 1.560465798967473e-05, "loss": 0.2783, "step": 3865 }, { "epoch": 0.33, "learning_rate": 1.5602358554404613e-05, "loss": 0.2875, "step": 3866 }, { "epoch": 0.33, "learning_rate": 1.5600058687332375e-05, "loss": 0.2493, "step": 3867 }, { "epoch": 0.33, "learning_rate": 1.5597758388635288e-05, "loss": 0.2921, "step": 3868 }, { "epoch": 0.33, "learning_rate": 1.5595457658490643e-05, "loss": 0.2942, "step": 3869 }, { "epoch": 0.33, "learning_rate": 1.5593156497075767e-05, "loss": 0.3015, "step": 3870 }, { "epoch": 0.33, "learning_rate": 1.559085490456803e-05, "loss": 0.3109, "step": 3871 }, { "epoch": 0.33, "learning_rate": 1.5588552881144814e-05, "loss": 0.6384, "step": 3872 }, { "epoch": 0.33, "learning_rate": 1.5586250426983566e-05, "loss": 0.333, "step": 3873 }, { "epoch": 0.33, "learning_rate": 1.558394754226173e-05, "loss": 0.3302, "step": 3874 }, { "epoch": 0.33, "learning_rate": 1.5581644227156815e-05, "loss": 0.3281, "step": 3875 }, { "epoch": 0.33, "learning_rate": 1.5579340481846338e-05, "loss": 0.2907, "step": 3876 }, { "epoch": 0.33, "learning_rate": 1.5577036306507863e-05, "loss": 0.3022, "step": 3877 }, { "epoch": 0.33, "learning_rate": 1.5574731701318987e-05, "loss": 0.2982, "step": 3878 }, { "epoch": 0.33, "learning_rate": 1.5572426666457342e-05, "loss": 0.3142, "step": 3879 }, { "epoch": 0.33, "learning_rate": 1.557012120210058e-05, "loss": 0.3098, "step": 3880 }, { "epoch": 0.33, "learning_rate": 1.55678153084264e-05, "loss": 0.2721, "step": 3881 }, { "epoch": 0.33, "learning_rate": 1.5565508985612525e-05, "loss": 0.2855, "step": 3882 }, { "epoch": 0.33, "learning_rate": 1.556320223383672e-05, "loss": 0.3185, "step": 3883 }, { "epoch": 0.33, "learning_rate": 1.556089505327677e-05, "loss": 0.265, "step": 3884 }, { "epoch": 0.33, "learning_rate": 1.555858744411052e-05, "loss": 0.3051, "step": 3885 }, { "epoch": 0.33, "learning_rate": 1.5556279406515802e-05, "loss": 0.2719, "step": 3886 }, { "epoch": 0.33, "learning_rate": 1.5553970940670527e-05, "loss": 0.304, "step": 3887 }, { "epoch": 0.33, "learning_rate": 1.5551662046752612e-05, "loss": 0.2966, "step": 3888 }, { "epoch": 0.33, "learning_rate": 1.554935272494002e-05, "loss": 0.3229, "step": 3889 }, { "epoch": 0.33, "learning_rate": 1.554704297541074e-05, "loss": 0.239, "step": 3890 }, { "epoch": 0.33, "learning_rate": 1.5544732798342798e-05, "loss": 0.2798, "step": 3891 }, { "epoch": 0.33, "learning_rate": 1.554242219391425e-05, "loss": 0.2979, "step": 3892 }, { "epoch": 0.33, "learning_rate": 1.554011116230318e-05, "loss": 0.3093, "step": 3893 }, { "epoch": 0.33, "learning_rate": 1.553779970368772e-05, "loss": 0.332, "step": 3894 }, { "epoch": 0.33, "learning_rate": 1.5535487818246023e-05, "loss": 0.3015, "step": 3895 }, { "epoch": 0.33, "learning_rate": 1.553317550615627e-05, "loss": 0.3169, "step": 3896 }, { "epoch": 0.33, "learning_rate": 1.5530862767596697e-05, "loss": 0.3001, "step": 3897 }, { "epoch": 0.33, "learning_rate": 1.5528549602745545e-05, "loss": 0.3043, "step": 3898 }, { "epoch": 0.33, "learning_rate": 1.552623601178111e-05, "loss": 0.3199, "step": 3899 }, { "epoch": 0.33, "learning_rate": 1.552392199488171e-05, "loss": 0.2902, "step": 3900 }, { "epoch": 0.33, "learning_rate": 1.5521607552225698e-05, "loss": 0.2822, "step": 3901 }, { "epoch": 0.33, "learning_rate": 1.5519292683991455e-05, "loss": 0.2686, "step": 3902 }, { "epoch": 0.33, "learning_rate": 1.5516977390357405e-05, "loss": 0.3297, "step": 3903 }, { "epoch": 0.33, "learning_rate": 1.5514661671502e-05, "loss": 0.2429, "step": 3904 }, { "epoch": 0.33, "learning_rate": 1.5512345527603718e-05, "loss": 0.3002, "step": 3905 }, { "epoch": 0.33, "learning_rate": 1.5510028958841085e-05, "loss": 0.2615, "step": 3906 }, { "epoch": 0.33, "learning_rate": 1.5507711965392643e-05, "loss": 0.2908, "step": 3907 }, { "epoch": 0.33, "learning_rate": 1.5505394547436976e-05, "loss": 0.2653, "step": 3908 }, { "epoch": 0.34, "learning_rate": 1.55030767051527e-05, "loss": 0.2932, "step": 3909 }, { "epoch": 0.34, "learning_rate": 1.5500758438718463e-05, "loss": 0.3299, "step": 3910 }, { "epoch": 0.34, "learning_rate": 1.549843974831295e-05, "loss": 0.3065, "step": 3911 }, { "epoch": 0.34, "learning_rate": 1.5496120634114865e-05, "loss": 0.5555, "step": 3912 }, { "epoch": 0.34, "learning_rate": 1.5493801096302964e-05, "loss": 0.2878, "step": 3913 }, { "epoch": 0.34, "learning_rate": 1.5491481135056012e-05, "loss": 0.2618, "step": 3914 }, { "epoch": 0.34, "learning_rate": 1.5489160750552833e-05, "loss": 0.3099, "step": 3915 }, { "epoch": 0.34, "learning_rate": 1.548683994297227e-05, "loss": 0.3718, "step": 3916 }, { "epoch": 0.34, "learning_rate": 1.5484518712493188e-05, "loss": 0.2573, "step": 3917 }, { "epoch": 0.34, "learning_rate": 1.548219705929451e-05, "loss": 0.3022, "step": 3918 }, { "epoch": 0.34, "learning_rate": 1.5479874983555166e-05, "loss": 0.2458, "step": 3919 }, { "epoch": 0.34, "learning_rate": 1.5477552485454136e-05, "loss": 0.2794, "step": 3920 }, { "epoch": 0.34, "learning_rate": 1.5475229565170428e-05, "loss": 0.3146, "step": 3921 }, { "epoch": 0.34, "learning_rate": 1.5472906222883075e-05, "loss": 0.3178, "step": 3922 }, { "epoch": 0.34, "learning_rate": 1.547058245877116e-05, "loss": 0.2927, "step": 3923 }, { "epoch": 0.34, "learning_rate": 1.5468258273013773e-05, "loss": 0.3034, "step": 3924 }, { "epoch": 0.34, "learning_rate": 1.546593366579006e-05, "loss": 0.3246, "step": 3925 }, { "epoch": 0.34, "learning_rate": 1.546360863727919e-05, "loss": 0.3193, "step": 3926 }, { "epoch": 0.34, "learning_rate": 1.546128318766036e-05, "loss": 0.2811, "step": 3927 }, { "epoch": 0.34, "learning_rate": 1.545895731711281e-05, "loss": 0.2975, "step": 3928 }, { "epoch": 0.34, "learning_rate": 1.54566310258158e-05, "loss": 0.3044, "step": 3929 }, { "epoch": 0.34, "learning_rate": 1.5454304313948635e-05, "loss": 0.2925, "step": 3930 }, { "epoch": 0.34, "learning_rate": 1.545197718169064e-05, "loss": 0.2997, "step": 3931 }, { "epoch": 0.34, "learning_rate": 1.544964962922119e-05, "loss": 0.2923, "step": 3932 }, { "epoch": 0.34, "learning_rate": 1.5447321656719668e-05, "loss": 0.3475, "step": 3933 }, { "epoch": 0.34, "learning_rate": 1.544499326436551e-05, "loss": 0.2952, "step": 3934 }, { "epoch": 0.34, "learning_rate": 1.5442664452338178e-05, "loss": 0.3317, "step": 3935 }, { "epoch": 0.34, "learning_rate": 1.544033522081716e-05, "loss": 0.2699, "step": 3936 }, { "epoch": 0.34, "learning_rate": 1.5438005569981986e-05, "loss": 0.2758, "step": 3937 }, { "epoch": 0.34, "learning_rate": 1.5435675500012212e-05, "loss": 0.2902, "step": 3938 }, { "epoch": 0.34, "learning_rate": 1.5433345011087427e-05, "loss": 0.3263, "step": 3939 }, { "epoch": 0.34, "learning_rate": 1.5431014103387263e-05, "loss": 0.2625, "step": 3940 }, { "epoch": 0.34, "learning_rate": 1.542868277709136e-05, "loss": 0.3054, "step": 3941 }, { "epoch": 0.34, "learning_rate": 1.5426351032379418e-05, "loss": 0.326, "step": 3942 }, { "epoch": 0.34, "learning_rate": 1.5424018869431144e-05, "loss": 0.6332, "step": 3943 }, { "epoch": 0.34, "learning_rate": 1.5421686288426303e-05, "loss": 0.3088, "step": 3944 }, { "epoch": 0.34, "learning_rate": 1.541935328954467e-05, "loss": 0.2856, "step": 3945 }, { "epoch": 0.34, "learning_rate": 1.541701987296606e-05, "loss": 0.3416, "step": 3946 }, { "epoch": 0.34, "learning_rate": 1.5414686038870327e-05, "loss": 0.2977, "step": 3947 }, { "epoch": 0.34, "learning_rate": 1.541235178743735e-05, "loss": 0.2991, "step": 3948 }, { "epoch": 0.34, "learning_rate": 1.541001711884704e-05, "loss": 0.2932, "step": 3949 }, { "epoch": 0.34, "learning_rate": 1.540768203327934e-05, "loss": 0.2958, "step": 3950 }, { "epoch": 0.34, "learning_rate": 1.5405346530914233e-05, "loss": 0.3158, "step": 3951 }, { "epoch": 0.34, "learning_rate": 1.5403010611931718e-05, "loss": 0.308, "step": 3952 }, { "epoch": 0.34, "learning_rate": 1.5400674276511848e-05, "loss": 0.2679, "step": 3953 }, { "epoch": 0.34, "learning_rate": 1.5398337524834688e-05, "loss": 0.2838, "step": 3954 }, { "epoch": 0.34, "learning_rate": 1.5396000357080345e-05, "loss": 0.2902, "step": 3955 }, { "epoch": 0.34, "learning_rate": 1.5393662773428956e-05, "loss": 0.2709, "step": 3956 }, { "epoch": 0.34, "learning_rate": 1.5391324774060695e-05, "loss": 0.2622, "step": 3957 }, { "epoch": 0.34, "learning_rate": 1.538898635915576e-05, "loss": 0.3279, "step": 3958 }, { "epoch": 0.34, "learning_rate": 1.5386647528894377e-05, "loss": 0.3098, "step": 3959 }, { "epoch": 0.34, "learning_rate": 1.5384308283456824e-05, "loss": 0.3135, "step": 3960 }, { "epoch": 0.34, "learning_rate": 1.538196862302339e-05, "loss": 0.3124, "step": 3961 }, { "epoch": 0.34, "learning_rate": 1.5379628547774412e-05, "loss": 0.2665, "step": 3962 }, { "epoch": 0.34, "learning_rate": 1.5377288057890246e-05, "loss": 0.2953, "step": 3963 }, { "epoch": 0.34, "learning_rate": 1.5374947153551284e-05, "loss": 0.2875, "step": 3964 }, { "epoch": 0.34, "learning_rate": 1.5372605834937953e-05, "loss": 0.2756, "step": 3965 }, { "epoch": 0.34, "learning_rate": 1.5370264102230716e-05, "loss": 0.3225, "step": 3966 }, { "epoch": 0.34, "learning_rate": 1.5367921955610055e-05, "loss": 0.2899, "step": 3967 }, { "epoch": 0.34, "learning_rate": 1.5365579395256493e-05, "loss": 0.2505, "step": 3968 }, { "epoch": 0.34, "learning_rate": 1.5363236421350584e-05, "loss": 0.3038, "step": 3969 }, { "epoch": 0.34, "learning_rate": 1.536089303407291e-05, "loss": 0.2897, "step": 3970 }, { "epoch": 0.34, "learning_rate": 1.5358549233604093e-05, "loss": 0.2704, "step": 3971 }, { "epoch": 0.34, "learning_rate": 1.535620502012478e-05, "loss": 0.2425, "step": 3972 }, { "epoch": 0.34, "learning_rate": 1.5353860393815642e-05, "loss": 0.3146, "step": 3973 }, { "epoch": 0.34, "learning_rate": 1.5351515354857404e-05, "loss": 0.2863, "step": 3974 }, { "epoch": 0.34, "learning_rate": 1.5349169903430804e-05, "loss": 0.2706, "step": 3975 }, { "epoch": 0.34, "learning_rate": 1.5346824039716622e-05, "loss": 0.2833, "step": 3976 }, { "epoch": 0.34, "learning_rate": 1.5344477763895663e-05, "loss": 0.3802, "step": 3977 }, { "epoch": 0.34, "learning_rate": 1.534213107614876e-05, "loss": 0.2939, "step": 3978 }, { "epoch": 0.34, "learning_rate": 1.5339783976656793e-05, "loss": 0.3157, "step": 3979 }, { "epoch": 0.34, "learning_rate": 1.533743646560066e-05, "loss": 0.3018, "step": 3980 }, { "epoch": 0.34, "learning_rate": 1.53350885431613e-05, "loss": 0.2964, "step": 3981 }, { "epoch": 0.34, "learning_rate": 1.5332740209519674e-05, "loss": 0.2876, "step": 3982 }, { "epoch": 0.34, "learning_rate": 1.5330391464856784e-05, "loss": 0.3074, "step": 3983 }, { "epoch": 0.34, "learning_rate": 1.5328042309353655e-05, "loss": 0.3092, "step": 3984 }, { "epoch": 0.34, "learning_rate": 1.532569274319136e-05, "loss": 0.59, "step": 3985 }, { "epoch": 0.34, "learning_rate": 1.5323342766550978e-05, "loss": 0.2571, "step": 3986 }, { "epoch": 0.34, "learning_rate": 1.5320992379613637e-05, "loss": 0.2595, "step": 3987 }, { "epoch": 0.34, "learning_rate": 1.5318641582560497e-05, "loss": 0.2527, "step": 3988 }, { "epoch": 0.34, "learning_rate": 1.5316290375572746e-05, "loss": 0.3005, "step": 3989 }, { "epoch": 0.34, "learning_rate": 1.5313938758831596e-05, "loss": 0.2512, "step": 3990 }, { "epoch": 0.34, "learning_rate": 1.5311586732518307e-05, "loss": 0.348, "step": 3991 }, { "epoch": 0.34, "learning_rate": 1.530923429681416e-05, "loss": 0.2892, "step": 3992 }, { "epoch": 0.34, "learning_rate": 1.5306881451900462e-05, "loss": 0.2681, "step": 3993 }, { "epoch": 0.34, "learning_rate": 1.5304528197958565e-05, "loss": 0.3127, "step": 3994 }, { "epoch": 0.34, "learning_rate": 1.530217453516985e-05, "loss": 0.3745, "step": 3995 }, { "epoch": 0.34, "learning_rate": 1.5299820463715716e-05, "loss": 0.2645, "step": 3996 }, { "epoch": 0.34, "learning_rate": 1.5297465983777612e-05, "loss": 0.3236, "step": 3997 }, { "epoch": 0.34, "learning_rate": 1.5295111095536997e-05, "loss": 0.2545, "step": 3998 }, { "epoch": 0.34, "learning_rate": 1.529275579917539e-05, "loss": 0.3099, "step": 3999 }, { "epoch": 0.34, "learning_rate": 1.5290400094874316e-05, "loss": 0.3007, "step": 4000 }, { "epoch": 0.34, "learning_rate": 1.5288043982815345e-05, "loss": 0.2887, "step": 4001 }, { "epoch": 0.34, "learning_rate": 1.528568746318007e-05, "loss": 0.2738, "step": 4002 }, { "epoch": 0.34, "learning_rate": 1.5283330536150126e-05, "loss": 0.2722, "step": 4003 }, { "epoch": 0.34, "learning_rate": 1.528097320190717e-05, "loss": 0.3093, "step": 4004 }, { "epoch": 0.34, "learning_rate": 1.5278615460632892e-05, "loss": 0.3059, "step": 4005 }, { "epoch": 0.34, "learning_rate": 1.527625731250901e-05, "loss": 0.2981, "step": 4006 }, { "epoch": 0.34, "learning_rate": 1.5273898757717295e-05, "loss": 0.2946, "step": 4007 }, { "epoch": 0.34, "learning_rate": 1.527153979643952e-05, "loss": 0.2905, "step": 4008 }, { "epoch": 0.34, "learning_rate": 1.5269180428857506e-05, "loss": 0.2736, "step": 4009 }, { "epoch": 0.34, "learning_rate": 1.52668206551531e-05, "loss": 0.3032, "step": 4010 }, { "epoch": 0.34, "learning_rate": 1.526446047550818e-05, "loss": 0.2682, "step": 4011 }, { "epoch": 0.34, "learning_rate": 1.526209989010466e-05, "loss": 0.2825, "step": 4012 }, { "epoch": 0.34, "learning_rate": 1.525973889912448e-05, "loss": 0.2828, "step": 4013 }, { "epoch": 0.34, "learning_rate": 1.5257377502749614e-05, "loss": 0.3463, "step": 4014 }, { "epoch": 0.34, "learning_rate": 1.5255015701162071e-05, "loss": 0.2434, "step": 4015 }, { "epoch": 0.34, "learning_rate": 1.5252653494543883e-05, "loss": 0.2639, "step": 4016 }, { "epoch": 0.34, "learning_rate": 1.5250290883077114e-05, "loss": 0.2601, "step": 4017 }, { "epoch": 0.34, "learning_rate": 1.5247927866943869e-05, "loss": 0.2635, "step": 4018 }, { "epoch": 0.34, "learning_rate": 1.5245564446326273e-05, "loss": 0.2798, "step": 4019 }, { "epoch": 0.34, "learning_rate": 1.5243200621406492e-05, "loss": 0.284, "step": 4020 }, { "epoch": 0.34, "learning_rate": 1.5240836392366713e-05, "loss": 0.2991, "step": 4021 }, { "epoch": 0.34, "learning_rate": 1.523847175938916e-05, "loss": 0.304, "step": 4022 }, { "epoch": 0.34, "learning_rate": 1.523610672265609e-05, "loss": 0.3409, "step": 4023 }, { "epoch": 0.34, "learning_rate": 1.5233741282349783e-05, "loss": 0.3078, "step": 4024 }, { "epoch": 0.35, "learning_rate": 1.523137543865256e-05, "loss": 0.2864, "step": 4025 }, { "epoch": 0.35, "learning_rate": 1.5229009191746769e-05, "loss": 0.2774, "step": 4026 }, { "epoch": 0.35, "learning_rate": 1.5226642541814785e-05, "loss": 0.2698, "step": 4027 }, { "epoch": 0.35, "learning_rate": 1.522427548903902e-05, "loss": 0.2891, "step": 4028 }, { "epoch": 0.35, "learning_rate": 1.5221908033601911e-05, "loss": 0.3114, "step": 4029 }, { "epoch": 0.35, "learning_rate": 1.5219540175685938e-05, "loss": 0.2944, "step": 4030 }, { "epoch": 0.35, "learning_rate": 1.5217171915473592e-05, "loss": 0.2927, "step": 4031 }, { "epoch": 0.35, "learning_rate": 1.5214803253147421e-05, "loss": 0.348, "step": 4032 }, { "epoch": 0.35, "learning_rate": 1.521243418888998e-05, "loss": 0.2881, "step": 4033 }, { "epoch": 0.35, "learning_rate": 1.5210064722883865e-05, "loss": 0.2661, "step": 4034 }, { "epoch": 0.35, "learning_rate": 1.5207694855311707e-05, "loss": 0.2761, "step": 4035 }, { "epoch": 0.35, "learning_rate": 1.5205324586356161e-05, "loss": 0.2665, "step": 4036 }, { "epoch": 0.35, "learning_rate": 1.5202953916199916e-05, "loss": 0.2978, "step": 4037 }, { "epoch": 0.35, "learning_rate": 1.5200582845025688e-05, "loss": 0.2997, "step": 4038 }, { "epoch": 0.35, "learning_rate": 1.5198211373016239e-05, "loss": 0.3301, "step": 4039 }, { "epoch": 0.35, "learning_rate": 1.5195839500354337e-05, "loss": 0.4129, "step": 4040 }, { "epoch": 0.35, "learning_rate": 1.5193467227222803e-05, "loss": 0.2791, "step": 4041 }, { "epoch": 0.35, "learning_rate": 1.5191094553804476e-05, "loss": 0.2906, "step": 4042 }, { "epoch": 0.35, "learning_rate": 1.5188721480282226e-05, "loss": 0.3106, "step": 4043 }, { "epoch": 0.35, "learning_rate": 1.5186348006838966e-05, "loss": 0.2775, "step": 4044 }, { "epoch": 0.35, "learning_rate": 1.5183974133657628e-05, "loss": 0.288, "step": 4045 }, { "epoch": 0.35, "learning_rate": 1.5181599860921182e-05, "loss": 0.2826, "step": 4046 }, { "epoch": 0.35, "learning_rate": 1.5179225188812616e-05, "loss": 0.6179, "step": 4047 }, { "epoch": 0.35, "learning_rate": 1.5176850117514964e-05, "loss": 0.3457, "step": 4048 }, { "epoch": 0.35, "learning_rate": 1.5174474647211291e-05, "loss": 0.2855, "step": 4049 }, { "epoch": 0.35, "learning_rate": 1.5172098778084672e-05, "loss": 0.3423, "step": 4050 }, { "epoch": 0.35, "learning_rate": 1.5169722510318242e-05, "loss": 0.2305, "step": 4051 }, { "epoch": 0.35, "learning_rate": 1.516734584409514e-05, "loss": 0.3118, "step": 4052 }, { "epoch": 0.35, "learning_rate": 1.5164968779598558e-05, "loss": 0.3058, "step": 4053 }, { "epoch": 0.35, "learning_rate": 1.51625913170117e-05, "loss": 0.2775, "step": 4054 }, { "epoch": 0.35, "learning_rate": 1.5160213456517812e-05, "loss": 0.2543, "step": 4055 }, { "epoch": 0.35, "learning_rate": 1.5157835198300169e-05, "loss": 0.2903, "step": 4056 }, { "epoch": 0.35, "learning_rate": 1.5155456542542072e-05, "loss": 0.2977, "step": 4057 }, { "epoch": 0.35, "learning_rate": 1.5153077489426865e-05, "loss": 0.2975, "step": 4058 }, { "epoch": 0.35, "learning_rate": 1.5150698039137903e-05, "loss": 0.3013, "step": 4059 }, { "epoch": 0.35, "learning_rate": 1.5148318191858588e-05, "loss": 0.2806, "step": 4060 }, { "epoch": 0.35, "learning_rate": 1.5145937947772344e-05, "loss": 0.3035, "step": 4061 }, { "epoch": 0.35, "learning_rate": 1.514355730706263e-05, "loss": 0.3047, "step": 4062 }, { "epoch": 0.35, "learning_rate": 1.5141176269912931e-05, "loss": 0.2446, "step": 4063 }, { "epoch": 0.35, "learning_rate": 1.5138794836506772e-05, "loss": 0.3136, "step": 4064 }, { "epoch": 0.35, "learning_rate": 1.5136413007027699e-05, "loss": 0.2704, "step": 4065 }, { "epoch": 0.35, "learning_rate": 1.5134030781659288e-05, "loss": 0.3084, "step": 4066 }, { "epoch": 0.35, "learning_rate": 1.5131648160585155e-05, "loss": 0.2347, "step": 4067 }, { "epoch": 0.35, "learning_rate": 1.512926514398894e-05, "loss": 0.278, "step": 4068 }, { "epoch": 0.35, "learning_rate": 1.5126881732054305e-05, "loss": 0.2468, "step": 4069 }, { "epoch": 0.35, "learning_rate": 1.5124497924964966e-05, "loss": 0.3073, "step": 4070 }, { "epoch": 0.35, "learning_rate": 1.5122113722904646e-05, "loss": 0.2711, "step": 4071 }, { "epoch": 0.35, "learning_rate": 1.5119729126057108e-05, "loss": 0.2786, "step": 4072 }, { "epoch": 0.35, "learning_rate": 1.5117344134606146e-05, "loss": 0.3053, "step": 4073 }, { "epoch": 0.35, "learning_rate": 1.5114958748735584e-05, "loss": 0.2523, "step": 4074 }, { "epoch": 0.35, "learning_rate": 1.5112572968629275e-05, "loss": 0.2957, "step": 4075 }, { "epoch": 0.35, "learning_rate": 1.5110186794471105e-05, "loss": 0.2795, "step": 4076 }, { "epoch": 0.35, "learning_rate": 1.5107800226444988e-05, "loss": 0.3082, "step": 4077 }, { "epoch": 0.35, "learning_rate": 1.5105413264734866e-05, "loss": 0.2845, "step": 4078 }, { "epoch": 0.35, "learning_rate": 1.5103025909524718e-05, "loss": 0.2731, "step": 4079 }, { "epoch": 0.35, "learning_rate": 1.5100638160998544e-05, "loss": 0.6196, "step": 4080 }, { "epoch": 0.35, "learning_rate": 1.5098250019340385e-05, "loss": 0.3105, "step": 4081 }, { "epoch": 0.35, "learning_rate": 1.5095861484734307e-05, "loss": 0.2779, "step": 4082 }, { "epoch": 0.35, "learning_rate": 1.5093472557364404e-05, "loss": 0.2938, "step": 4083 }, { "epoch": 0.35, "learning_rate": 1.5091083237414806e-05, "loss": 0.2758, "step": 4084 }, { "epoch": 0.35, "learning_rate": 1.5088693525069664e-05, "loss": 0.2797, "step": 4085 }, { "epoch": 0.35, "learning_rate": 1.508630342051317e-05, "loss": 0.3054, "step": 4086 }, { "epoch": 0.35, "learning_rate": 1.5083912923929545e-05, "loss": 0.2538, "step": 4087 }, { "epoch": 0.35, "learning_rate": 1.5081522035503025e-05, "loss": 0.2682, "step": 4088 }, { "epoch": 0.35, "learning_rate": 1.50791307554179e-05, "loss": 0.2856, "step": 4089 }, { "epoch": 0.35, "learning_rate": 1.5076739083858472e-05, "loss": 0.297, "step": 4090 }, { "epoch": 0.35, "learning_rate": 1.5074347021009081e-05, "loss": 0.2659, "step": 4091 }, { "epoch": 0.35, "learning_rate": 1.5071954567054094e-05, "loss": 0.3081, "step": 4092 }, { "epoch": 0.35, "learning_rate": 1.506956172217791e-05, "loss": 0.3233, "step": 4093 }, { "epoch": 0.35, "learning_rate": 1.5067168486564959e-05, "loss": 0.2972, "step": 4094 }, { "epoch": 0.35, "learning_rate": 1.5064774860399699e-05, "loss": 0.3282, "step": 4095 }, { "epoch": 0.35, "learning_rate": 1.5062380843866618e-05, "loss": 0.2553, "step": 4096 }, { "epoch": 0.35, "learning_rate": 1.5059986437150233e-05, "loss": 0.3025, "step": 4097 }, { "epoch": 0.35, "learning_rate": 1.5057591640435098e-05, "loss": 0.2498, "step": 4098 }, { "epoch": 0.35, "learning_rate": 1.505519645390579e-05, "loss": 0.2912, "step": 4099 }, { "epoch": 0.35, "learning_rate": 1.5052800877746915e-05, "loss": 0.2963, "step": 4100 }, { "epoch": 0.35, "learning_rate": 1.5050404912143118e-05, "loss": 0.3167, "step": 4101 }, { "epoch": 0.35, "learning_rate": 1.5048008557279064e-05, "loss": 0.3188, "step": 4102 }, { "epoch": 0.35, "learning_rate": 1.5045611813339456e-05, "loss": 0.3483, "step": 4103 }, { "epoch": 0.35, "learning_rate": 1.5043214680509018e-05, "loss": 0.2523, "step": 4104 }, { "epoch": 0.35, "learning_rate": 1.5040817158972509e-05, "loss": 0.2682, "step": 4105 }, { "epoch": 0.35, "learning_rate": 1.5038419248914725e-05, "loss": 0.3566, "step": 4106 }, { "epoch": 0.35, "learning_rate": 1.5036020950520476e-05, "loss": 0.3027, "step": 4107 }, { "epoch": 0.35, "learning_rate": 1.5033622263974621e-05, "loss": 0.2971, "step": 4108 }, { "epoch": 0.35, "learning_rate": 1.5031223189462031e-05, "loss": 0.2941, "step": 4109 }, { "epoch": 0.35, "learning_rate": 1.5028823727167621e-05, "loss": 0.3111, "step": 4110 }, { "epoch": 0.35, "learning_rate": 1.5026423877276322e-05, "loss": 0.3297, "step": 4111 }, { "epoch": 0.35, "learning_rate": 1.5024023639973109e-05, "loss": 0.3239, "step": 4112 }, { "epoch": 0.35, "learning_rate": 1.5021623015442976e-05, "loss": 0.3541, "step": 4113 }, { "epoch": 0.35, "learning_rate": 1.5019222003870954e-05, "loss": 0.3144, "step": 4114 }, { "epoch": 0.35, "learning_rate": 1.5016820605442105e-05, "loss": 0.2922, "step": 4115 }, { "epoch": 0.35, "learning_rate": 1.501441882034151e-05, "loss": 0.321, "step": 4116 }, { "epoch": 0.35, "learning_rate": 1.5012016648754291e-05, "loss": 0.2629, "step": 4117 }, { "epoch": 0.35, "learning_rate": 1.500961409086559e-05, "loss": 0.2981, "step": 4118 }, { "epoch": 0.35, "learning_rate": 1.500721114686059e-05, "loss": 0.2495, "step": 4119 }, { "epoch": 0.35, "learning_rate": 1.5004807816924494e-05, "loss": 0.3297, "step": 4120 }, { "epoch": 0.35, "learning_rate": 1.5002404101242543e-05, "loss": 0.2517, "step": 4121 }, { "epoch": 0.35, "learning_rate": 1.5000000000000002e-05, "loss": 0.3335, "step": 4122 }, { "epoch": 0.35, "learning_rate": 1.4997595513382166e-05, "loss": 0.3159, "step": 4123 }, { "epoch": 0.35, "learning_rate": 1.4995190641574361e-05, "loss": 0.3029, "step": 4124 }, { "epoch": 0.35, "learning_rate": 1.4992785384761945e-05, "loss": 0.2993, "step": 4125 }, { "epoch": 0.35, "learning_rate": 1.49903797431303e-05, "loss": 0.2734, "step": 4126 }, { "epoch": 0.35, "learning_rate": 1.4987973716864843e-05, "loss": 0.3224, "step": 4127 }, { "epoch": 0.35, "learning_rate": 1.4985567306151018e-05, "loss": 0.3124, "step": 4128 }, { "epoch": 0.35, "learning_rate": 1.4983160511174302e-05, "loss": 0.2655, "step": 4129 }, { "epoch": 0.35, "learning_rate": 1.4980753332120193e-05, "loss": 0.3051, "step": 4130 }, { "epoch": 0.35, "learning_rate": 1.497834576917423e-05, "loss": 0.2711, "step": 4131 }, { "epoch": 0.35, "learning_rate": 1.4975937822521972e-05, "loss": 0.3042, "step": 4132 }, { "epoch": 0.35, "learning_rate": 1.4973529492349013e-05, "loss": 0.3492, "step": 4133 }, { "epoch": 0.35, "learning_rate": 1.497112077884098e-05, "loss": 0.2485, "step": 4134 }, { "epoch": 0.35, "learning_rate": 1.4968711682183515e-05, "loss": 0.2875, "step": 4135 }, { "epoch": 0.35, "learning_rate": 1.4966302202562308e-05, "loss": 0.2903, "step": 4136 }, { "epoch": 0.35, "learning_rate": 1.4963892340163067e-05, "loss": 0.6328, "step": 4137 }, { "epoch": 0.35, "learning_rate": 1.4961482095171529e-05, "loss": 0.2867, "step": 4138 }, { "epoch": 0.35, "learning_rate": 1.4959071467773467e-05, "loss": 0.329, "step": 4139 }, { "epoch": 0.35, "learning_rate": 1.4956660458154679e-05, "loss": 0.2579, "step": 4140 }, { "epoch": 0.35, "learning_rate": 1.4954249066501e-05, "loss": 0.2817, "step": 4141 }, { "epoch": 0.36, "learning_rate": 1.4951837292998277e-05, "loss": 0.3085, "step": 4142 }, { "epoch": 0.36, "learning_rate": 1.4949425137832406e-05, "loss": 0.2899, "step": 4143 }, { "epoch": 0.36, "learning_rate": 1.4947012601189299e-05, "loss": 0.2602, "step": 4144 }, { "epoch": 0.36, "learning_rate": 1.4944599683254903e-05, "loss": 0.3393, "step": 4145 }, { "epoch": 0.36, "learning_rate": 1.4942186384215198e-05, "loss": 0.2863, "step": 4146 }, { "epoch": 0.36, "learning_rate": 1.4939772704256187e-05, "loss": 0.3138, "step": 4147 }, { "epoch": 0.36, "learning_rate": 1.4937358643563906e-05, "loss": 0.3074, "step": 4148 }, { "epoch": 0.36, "learning_rate": 1.4934944202324413e-05, "loss": 0.3161, "step": 4149 }, { "epoch": 0.36, "learning_rate": 1.4932529380723806e-05, "loss": 0.2722, "step": 4150 }, { "epoch": 0.36, "learning_rate": 1.4930114178948207e-05, "loss": 0.2604, "step": 4151 }, { "epoch": 0.36, "learning_rate": 1.4927698597183768e-05, "loss": 0.3197, "step": 4152 }, { "epoch": 0.36, "learning_rate": 1.4925282635616671e-05, "loss": 0.2986, "step": 4153 }, { "epoch": 0.36, "learning_rate": 1.4922866294433122e-05, "loss": 0.2798, "step": 4154 }, { "epoch": 0.36, "learning_rate": 1.4920449573819366e-05, "loss": 0.2947, "step": 4155 }, { "epoch": 0.36, "learning_rate": 1.491803247396167e-05, "loss": 0.2785, "step": 4156 }, { "epoch": 0.36, "learning_rate": 1.4915614995046329e-05, "loss": 0.3163, "step": 4157 }, { "epoch": 0.36, "learning_rate": 1.4913197137259675e-05, "loss": 0.3738, "step": 4158 }, { "epoch": 0.36, "learning_rate": 1.4910778900788061e-05, "loss": 0.2656, "step": 4159 }, { "epoch": 0.36, "learning_rate": 1.4908360285817875e-05, "loss": 0.3268, "step": 4160 }, { "epoch": 0.36, "learning_rate": 1.490594129253553e-05, "loss": 0.344, "step": 4161 }, { "epoch": 0.36, "learning_rate": 1.4903521921127472e-05, "loss": 0.5894, "step": 4162 }, { "epoch": 0.36, "learning_rate": 1.4901102171780175e-05, "loss": 0.3341, "step": 4163 }, { "epoch": 0.36, "learning_rate": 1.4898682044680135e-05, "loss": 0.2888, "step": 4164 }, { "epoch": 0.36, "learning_rate": 1.4896261540013894e-05, "loss": 0.6628, "step": 4165 }, { "epoch": 0.36, "learning_rate": 1.4893840657968001e-05, "loss": 0.2863, "step": 4166 }, { "epoch": 0.36, "learning_rate": 1.4891419398729057e-05, "loss": 0.2621, "step": 4167 }, { "epoch": 0.36, "learning_rate": 1.488899776248367e-05, "loss": 0.2929, "step": 4168 }, { "epoch": 0.36, "learning_rate": 1.4886575749418494e-05, "loss": 0.3234, "step": 4169 }, { "epoch": 0.36, "learning_rate": 1.4884153359720205e-05, "loss": 0.3334, "step": 4170 }, { "epoch": 0.36, "learning_rate": 1.488173059357551e-05, "loss": 0.269, "step": 4171 }, { "epoch": 0.36, "learning_rate": 1.4879307451171141e-05, "loss": 0.2747, "step": 4172 }, { "epoch": 0.36, "learning_rate": 1.4876883932693864e-05, "loss": 0.3294, "step": 4173 }, { "epoch": 0.36, "learning_rate": 1.4874460038330469e-05, "loss": 0.2809, "step": 4174 }, { "epoch": 0.36, "learning_rate": 1.487203576826778e-05, "loss": 0.296, "step": 4175 }, { "epoch": 0.36, "learning_rate": 1.4869611122692649e-05, "loss": 0.3059, "step": 4176 }, { "epoch": 0.36, "learning_rate": 1.4867186101791951e-05, "loss": 0.3608, "step": 4177 }, { "epoch": 0.36, "learning_rate": 1.48647607057526e-05, "loss": 0.2745, "step": 4178 }, { "epoch": 0.36, "learning_rate": 1.4862334934761533e-05, "loss": 0.3418, "step": 4179 }, { "epoch": 0.36, "learning_rate": 1.485990878900571e-05, "loss": 0.2679, "step": 4180 }, { "epoch": 0.36, "learning_rate": 1.4857482268672136e-05, "loss": 0.2737, "step": 4181 }, { "epoch": 0.36, "learning_rate": 1.4855055373947829e-05, "loss": 0.2919, "step": 4182 }, { "epoch": 0.36, "learning_rate": 1.485262810501984e-05, "loss": 0.3055, "step": 4183 }, { "epoch": 0.36, "learning_rate": 1.4850200462075255e-05, "loss": 0.3151, "step": 4184 }, { "epoch": 0.36, "learning_rate": 1.4847772445301186e-05, "loss": 0.2498, "step": 4185 }, { "epoch": 0.36, "learning_rate": 1.4845344054884772e-05, "loss": 0.2716, "step": 4186 }, { "epoch": 0.36, "learning_rate": 1.4842915291013176e-05, "loss": 0.3465, "step": 4187 }, { "epoch": 0.36, "learning_rate": 1.4840486153873599e-05, "loss": 0.2859, "step": 4188 }, { "epoch": 0.36, "learning_rate": 1.483805664365327e-05, "loss": 0.2778, "step": 4189 }, { "epoch": 0.36, "learning_rate": 1.4835626760539437e-05, "loss": 0.3149, "step": 4190 }, { "epoch": 0.36, "learning_rate": 1.4833196504719389e-05, "loss": 0.308, "step": 4191 }, { "epoch": 0.36, "learning_rate": 1.4830765876380438e-05, "loss": 0.2543, "step": 4192 }, { "epoch": 0.36, "learning_rate": 1.482833487570992e-05, "loss": 0.3069, "step": 4193 }, { "epoch": 0.36, "learning_rate": 1.4825903502895207e-05, "loss": 0.3235, "step": 4194 }, { "epoch": 0.36, "learning_rate": 1.4823471758123697e-05, "loss": 0.2879, "step": 4195 }, { "epoch": 0.36, "learning_rate": 1.482103964158282e-05, "loss": 0.2627, "step": 4196 }, { "epoch": 0.36, "learning_rate": 1.4818607153460025e-05, "loss": 0.2626, "step": 4197 }, { "epoch": 0.36, "learning_rate": 1.4816174293942804e-05, "loss": 0.3358, "step": 4198 }, { "epoch": 0.36, "learning_rate": 1.4813741063218662e-05, "loss": 0.2737, "step": 4199 }, { "epoch": 0.36, "learning_rate": 1.4811307461475151e-05, "loss": 0.3, "step": 4200 }, { "epoch": 0.36, "learning_rate": 1.4808873488899829e-05, "loss": 0.2836, "step": 4201 }, { "epoch": 0.36, "learning_rate": 1.4806439145680298e-05, "loss": 0.2899, "step": 4202 }, { "epoch": 0.36, "learning_rate": 1.4804004432004191e-05, "loss": 0.2928, "step": 4203 }, { "epoch": 0.36, "learning_rate": 1.4801569348059158e-05, "loss": 0.2711, "step": 4204 }, { "epoch": 0.36, "learning_rate": 1.4799133894032887e-05, "loss": 0.3206, "step": 4205 }, { "epoch": 0.36, "learning_rate": 1.4796698070113084e-05, "loss": 0.3285, "step": 4206 }, { "epoch": 0.36, "learning_rate": 1.4794261876487496e-05, "loss": 0.2906, "step": 4207 }, { "epoch": 0.36, "learning_rate": 1.4791825313343896e-05, "loss": 0.3313, "step": 4208 }, { "epoch": 0.36, "learning_rate": 1.4789388380870074e-05, "loss": 0.257, "step": 4209 }, { "epoch": 0.36, "learning_rate": 1.4786951079253861e-05, "loss": 0.3441, "step": 4210 }, { "epoch": 0.36, "learning_rate": 1.4784513408683115e-05, "loss": 0.299, "step": 4211 }, { "epoch": 0.36, "learning_rate": 1.4782075369345715e-05, "loss": 0.2814, "step": 4212 }, { "epoch": 0.36, "learning_rate": 1.4779636961429573e-05, "loss": 0.257, "step": 4213 }, { "epoch": 0.36, "learning_rate": 1.477719818512263e-05, "loss": 0.3127, "step": 4214 }, { "epoch": 0.36, "learning_rate": 1.4774759040612859e-05, "loss": 0.2875, "step": 4215 }, { "epoch": 0.36, "learning_rate": 1.477231952808825e-05, "loss": 0.3006, "step": 4216 }, { "epoch": 0.36, "learning_rate": 1.4769879647736835e-05, "loss": 0.3078, "step": 4217 }, { "epoch": 0.36, "learning_rate": 1.4767439399746666e-05, "loss": 0.255, "step": 4218 }, { "epoch": 0.36, "learning_rate": 1.4764998784305825e-05, "loss": 0.2906, "step": 4219 }, { "epoch": 0.36, "learning_rate": 1.4762557801602422e-05, "loss": 0.2764, "step": 4220 }, { "epoch": 0.36, "learning_rate": 1.476011645182459e-05, "loss": 0.2819, "step": 4221 }, { "epoch": 0.36, "learning_rate": 1.4757674735160512e-05, "loss": 0.2874, "step": 4222 }, { "epoch": 0.36, "learning_rate": 1.4755232651798368e-05, "loss": 0.2423, "step": 4223 }, { "epoch": 0.36, "learning_rate": 1.475279020192639e-05, "loss": 0.3406, "step": 4224 }, { "epoch": 0.36, "learning_rate": 1.4750347385732826e-05, "loss": 0.2884, "step": 4225 }, { "epoch": 0.36, "learning_rate": 1.4747904203405959e-05, "loss": 0.3337, "step": 4226 }, { "epoch": 0.36, "learning_rate": 1.4745460655134091e-05, "loss": 0.2497, "step": 4227 }, { "epoch": 0.36, "learning_rate": 1.474301674110557e-05, "loss": 0.3538, "step": 4228 }, { "epoch": 0.36, "learning_rate": 1.4740572461508753e-05, "loss": 0.3043, "step": 4229 }, { "epoch": 0.36, "learning_rate": 1.4738127816532034e-05, "loss": 0.2672, "step": 4230 }, { "epoch": 0.36, "learning_rate": 1.4735682806363834e-05, "loss": 0.3094, "step": 4231 }, { "epoch": 0.36, "learning_rate": 1.4733237431192604e-05, "loss": 0.2589, "step": 4232 }, { "epoch": 0.36, "learning_rate": 1.4730791691206818e-05, "loss": 0.3208, "step": 4233 }, { "epoch": 0.36, "learning_rate": 1.4728345586594986e-05, "loss": 0.3356, "step": 4234 }, { "epoch": 0.36, "learning_rate": 1.4725899117545638e-05, "loss": 0.2686, "step": 4235 }, { "epoch": 0.36, "learning_rate": 1.4723452284247341e-05, "loss": 0.3402, "step": 4236 }, { "epoch": 0.36, "learning_rate": 1.4721005086888678e-05, "loss": 0.2506, "step": 4237 }, { "epoch": 0.36, "learning_rate": 1.4718557525658272e-05, "loss": 0.3091, "step": 4238 }, { "epoch": 0.36, "learning_rate": 1.4716109600744766e-05, "loss": 0.2957, "step": 4239 }, { "epoch": 0.36, "learning_rate": 1.4713661312336832e-05, "loss": 0.2809, "step": 4240 }, { "epoch": 0.36, "learning_rate": 1.4711212660623181e-05, "loss": 0.2968, "step": 4241 }, { "epoch": 0.36, "learning_rate": 1.4708763645792531e-05, "loss": 0.3277, "step": 4242 }, { "epoch": 0.36, "learning_rate": 1.4706314268033652e-05, "loss": 0.2744, "step": 4243 }, { "epoch": 0.36, "learning_rate": 1.4703864527535321e-05, "loss": 0.3157, "step": 4244 }, { "epoch": 0.36, "learning_rate": 1.4701414424486353e-05, "loss": 0.3535, "step": 4245 }, { "epoch": 0.36, "learning_rate": 1.4698963959075592e-05, "loss": 0.3309, "step": 4246 }, { "epoch": 0.36, "learning_rate": 1.4696513131491907e-05, "loss": 0.2908, "step": 4247 }, { "epoch": 0.36, "learning_rate": 1.4694061941924199e-05, "loss": 0.2975, "step": 4248 }, { "epoch": 0.36, "learning_rate": 1.4691610390561389e-05, "loss": 0.286, "step": 4249 }, { "epoch": 0.36, "learning_rate": 1.4689158477592433e-05, "loss": 0.3047, "step": 4250 }, { "epoch": 0.36, "learning_rate": 1.4686706203206309e-05, "loss": 0.2431, "step": 4251 }, { "epoch": 0.36, "learning_rate": 1.4684253567592029e-05, "loss": 0.3116, "step": 4252 }, { "epoch": 0.36, "learning_rate": 1.4681800570938628e-05, "loss": 0.2908, "step": 4253 }, { "epoch": 0.36, "learning_rate": 1.4679347213435176e-05, "loss": 0.2996, "step": 4254 }, { "epoch": 0.36, "learning_rate": 1.4676893495270762e-05, "loss": 0.2726, "step": 4255 }, { "epoch": 0.36, "learning_rate": 1.4674439416634505e-05, "loss": 0.3203, "step": 4256 }, { "epoch": 0.36, "learning_rate": 1.4671984977715556e-05, "loss": 0.329, "step": 4257 }, { "epoch": 0.36, "learning_rate": 1.4669530178703089e-05, "loss": 0.288, "step": 4258 }, { "epoch": 0.37, "learning_rate": 1.4667075019786306e-05, "loss": 0.2962, "step": 4259 }, { "epoch": 0.37, "learning_rate": 1.4664619501154445e-05, "loss": 0.2955, "step": 4260 }, { "epoch": 0.37, "learning_rate": 1.4662163622996758e-05, "loss": 0.296, "step": 4261 }, { "epoch": 0.37, "learning_rate": 1.465970738550254e-05, "loss": 0.2678, "step": 4262 }, { "epoch": 0.37, "learning_rate": 1.4657250788861099e-05, "loss": 0.2631, "step": 4263 }, { "epoch": 0.37, "learning_rate": 1.4654793833261777e-05, "loss": 0.2744, "step": 4264 }, { "epoch": 0.37, "learning_rate": 1.4652336518893948e-05, "loss": 0.2983, "step": 4265 }, { "epoch": 0.37, "learning_rate": 1.464987884594701e-05, "loss": 0.2576, "step": 4266 }, { "epoch": 0.37, "learning_rate": 1.4647420814610384e-05, "loss": 0.2767, "step": 4267 }, { "epoch": 0.37, "learning_rate": 1.4644962425073526e-05, "loss": 0.3093, "step": 4268 }, { "epoch": 0.37, "learning_rate": 1.4642503677525917e-05, "loss": 0.6454, "step": 4269 }, { "epoch": 0.37, "learning_rate": 1.4640044572157062e-05, "loss": 0.3005, "step": 4270 }, { "epoch": 0.37, "learning_rate": 1.4637585109156498e-05, "loss": 0.254, "step": 4271 }, { "epoch": 0.37, "learning_rate": 1.4635125288713789e-05, "loss": 0.3408, "step": 4272 }, { "epoch": 0.37, "learning_rate": 1.4632665111018525e-05, "loss": 0.3262, "step": 4273 }, { "epoch": 0.37, "learning_rate": 1.4630204576260328e-05, "loss": 0.282, "step": 4274 }, { "epoch": 0.37, "learning_rate": 1.4627743684628838e-05, "loss": 0.278, "step": 4275 }, { "epoch": 0.37, "learning_rate": 1.4625282436313733e-05, "loss": 0.2499, "step": 4276 }, { "epoch": 0.37, "learning_rate": 1.4622820831504712e-05, "loss": 0.2925, "step": 4277 }, { "epoch": 0.37, "learning_rate": 1.46203588703915e-05, "loss": 0.2936, "step": 4278 }, { "epoch": 0.37, "learning_rate": 1.461789655316386e-05, "loss": 0.3112, "step": 4279 }, { "epoch": 0.37, "learning_rate": 1.461543388001157e-05, "loss": 0.2738, "step": 4280 }, { "epoch": 0.37, "learning_rate": 1.4612970851124442e-05, "loss": 0.3252, "step": 4281 }, { "epoch": 0.37, "learning_rate": 1.4610507466692312e-05, "loss": 0.2949, "step": 4282 }, { "epoch": 0.37, "learning_rate": 1.460804372690505e-05, "loss": 0.2961, "step": 4283 }, { "epoch": 0.37, "learning_rate": 1.4605579631952544e-05, "loss": 0.2932, "step": 4284 }, { "epoch": 0.37, "learning_rate": 1.4603115182024721e-05, "loss": 0.2913, "step": 4285 }, { "epoch": 0.37, "learning_rate": 1.4600650377311523e-05, "loss": 0.323, "step": 4286 }, { "epoch": 0.37, "learning_rate": 1.4598185218002925e-05, "loss": 0.2891, "step": 4287 }, { "epoch": 0.37, "learning_rate": 1.4595719704288932e-05, "loss": 0.2695, "step": 4288 }, { "epoch": 0.37, "learning_rate": 1.4593253836359573e-05, "loss": 0.2529, "step": 4289 }, { "epoch": 0.37, "learning_rate": 1.4590787614404902e-05, "loss": 0.3531, "step": 4290 }, { "epoch": 0.37, "learning_rate": 1.4588321038615005e-05, "loss": 0.2762, "step": 4291 }, { "epoch": 0.37, "learning_rate": 1.4585854109179995e-05, "loss": 0.2966, "step": 4292 }, { "epoch": 0.37, "learning_rate": 1.4583386826290013e-05, "loss": 0.2784, "step": 4293 }, { "epoch": 0.37, "learning_rate": 1.4580919190135219e-05, "loss": 0.2909, "step": 4294 }, { "epoch": 0.37, "learning_rate": 1.457845120090581e-05, "loss": 0.2937, "step": 4295 }, { "epoch": 0.37, "learning_rate": 1.4575982858792002e-05, "loss": 0.3417, "step": 4296 }, { "epoch": 0.37, "learning_rate": 1.4573514163984044e-05, "loss": 0.3081, "step": 4297 }, { "epoch": 0.37, "learning_rate": 1.4571045116672219e-05, "loss": 0.2868, "step": 4298 }, { "epoch": 0.37, "learning_rate": 1.4568575717046819e-05, "loss": 0.3148, "step": 4299 }, { "epoch": 0.37, "learning_rate": 1.4566105965298179e-05, "loss": 0.2949, "step": 4300 }, { "epoch": 0.37, "learning_rate": 1.4563635861616652e-05, "loss": 0.3117, "step": 4301 }, { "epoch": 0.37, "learning_rate": 1.4561165406192622e-05, "loss": 0.3089, "step": 4302 }, { "epoch": 0.37, "learning_rate": 1.4558694599216496e-05, "loss": 0.2485, "step": 4303 }, { "epoch": 0.37, "learning_rate": 1.455622344087872e-05, "loss": 0.3022, "step": 4304 }, { "epoch": 0.37, "learning_rate": 1.4553751931369755e-05, "loss": 0.3172, "step": 4305 }, { "epoch": 0.37, "learning_rate": 1.4551280070880089e-05, "loss": 0.2928, "step": 4306 }, { "epoch": 0.37, "learning_rate": 1.4548807859600248e-05, "loss": 0.3112, "step": 4307 }, { "epoch": 0.37, "learning_rate": 1.4546335297720769e-05, "loss": 0.2914, "step": 4308 }, { "epoch": 0.37, "learning_rate": 1.454386238543223e-05, "loss": 0.3488, "step": 4309 }, { "epoch": 0.37, "learning_rate": 1.4541389122925229e-05, "loss": 0.3497, "step": 4310 }, { "epoch": 0.37, "learning_rate": 1.4538915510390397e-05, "loss": 0.2661, "step": 4311 }, { "epoch": 0.37, "learning_rate": 1.4536441548018385e-05, "loss": 0.2687, "step": 4312 }, { "epoch": 0.37, "learning_rate": 1.4533967235999872e-05, "loss": 0.3354, "step": 4313 }, { "epoch": 0.37, "learning_rate": 1.453149257452557e-05, "loss": 0.2734, "step": 4314 }, { "epoch": 0.37, "learning_rate": 1.4529017563786208e-05, "loss": 0.2393, "step": 4315 }, { "epoch": 0.37, "learning_rate": 1.452654220397255e-05, "loss": 0.3446, "step": 4316 }, { "epoch": 0.37, "learning_rate": 1.4524066495275388e-05, "loss": 0.2862, "step": 4317 }, { "epoch": 0.37, "learning_rate": 1.4521590437885533e-05, "loss": 0.2607, "step": 4318 }, { "epoch": 0.37, "learning_rate": 1.451911403199383e-05, "loss": 0.2426, "step": 4319 }, { "epoch": 0.37, "learning_rate": 1.4516637277791149e-05, "loss": 0.3161, "step": 4320 }, { "epoch": 0.37, "learning_rate": 1.4514160175468379e-05, "loss": 0.3187, "step": 4321 }, { "epoch": 0.37, "learning_rate": 1.451168272521645e-05, "loss": 0.2853, "step": 4322 }, { "epoch": 0.37, "learning_rate": 1.4509204927226307e-05, "loss": 0.3265, "step": 4323 }, { "epoch": 0.37, "learning_rate": 1.4506726781688935e-05, "loss": 0.3212, "step": 4324 }, { "epoch": 0.37, "learning_rate": 1.4504248288795328e-05, "loss": 0.2377, "step": 4325 }, { "epoch": 0.37, "learning_rate": 1.450176944873652e-05, "loss": 0.3303, "step": 4326 }, { "epoch": 0.37, "learning_rate": 1.4499290261703565e-05, "loss": 0.3043, "step": 4327 }, { "epoch": 0.37, "learning_rate": 1.4496810727887547e-05, "loss": 0.2609, "step": 4328 }, { "epoch": 0.37, "learning_rate": 1.449433084747958e-05, "loss": 0.2778, "step": 4329 }, { "epoch": 0.37, "learning_rate": 1.4491850620670798e-05, "loss": 0.3074, "step": 4330 }, { "epoch": 0.37, "learning_rate": 1.4489370047652364e-05, "loss": 0.2453, "step": 4331 }, { "epoch": 0.37, "learning_rate": 1.4486889128615472e-05, "loss": 0.2794, "step": 4332 }, { "epoch": 0.37, "learning_rate": 1.4484407863751335e-05, "loss": 0.2837, "step": 4333 }, { "epoch": 0.37, "learning_rate": 1.4481926253251197e-05, "loss": 0.2922, "step": 4334 }, { "epoch": 0.37, "learning_rate": 1.4479444297306326e-05, "loss": 0.2958, "step": 4335 }, { "epoch": 0.37, "learning_rate": 1.4476961996108027e-05, "loss": 0.2794, "step": 4336 }, { "epoch": 0.37, "learning_rate": 1.4474479349847617e-05, "loss": 0.2917, "step": 4337 }, { "epoch": 0.37, "learning_rate": 1.4471996358716451e-05, "loss": 0.311, "step": 4338 }, { "epoch": 0.37, "learning_rate": 1.4469513022905898e-05, "loss": 0.3389, "step": 4339 }, { "epoch": 0.37, "learning_rate": 1.4467029342607368e-05, "loss": 0.3246, "step": 4340 }, { "epoch": 0.37, "learning_rate": 1.4464545318012286e-05, "loss": 0.2924, "step": 4341 }, { "epoch": 0.37, "learning_rate": 1.4462060949312114e-05, "loss": 0.3387, "step": 4342 }, { "epoch": 0.37, "learning_rate": 1.4459576236698331e-05, "loss": 0.2733, "step": 4343 }, { "epoch": 0.37, "learning_rate": 1.4457091180362445e-05, "loss": 0.2754, "step": 4344 }, { "epoch": 0.37, "learning_rate": 1.4454605780495998e-05, "loss": 0.2828, "step": 4345 }, { "epoch": 0.37, "learning_rate": 1.4452120037290547e-05, "loss": 0.3292, "step": 4346 }, { "epoch": 0.37, "learning_rate": 1.4449633950937678e-05, "loss": 0.3133, "step": 4347 }, { "epoch": 0.37, "learning_rate": 1.4447147521629013e-05, "loss": 0.2632, "step": 4348 }, { "epoch": 0.37, "learning_rate": 1.4444660749556192e-05, "loss": 0.2484, "step": 4349 }, { "epoch": 0.37, "learning_rate": 1.4442173634910881e-05, "loss": 0.2896, "step": 4350 }, { "epoch": 0.37, "learning_rate": 1.4439686177884778e-05, "loss": 0.2784, "step": 4351 }, { "epoch": 0.37, "learning_rate": 1.4437198378669598e-05, "loss": 0.2953, "step": 4352 }, { "epoch": 0.37, "learning_rate": 1.4434710237457094e-05, "loss": 0.3451, "step": 4353 }, { "epoch": 0.37, "learning_rate": 1.4432221754439037e-05, "loss": 0.2583, "step": 4354 }, { "epoch": 0.37, "learning_rate": 1.4429732929807227e-05, "loss": 0.2822, "step": 4355 }, { "epoch": 0.37, "learning_rate": 1.4427243763753488e-05, "loss": 0.25, "step": 4356 }, { "epoch": 0.37, "learning_rate": 1.4424754256469681e-05, "loss": 0.2886, "step": 4357 }, { "epoch": 0.37, "learning_rate": 1.4422264408147676e-05, "loss": 0.2718, "step": 4358 }, { "epoch": 0.37, "learning_rate": 1.4419774218979383e-05, "loss": 0.283, "step": 4359 }, { "epoch": 0.37, "learning_rate": 1.4417283689156731e-05, "loss": 0.2921, "step": 4360 }, { "epoch": 0.37, "learning_rate": 1.4414792818871676e-05, "loss": 0.3076, "step": 4361 }, { "epoch": 0.37, "learning_rate": 1.441230160831621e-05, "loss": 0.3334, "step": 4362 }, { "epoch": 0.37, "learning_rate": 1.4409810057682333e-05, "loss": 0.2635, "step": 4363 }, { "epoch": 0.37, "learning_rate": 1.4407318167162092e-05, "loss": 0.3124, "step": 4364 }, { "epoch": 0.37, "learning_rate": 1.4404825936947539e-05, "loss": 0.3156, "step": 4365 }, { "epoch": 0.37, "learning_rate": 1.440233336723077e-05, "loss": 0.3383, "step": 4366 }, { "epoch": 0.37, "learning_rate": 1.4399840458203896e-05, "loss": 0.2731, "step": 4367 }, { "epoch": 0.37, "learning_rate": 1.4397347210059059e-05, "loss": 0.2467, "step": 4368 }, { "epoch": 0.37, "learning_rate": 1.439485362298843e-05, "loss": 0.3231, "step": 4369 }, { "epoch": 0.37, "learning_rate": 1.4392359697184197e-05, "loss": 0.2736, "step": 4370 }, { "epoch": 0.37, "learning_rate": 1.4389865432838583e-05, "loss": 0.3091, "step": 4371 }, { "epoch": 0.37, "learning_rate": 1.4387370830143832e-05, "loss": 0.2951, "step": 4372 }, { "epoch": 0.37, "learning_rate": 1.4384875889292216e-05, "loss": 0.2574, "step": 4373 }, { "epoch": 0.37, "learning_rate": 1.4382380610476032e-05, "loss": 0.3156, "step": 4374 }, { "epoch": 0.38, "learning_rate": 1.4379884993887605e-05, "loss": 0.2957, "step": 4375 }, { "epoch": 0.38, "learning_rate": 1.4377389039719285e-05, "loss": 0.3148, "step": 4376 }, { "epoch": 0.38, "learning_rate": 1.4374892748163447e-05, "loss": 0.3152, "step": 4377 }, { "epoch": 0.38, "learning_rate": 1.4372396119412493e-05, "loss": 0.2703, "step": 4378 }, { "epoch": 0.38, "learning_rate": 1.4369899153658848e-05, "loss": 0.2795, "step": 4379 }, { "epoch": 0.38, "learning_rate": 1.436740185109497e-05, "loss": 0.3353, "step": 4380 }, { "epoch": 0.38, "learning_rate": 1.436490421191334e-05, "loss": 0.3352, "step": 4381 }, { "epoch": 0.38, "learning_rate": 1.436240623630646e-05, "loss": 0.3149, "step": 4382 }, { "epoch": 0.38, "learning_rate": 1.4359907924466863e-05, "loss": 0.2802, "step": 4383 }, { "epoch": 0.38, "learning_rate": 1.4357409276587105e-05, "loss": 0.2801, "step": 4384 }, { "epoch": 0.38, "learning_rate": 1.4354910292859769e-05, "loss": 0.2872, "step": 4385 }, { "epoch": 0.38, "learning_rate": 1.4352410973477466e-05, "loss": 0.2979, "step": 4386 }, { "epoch": 0.38, "learning_rate": 1.4349911318632832e-05, "loss": 0.3075, "step": 4387 }, { "epoch": 0.38, "learning_rate": 1.434741132851853e-05, "loss": 0.278, "step": 4388 }, { "epoch": 0.38, "learning_rate": 1.434491100332724e-05, "loss": 0.2346, "step": 4389 }, { "epoch": 0.38, "learning_rate": 1.4342410343251683e-05, "loss": 0.3038, "step": 4390 }, { "epoch": 0.38, "learning_rate": 1.4339909348484589e-05, "loss": 0.2928, "step": 4391 }, { "epoch": 0.38, "learning_rate": 1.4337408019218728e-05, "loss": 0.2984, "step": 4392 }, { "epoch": 0.38, "learning_rate": 1.4334906355646887e-05, "loss": 0.2754, "step": 4393 }, { "epoch": 0.38, "learning_rate": 1.4332404357961884e-05, "loss": 0.2935, "step": 4394 }, { "epoch": 0.38, "learning_rate": 1.4329902026356564e-05, "loss": 0.2723, "step": 4395 }, { "epoch": 0.38, "learning_rate": 1.4327399361023785e-05, "loss": 0.29, "step": 4396 }, { "epoch": 0.38, "learning_rate": 1.4324896362156451e-05, "loss": 0.2786, "step": 4397 }, { "epoch": 0.38, "learning_rate": 1.432239302994747e-05, "loss": 0.3416, "step": 4398 }, { "epoch": 0.38, "learning_rate": 1.4319889364589794e-05, "loss": 0.3063, "step": 4399 }, { "epoch": 0.38, "learning_rate": 1.4317385366276393e-05, "loss": 0.2725, "step": 4400 }, { "epoch": 0.38, "learning_rate": 1.4314881035200259e-05, "loss": 0.2886, "step": 4401 }, { "epoch": 0.38, "learning_rate": 1.4312376371554417e-05, "loss": 0.291, "step": 4402 }, { "epoch": 0.38, "learning_rate": 1.430987137553191e-05, "loss": 0.2755, "step": 4403 }, { "epoch": 0.38, "learning_rate": 1.4307366047325814e-05, "loss": 0.3079, "step": 4404 }, { "epoch": 0.38, "learning_rate": 1.4304860387129225e-05, "loss": 0.2746, "step": 4405 }, { "epoch": 0.38, "learning_rate": 1.4302354395135269e-05, "loss": 0.2892, "step": 4406 }, { "epoch": 0.38, "learning_rate": 1.4299848071537097e-05, "loss": 0.2817, "step": 4407 }, { "epoch": 0.38, "learning_rate": 1.4297341416527881e-05, "loss": 0.2639, "step": 4408 }, { "epoch": 0.38, "learning_rate": 1.4294834430300822e-05, "loss": 0.2954, "step": 4409 }, { "epoch": 0.38, "learning_rate": 1.4292327113049145e-05, "loss": 0.3021, "step": 4410 }, { "epoch": 0.38, "learning_rate": 1.4289819464966104e-05, "loss": 0.3441, "step": 4411 }, { "epoch": 0.38, "learning_rate": 1.4287311486244975e-05, "loss": 0.3043, "step": 4412 }, { "epoch": 0.38, "learning_rate": 1.428480317707906e-05, "loss": 0.2898, "step": 4413 }, { "epoch": 0.38, "learning_rate": 1.4282294537661692e-05, "loss": 0.3163, "step": 4414 }, { "epoch": 0.38, "learning_rate": 1.4279785568186217e-05, "loss": 0.2556, "step": 4415 }, { "epoch": 0.38, "learning_rate": 1.4277276268846017e-05, "loss": 0.3167, "step": 4416 }, { "epoch": 0.38, "learning_rate": 1.4274766639834498e-05, "loss": 0.2766, "step": 4417 }, { "epoch": 0.38, "learning_rate": 1.4272256681345087e-05, "loss": 0.2863, "step": 4418 }, { "epoch": 0.38, "learning_rate": 1.4269746393571244e-05, "loss": 0.2793, "step": 4419 }, { "epoch": 0.38, "learning_rate": 1.4267235776706445e-05, "loss": 0.3325, "step": 4420 }, { "epoch": 0.38, "learning_rate": 1.4264724830944198e-05, "loss": 0.2736, "step": 4421 }, { "epoch": 0.38, "learning_rate": 1.4262213556478033e-05, "loss": 0.2925, "step": 4422 }, { "epoch": 0.38, "learning_rate": 1.4259701953501509e-05, "loss": 0.2755, "step": 4423 }, { "epoch": 0.38, "learning_rate": 1.4257190022208203e-05, "loss": 0.2681, "step": 4424 }, { "epoch": 0.38, "learning_rate": 1.4254677762791727e-05, "loss": 0.3132, "step": 4425 }, { "epoch": 0.38, "learning_rate": 1.425216517544571e-05, "loss": 0.326, "step": 4426 }, { "epoch": 0.38, "learning_rate": 1.4249652260363815e-05, "loss": 0.6044, "step": 4427 }, { "epoch": 0.38, "learning_rate": 1.4247139017739722e-05, "loss": 0.3224, "step": 4428 }, { "epoch": 0.38, "learning_rate": 1.4244625447767138e-05, "loss": 0.311, "step": 4429 }, { "epoch": 0.38, "learning_rate": 1.4242111550639797e-05, "loss": 0.2689, "step": 4430 }, { "epoch": 0.38, "learning_rate": 1.4239597326551459e-05, "loss": 0.3358, "step": 4431 }, { "epoch": 0.38, "learning_rate": 1.4237082775695907e-05, "loss": 0.3406, "step": 4432 }, { "epoch": 0.38, "learning_rate": 1.4234567898266954e-05, "loss": 0.2959, "step": 4433 }, { "epoch": 0.38, "learning_rate": 1.423205269445843e-05, "loss": 0.3246, "step": 4434 }, { "epoch": 0.38, "learning_rate": 1.422953716446419e-05, "loss": 0.6313, "step": 4435 }, { "epoch": 0.38, "learning_rate": 1.4227021308478129e-05, "loss": 0.3161, "step": 4436 }, { "epoch": 0.38, "learning_rate": 1.4224505126694153e-05, "loss": 0.2886, "step": 4437 }, { "epoch": 0.38, "learning_rate": 1.4221988619306192e-05, "loss": 0.3649, "step": 4438 }, { "epoch": 0.38, "learning_rate": 1.4219471786508212e-05, "loss": 0.2962, "step": 4439 }, { "epoch": 0.38, "learning_rate": 1.4216954628494195e-05, "loss": 0.296, "step": 4440 }, { "epoch": 0.38, "learning_rate": 1.4214437145458153e-05, "loss": 0.2706, "step": 4441 }, { "epoch": 0.38, "learning_rate": 1.4211919337594118e-05, "loss": 0.2719, "step": 4442 }, { "epoch": 0.38, "learning_rate": 1.420940120509615e-05, "loss": 0.2982, "step": 4443 }, { "epoch": 0.38, "learning_rate": 1.4206882748158341e-05, "loss": 0.2864, "step": 4444 }, { "epoch": 0.38, "learning_rate": 1.4204363966974798e-05, "loss": 0.29, "step": 4445 }, { "epoch": 0.38, "learning_rate": 1.420184486173965e-05, "loss": 0.2858, "step": 4446 }, { "epoch": 0.38, "learning_rate": 1.4199325432647067e-05, "loss": 0.2722, "step": 4447 }, { "epoch": 0.38, "learning_rate": 1.4196805679891225e-05, "loss": 0.2625, "step": 4448 }, { "epoch": 0.38, "learning_rate": 1.4194285603666337e-05, "loss": 0.3297, "step": 4449 }, { "epoch": 0.38, "learning_rate": 1.4191765204166643e-05, "loss": 0.2593, "step": 4450 }, { "epoch": 0.38, "learning_rate": 1.4189244481586398e-05, "loss": 0.6046, "step": 4451 }, { "epoch": 0.38, "learning_rate": 1.4186723436119887e-05, "loss": 0.3358, "step": 4452 }, { "epoch": 0.38, "learning_rate": 1.4184202067961422e-05, "loss": 0.2955, "step": 4453 }, { "epoch": 0.38, "learning_rate": 1.4181680377305336e-05, "loss": 0.2835, "step": 4454 }, { "epoch": 0.38, "learning_rate": 1.4179158364345986e-05, "loss": 0.3147, "step": 4455 }, { "epoch": 0.38, "learning_rate": 1.4176636029277764e-05, "loss": 0.3148, "step": 4456 }, { "epoch": 0.38, "learning_rate": 1.4174113372295071e-05, "loss": 0.3585, "step": 4457 }, { "epoch": 0.38, "learning_rate": 1.4171590393592346e-05, "loss": 0.2871, "step": 4458 }, { "epoch": 0.38, "learning_rate": 1.4169067093364047e-05, "loss": 0.2687, "step": 4459 }, { "epoch": 0.38, "learning_rate": 1.4166543471804653e-05, "loss": 0.2679, "step": 4460 }, { "epoch": 0.38, "learning_rate": 1.4164019529108677e-05, "loss": 0.2631, "step": 4461 }, { "epoch": 0.38, "learning_rate": 1.4161495265470649e-05, "loss": 0.2864, "step": 4462 }, { "epoch": 0.38, "learning_rate": 1.415897068108513e-05, "loss": 0.3023, "step": 4463 }, { "epoch": 0.38, "learning_rate": 1.4156445776146703e-05, "loss": 0.2661, "step": 4464 }, { "epoch": 0.38, "learning_rate": 1.415392055084997e-05, "loss": 0.3284, "step": 4465 }, { "epoch": 0.38, "learning_rate": 1.415139500538957e-05, "loss": 0.3329, "step": 4466 }, { "epoch": 0.38, "learning_rate": 1.4148869139960151e-05, "loss": 0.3131, "step": 4467 }, { "epoch": 0.38, "learning_rate": 1.4146342954756402e-05, "loss": 0.253, "step": 4468 }, { "epoch": 0.38, "learning_rate": 1.4143816449973025e-05, "loss": 0.2968, "step": 4469 }, { "epoch": 0.38, "learning_rate": 1.4141289625804748e-05, "loss": 0.3346, "step": 4470 }, { "epoch": 0.38, "learning_rate": 1.4138762482446335e-05, "loss": 0.2781, "step": 4471 }, { "epoch": 0.38, "learning_rate": 1.4136235020092558e-05, "loss": 0.2485, "step": 4472 }, { "epoch": 0.38, "learning_rate": 1.4133707238938222e-05, "loss": 0.2839, "step": 4473 }, { "epoch": 0.38, "learning_rate": 1.4131179139178157e-05, "loss": 0.313, "step": 4474 }, { "epoch": 0.38, "learning_rate": 1.412865072100722e-05, "loss": 0.2728, "step": 4475 }, { "epoch": 0.38, "learning_rate": 1.4126121984620283e-05, "loss": 0.2833, "step": 4476 }, { "epoch": 0.38, "learning_rate": 1.4123592930212251e-05, "loss": 0.2365, "step": 4477 }, { "epoch": 0.38, "learning_rate": 1.4121063557978051e-05, "loss": 0.2928, "step": 4478 }, { "epoch": 0.38, "learning_rate": 1.4118533868112637e-05, "loss": 0.343, "step": 4479 }, { "epoch": 0.38, "learning_rate": 1.411600386081098e-05, "loss": 0.3048, "step": 4480 }, { "epoch": 0.38, "learning_rate": 1.4113473536268083e-05, "loss": 0.2992, "step": 4481 }, { "epoch": 0.38, "learning_rate": 1.4110942894678971e-05, "loss": 0.3042, "step": 4482 }, { "epoch": 0.38, "learning_rate": 1.41084119362387e-05, "loss": 0.2762, "step": 4483 }, { "epoch": 0.38, "learning_rate": 1.4105880661142331e-05, "loss": 0.2833, "step": 4484 }, { "epoch": 0.38, "learning_rate": 1.4103349069584971e-05, "loss": 0.2654, "step": 4485 }, { "epoch": 0.38, "learning_rate": 1.4100817161761738e-05, "loss": 0.3018, "step": 4486 }, { "epoch": 0.38, "learning_rate": 1.409828493786778e-05, "loss": 0.257, "step": 4487 }, { "epoch": 0.38, "learning_rate": 1.409575239809827e-05, "loss": 0.273, "step": 4488 }, { "epoch": 0.38, "learning_rate": 1.4093219542648405e-05, "loss": 0.2712, "step": 4489 }, { "epoch": 0.38, "learning_rate": 1.4090686371713403e-05, "loss": 0.2744, "step": 4490 }, { "epoch": 0.38, "learning_rate": 1.4088152885488504e-05, "loss": 0.2911, "step": 4491 }, { "epoch": 0.39, "learning_rate": 1.4085619084168983e-05, "loss": 0.2342, "step": 4492 }, { "epoch": 0.39, "learning_rate": 1.4083084967950131e-05, "loss": 0.2644, "step": 4493 }, { "epoch": 0.39, "learning_rate": 1.4080550537027264e-05, "loss": 0.2812, "step": 4494 }, { "epoch": 0.39, "learning_rate": 1.4078015791595724e-05, "loss": 0.2603, "step": 4495 }, { "epoch": 0.39, "learning_rate": 1.407548073185088e-05, "loss": 0.2922, "step": 4496 }, { "epoch": 0.39, "learning_rate": 1.4072945357988118e-05, "loss": 0.3272, "step": 4497 }, { "epoch": 0.39, "learning_rate": 1.4070409670202849e-05, "loss": 0.3045, "step": 4498 }, { "epoch": 0.39, "learning_rate": 1.4067873668690517e-05, "loss": 0.288, "step": 4499 }, { "epoch": 0.39, "learning_rate": 1.4065337353646583e-05, "loss": 0.3287, "step": 4500 }, { "epoch": 0.39, "learning_rate": 1.4062800725266532e-05, "loss": 0.2673, "step": 4501 }, { "epoch": 0.39, "learning_rate": 1.406026378374588e-05, "loss": 0.301, "step": 4502 }, { "epoch": 0.39, "learning_rate": 1.4057726529280154e-05, "loss": 0.2833, "step": 4503 }, { "epoch": 0.39, "learning_rate": 1.4055188962064918e-05, "loss": 0.3015, "step": 4504 }, { "epoch": 0.39, "learning_rate": 1.4052651082295754e-05, "loss": 0.2495, "step": 4505 }, { "epoch": 0.39, "learning_rate": 1.405011289016827e-05, "loss": 0.3339, "step": 4506 }, { "epoch": 0.39, "learning_rate": 1.4047574385878095e-05, "loss": 0.2712, "step": 4507 }, { "epoch": 0.39, "learning_rate": 1.4045035569620886e-05, "loss": 0.3035, "step": 4508 }, { "epoch": 0.39, "learning_rate": 1.4042496441592323e-05, "loss": 0.3135, "step": 4509 }, { "epoch": 0.39, "learning_rate": 1.4039957001988112e-05, "loss": 0.3093, "step": 4510 }, { "epoch": 0.39, "learning_rate": 1.4037417251003972e-05, "loss": 0.2986, "step": 4511 }, { "epoch": 0.39, "learning_rate": 1.4034877188835662e-05, "loss": 0.3021, "step": 4512 }, { "epoch": 0.39, "learning_rate": 1.4032336815678957e-05, "loss": 0.3362, "step": 4513 }, { "epoch": 0.39, "learning_rate": 1.4029796131729652e-05, "loss": 0.2817, "step": 4514 }, { "epoch": 0.39, "learning_rate": 1.4027255137183575e-05, "loss": 0.2631, "step": 4515 }, { "epoch": 0.39, "learning_rate": 1.4024713832236571e-05, "loss": 0.353, "step": 4516 }, { "epoch": 0.39, "learning_rate": 1.4022172217084512e-05, "loss": 0.2555, "step": 4517 }, { "epoch": 0.39, "learning_rate": 1.4019630291923289e-05, "loss": 0.2841, "step": 4518 }, { "epoch": 0.39, "learning_rate": 1.4017088056948826e-05, "loss": 0.2895, "step": 4519 }, { "epoch": 0.39, "learning_rate": 1.4014545512357068e-05, "loss": 0.2599, "step": 4520 }, { "epoch": 0.39, "learning_rate": 1.4012002658343976e-05, "loss": 0.2904, "step": 4521 }, { "epoch": 0.39, "learning_rate": 1.4009459495105542e-05, "loss": 0.2791, "step": 4522 }, { "epoch": 0.39, "learning_rate": 1.4006916022837784e-05, "loss": 0.2493, "step": 4523 }, { "epoch": 0.39, "learning_rate": 1.4004372241736736e-05, "loss": 0.2982, "step": 4524 }, { "epoch": 0.39, "learning_rate": 1.4001828151998462e-05, "loss": 0.289, "step": 4525 }, { "epoch": 0.39, "learning_rate": 1.3999283753819047e-05, "loss": 0.3278, "step": 4526 }, { "epoch": 0.39, "learning_rate": 1.3996739047394601e-05, "loss": 0.2567, "step": 4527 }, { "epoch": 0.39, "learning_rate": 1.399419403292126e-05, "loss": 0.2598, "step": 4528 }, { "epoch": 0.39, "learning_rate": 1.3991648710595179e-05, "loss": 0.2757, "step": 4529 }, { "epoch": 0.39, "learning_rate": 1.3989103080612533e-05, "loss": 0.2809, "step": 4530 }, { "epoch": 0.39, "learning_rate": 1.3986557143169539e-05, "loss": 0.2926, "step": 4531 }, { "epoch": 0.39, "learning_rate": 1.3984010898462417e-05, "loss": 0.2939, "step": 4532 }, { "epoch": 0.39, "learning_rate": 1.3981464346687419e-05, "loss": 0.2754, "step": 4533 }, { "epoch": 0.39, "learning_rate": 1.3978917488040822e-05, "loss": 0.2684, "step": 4534 }, { "epoch": 0.39, "learning_rate": 1.3976370322718928e-05, "loss": 0.6372, "step": 4535 }, { "epoch": 0.39, "learning_rate": 1.3973822850918055e-05, "loss": 0.2732, "step": 4536 }, { "epoch": 0.39, "learning_rate": 1.3971275072834552e-05, "loss": 0.2924, "step": 4537 }, { "epoch": 0.39, "learning_rate": 1.3968726988664788e-05, "loss": 0.277, "step": 4538 }, { "epoch": 0.39, "learning_rate": 1.396617859860516e-05, "loss": 0.3034, "step": 4539 }, { "epoch": 0.39, "learning_rate": 1.3963629902852082e-05, "loss": 0.2946, "step": 4540 }, { "epoch": 0.39, "learning_rate": 1.3961080901601996e-05, "loss": 0.2529, "step": 4541 }, { "epoch": 0.39, "learning_rate": 1.3958531595051367e-05, "loss": 0.2889, "step": 4542 }, { "epoch": 0.39, "learning_rate": 1.3955981983396683e-05, "loss": 0.2687, "step": 4543 }, { "epoch": 0.39, "learning_rate": 1.3953432066834454e-05, "loss": 0.282, "step": 4544 }, { "epoch": 0.39, "learning_rate": 1.3950881845561214e-05, "loss": 0.2836, "step": 4545 }, { "epoch": 0.39, "learning_rate": 1.3948331319773525e-05, "loss": 0.3462, "step": 4546 }, { "epoch": 0.39, "learning_rate": 1.3945780489667968e-05, "loss": 0.2766, "step": 4547 }, { "epoch": 0.39, "learning_rate": 1.3943229355441145e-05, "loss": 0.3083, "step": 4548 }, { "epoch": 0.39, "learning_rate": 1.3940677917289689e-05, "loss": 0.3028, "step": 4549 }, { "epoch": 0.39, "learning_rate": 1.393812617541025e-05, "loss": 0.2505, "step": 4550 }, { "epoch": 0.39, "learning_rate": 1.3935574129999504e-05, "loss": 0.2574, "step": 4551 }, { "epoch": 0.39, "learning_rate": 1.3933021781254152e-05, "loss": 0.2752, "step": 4552 }, { "epoch": 0.39, "learning_rate": 1.3930469129370913e-05, "loss": 0.3438, "step": 4553 }, { "epoch": 0.39, "learning_rate": 1.3927916174546536e-05, "loss": 0.2852, "step": 4554 }, { "epoch": 0.39, "learning_rate": 1.3925362916977787e-05, "loss": 0.3031, "step": 4555 }, { "epoch": 0.39, "learning_rate": 1.3922809356861462e-05, "loss": 0.3129, "step": 4556 }, { "epoch": 0.39, "learning_rate": 1.3920255494394373e-05, "loss": 0.3022, "step": 4557 }, { "epoch": 0.39, "learning_rate": 1.3917701329773364e-05, "loss": 0.2629, "step": 4558 }, { "epoch": 0.39, "learning_rate": 1.3915146863195292e-05, "loss": 0.2819, "step": 4559 }, { "epoch": 0.39, "learning_rate": 1.3912592094857044e-05, "loss": 0.2679, "step": 4560 }, { "epoch": 0.39, "learning_rate": 1.3910037024955534e-05, "loss": 0.2628, "step": 4561 }, { "epoch": 0.39, "learning_rate": 1.3907481653687687e-05, "loss": 0.2759, "step": 4562 }, { "epoch": 0.39, "learning_rate": 1.390492598125046e-05, "loss": 0.3445, "step": 4563 }, { "epoch": 0.39, "learning_rate": 1.3902370007840835e-05, "loss": 0.2772, "step": 4564 }, { "epoch": 0.39, "learning_rate": 1.3899813733655814e-05, "loss": 0.3097, "step": 4565 }, { "epoch": 0.39, "learning_rate": 1.389725715889242e-05, "loss": 0.2908, "step": 4566 }, { "epoch": 0.39, "learning_rate": 1.3894700283747697e-05, "loss": 0.3004, "step": 4567 }, { "epoch": 0.39, "learning_rate": 1.3892143108418723e-05, "loss": 0.6012, "step": 4568 }, { "epoch": 0.39, "learning_rate": 1.388958563310259e-05, "loss": 0.3231, "step": 4569 }, { "epoch": 0.39, "learning_rate": 1.3887027857996416e-05, "loss": 0.3204, "step": 4570 }, { "epoch": 0.39, "learning_rate": 1.3884469783297339e-05, "loss": 0.6304, "step": 4571 }, { "epoch": 0.39, "learning_rate": 1.3881911409202525e-05, "loss": 0.255, "step": 4572 }, { "epoch": 0.39, "learning_rate": 1.3879352735909163e-05, "loss": 0.3339, "step": 4573 }, { "epoch": 0.39, "learning_rate": 1.387679376361446e-05, "loss": 0.315, "step": 4574 }, { "epoch": 0.39, "learning_rate": 1.3874234492515649e-05, "loss": 0.3082, "step": 4575 }, { "epoch": 0.39, "learning_rate": 1.3871674922809985e-05, "loss": 0.2715, "step": 4576 }, { "epoch": 0.39, "learning_rate": 1.386911505469475e-05, "loss": 0.2662, "step": 4577 }, { "epoch": 0.39, "learning_rate": 1.3866554888367243e-05, "loss": 0.2839, "step": 4578 }, { "epoch": 0.39, "learning_rate": 1.3863994424024792e-05, "loss": 0.2803, "step": 4579 }, { "epoch": 0.39, "learning_rate": 1.3861433661864744e-05, "loss": 0.2603, "step": 4580 }, { "epoch": 0.39, "learning_rate": 1.3858872602084467e-05, "loss": 0.2668, "step": 4581 }, { "epoch": 0.39, "learning_rate": 1.385631124488136e-05, "loss": 0.2881, "step": 4582 }, { "epoch": 0.39, "learning_rate": 1.3853749590452834e-05, "loss": 0.28, "step": 4583 }, { "epoch": 0.39, "learning_rate": 1.3851187638996331e-05, "loss": 0.2723, "step": 4584 }, { "epoch": 0.39, "learning_rate": 1.3848625390709315e-05, "loss": 0.2834, "step": 4585 }, { "epoch": 0.39, "learning_rate": 1.3846062845789275e-05, "loss": 0.2627, "step": 4586 }, { "epoch": 0.39, "learning_rate": 1.3843500004433708e-05, "loss": 0.3146, "step": 4587 }, { "epoch": 0.39, "learning_rate": 1.3840936866840155e-05, "loss": 0.2851, "step": 4588 }, { "epoch": 0.39, "learning_rate": 1.3838373433206167e-05, "loss": 0.3091, "step": 4589 }, { "epoch": 0.39, "learning_rate": 1.3835809703729322e-05, "loss": 0.3173, "step": 4590 }, { "epoch": 0.39, "learning_rate": 1.3833245678607215e-05, "loss": 0.2961, "step": 4591 }, { "epoch": 0.39, "learning_rate": 1.3830681358037477e-05, "loss": 0.2628, "step": 4592 }, { "epoch": 0.39, "learning_rate": 1.3828116742217744e-05, "loss": 0.3027, "step": 4593 }, { "epoch": 0.39, "learning_rate": 1.3825551831345685e-05, "loss": 0.2852, "step": 4594 }, { "epoch": 0.39, "learning_rate": 1.3822986625618997e-05, "loss": 0.2632, "step": 4595 }, { "epoch": 0.39, "learning_rate": 1.382042112523539e-05, "loss": 0.2593, "step": 4596 }, { "epoch": 0.39, "learning_rate": 1.38178553303926e-05, "loss": 0.3315, "step": 4597 }, { "epoch": 0.39, "learning_rate": 1.3815289241288383e-05, "loss": 0.2971, "step": 4598 }, { "epoch": 0.39, "learning_rate": 1.3812722858120528e-05, "loss": 0.2962, "step": 4599 }, { "epoch": 0.39, "learning_rate": 1.3810156181086832e-05, "loss": 0.2778, "step": 4600 }, { "epoch": 0.39, "learning_rate": 1.3807589210385123e-05, "loss": 0.3069, "step": 4601 }, { "epoch": 0.39, "learning_rate": 1.3805021946213251e-05, "loss": 0.2755, "step": 4602 }, { "epoch": 0.39, "learning_rate": 1.3802454388769091e-05, "loss": 0.3104, "step": 4603 }, { "epoch": 0.39, "learning_rate": 1.3799886538250534e-05, "loss": 0.3015, "step": 4604 }, { "epoch": 0.39, "learning_rate": 1.3797318394855496e-05, "loss": 0.2944, "step": 4605 }, { "epoch": 0.39, "learning_rate": 1.3794749958781924e-05, "loss": 0.2846, "step": 4606 }, { "epoch": 0.39, "learning_rate": 1.3792181230227773e-05, "loss": 0.3088, "step": 4607 }, { "epoch": 0.39, "learning_rate": 1.3789612209391031e-05, "loss": 0.3127, "step": 4608 }, { "epoch": 0.4, "learning_rate": 1.3787042896469705e-05, "loss": 0.2922, "step": 4609 }, { "epoch": 0.4, "learning_rate": 1.3784473291661824e-05, "loss": 0.2703, "step": 4610 }, { "epoch": 0.4, "learning_rate": 1.3781903395165441e-05, "loss": 0.309, "step": 4611 }, { "epoch": 0.4, "learning_rate": 1.3779333207178632e-05, "loss": 0.3019, "step": 4612 }, { "epoch": 0.4, "learning_rate": 1.3776762727899494e-05, "loss": 0.2759, "step": 4613 }, { "epoch": 0.4, "learning_rate": 1.3774191957526144e-05, "loss": 0.2824, "step": 4614 }, { "epoch": 0.4, "learning_rate": 1.3771620896256732e-05, "loss": 0.2808, "step": 4615 }, { "epoch": 0.4, "learning_rate": 1.3769049544289415e-05, "loss": 0.2684, "step": 4616 }, { "epoch": 0.4, "learning_rate": 1.3766477901822379e-05, "loss": 0.2996, "step": 4617 }, { "epoch": 0.4, "learning_rate": 1.3763905969053841e-05, "loss": 0.2628, "step": 4618 }, { "epoch": 0.4, "learning_rate": 1.3761333746182028e-05, "loss": 0.2499, "step": 4619 }, { "epoch": 0.4, "learning_rate": 1.3758761233405195e-05, "loss": 0.2328, "step": 4620 }, { "epoch": 0.4, "learning_rate": 1.3756188430921618e-05, "loss": 0.234, "step": 4621 }, { "epoch": 0.4, "learning_rate": 1.3753615338929598e-05, "loss": 0.35, "step": 4622 }, { "epoch": 0.4, "learning_rate": 1.3751041957627456e-05, "loss": 0.3224, "step": 4623 }, { "epoch": 0.4, "learning_rate": 1.374846828721353e-05, "loss": 0.3289, "step": 4624 }, { "epoch": 0.4, "learning_rate": 1.3745894327886192e-05, "loss": 0.2624, "step": 4625 }, { "epoch": 0.4, "learning_rate": 1.3743320079843828e-05, "loss": 0.2701, "step": 4626 }, { "epoch": 0.4, "learning_rate": 1.3740745543284852e-05, "loss": 0.2567, "step": 4627 }, { "epoch": 0.4, "learning_rate": 1.3738170718407689e-05, "loss": 0.3145, "step": 4628 }, { "epoch": 0.4, "learning_rate": 1.37355956054108e-05, "loss": 0.3188, "step": 4629 }, { "epoch": 0.4, "learning_rate": 1.373302020449266e-05, "loss": 0.3017, "step": 4630 }, { "epoch": 0.4, "learning_rate": 1.3730444515851766e-05, "loss": 0.2667, "step": 4631 }, { "epoch": 0.4, "learning_rate": 1.3727868539686641e-05, "loss": 0.3004, "step": 4632 }, { "epoch": 0.4, "learning_rate": 1.3725292276195832e-05, "loss": 0.3397, "step": 4633 }, { "epoch": 0.4, "learning_rate": 1.3722715725577902e-05, "loss": 0.3406, "step": 4634 }, { "epoch": 0.4, "learning_rate": 1.3720138888031436e-05, "loss": 0.2543, "step": 4635 }, { "epoch": 0.4, "learning_rate": 1.3717561763755045e-05, "loss": 0.2812, "step": 4636 }, { "epoch": 0.4, "learning_rate": 1.3714984352947365e-05, "loss": 0.3057, "step": 4637 }, { "epoch": 0.4, "learning_rate": 1.3712406655807047e-05, "loss": 0.3049, "step": 4638 }, { "epoch": 0.4, "learning_rate": 1.3709828672532766e-05, "loss": 0.2825, "step": 4639 }, { "epoch": 0.4, "learning_rate": 1.3707250403323222e-05, "loss": 0.2918, "step": 4640 }, { "epoch": 0.4, "learning_rate": 1.3704671848377136e-05, "loss": 0.272, "step": 4641 }, { "epoch": 0.4, "learning_rate": 1.3702093007893249e-05, "loss": 0.2676, "step": 4642 }, { "epoch": 0.4, "learning_rate": 1.3699513882070323e-05, "loss": 0.2919, "step": 4643 }, { "epoch": 0.4, "learning_rate": 1.369693447110715e-05, "loss": 0.277, "step": 4644 }, { "epoch": 0.4, "learning_rate": 1.3694354775202534e-05, "loss": 0.6479, "step": 4645 }, { "epoch": 0.4, "learning_rate": 1.3691774794555306e-05, "loss": 0.3091, "step": 4646 }, { "epoch": 0.4, "learning_rate": 1.368919452936432e-05, "loss": 0.309, "step": 4647 }, { "epoch": 0.4, "learning_rate": 1.3686613979828444e-05, "loss": 0.2812, "step": 4648 }, { "epoch": 0.4, "learning_rate": 1.3684033146146585e-05, "loss": 0.3076, "step": 4649 }, { "epoch": 0.4, "learning_rate": 1.368145202851765e-05, "loss": 0.2703, "step": 4650 }, { "epoch": 0.4, "learning_rate": 1.3678870627140585e-05, "loss": 0.2853, "step": 4651 }, { "epoch": 0.4, "learning_rate": 1.3676288942214348e-05, "loss": 0.3292, "step": 4652 }, { "epoch": 0.4, "learning_rate": 1.3673706973937928e-05, "loss": 0.3247, "step": 4653 }, { "epoch": 0.4, "learning_rate": 1.3671124722510325e-05, "loss": 0.2856, "step": 4654 }, { "epoch": 0.4, "learning_rate": 1.3668542188130567e-05, "loss": 0.253, "step": 4655 }, { "epoch": 0.4, "learning_rate": 1.3665959370997706e-05, "loss": 0.2986, "step": 4656 }, { "epoch": 0.4, "learning_rate": 1.3663376271310809e-05, "loss": 0.3129, "step": 4657 }, { "epoch": 0.4, "learning_rate": 1.3660792889268967e-05, "loss": 0.2698, "step": 4658 }, { "epoch": 0.4, "learning_rate": 1.3658209225071301e-05, "loss": 0.2264, "step": 4659 }, { "epoch": 0.4, "learning_rate": 1.3655625278916947e-05, "loss": 0.3402, "step": 4660 }, { "epoch": 0.4, "learning_rate": 1.3653041051005056e-05, "loss": 0.2313, "step": 4661 }, { "epoch": 0.4, "learning_rate": 1.3650456541534811e-05, "loss": 0.2849, "step": 4662 }, { "epoch": 0.4, "learning_rate": 1.3647871750705412e-05, "loss": 0.2709, "step": 4663 }, { "epoch": 0.4, "learning_rate": 1.3645286678716084e-05, "loss": 0.2778, "step": 4664 }, { "epoch": 0.4, "learning_rate": 1.3642701325766073e-05, "loss": 0.2761, "step": 4665 }, { "epoch": 0.4, "learning_rate": 1.364011569205464e-05, "loss": 0.2565, "step": 4666 }, { "epoch": 0.4, "learning_rate": 1.3637529777781077e-05, "loss": 0.2918, "step": 4667 }, { "epoch": 0.4, "learning_rate": 1.3634943583144693e-05, "loss": 0.2708, "step": 4668 }, { "epoch": 0.4, "learning_rate": 1.3632357108344819e-05, "loss": 0.2958, "step": 4669 }, { "epoch": 0.4, "learning_rate": 1.3629770353580804e-05, "loss": 0.2606, "step": 4670 }, { "epoch": 0.4, "learning_rate": 1.3627183319052026e-05, "loss": 0.2998, "step": 4671 }, { "epoch": 0.4, "learning_rate": 1.3624596004957884e-05, "loss": 0.2653, "step": 4672 }, { "epoch": 0.4, "learning_rate": 1.3622008411497787e-05, "loss": 0.2742, "step": 4673 }, { "epoch": 0.4, "learning_rate": 1.361942053887118e-05, "loss": 0.3118, "step": 4674 }, { "epoch": 0.4, "learning_rate": 1.3616832387277525e-05, "loss": 0.239, "step": 4675 }, { "epoch": 0.4, "learning_rate": 1.3614243956916297e-05, "loss": 0.2817, "step": 4676 }, { "epoch": 0.4, "learning_rate": 1.3611655247987004e-05, "loss": 0.2899, "step": 4677 }, { "epoch": 0.4, "learning_rate": 1.360906626068917e-05, "loss": 0.2753, "step": 4678 }, { "epoch": 0.4, "learning_rate": 1.3606476995222344e-05, "loss": 0.2676, "step": 4679 }, { "epoch": 0.4, "learning_rate": 1.3603887451786088e-05, "loss": 0.3101, "step": 4680 }, { "epoch": 0.4, "learning_rate": 1.3601297630579996e-05, "loss": 0.3201, "step": 4681 }, { "epoch": 0.4, "learning_rate": 1.359870753180368e-05, "loss": 0.2611, "step": 4682 }, { "epoch": 0.4, "learning_rate": 1.3596117155656763e-05, "loss": 0.2714, "step": 4683 }, { "epoch": 0.4, "learning_rate": 1.3593526502338909e-05, "loss": 0.2728, "step": 4684 }, { "epoch": 0.4, "learning_rate": 1.3590935572049787e-05, "loss": 0.3516, "step": 4685 }, { "epoch": 0.4, "learning_rate": 1.3588344364989096e-05, "loss": 0.2943, "step": 4686 }, { "epoch": 0.4, "learning_rate": 1.358575288135655e-05, "loss": 0.2857, "step": 4687 }, { "epoch": 0.4, "learning_rate": 1.358316112135189e-05, "loss": 0.3317, "step": 4688 }, { "epoch": 0.4, "learning_rate": 1.3580569085174877e-05, "loss": 0.2747, "step": 4689 }, { "epoch": 0.4, "learning_rate": 1.357797677302529e-05, "loss": 0.2361, "step": 4690 }, { "epoch": 0.4, "learning_rate": 1.3575384185102933e-05, "loss": 0.2667, "step": 4691 }, { "epoch": 0.4, "learning_rate": 1.357279132160763e-05, "loss": 0.2779, "step": 4692 }, { "epoch": 0.4, "learning_rate": 1.3570198182739222e-05, "loss": 0.2538, "step": 4693 }, { "epoch": 0.4, "learning_rate": 1.3567604768697585e-05, "loss": 0.2997, "step": 4694 }, { "epoch": 0.4, "learning_rate": 1.3565011079682597e-05, "loss": 0.2556, "step": 4695 }, { "epoch": 0.4, "learning_rate": 1.356241711589417e-05, "loss": 0.2922, "step": 4696 }, { "epoch": 0.4, "learning_rate": 1.3559822877532234e-05, "loss": 0.6287, "step": 4697 }, { "epoch": 0.4, "learning_rate": 1.3557228364796742e-05, "loss": 0.3558, "step": 4698 }, { "epoch": 0.4, "learning_rate": 1.3554633577887663e-05, "loss": 0.2886, "step": 4699 }, { "epoch": 0.4, "learning_rate": 1.3552038517004991e-05, "loss": 0.3183, "step": 4700 }, { "epoch": 0.4, "learning_rate": 1.3549443182348743e-05, "loss": 0.2421, "step": 4701 }, { "epoch": 0.4, "learning_rate": 1.3546847574118951e-05, "loss": 0.3222, "step": 4702 }, { "epoch": 0.4, "learning_rate": 1.3544251692515675e-05, "loss": 0.3259, "step": 4703 }, { "epoch": 0.4, "learning_rate": 1.3541655537738992e-05, "loss": 0.2913, "step": 4704 }, { "epoch": 0.4, "learning_rate": 1.3539059109988999e-05, "loss": 0.2619, "step": 4705 }, { "epoch": 0.4, "learning_rate": 1.3536462409465816e-05, "loss": 0.3214, "step": 4706 }, { "epoch": 0.4, "learning_rate": 1.3533865436369584e-05, "loss": 0.3145, "step": 4707 }, { "epoch": 0.4, "learning_rate": 1.3531268190900467e-05, "loss": 0.3072, "step": 4708 }, { "epoch": 0.4, "learning_rate": 1.3528670673258645e-05, "loss": 0.269, "step": 4709 }, { "epoch": 0.4, "learning_rate": 1.3526072883644326e-05, "loss": 0.2829, "step": 4710 }, { "epoch": 0.4, "learning_rate": 1.3523474822257729e-05, "loss": 0.3028, "step": 4711 }, { "epoch": 0.4, "learning_rate": 1.3520876489299104e-05, "loss": 0.2947, "step": 4712 }, { "epoch": 0.4, "learning_rate": 1.3518277884968718e-05, "loss": 0.2807, "step": 4713 }, { "epoch": 0.4, "learning_rate": 1.3515679009466856e-05, "loss": 0.3173, "step": 4714 }, { "epoch": 0.4, "learning_rate": 1.3513079862993825e-05, "loss": 0.2651, "step": 4715 }, { "epoch": 0.4, "learning_rate": 1.3510480445749958e-05, "loss": 0.2939, "step": 4716 }, { "epoch": 0.4, "learning_rate": 1.3507880757935605e-05, "loss": 0.3118, "step": 4717 }, { "epoch": 0.4, "learning_rate": 1.3505280799751134e-05, "loss": 0.2829, "step": 4718 }, { "epoch": 0.4, "learning_rate": 1.3502680571396943e-05, "loss": 0.3098, "step": 4719 }, { "epoch": 0.4, "learning_rate": 1.3500080073073436e-05, "loss": 0.3193, "step": 4720 }, { "epoch": 0.4, "learning_rate": 1.3497479304981053e-05, "loss": 0.2904, "step": 4721 }, { "epoch": 0.4, "learning_rate": 1.349487826732025e-05, "loss": 0.2912, "step": 4722 }, { "epoch": 0.4, "learning_rate": 1.3492276960291495e-05, "loss": 0.3058, "step": 4723 }, { "epoch": 0.4, "learning_rate": 1.3489675384095291e-05, "loss": 0.2505, "step": 4724 }, { "epoch": 0.41, "learning_rate": 1.3487073538932149e-05, "loss": 0.3296, "step": 4725 }, { "epoch": 0.41, "learning_rate": 1.348447142500261e-05, "loss": 0.5804, "step": 4726 }, { "epoch": 0.41, "learning_rate": 1.348186904250723e-05, "loss": 0.3107, "step": 4727 }, { "epoch": 0.41, "learning_rate": 1.3479266391646588e-05, "loss": 0.2668, "step": 4728 }, { "epoch": 0.41, "learning_rate": 1.3476663472621286e-05, "loss": 0.2903, "step": 4729 }, { "epoch": 0.41, "learning_rate": 1.347406028563194e-05, "loss": 0.2913, "step": 4730 }, { "epoch": 0.41, "learning_rate": 1.3471456830879195e-05, "loss": 0.3231, "step": 4731 }, { "epoch": 0.41, "learning_rate": 1.3468853108563709e-05, "loss": 0.2511, "step": 4732 }, { "epoch": 0.41, "learning_rate": 1.3466249118886165e-05, "loss": 0.2686, "step": 4733 }, { "epoch": 0.41, "learning_rate": 1.3463644862047267e-05, "loss": 0.2515, "step": 4734 }, { "epoch": 0.41, "learning_rate": 1.3461040338247737e-05, "loss": 0.3395, "step": 4735 }, { "epoch": 0.41, "learning_rate": 1.3458435547688323e-05, "loss": 0.2753, "step": 4736 }, { "epoch": 0.41, "learning_rate": 1.3455830490569782e-05, "loss": 0.2785, "step": 4737 }, { "epoch": 0.41, "learning_rate": 1.3453225167092902e-05, "loss": 0.2432, "step": 4738 }, { "epoch": 0.41, "learning_rate": 1.3450619577458488e-05, "loss": 0.2924, "step": 4739 }, { "epoch": 0.41, "learning_rate": 1.344801372186737e-05, "loss": 0.3099, "step": 4740 }, { "epoch": 0.41, "learning_rate": 1.344540760052039e-05, "loss": 0.299, "step": 4741 }, { "epoch": 0.41, "learning_rate": 1.3442801213618417e-05, "loss": 0.3189, "step": 4742 }, { "epoch": 0.41, "learning_rate": 1.344019456136234e-05, "loss": 0.2726, "step": 4743 }, { "epoch": 0.41, "learning_rate": 1.3437587643953062e-05, "loss": 0.3393, "step": 4744 }, { "epoch": 0.41, "learning_rate": 1.3434980461591514e-05, "loss": 0.2943, "step": 4745 }, { "epoch": 0.41, "learning_rate": 1.3432373014478644e-05, "loss": 0.3002, "step": 4746 }, { "epoch": 0.41, "learning_rate": 1.3429765302815423e-05, "loss": 0.2913, "step": 4747 }, { "epoch": 0.41, "learning_rate": 1.3427157326802843e-05, "loss": 0.2818, "step": 4748 }, { "epoch": 0.41, "learning_rate": 1.3424549086641905e-05, "loss": 0.2925, "step": 4749 }, { "epoch": 0.41, "learning_rate": 1.3421940582533645e-05, "loss": 0.3036, "step": 4750 }, { "epoch": 0.41, "learning_rate": 1.3419331814679115e-05, "loss": 0.3173, "step": 4751 }, { "epoch": 0.41, "learning_rate": 1.3416722783279386e-05, "loss": 0.2527, "step": 4752 }, { "epoch": 0.41, "learning_rate": 1.3414113488535542e-05, "loss": 0.343, "step": 4753 }, { "epoch": 0.41, "learning_rate": 1.3411503930648704e-05, "loss": 0.2633, "step": 4754 }, { "epoch": 0.41, "learning_rate": 1.3408894109820002e-05, "loss": 0.2786, "step": 4755 }, { "epoch": 0.41, "learning_rate": 1.340628402625058e-05, "loss": 0.2847, "step": 4756 }, { "epoch": 0.41, "learning_rate": 1.340367368014162e-05, "loss": 0.2754, "step": 4757 }, { "epoch": 0.41, "learning_rate": 1.3401063071694309e-05, "loss": 0.3082, "step": 4758 }, { "epoch": 0.41, "learning_rate": 1.339845220110986e-05, "loss": 0.3127, "step": 4759 }, { "epoch": 0.41, "learning_rate": 1.3395841068589513e-05, "loss": 0.2743, "step": 4760 }, { "epoch": 0.41, "learning_rate": 1.3393229674334512e-05, "loss": 0.2956, "step": 4761 }, { "epoch": 0.41, "learning_rate": 1.3390618018546135e-05, "loss": 0.3106, "step": 4762 }, { "epoch": 0.41, "learning_rate": 1.3388006101425674e-05, "loss": 0.2979, "step": 4763 }, { "epoch": 0.41, "learning_rate": 1.3385393923174443e-05, "loss": 0.3387, "step": 4764 }, { "epoch": 0.41, "learning_rate": 1.3382781483993777e-05, "loss": 0.3207, "step": 4765 }, { "epoch": 0.41, "learning_rate": 1.3380168784085028e-05, "loss": 0.2396, "step": 4766 }, { "epoch": 0.41, "learning_rate": 1.3377555823649573e-05, "loss": 0.3193, "step": 4767 }, { "epoch": 0.41, "learning_rate": 1.3374942602888803e-05, "loss": 0.3192, "step": 4768 }, { "epoch": 0.41, "learning_rate": 1.3372329122004135e-05, "loss": 0.2903, "step": 4769 }, { "epoch": 0.41, "learning_rate": 1.3369715381197e-05, "loss": 0.3156, "step": 4770 }, { "epoch": 0.41, "learning_rate": 1.3367101380668852e-05, "loss": 0.3001, "step": 4771 }, { "epoch": 0.41, "learning_rate": 1.3364487120621168e-05, "loss": 0.2541, "step": 4772 }, { "epoch": 0.41, "learning_rate": 1.3361872601255441e-05, "loss": 0.3005, "step": 4773 }, { "epoch": 0.41, "learning_rate": 1.3359257822773187e-05, "loss": 0.2616, "step": 4774 }, { "epoch": 0.41, "learning_rate": 1.3356642785375937e-05, "loss": 0.2515, "step": 4775 }, { "epoch": 0.41, "learning_rate": 1.3354027489265246e-05, "loss": 0.2652, "step": 4776 }, { "epoch": 0.41, "learning_rate": 1.335141193464269e-05, "loss": 0.2809, "step": 4777 }, { "epoch": 0.41, "learning_rate": 1.3348796121709862e-05, "loss": 0.2805, "step": 4778 }, { "epoch": 0.41, "learning_rate": 1.3346180050668376e-05, "loss": 0.2776, "step": 4779 }, { "epoch": 0.41, "learning_rate": 1.3343563721719865e-05, "loss": 0.2867, "step": 4780 }, { "epoch": 0.41, "learning_rate": 1.3340947135065986e-05, "loss": 0.3054, "step": 4781 }, { "epoch": 0.41, "learning_rate": 1.3338330290908408e-05, "loss": 0.2798, "step": 4782 }, { "epoch": 0.41, "learning_rate": 1.3335713189448824e-05, "loss": 0.3165, "step": 4783 }, { "epoch": 0.41, "learning_rate": 1.3333095830888954e-05, "loss": 0.2949, "step": 4784 }, { "epoch": 0.41, "learning_rate": 1.3330478215430523e-05, "loss": 0.2692, "step": 4785 }, { "epoch": 0.41, "learning_rate": 1.332786034327529e-05, "loss": 0.317, "step": 4786 }, { "epoch": 0.41, "learning_rate": 1.3325242214625022e-05, "loss": 0.2678, "step": 4787 }, { "epoch": 0.41, "learning_rate": 1.332262382968152e-05, "loss": 0.3068, "step": 4788 }, { "epoch": 0.41, "learning_rate": 1.3320005188646587e-05, "loss": 0.3077, "step": 4789 }, { "epoch": 0.41, "learning_rate": 1.331738629172206e-05, "loss": 0.2463, "step": 4790 }, { "epoch": 0.41, "learning_rate": 1.3314767139109786e-05, "loss": 0.2664, "step": 4791 }, { "epoch": 0.41, "learning_rate": 1.3312147731011642e-05, "loss": 0.3241, "step": 4792 }, { "epoch": 0.41, "learning_rate": 1.3309528067629518e-05, "loss": 0.2742, "step": 4793 }, { "epoch": 0.41, "learning_rate": 1.330690814916532e-05, "loss": 0.2962, "step": 4794 }, { "epoch": 0.41, "learning_rate": 1.3304287975820985e-05, "loss": 0.3509, "step": 4795 }, { "epoch": 0.41, "learning_rate": 1.3301667547798458e-05, "loss": 0.2891, "step": 4796 }, { "epoch": 0.41, "learning_rate": 1.3299046865299713e-05, "loss": 0.2622, "step": 4797 }, { "epoch": 0.41, "learning_rate": 1.3296425928526735e-05, "loss": 0.2841, "step": 4798 }, { "epoch": 0.41, "learning_rate": 1.3293804737681533e-05, "loss": 0.3107, "step": 4799 }, { "epoch": 0.41, "learning_rate": 1.3291183292966141e-05, "loss": 0.3146, "step": 4800 }, { "epoch": 0.41, "learning_rate": 1.3288561594582599e-05, "loss": 0.2556, "step": 4801 }, { "epoch": 0.41, "learning_rate": 1.3285939642732979e-05, "loss": 0.3171, "step": 4802 }, { "epoch": 0.41, "learning_rate": 1.3283317437619369e-05, "loss": 0.27, "step": 4803 }, { "epoch": 0.41, "learning_rate": 1.3280694979443873e-05, "loss": 0.3049, "step": 4804 }, { "epoch": 0.41, "learning_rate": 1.3278072268408621e-05, "loss": 0.2706, "step": 4805 }, { "epoch": 0.41, "learning_rate": 1.3275449304715753e-05, "loss": 0.2852, "step": 4806 }, { "epoch": 0.41, "learning_rate": 1.3272826088567441e-05, "loss": 0.2865, "step": 4807 }, { "epoch": 0.41, "learning_rate": 1.3270202620165861e-05, "loss": 0.3076, "step": 4808 }, { "epoch": 0.41, "learning_rate": 1.3267578899713223e-05, "loss": 0.3184, "step": 4809 }, { "epoch": 0.41, "learning_rate": 1.3264954927411751e-05, "loss": 0.2737, "step": 4810 }, { "epoch": 0.41, "learning_rate": 1.3262330703463682e-05, "loss": 0.291, "step": 4811 }, { "epoch": 0.41, "learning_rate": 1.3259706228071286e-05, "loss": 0.2676, "step": 4812 }, { "epoch": 0.41, "learning_rate": 1.3257081501436839e-05, "loss": 0.313, "step": 4813 }, { "epoch": 0.41, "learning_rate": 1.3254456523762643e-05, "loss": 0.2609, "step": 4814 }, { "epoch": 0.41, "learning_rate": 1.3251831295251019e-05, "loss": 0.2407, "step": 4815 }, { "epoch": 0.41, "learning_rate": 1.3249205816104307e-05, "loss": 0.2679, "step": 4816 }, { "epoch": 0.41, "learning_rate": 1.3246580086524868e-05, "loss": 0.3162, "step": 4817 }, { "epoch": 0.41, "learning_rate": 1.3243954106715074e-05, "loss": 0.3313, "step": 4818 }, { "epoch": 0.41, "learning_rate": 1.3241327876877328e-05, "loss": 0.2509, "step": 4819 }, { "epoch": 0.41, "learning_rate": 1.3238701397214044e-05, "loss": 0.6091, "step": 4820 }, { "epoch": 0.41, "learning_rate": 1.3236074667927659e-05, "loss": 0.3187, "step": 4821 }, { "epoch": 0.41, "learning_rate": 1.3233447689220629e-05, "loss": 0.2988, "step": 4822 }, { "epoch": 0.41, "learning_rate": 1.3230820461295429e-05, "loss": 0.2989, "step": 4823 }, { "epoch": 0.41, "learning_rate": 1.3228192984354552e-05, "loss": 0.2506, "step": 4824 }, { "epoch": 0.41, "learning_rate": 1.3225565258600507e-05, "loss": 0.2597, "step": 4825 }, { "epoch": 0.41, "learning_rate": 1.3222937284235835e-05, "loss": 0.2708, "step": 4826 }, { "epoch": 0.41, "learning_rate": 1.3220309061463081e-05, "loss": 0.3536, "step": 4827 }, { "epoch": 0.41, "learning_rate": 1.3217680590484813e-05, "loss": 0.2786, "step": 4828 }, { "epoch": 0.41, "learning_rate": 1.3215051871503628e-05, "loss": 0.2586, "step": 4829 }, { "epoch": 0.41, "learning_rate": 1.321242290472213e-05, "loss": 0.2776, "step": 4830 }, { "epoch": 0.41, "learning_rate": 1.3209793690342947e-05, "loss": 0.2984, "step": 4831 }, { "epoch": 0.41, "learning_rate": 1.3207164228568725e-05, "loss": 0.2921, "step": 4832 }, { "epoch": 0.41, "learning_rate": 1.3204534519602133e-05, "loss": 0.6091, "step": 4833 }, { "epoch": 0.41, "learning_rate": 1.3201904563645853e-05, "loss": 0.2847, "step": 4834 }, { "epoch": 0.41, "learning_rate": 1.319927436090259e-05, "loss": 0.2813, "step": 4835 }, { "epoch": 0.41, "learning_rate": 1.3196643911575072e-05, "loss": 0.2755, "step": 4836 }, { "epoch": 0.41, "learning_rate": 1.319401321586603e-05, "loss": 0.5737, "step": 4837 }, { "epoch": 0.41, "learning_rate": 1.3191382273978237e-05, "loss": 0.3258, "step": 4838 }, { "epoch": 0.41, "learning_rate": 1.3188751086114464e-05, "loss": 0.2497, "step": 4839 }, { "epoch": 0.41, "learning_rate": 1.3186119652477514e-05, "loss": 0.3023, "step": 4840 }, { "epoch": 0.41, "learning_rate": 1.3183487973270204e-05, "loss": 0.2723, "step": 4841 }, { "epoch": 0.42, "learning_rate": 1.318085604869537e-05, "loss": 0.2432, "step": 4842 }, { "epoch": 0.42, "learning_rate": 1.3178223878955874e-05, "loss": 0.254, "step": 4843 }, { "epoch": 0.42, "learning_rate": 1.3175591464254581e-05, "loss": 0.2863, "step": 4844 }, { "epoch": 0.42, "learning_rate": 1.317295880479439e-05, "loss": 0.336, "step": 4845 }, { "epoch": 0.42, "learning_rate": 1.3170325900778211e-05, "loss": 0.3009, "step": 4846 }, { "epoch": 0.42, "learning_rate": 1.3167692752408978e-05, "loss": 0.3116, "step": 4847 }, { "epoch": 0.42, "learning_rate": 1.3165059359889639e-05, "loss": 0.3154, "step": 4848 }, { "epoch": 0.42, "learning_rate": 1.3162425723423162e-05, "loss": 0.31, "step": 4849 }, { "epoch": 0.42, "learning_rate": 1.3159791843212542e-05, "loss": 0.2816, "step": 4850 }, { "epoch": 0.42, "learning_rate": 1.3157157719460774e-05, "loss": 0.3444, "step": 4851 }, { "epoch": 0.42, "learning_rate": 1.3154523352370894e-05, "loss": 0.6122, "step": 4852 }, { "epoch": 0.42, "learning_rate": 1.3151888742145932e-05, "loss": 0.306, "step": 4853 }, { "epoch": 0.42, "learning_rate": 1.3149253888988967e-05, "loss": 0.2873, "step": 4854 }, { "epoch": 0.42, "learning_rate": 1.3146618793103074e-05, "loss": 0.2795, "step": 4855 }, { "epoch": 0.42, "learning_rate": 1.3143983454691348e-05, "loss": 0.3004, "step": 4856 }, { "epoch": 0.42, "learning_rate": 1.3141347873956915e-05, "loss": 0.2885, "step": 4857 }, { "epoch": 0.42, "learning_rate": 1.3138712051102908e-05, "loss": 0.2515, "step": 4858 }, { "epoch": 0.42, "learning_rate": 1.3136075986332485e-05, "loss": 0.3378, "step": 4859 }, { "epoch": 0.42, "learning_rate": 1.3133439679848824e-05, "loss": 0.277, "step": 4860 }, { "epoch": 0.42, "learning_rate": 1.3130803131855113e-05, "loss": 0.3376, "step": 4861 }, { "epoch": 0.42, "learning_rate": 1.3128166342554567e-05, "loss": 0.297, "step": 4862 }, { "epoch": 0.42, "learning_rate": 1.3125529312150414e-05, "loss": 0.3082, "step": 4863 }, { "epoch": 0.42, "learning_rate": 1.312289204084591e-05, "loss": 0.3073, "step": 4864 }, { "epoch": 0.42, "learning_rate": 1.3120254528844312e-05, "loss": 0.3297, "step": 4865 }, { "epoch": 0.42, "learning_rate": 1.3117616776348915e-05, "loss": 0.2859, "step": 4866 }, { "epoch": 0.42, "learning_rate": 1.3114978783563022e-05, "loss": 0.3062, "step": 4867 }, { "epoch": 0.42, "learning_rate": 1.3112340550689955e-05, "loss": 0.2731, "step": 4868 }, { "epoch": 0.42, "learning_rate": 1.310970207793306e-05, "loss": 0.2795, "step": 4869 }, { "epoch": 0.42, "learning_rate": 1.3107063365495692e-05, "loss": 0.2925, "step": 4870 }, { "epoch": 0.42, "learning_rate": 1.3104424413581231e-05, "loss": 0.2915, "step": 4871 }, { "epoch": 0.42, "learning_rate": 1.3101785222393075e-05, "loss": 0.2886, "step": 4872 }, { "epoch": 0.42, "learning_rate": 1.3099145792134642e-05, "loss": 0.3122, "step": 4873 }, { "epoch": 0.42, "learning_rate": 1.3096506123009368e-05, "loss": 0.2537, "step": 4874 }, { "epoch": 0.42, "learning_rate": 1.3093866215220698e-05, "loss": 0.2968, "step": 4875 }, { "epoch": 0.42, "learning_rate": 1.309122606897211e-05, "loss": 0.3199, "step": 4876 }, { "epoch": 0.42, "learning_rate": 1.3088585684467088e-05, "loss": 0.2534, "step": 4877 }, { "epoch": 0.42, "learning_rate": 1.3085945061909144e-05, "loss": 0.2666, "step": 4878 }, { "epoch": 0.42, "learning_rate": 1.3083304201501803e-05, "loss": 0.2632, "step": 4879 }, { "epoch": 0.42, "learning_rate": 1.3080663103448607e-05, "loss": 0.2697, "step": 4880 }, { "epoch": 0.42, "learning_rate": 1.3078021767953125e-05, "loss": 0.256, "step": 4881 }, { "epoch": 0.42, "learning_rate": 1.3075380195218931e-05, "loss": 0.2749, "step": 4882 }, { "epoch": 0.42, "learning_rate": 1.307273838544963e-05, "loss": 0.2484, "step": 4883 }, { "epoch": 0.42, "learning_rate": 1.3070096338848835e-05, "loss": 0.316, "step": 4884 }, { "epoch": 0.42, "learning_rate": 1.3067454055620184e-05, "loss": 0.2657, "step": 4885 }, { "epoch": 0.42, "learning_rate": 1.306481153596733e-05, "loss": 0.2841, "step": 4886 }, { "epoch": 0.42, "learning_rate": 1.3062168780093949e-05, "loss": 0.2512, "step": 4887 }, { "epoch": 0.42, "learning_rate": 1.3059525788203728e-05, "loss": 0.2742, "step": 4888 }, { "epoch": 0.42, "learning_rate": 1.3056882560500378e-05, "loss": 0.2917, "step": 4889 }, { "epoch": 0.42, "learning_rate": 1.3054239097187625e-05, "loss": 0.2507, "step": 4890 }, { "epoch": 0.42, "learning_rate": 1.305159539846921e-05, "loss": 0.3232, "step": 4891 }, { "epoch": 0.42, "learning_rate": 1.3048951464548902e-05, "loss": 0.6477, "step": 4892 }, { "epoch": 0.42, "learning_rate": 1.3046307295630482e-05, "loss": 0.295, "step": 4893 }, { "epoch": 0.42, "learning_rate": 1.3043662891917748e-05, "loss": 0.2845, "step": 4894 }, { "epoch": 0.42, "learning_rate": 1.3041018253614518e-05, "loss": 0.3442, "step": 4895 }, { "epoch": 0.42, "learning_rate": 1.3038373380924623e-05, "loss": 0.3137, "step": 4896 }, { "epoch": 0.42, "learning_rate": 1.3035728274051924e-05, "loss": 0.2985, "step": 4897 }, { "epoch": 0.42, "learning_rate": 1.3033082933200287e-05, "loss": 0.2945, "step": 4898 }, { "epoch": 0.42, "learning_rate": 1.3030437358573606e-05, "loss": 0.2918, "step": 4899 }, { "epoch": 0.42, "learning_rate": 1.302779155037579e-05, "loss": 0.3008, "step": 4900 }, { "epoch": 0.42, "learning_rate": 1.302514550881076e-05, "loss": 0.2714, "step": 4901 }, { "epoch": 0.42, "learning_rate": 1.3022499234082463e-05, "loss": 0.3484, "step": 4902 }, { "epoch": 0.42, "learning_rate": 1.3019852726394857e-05, "loss": 0.3029, "step": 4903 }, { "epoch": 0.42, "learning_rate": 1.3017205985951926e-05, "loss": 0.2746, "step": 4904 }, { "epoch": 0.42, "learning_rate": 1.3014559012957665e-05, "loss": 0.2798, "step": 4905 }, { "epoch": 0.42, "learning_rate": 1.3011911807616091e-05, "loss": 0.2724, "step": 4906 }, { "epoch": 0.42, "learning_rate": 1.3009264370131239e-05, "loss": 0.3561, "step": 4907 }, { "epoch": 0.42, "learning_rate": 1.3006616700707156e-05, "loss": 0.2535, "step": 4908 }, { "epoch": 0.42, "learning_rate": 1.3003968799547915e-05, "loss": 0.291, "step": 4909 }, { "epoch": 0.42, "learning_rate": 1.30013206668576e-05, "loss": 0.3071, "step": 4910 }, { "epoch": 0.42, "learning_rate": 1.2998672302840318e-05, "loss": 0.2776, "step": 4911 }, { "epoch": 0.42, "learning_rate": 1.2996023707700197e-05, "loss": 0.2747, "step": 4912 }, { "epoch": 0.42, "learning_rate": 1.2993374881641367e-05, "loss": 0.2949, "step": 4913 }, { "epoch": 0.42, "learning_rate": 1.2990725824867995e-05, "loss": 0.3315, "step": 4914 }, { "epoch": 0.42, "learning_rate": 1.2988076537584254e-05, "loss": 0.2838, "step": 4915 }, { "epoch": 0.42, "learning_rate": 1.2985427019994335e-05, "loss": 0.2912, "step": 4916 }, { "epoch": 0.42, "learning_rate": 1.2982777272302454e-05, "loss": 0.3068, "step": 4917 }, { "epoch": 0.42, "learning_rate": 1.2980127294712839e-05, "loss": 0.6093, "step": 4918 }, { "epoch": 0.42, "learning_rate": 1.2977477087429739e-05, "loss": 0.2781, "step": 4919 }, { "epoch": 0.42, "learning_rate": 1.2974826650657418e-05, "loss": 0.2736, "step": 4920 }, { "epoch": 0.42, "learning_rate": 1.2972175984600157e-05, "loss": 0.3271, "step": 4921 }, { "epoch": 0.42, "learning_rate": 1.2969525089462253e-05, "loss": 0.3005, "step": 4922 }, { "epoch": 0.42, "learning_rate": 1.2966873965448032e-05, "loss": 0.2636, "step": 4923 }, { "epoch": 0.42, "learning_rate": 1.2964222612761825e-05, "loss": 0.2827, "step": 4924 }, { "epoch": 0.42, "learning_rate": 1.2961571031607985e-05, "loss": 0.3504, "step": 4925 }, { "epoch": 0.42, "learning_rate": 1.2958919222190885e-05, "loss": 0.3324, "step": 4926 }, { "epoch": 0.42, "learning_rate": 1.295626718471491e-05, "loss": 0.3079, "step": 4927 }, { "epoch": 0.42, "learning_rate": 1.295361491938447e-05, "loss": 0.2971, "step": 4928 }, { "epoch": 0.42, "learning_rate": 1.2950962426403981e-05, "loss": 0.3065, "step": 4929 }, { "epoch": 0.42, "learning_rate": 1.2948309705977893e-05, "loss": 0.6127, "step": 4930 }, { "epoch": 0.42, "learning_rate": 1.2945656758310663e-05, "loss": 0.2986, "step": 4931 }, { "epoch": 0.42, "learning_rate": 1.294300358360676e-05, "loss": 0.3096, "step": 4932 }, { "epoch": 0.42, "learning_rate": 1.294035018207069e-05, "loss": 0.3211, "step": 4933 }, { "epoch": 0.42, "learning_rate": 1.2937696553906949e-05, "loss": 0.3181, "step": 4934 }, { "epoch": 0.42, "learning_rate": 1.2935042699320075e-05, "loss": 0.2803, "step": 4935 }, { "epoch": 0.42, "learning_rate": 1.2932388618514616e-05, "loss": 0.2709, "step": 4936 }, { "epoch": 0.42, "learning_rate": 1.2929734311695125e-05, "loss": 0.2866, "step": 4937 }, { "epoch": 0.42, "learning_rate": 1.2927079779066196e-05, "loss": 0.3434, "step": 4938 }, { "epoch": 0.42, "learning_rate": 1.2924425020832419e-05, "loss": 0.3034, "step": 4939 }, { "epoch": 0.42, "learning_rate": 1.292177003719841e-05, "loss": 0.2884, "step": 4940 }, { "epoch": 0.42, "learning_rate": 1.2919114828368806e-05, "loss": 0.2755, "step": 4941 }, { "epoch": 0.42, "learning_rate": 1.291645939454825e-05, "loss": 0.2856, "step": 4942 }, { "epoch": 0.42, "learning_rate": 1.2913803735941417e-05, "loss": 0.2612, "step": 4943 }, { "epoch": 0.42, "learning_rate": 1.291114785275299e-05, "loss": 0.3066, "step": 4944 }, { "epoch": 0.42, "learning_rate": 1.2908491745187673e-05, "loss": 0.269, "step": 4945 }, { "epoch": 0.42, "learning_rate": 1.290583541345018e-05, "loss": 0.3151, "step": 4946 }, { "epoch": 0.42, "learning_rate": 1.2903178857745256e-05, "loss": 0.3021, "step": 4947 }, { "epoch": 0.42, "learning_rate": 1.2900522078277645e-05, "loss": 0.3053, "step": 4948 }, { "epoch": 0.42, "learning_rate": 1.2897865075252125e-05, "loss": 0.2658, "step": 4949 }, { "epoch": 0.42, "learning_rate": 1.2895207848873488e-05, "loss": 0.2978, "step": 4950 }, { "epoch": 0.42, "learning_rate": 1.2892550399346533e-05, "loss": 0.2809, "step": 4951 }, { "epoch": 0.42, "learning_rate": 1.2889892726876085e-05, "loss": 0.2707, "step": 4952 }, { "epoch": 0.42, "learning_rate": 1.2887234831666987e-05, "loss": 0.299, "step": 4953 }, { "epoch": 0.42, "learning_rate": 1.2884576713924093e-05, "loss": 0.3063, "step": 4954 }, { "epoch": 0.42, "learning_rate": 1.2881918373852278e-05, "loss": 0.2911, "step": 4955 }, { "epoch": 0.42, "learning_rate": 1.2879259811656435e-05, "loss": 0.2944, "step": 4956 }, { "epoch": 0.42, "learning_rate": 1.2876601027541475e-05, "loss": 0.3289, "step": 4957 }, { "epoch": 0.42, "learning_rate": 1.287394202171232e-05, "loss": 0.2614, "step": 4958 }, { "epoch": 0.43, "learning_rate": 1.2871282794373916e-05, "loss": 0.2828, "step": 4959 }, { "epoch": 0.43, "learning_rate": 1.286862334573122e-05, "loss": 0.3165, "step": 4960 }, { "epoch": 0.43, "learning_rate": 1.286596367598921e-05, "loss": 0.238, "step": 4961 }, { "epoch": 0.43, "learning_rate": 1.2863303785352883e-05, "loss": 0.2675, "step": 4962 }, { "epoch": 0.43, "learning_rate": 1.2860643674027246e-05, "loss": 0.2759, "step": 4963 }, { "epoch": 0.43, "learning_rate": 1.2857983342217333e-05, "loss": 0.2704, "step": 4964 }, { "epoch": 0.43, "learning_rate": 1.2855322790128182e-05, "loss": 0.3199, "step": 4965 }, { "epoch": 0.43, "learning_rate": 1.2852662017964863e-05, "loss": 0.2951, "step": 4966 }, { "epoch": 0.43, "learning_rate": 1.2850001025932444e-05, "loss": 0.2819, "step": 4967 }, { "epoch": 0.43, "learning_rate": 1.2847339814236033e-05, "loss": 0.2407, "step": 4968 }, { "epoch": 0.43, "learning_rate": 1.284467838308074e-05, "loss": 0.2733, "step": 4969 }, { "epoch": 0.43, "learning_rate": 1.2842016732671689e-05, "loss": 0.2523, "step": 4970 }, { "epoch": 0.43, "learning_rate": 1.2839354863214035e-05, "loss": 0.2656, "step": 4971 }, { "epoch": 0.43, "learning_rate": 1.2836692774912935e-05, "loss": 0.2934, "step": 4972 }, { "epoch": 0.43, "learning_rate": 1.2834030467973572e-05, "loss": 0.3209, "step": 4973 }, { "epoch": 0.43, "learning_rate": 1.2831367942601146e-05, "loss": 0.2899, "step": 4974 }, { "epoch": 0.43, "learning_rate": 1.2828705199000869e-05, "loss": 0.2903, "step": 4975 }, { "epoch": 0.43, "learning_rate": 1.2826042237377972e-05, "loss": 0.3071, "step": 4976 }, { "epoch": 0.43, "learning_rate": 1.2823379057937702e-05, "loss": 0.3071, "step": 4977 }, { "epoch": 0.43, "learning_rate": 1.2820715660885328e-05, "loss": 0.2733, "step": 4978 }, { "epoch": 0.43, "learning_rate": 1.2818052046426125e-05, "loss": 0.3013, "step": 4979 }, { "epoch": 0.43, "learning_rate": 1.2815388214765397e-05, "loss": 0.2896, "step": 4980 }, { "epoch": 0.43, "learning_rate": 1.2812724166108454e-05, "loss": 0.6068, "step": 4981 }, { "epoch": 0.43, "learning_rate": 1.281005990066063e-05, "loss": 0.2501, "step": 4982 }, { "epoch": 0.43, "learning_rate": 1.2807395418627278e-05, "loss": 0.3109, "step": 4983 }, { "epoch": 0.43, "learning_rate": 1.2804730720213756e-05, "loss": 0.3362, "step": 4984 }, { "epoch": 0.43, "learning_rate": 1.280206580562545e-05, "loss": 0.2627, "step": 4985 }, { "epoch": 0.43, "learning_rate": 1.2799400675067754e-05, "loss": 0.3087, "step": 4986 }, { "epoch": 0.43, "learning_rate": 1.2796735328746089e-05, "loss": 0.2274, "step": 4987 }, { "epoch": 0.43, "learning_rate": 1.2794069766865884e-05, "loss": 0.2737, "step": 4988 }, { "epoch": 0.43, "learning_rate": 1.2791403989632586e-05, "loss": 0.3082, "step": 4989 }, { "epoch": 0.43, "learning_rate": 1.2788737997251665e-05, "loss": 0.2284, "step": 4990 }, { "epoch": 0.43, "learning_rate": 1.2786071789928593e-05, "loss": 0.3211, "step": 4991 }, { "epoch": 0.43, "learning_rate": 1.2783405367868878e-05, "loss": 0.3515, "step": 4992 }, { "epoch": 0.43, "learning_rate": 1.2780738731278028e-05, "loss": 0.2344, "step": 4993 }, { "epoch": 0.43, "learning_rate": 1.2778071880361577e-05, "loss": 0.2803, "step": 4994 }, { "epoch": 0.43, "learning_rate": 1.2775404815325074e-05, "loss": 0.2524, "step": 4995 }, { "epoch": 0.43, "learning_rate": 1.277273753637408e-05, "loss": 0.3319, "step": 4996 }, { "epoch": 0.43, "learning_rate": 1.277007004371418e-05, "loss": 0.2653, "step": 4997 }, { "epoch": 0.43, "learning_rate": 1.2767402337550966e-05, "loss": 0.3116, "step": 4998 }, { "epoch": 0.43, "learning_rate": 1.2764734418090052e-05, "loss": 0.3344, "step": 4999 }, { "epoch": 0.43, "learning_rate": 1.2762066285537071e-05, "loss": 0.3002, "step": 5000 }, { "epoch": 0.43, "learning_rate": 1.275939794009767e-05, "loss": 0.267, "step": 5001 }, { "epoch": 0.43, "learning_rate": 1.275672938197751e-05, "loss": 0.2889, "step": 5002 }, { "epoch": 0.43, "learning_rate": 1.275406061138227e-05, "loss": 0.2973, "step": 5003 }, { "epoch": 0.43, "learning_rate": 1.2751391628517649e-05, "loss": 0.3354, "step": 5004 }, { "epoch": 0.43, "learning_rate": 1.2748722433589351e-05, "loss": 0.3162, "step": 5005 }, { "epoch": 0.43, "learning_rate": 1.2746053026803114e-05, "loss": 0.2952, "step": 5006 }, { "epoch": 0.43, "learning_rate": 1.2743383408364678e-05, "loss": 0.3312, "step": 5007 }, { "epoch": 0.43, "learning_rate": 1.2740713578479802e-05, "loss": 0.2976, "step": 5008 }, { "epoch": 0.43, "learning_rate": 1.2738043537354268e-05, "loss": 0.2833, "step": 5009 }, { "epoch": 0.43, "learning_rate": 1.2735373285193867e-05, "loss": 0.2736, "step": 5010 }, { "epoch": 0.43, "learning_rate": 1.2732702822204406e-05, "loss": 0.2896, "step": 5011 }, { "epoch": 0.43, "learning_rate": 1.2730032148591716e-05, "loss": 0.6, "step": 5012 }, { "epoch": 0.43, "learning_rate": 1.2727361264561637e-05, "loss": 0.3231, "step": 5013 }, { "epoch": 0.43, "learning_rate": 1.2724690170320031e-05, "loss": 0.2505, "step": 5014 }, { "epoch": 0.43, "learning_rate": 1.2722018866072768e-05, "loss": 0.2782, "step": 5015 }, { "epoch": 0.43, "learning_rate": 1.2719347352025741e-05, "loss": 0.2721, "step": 5016 }, { "epoch": 0.43, "learning_rate": 1.2716675628384856e-05, "loss": 0.3256, "step": 5017 }, { "epoch": 0.43, "learning_rate": 1.2714003695356037e-05, "loss": 0.3231, "step": 5018 }, { "epoch": 0.43, "learning_rate": 1.2711331553145223e-05, "loss": 0.2431, "step": 5019 }, { "epoch": 0.43, "learning_rate": 1.270865920195837e-05, "loss": 0.3104, "step": 5020 }, { "epoch": 0.43, "learning_rate": 1.2705986642001451e-05, "loss": 0.2838, "step": 5021 }, { "epoch": 0.43, "learning_rate": 1.2703313873480451e-05, "loss": 0.3663, "step": 5022 }, { "epoch": 0.43, "learning_rate": 1.2700640896601377e-05, "loss": 0.3014, "step": 5023 }, { "epoch": 0.43, "learning_rate": 1.2697967711570243e-05, "loss": 0.3015, "step": 5024 }, { "epoch": 0.43, "learning_rate": 1.2695294318593092e-05, "loss": 0.2964, "step": 5025 }, { "epoch": 0.43, "learning_rate": 1.2692620717875972e-05, "loss": 0.2776, "step": 5026 }, { "epoch": 0.43, "learning_rate": 1.2689946909624951e-05, "loss": 0.3105, "step": 5027 }, { "epoch": 0.43, "learning_rate": 1.2687272894046116e-05, "loss": 0.251, "step": 5028 }, { "epoch": 0.43, "learning_rate": 1.2684598671345563e-05, "loss": 0.2552, "step": 5029 }, { "epoch": 0.43, "learning_rate": 1.2681924241729409e-05, "loss": 0.2737, "step": 5030 }, { "epoch": 0.43, "learning_rate": 1.2679249605403786e-05, "loss": 0.2887, "step": 5031 }, { "epoch": 0.43, "learning_rate": 1.2676574762574842e-05, "loss": 0.2668, "step": 5032 }, { "epoch": 0.43, "learning_rate": 1.2673899713448743e-05, "loss": 0.288, "step": 5033 }, { "epoch": 0.43, "learning_rate": 1.2671224458231664e-05, "loss": 0.3062, "step": 5034 }, { "epoch": 0.43, "learning_rate": 1.2668548997129807e-05, "loss": 0.2898, "step": 5035 }, { "epoch": 0.43, "learning_rate": 1.2665873330349371e-05, "loss": 0.2919, "step": 5036 }, { "epoch": 0.43, "learning_rate": 1.2663197458096597e-05, "loss": 0.2697, "step": 5037 }, { "epoch": 0.43, "learning_rate": 1.266052138057772e-05, "loss": 0.2736, "step": 5038 }, { "epoch": 0.43, "learning_rate": 1.2657845097999e-05, "loss": 0.2621, "step": 5039 }, { "epoch": 0.43, "learning_rate": 1.2655168610566715e-05, "loss": 0.287, "step": 5040 }, { "epoch": 0.43, "learning_rate": 1.265249191848715e-05, "loss": 0.2751, "step": 5041 }, { "epoch": 0.43, "learning_rate": 1.264981502196662e-05, "loss": 0.311, "step": 5042 }, { "epoch": 0.43, "learning_rate": 1.2647137921211435e-05, "loss": 0.2762, "step": 5043 }, { "epoch": 0.43, "learning_rate": 1.264446061642794e-05, "loss": 0.3188, "step": 5044 }, { "epoch": 0.43, "learning_rate": 1.2641783107822491e-05, "loss": 0.2769, "step": 5045 }, { "epoch": 0.43, "learning_rate": 1.2639105395601452e-05, "loss": 0.297, "step": 5046 }, { "epoch": 0.43, "learning_rate": 1.263642747997121e-05, "loss": 0.2922, "step": 5047 }, { "epoch": 0.43, "learning_rate": 1.2633749361138162e-05, "loss": 0.2924, "step": 5048 }, { "epoch": 0.43, "learning_rate": 1.2631071039308728e-05, "loss": 0.2505, "step": 5049 }, { "epoch": 0.43, "learning_rate": 1.2628392514689339e-05, "loss": 0.317, "step": 5050 }, { "epoch": 0.43, "learning_rate": 1.2625713787486442e-05, "loss": 0.2849, "step": 5051 }, { "epoch": 0.43, "learning_rate": 1.2623034857906501e-05, "loss": 0.2952, "step": 5052 }, { "epoch": 0.43, "learning_rate": 1.2620355726155995e-05, "loss": 0.5823, "step": 5053 }, { "epoch": 0.43, "learning_rate": 1.2617676392441419e-05, "loss": 0.3296, "step": 5054 }, { "epoch": 0.43, "learning_rate": 1.2614996856969275e-05, "loss": 0.2885, "step": 5055 }, { "epoch": 0.43, "learning_rate": 1.2612317119946099e-05, "loss": 0.2552, "step": 5056 }, { "epoch": 0.43, "learning_rate": 1.2609637181578424e-05, "loss": 0.3136, "step": 5057 }, { "epoch": 0.43, "learning_rate": 1.260695704207281e-05, "loss": 0.3046, "step": 5058 }, { "epoch": 0.43, "learning_rate": 1.2604276701635832e-05, "loss": 0.2936, "step": 5059 }, { "epoch": 0.43, "learning_rate": 1.260159616047407e-05, "loss": 0.325, "step": 5060 }, { "epoch": 0.43, "learning_rate": 1.2598915418794136e-05, "loss": 0.3215, "step": 5061 }, { "epoch": 0.43, "learning_rate": 1.2596234476802636e-05, "loss": 0.2955, "step": 5062 }, { "epoch": 0.43, "learning_rate": 1.2593553334706212e-05, "loss": 0.2825, "step": 5063 }, { "epoch": 0.43, "learning_rate": 1.2590871992711517e-05, "loss": 0.3372, "step": 5064 }, { "epoch": 0.43, "learning_rate": 1.2588190451025209e-05, "loss": 0.2682, "step": 5065 }, { "epoch": 0.43, "learning_rate": 1.2585508709853971e-05, "loss": 0.2754, "step": 5066 }, { "epoch": 0.43, "learning_rate": 1.2582826769404492e-05, "loss": 0.3116, "step": 5067 }, { "epoch": 0.43, "learning_rate": 1.2580144629883494e-05, "loss": 0.3187, "step": 5068 }, { "epoch": 0.43, "learning_rate": 1.257746229149769e-05, "loss": 0.3624, "step": 5069 }, { "epoch": 0.43, "learning_rate": 1.2574779754453831e-05, "loss": 0.3005, "step": 5070 }, { "epoch": 0.43, "learning_rate": 1.2572097018958674e-05, "loss": 0.3066, "step": 5071 }, { "epoch": 0.43, "learning_rate": 1.2569414085218986e-05, "loss": 0.2709, "step": 5072 }, { "epoch": 0.43, "learning_rate": 1.2566730953441554e-05, "loss": 0.2873, "step": 5073 }, { "epoch": 0.43, "learning_rate": 1.2564047623833186e-05, "loss": 0.5846, "step": 5074 }, { "epoch": 0.44, "learning_rate": 1.2561364096600694e-05, "loss": 0.2693, "step": 5075 }, { "epoch": 0.44, "learning_rate": 1.2558680371950913e-05, "loss": 0.2677, "step": 5076 }, { "epoch": 0.44, "learning_rate": 1.2555996450090693e-05, "loss": 0.2746, "step": 5077 }, { "epoch": 0.44, "learning_rate": 1.2553312331226896e-05, "loss": 0.5933, "step": 5078 }, { "epoch": 0.44, "learning_rate": 1.2550628015566402e-05, "loss": 0.2944, "step": 5079 }, { "epoch": 0.44, "learning_rate": 1.2547943503316105e-05, "loss": 0.3016, "step": 5080 }, { "epoch": 0.44, "learning_rate": 1.2545258794682906e-05, "loss": 0.3448, "step": 5081 }, { "epoch": 0.44, "learning_rate": 1.2542573889873741e-05, "loss": 0.2834, "step": 5082 }, { "epoch": 0.44, "learning_rate": 1.2539888789095542e-05, "loss": 0.2861, "step": 5083 }, { "epoch": 0.44, "learning_rate": 1.2537203492555265e-05, "loss": 0.3148, "step": 5084 }, { "epoch": 0.44, "learning_rate": 1.2534518000459884e-05, "loss": 0.319, "step": 5085 }, { "epoch": 0.44, "learning_rate": 1.2531832313016374e-05, "loss": 0.2494, "step": 5086 }, { "epoch": 0.44, "learning_rate": 1.252914643043174e-05, "loss": 0.6106, "step": 5087 }, { "epoch": 0.44, "learning_rate": 1.2526460352912994e-05, "loss": 0.3207, "step": 5088 }, { "epoch": 0.44, "learning_rate": 1.252377408066717e-05, "loss": 0.3076, "step": 5089 }, { "epoch": 0.44, "learning_rate": 1.2521087613901313e-05, "loss": 0.3112, "step": 5090 }, { "epoch": 0.44, "learning_rate": 1.2518400952822475e-05, "loss": 0.3275, "step": 5091 }, { "epoch": 0.44, "learning_rate": 1.251571409763774e-05, "loss": 0.3105, "step": 5092 }, { "epoch": 0.44, "learning_rate": 1.251302704855419e-05, "loss": 0.3038, "step": 5093 }, { "epoch": 0.44, "learning_rate": 1.2510339805778932e-05, "loss": 0.3319, "step": 5094 }, { "epoch": 0.44, "learning_rate": 1.2507652369519085e-05, "loss": 0.3226, "step": 5095 }, { "epoch": 0.44, "learning_rate": 1.2504964739981787e-05, "loss": 0.2773, "step": 5096 }, { "epoch": 0.44, "learning_rate": 1.2502276917374183e-05, "loss": 0.2769, "step": 5097 }, { "epoch": 0.44, "learning_rate": 1.2499588901903437e-05, "loss": 0.2853, "step": 5098 }, { "epoch": 0.44, "learning_rate": 1.2496900693776732e-05, "loss": 0.2924, "step": 5099 }, { "epoch": 0.44, "learning_rate": 1.2494212293201255e-05, "loss": 0.257, "step": 5100 }, { "epoch": 0.44, "learning_rate": 1.2491523700384222e-05, "loss": 0.3021, "step": 5101 }, { "epoch": 0.44, "learning_rate": 1.2488834915532852e-05, "loss": 0.3013, "step": 5102 }, { "epoch": 0.44, "learning_rate": 1.2486145938854384e-05, "loss": 0.2578, "step": 5103 }, { "epoch": 0.44, "learning_rate": 1.2483456770556073e-05, "loss": 0.2798, "step": 5104 }, { "epoch": 0.44, "learning_rate": 1.2480767410845185e-05, "loss": 0.326, "step": 5105 }, { "epoch": 0.44, "learning_rate": 1.2478077859929e-05, "loss": 0.3029, "step": 5106 }, { "epoch": 0.44, "learning_rate": 1.247538811801482e-05, "loss": 0.2978, "step": 5107 }, { "epoch": 0.44, "learning_rate": 1.2472698185309954e-05, "loss": 0.3493, "step": 5108 }, { "epoch": 0.44, "learning_rate": 1.2470008062021732e-05, "loss": 0.3455, "step": 5109 }, { "epoch": 0.44, "learning_rate": 1.2467317748357493e-05, "loss": 0.2995, "step": 5110 }, { "epoch": 0.44, "learning_rate": 1.2464627244524595e-05, "loss": 0.3005, "step": 5111 }, { "epoch": 0.44, "learning_rate": 1.2461936550730402e-05, "loss": 0.3138, "step": 5112 }, { "epoch": 0.44, "learning_rate": 1.2459245667182307e-05, "loss": 0.3345, "step": 5113 }, { "epoch": 0.44, "learning_rate": 1.2456554594087709e-05, "loss": 0.2744, "step": 5114 }, { "epoch": 0.44, "learning_rate": 1.2453863331654019e-05, "loss": 0.276, "step": 5115 }, { "epoch": 0.44, "learning_rate": 1.245117188008867e-05, "loss": 0.288, "step": 5116 }, { "epoch": 0.44, "learning_rate": 1.2448480239599103e-05, "loss": 0.2554, "step": 5117 }, { "epoch": 0.44, "learning_rate": 1.2445788410392778e-05, "loss": 0.2917, "step": 5118 }, { "epoch": 0.44, "learning_rate": 1.2443096392677165e-05, "loss": 0.3129, "step": 5119 }, { "epoch": 0.44, "learning_rate": 1.2440404186659757e-05, "loss": 0.2881, "step": 5120 }, { "epoch": 0.44, "learning_rate": 1.243771179254805e-05, "loss": 0.2708, "step": 5121 }, { "epoch": 0.44, "learning_rate": 1.2435019210549564e-05, "loss": 0.5685, "step": 5122 }, { "epoch": 0.44, "learning_rate": 1.2432326440871832e-05, "loss": 0.2533, "step": 5123 }, { "epoch": 0.44, "learning_rate": 1.2429633483722392e-05, "loss": 0.2368, "step": 5124 }, { "epoch": 0.44, "learning_rate": 1.242694033930881e-05, "loss": 0.2086, "step": 5125 }, { "epoch": 0.44, "learning_rate": 1.2424247007838659e-05, "loss": 0.2679, "step": 5126 }, { "epoch": 0.44, "learning_rate": 1.2421553489519527e-05, "loss": 0.2884, "step": 5127 }, { "epoch": 0.44, "learning_rate": 1.2418859784559016e-05, "loss": 0.2932, "step": 5128 }, { "epoch": 0.44, "learning_rate": 1.2416165893164746e-05, "loss": 0.3159, "step": 5129 }, { "epoch": 0.44, "learning_rate": 1.241347181554435e-05, "loss": 0.2684, "step": 5130 }, { "epoch": 0.44, "learning_rate": 1.2410777551905469e-05, "loss": 0.3151, "step": 5131 }, { "epoch": 0.44, "learning_rate": 1.2408083102455766e-05, "loss": 0.3011, "step": 5132 }, { "epoch": 0.44, "learning_rate": 1.2405388467402915e-05, "loss": 0.2405, "step": 5133 }, { "epoch": 0.44, "learning_rate": 1.2402693646954607e-05, "loss": 0.2706, "step": 5134 }, { "epoch": 0.44, "learning_rate": 1.2399998641318547e-05, "loss": 0.3016, "step": 5135 }, { "epoch": 0.44, "learning_rate": 1.2397303450702449e-05, "loss": 0.3594, "step": 5136 }, { "epoch": 0.44, "learning_rate": 1.2394608075314048e-05, "loss": 0.3301, "step": 5137 }, { "epoch": 0.44, "learning_rate": 1.2391912515361085e-05, "loss": 0.2711, "step": 5138 }, { "epoch": 0.44, "learning_rate": 1.238921677105133e-05, "loss": 0.2408, "step": 5139 }, { "epoch": 0.44, "learning_rate": 1.2386520842592544e-05, "loss": 0.2906, "step": 5140 }, { "epoch": 0.44, "learning_rate": 1.2383824730192529e-05, "loss": 0.3082, "step": 5141 }, { "epoch": 0.44, "learning_rate": 1.2381128434059082e-05, "loss": 0.2953, "step": 5142 }, { "epoch": 0.44, "learning_rate": 1.2378431954400021e-05, "loss": 0.3179, "step": 5143 }, { "epoch": 0.44, "learning_rate": 1.2375735291423174e-05, "loss": 0.3124, "step": 5144 }, { "epoch": 0.44, "learning_rate": 1.2373038445336392e-05, "loss": 0.256, "step": 5145 }, { "epoch": 0.44, "learning_rate": 1.237034141634753e-05, "loss": 0.2609, "step": 5146 }, { "epoch": 0.44, "learning_rate": 1.2367644204664468e-05, "loss": 0.2864, "step": 5147 }, { "epoch": 0.44, "learning_rate": 1.2364946810495088e-05, "loss": 0.2834, "step": 5148 }, { "epoch": 0.44, "learning_rate": 1.2362249234047295e-05, "loss": 0.2791, "step": 5149 }, { "epoch": 0.44, "learning_rate": 1.2359551475529e-05, "loss": 0.2488, "step": 5150 }, { "epoch": 0.44, "learning_rate": 1.2356853535148137e-05, "loss": 0.2633, "step": 5151 }, { "epoch": 0.44, "learning_rate": 1.2354155413112647e-05, "loss": 0.2738, "step": 5152 }, { "epoch": 0.44, "learning_rate": 1.2351457109630493e-05, "loss": 0.2678, "step": 5153 }, { "epoch": 0.44, "learning_rate": 1.2348758624909644e-05, "loss": 0.3131, "step": 5154 }, { "epoch": 0.44, "learning_rate": 1.2346059959158085e-05, "loss": 0.2427, "step": 5155 }, { "epoch": 0.44, "learning_rate": 1.2343361112583819e-05, "loss": 0.297, "step": 5156 }, { "epoch": 0.44, "learning_rate": 1.234066208539485e-05, "loss": 0.2605, "step": 5157 }, { "epoch": 0.44, "learning_rate": 1.233796287779922e-05, "loss": 0.265, "step": 5158 }, { "epoch": 0.44, "learning_rate": 1.2335263490004961e-05, "loss": 0.5809, "step": 5159 }, { "epoch": 0.44, "learning_rate": 1.2332563922220132e-05, "loss": 0.3103, "step": 5160 }, { "epoch": 0.44, "learning_rate": 1.2329864174652802e-05, "loss": 0.3275, "step": 5161 }, { "epoch": 0.44, "learning_rate": 1.2327164247511051e-05, "loss": 0.2773, "step": 5162 }, { "epoch": 0.44, "learning_rate": 1.2324464141002981e-05, "loss": 0.3287, "step": 5163 }, { "epoch": 0.44, "learning_rate": 1.23217638553367e-05, "loss": 0.2722, "step": 5164 }, { "epoch": 0.44, "learning_rate": 1.2319063390720331e-05, "loss": 0.2862, "step": 5165 }, { "epoch": 0.44, "learning_rate": 1.2316362747362019e-05, "loss": 0.2706, "step": 5166 }, { "epoch": 0.44, "learning_rate": 1.2313661925469908e-05, "loss": 0.27, "step": 5167 }, { "epoch": 0.44, "learning_rate": 1.2310960925252171e-05, "loss": 0.2825, "step": 5168 }, { "epoch": 0.44, "learning_rate": 1.2308259746916982e-05, "loss": 0.2767, "step": 5169 }, { "epoch": 0.44, "learning_rate": 1.2305558390672539e-05, "loss": 0.27, "step": 5170 }, { "epoch": 0.44, "learning_rate": 1.2302856856727043e-05, "loss": 0.2974, "step": 5171 }, { "epoch": 0.44, "learning_rate": 1.2300155145288724e-05, "loss": 0.2477, "step": 5172 }, { "epoch": 0.44, "learning_rate": 1.2297453256565812e-05, "loss": 0.2212, "step": 5173 }, { "epoch": 0.44, "learning_rate": 1.2294751190766552e-05, "loss": 0.2523, "step": 5174 }, { "epoch": 0.44, "learning_rate": 1.2292048948099214e-05, "loss": 0.2648, "step": 5175 }, { "epoch": 0.44, "learning_rate": 1.228934652877206e-05, "loss": 0.2862, "step": 5176 }, { "epoch": 0.44, "learning_rate": 1.2286643932993396e-05, "loss": 0.2982, "step": 5177 }, { "epoch": 0.44, "learning_rate": 1.2283941160971512e-05, "loss": 0.2551, "step": 5178 }, { "epoch": 0.44, "learning_rate": 1.2281238212914727e-05, "loss": 0.2878, "step": 5179 }, { "epoch": 0.44, "learning_rate": 1.2278535089031377e-05, "loss": 0.2994, "step": 5180 }, { "epoch": 0.44, "learning_rate": 1.22758317895298e-05, "loss": 0.2975, "step": 5181 }, { "epoch": 0.44, "learning_rate": 1.2273128314618353e-05, "loss": 0.2916, "step": 5182 }, { "epoch": 0.44, "learning_rate": 1.2270424664505405e-05, "loss": 0.2406, "step": 5183 }, { "epoch": 0.44, "learning_rate": 1.2267720839399347e-05, "loss": 0.2346, "step": 5184 }, { "epoch": 0.44, "learning_rate": 1.2265016839508568e-05, "loss": 0.3267, "step": 5185 }, { "epoch": 0.44, "learning_rate": 1.2262312665041482e-05, "loss": 0.3231, "step": 5186 }, { "epoch": 0.44, "learning_rate": 1.2259608316206519e-05, "loss": 0.2839, "step": 5187 }, { "epoch": 0.44, "learning_rate": 1.2256903793212107e-05, "loss": 0.2876, "step": 5188 }, { "epoch": 0.44, "learning_rate": 1.2254199096266705e-05, "loss": 0.2913, "step": 5189 }, { "epoch": 0.44, "learning_rate": 1.2251494225578775e-05, "loss": 0.29, "step": 5190 }, { "epoch": 0.44, "learning_rate": 1.2248789181356793e-05, "loss": 0.2804, "step": 5191 }, { "epoch": 0.45, "learning_rate": 1.2246083963809256e-05, "loss": 0.339, "step": 5192 }, { "epoch": 0.45, "learning_rate": 1.2243378573144663e-05, "loss": 0.3062, "step": 5193 }, { "epoch": 0.45, "learning_rate": 1.2240673009571536e-05, "loss": 0.2581, "step": 5194 }, { "epoch": 0.45, "learning_rate": 1.2237967273298401e-05, "loss": 0.2989, "step": 5195 }, { "epoch": 0.45, "learning_rate": 1.223526136453381e-05, "loss": 0.282, "step": 5196 }, { "epoch": 0.45, "learning_rate": 1.2232555283486319e-05, "loss": 0.2779, "step": 5197 }, { "epoch": 0.45, "learning_rate": 1.2229849030364496e-05, "loss": 0.3223, "step": 5198 }, { "epoch": 0.45, "learning_rate": 1.2227142605376928e-05, "loss": 0.3434, "step": 5199 }, { "epoch": 0.45, "learning_rate": 1.2224436008732213e-05, "loss": 0.2767, "step": 5200 }, { "epoch": 0.45, "learning_rate": 1.222172924063896e-05, "loss": 0.3041, "step": 5201 }, { "epoch": 0.45, "learning_rate": 1.2219022301305796e-05, "loss": 0.6394, "step": 5202 }, { "epoch": 0.45, "learning_rate": 1.221631519094136e-05, "loss": 0.2842, "step": 5203 }, { "epoch": 0.45, "learning_rate": 1.2213607909754297e-05, "loss": 0.2719, "step": 5204 }, { "epoch": 0.45, "learning_rate": 1.2210900457953274e-05, "loss": 0.2858, "step": 5205 }, { "epoch": 0.45, "learning_rate": 1.2208192835746973e-05, "loss": 0.3089, "step": 5206 }, { "epoch": 0.45, "learning_rate": 1.2205485043344074e-05, "loss": 0.2872, "step": 5207 }, { "epoch": 0.45, "learning_rate": 1.2202777080953285e-05, "loss": 0.3708, "step": 5208 }, { "epoch": 0.45, "learning_rate": 1.2200068948783325e-05, "loss": 0.2668, "step": 5209 }, { "epoch": 0.45, "learning_rate": 1.2197360647042922e-05, "loss": 0.2662, "step": 5210 }, { "epoch": 0.45, "learning_rate": 1.2194652175940817e-05, "loss": 0.264, "step": 5211 }, { "epoch": 0.45, "learning_rate": 1.2191943535685766e-05, "loss": 0.3219, "step": 5212 }, { "epoch": 0.45, "learning_rate": 1.218923472648654e-05, "loss": 0.3112, "step": 5213 }, { "epoch": 0.45, "learning_rate": 1.2186525748551914e-05, "loss": 0.2975, "step": 5214 }, { "epoch": 0.45, "learning_rate": 1.2183816602090693e-05, "loss": 0.3105, "step": 5215 }, { "epoch": 0.45, "learning_rate": 1.2181107287311675e-05, "loss": 0.2872, "step": 5216 }, { "epoch": 0.45, "learning_rate": 1.2178397804423685e-05, "loss": 0.284, "step": 5217 }, { "epoch": 0.45, "learning_rate": 1.217568815363556e-05, "loss": 0.2841, "step": 5218 }, { "epoch": 0.45, "learning_rate": 1.2172978335156136e-05, "loss": 0.2945, "step": 5219 }, { "epoch": 0.45, "learning_rate": 1.2170268349194281e-05, "loss": 0.3187, "step": 5220 }, { "epoch": 0.45, "learning_rate": 1.2167558195958867e-05, "loss": 0.2859, "step": 5221 }, { "epoch": 0.45, "learning_rate": 1.2164847875658776e-05, "loss": 0.3312, "step": 5222 }, { "epoch": 0.45, "learning_rate": 1.2162137388502908e-05, "loss": 0.2891, "step": 5223 }, { "epoch": 0.45, "learning_rate": 1.2159426734700175e-05, "loss": 0.2958, "step": 5224 }, { "epoch": 0.45, "learning_rate": 1.2156715914459498e-05, "loss": 0.2496, "step": 5225 }, { "epoch": 0.45, "learning_rate": 1.2154004927989815e-05, "loss": 0.2876, "step": 5226 }, { "epoch": 0.45, "learning_rate": 1.2151293775500076e-05, "loss": 0.337, "step": 5227 }, { "epoch": 0.45, "learning_rate": 1.214858245719924e-05, "loss": 0.2776, "step": 5228 }, { "epoch": 0.45, "learning_rate": 1.2145870973296288e-05, "loss": 0.2784, "step": 5229 }, { "epoch": 0.45, "learning_rate": 1.2143159324000204e-05, "loss": 0.2466, "step": 5230 }, { "epoch": 0.45, "learning_rate": 1.2140447509519988e-05, "loss": 0.272, "step": 5231 }, { "epoch": 0.45, "learning_rate": 1.2137735530064653e-05, "loss": 0.3425, "step": 5232 }, { "epoch": 0.45, "learning_rate": 1.2135023385843228e-05, "loss": 0.3345, "step": 5233 }, { "epoch": 0.45, "learning_rate": 1.2132311077064749e-05, "loss": 0.2472, "step": 5234 }, { "epoch": 0.45, "learning_rate": 1.2129598603938269e-05, "loss": 0.2385, "step": 5235 }, { "epoch": 0.45, "learning_rate": 1.212688596667285e-05, "loss": 0.2809, "step": 5236 }, { "epoch": 0.45, "learning_rate": 1.2124173165477572e-05, "loss": 0.3051, "step": 5237 }, { "epoch": 0.45, "learning_rate": 1.2121460200561521e-05, "loss": 0.3031, "step": 5238 }, { "epoch": 0.45, "learning_rate": 1.21187470721338e-05, "loss": 0.3035, "step": 5239 }, { "epoch": 0.45, "learning_rate": 1.2116033780403524e-05, "loss": 0.2922, "step": 5240 }, { "epoch": 0.45, "learning_rate": 1.211332032557982e-05, "loss": 0.311, "step": 5241 }, { "epoch": 0.45, "learning_rate": 1.2110606707871828e-05, "loss": 0.2844, "step": 5242 }, { "epoch": 0.45, "learning_rate": 1.2107892927488698e-05, "loss": 0.2643, "step": 5243 }, { "epoch": 0.45, "learning_rate": 1.2105178984639601e-05, "loss": 0.2689, "step": 5244 }, { "epoch": 0.45, "learning_rate": 1.2102464879533704e-05, "loss": 0.3397, "step": 5245 }, { "epoch": 0.45, "learning_rate": 1.2099750612380205e-05, "loss": 0.3127, "step": 5246 }, { "epoch": 0.45, "learning_rate": 1.2097036183388305e-05, "loss": 0.3026, "step": 5247 }, { "epoch": 0.45, "learning_rate": 1.2094321592767217e-05, "loss": 0.3033, "step": 5248 }, { "epoch": 0.45, "learning_rate": 1.209160684072617e-05, "loss": 0.2864, "step": 5249 }, { "epoch": 0.45, "learning_rate": 1.20888919274744e-05, "loss": 0.2635, "step": 5250 }, { "epoch": 0.45, "learning_rate": 1.2086176853221166e-05, "loss": 0.6339, "step": 5251 }, { "epoch": 0.45, "learning_rate": 1.2083461618175723e-05, "loss": 0.2958, "step": 5252 }, { "epoch": 0.45, "learning_rate": 1.2080746222547356e-05, "loss": 0.5901, "step": 5253 }, { "epoch": 0.45, "learning_rate": 1.2078030666545351e-05, "loss": 0.2417, "step": 5254 }, { "epoch": 0.45, "learning_rate": 1.207531495037901e-05, "loss": 0.281, "step": 5255 }, { "epoch": 0.45, "learning_rate": 1.207259907425765e-05, "loss": 0.276, "step": 5256 }, { "epoch": 0.45, "learning_rate": 1.206988303839059e-05, "loss": 0.2995, "step": 5257 }, { "epoch": 0.45, "learning_rate": 1.2067166842987175e-05, "loss": 0.2656, "step": 5258 }, { "epoch": 0.45, "learning_rate": 1.2064450488256751e-05, "loss": 0.2841, "step": 5259 }, { "epoch": 0.45, "learning_rate": 1.206173397440869e-05, "loss": 0.3264, "step": 5260 }, { "epoch": 0.45, "learning_rate": 1.2059017301652359e-05, "loss": 0.3846, "step": 5261 }, { "epoch": 0.45, "learning_rate": 1.2056300470197144e-05, "loss": 0.2714, "step": 5262 }, { "epoch": 0.45, "learning_rate": 1.2053583480252456e-05, "loss": 0.3253, "step": 5263 }, { "epoch": 0.45, "learning_rate": 1.2050866332027695e-05, "loss": 0.2585, "step": 5264 }, { "epoch": 0.45, "learning_rate": 1.2048149025732295e-05, "loss": 0.2858, "step": 5265 }, { "epoch": 0.45, "learning_rate": 1.2045431561575685e-05, "loss": 0.3005, "step": 5266 }, { "epoch": 0.45, "learning_rate": 1.2042713939767318e-05, "loss": 0.2812, "step": 5267 }, { "epoch": 0.45, "learning_rate": 1.2039996160516654e-05, "loss": 0.2631, "step": 5268 }, { "epoch": 0.45, "learning_rate": 1.2037278224033166e-05, "loss": 0.2798, "step": 5269 }, { "epoch": 0.45, "learning_rate": 1.2034560130526341e-05, "loss": 0.239, "step": 5270 }, { "epoch": 0.45, "learning_rate": 1.203184188020567e-05, "loss": 0.3223, "step": 5271 }, { "epoch": 0.45, "learning_rate": 1.2029123473280668e-05, "loss": 0.2745, "step": 5272 }, { "epoch": 0.45, "learning_rate": 1.2026404909960856e-05, "loss": 0.2731, "step": 5273 }, { "epoch": 0.45, "learning_rate": 1.2023686190455766e-05, "loss": 0.2752, "step": 5274 }, { "epoch": 0.45, "learning_rate": 1.2020967314974945e-05, "loss": 0.2837, "step": 5275 }, { "epoch": 0.45, "learning_rate": 1.2018248283727947e-05, "loss": 0.2931, "step": 5276 }, { "epoch": 0.45, "learning_rate": 1.2015529096924343e-05, "loss": 0.2755, "step": 5277 }, { "epoch": 0.45, "learning_rate": 1.2012809754773718e-05, "loss": 0.2768, "step": 5278 }, { "epoch": 0.45, "learning_rate": 1.2010090257485663e-05, "loss": 0.3058, "step": 5279 }, { "epoch": 0.45, "learning_rate": 1.2007370605269782e-05, "loss": 0.2674, "step": 5280 }, { "epoch": 0.45, "learning_rate": 1.200465079833569e-05, "loss": 0.2902, "step": 5281 }, { "epoch": 0.45, "learning_rate": 1.2001930836893026e-05, "loss": 0.3199, "step": 5282 }, { "epoch": 0.45, "learning_rate": 1.1999210721151421e-05, "loss": 0.3159, "step": 5283 }, { "epoch": 0.45, "learning_rate": 1.1996490451320532e-05, "loss": 0.3105, "step": 5284 }, { "epoch": 0.45, "learning_rate": 1.1993770027610023e-05, "loss": 0.61, "step": 5285 }, { "epoch": 0.45, "learning_rate": 1.1991049450229577e-05, "loss": 0.3182, "step": 5286 }, { "epoch": 0.45, "learning_rate": 1.1988328719388873e-05, "loss": 0.274, "step": 5287 }, { "epoch": 0.45, "learning_rate": 1.1985607835297618e-05, "loss": 0.3, "step": 5288 }, { "epoch": 0.45, "learning_rate": 1.1982886798165521e-05, "loss": 0.288, "step": 5289 }, { "epoch": 0.45, "learning_rate": 1.1980165608202303e-05, "loss": 0.2626, "step": 5290 }, { "epoch": 0.45, "learning_rate": 1.1977444265617713e-05, "loss": 0.3077, "step": 5291 }, { "epoch": 0.45, "learning_rate": 1.1974722770621483e-05, "loss": 0.26, "step": 5292 }, { "epoch": 0.45, "learning_rate": 1.1972001123423382e-05, "loss": 0.2897, "step": 5293 }, { "epoch": 0.45, "learning_rate": 1.1969279324233179e-05, "loss": 0.2702, "step": 5294 }, { "epoch": 0.45, "learning_rate": 1.1966557373260654e-05, "loss": 0.2867, "step": 5295 }, { "epoch": 0.45, "learning_rate": 1.1963835270715604e-05, "loss": 0.2562, "step": 5296 }, { "epoch": 0.45, "learning_rate": 1.1961113016807838e-05, "loss": 0.2557, "step": 5297 }, { "epoch": 0.45, "learning_rate": 1.1958390611747167e-05, "loss": 0.2822, "step": 5298 }, { "epoch": 0.45, "learning_rate": 1.1955668055743429e-05, "loss": 0.2919, "step": 5299 }, { "epoch": 0.45, "learning_rate": 1.1952945349006455e-05, "loss": 0.2864, "step": 5300 }, { "epoch": 0.45, "learning_rate": 1.1950222491746109e-05, "loss": 0.3005, "step": 5301 }, { "epoch": 0.45, "learning_rate": 1.1947499484172245e-05, "loss": 0.2961, "step": 5302 }, { "epoch": 0.45, "learning_rate": 1.1944776326494745e-05, "loss": 0.3578, "step": 5303 }, { "epoch": 0.45, "learning_rate": 1.1942053018923494e-05, "loss": 0.2657, "step": 5304 }, { "epoch": 0.45, "learning_rate": 1.1939329561668396e-05, "loss": 0.3127, "step": 5305 }, { "epoch": 0.45, "learning_rate": 1.1936605954939355e-05, "loss": 0.2883, "step": 5306 }, { "epoch": 0.45, "learning_rate": 1.1933882198946296e-05, "loss": 0.3423, "step": 5307 }, { "epoch": 0.45, "learning_rate": 1.1931158293899154e-05, "loss": 0.2751, "step": 5308 }, { "epoch": 0.46, "learning_rate": 1.1928434240007869e-05, "loss": 0.3028, "step": 5309 }, { "epoch": 0.46, "learning_rate": 1.1925710037482405e-05, "loss": 0.3023, "step": 5310 }, { "epoch": 0.46, "learning_rate": 1.1922985686532726e-05, "loss": 0.6001, "step": 5311 }, { "epoch": 0.46, "learning_rate": 1.192026118736881e-05, "loss": 0.28, "step": 5312 }, { "epoch": 0.46, "learning_rate": 1.1917536540200655e-05, "loss": 0.3248, "step": 5313 }, { "epoch": 0.46, "learning_rate": 1.1914811745238256e-05, "loss": 0.2647, "step": 5314 }, { "epoch": 0.46, "learning_rate": 1.1912086802691627e-05, "loss": 0.2952, "step": 5315 }, { "epoch": 0.46, "learning_rate": 1.1909361712770796e-05, "loss": 0.268, "step": 5316 }, { "epoch": 0.46, "learning_rate": 1.19066364756858e-05, "loss": 0.2814, "step": 5317 }, { "epoch": 0.46, "learning_rate": 1.1903911091646684e-05, "loss": 0.2876, "step": 5318 }, { "epoch": 0.46, "learning_rate": 1.190118556086351e-05, "loss": 0.3043, "step": 5319 }, { "epoch": 0.46, "learning_rate": 1.1898459883546346e-05, "loss": 0.3002, "step": 5320 }, { "epoch": 0.46, "learning_rate": 1.1895734059905275e-05, "loss": 0.2681, "step": 5321 }, { "epoch": 0.46, "learning_rate": 1.1893008090150389e-05, "loss": 0.2776, "step": 5322 }, { "epoch": 0.46, "learning_rate": 1.1890281974491794e-05, "loss": 0.2712, "step": 5323 }, { "epoch": 0.46, "learning_rate": 1.1887555713139605e-05, "loss": 0.3186, "step": 5324 }, { "epoch": 0.46, "learning_rate": 1.1884829306303947e-05, "loss": 0.2463, "step": 5325 }, { "epoch": 0.46, "learning_rate": 1.188210275419496e-05, "loss": 0.2753, "step": 5326 }, { "epoch": 0.46, "learning_rate": 1.1879376057022793e-05, "loss": 0.2542, "step": 5327 }, { "epoch": 0.46, "learning_rate": 1.1876649214997602e-05, "loss": 0.2787, "step": 5328 }, { "epoch": 0.46, "learning_rate": 1.187392222832957e-05, "loss": 0.2897, "step": 5329 }, { "epoch": 0.46, "learning_rate": 1.1871195097228864e-05, "loss": 0.2876, "step": 5330 }, { "epoch": 0.46, "learning_rate": 1.186846782190569e-05, "loss": 0.2892, "step": 5331 }, { "epoch": 0.46, "learning_rate": 1.186574040257025e-05, "loss": 0.3063, "step": 5332 }, { "epoch": 0.46, "learning_rate": 1.1863012839432755e-05, "loss": 0.2781, "step": 5333 }, { "epoch": 0.46, "learning_rate": 1.1860285132703435e-05, "loss": 0.256, "step": 5334 }, { "epoch": 0.46, "learning_rate": 1.1857557282592531e-05, "loss": 0.2599, "step": 5335 }, { "epoch": 0.46, "learning_rate": 1.1854829289310293e-05, "loss": 0.2654, "step": 5336 }, { "epoch": 0.46, "learning_rate": 1.1852101153066976e-05, "loss": 0.3451, "step": 5337 }, { "epoch": 0.46, "learning_rate": 1.1849372874072852e-05, "loss": 0.6012, "step": 5338 }, { "epoch": 0.46, "learning_rate": 1.1846644452538207e-05, "loss": 0.289, "step": 5339 }, { "epoch": 0.46, "learning_rate": 1.184391588867333e-05, "loss": 0.2855, "step": 5340 }, { "epoch": 0.46, "learning_rate": 1.1841187182688527e-05, "loss": 0.2746, "step": 5341 }, { "epoch": 0.46, "learning_rate": 1.1838458334794116e-05, "loss": 0.291, "step": 5342 }, { "epoch": 0.46, "learning_rate": 1.1835729345200422e-05, "loss": 0.3144, "step": 5343 }, { "epoch": 0.46, "learning_rate": 1.1833000214117776e-05, "loss": 0.3074, "step": 5344 }, { "epoch": 0.46, "learning_rate": 1.1830270941756532e-05, "loss": 0.2659, "step": 5345 }, { "epoch": 0.46, "learning_rate": 1.1827541528327052e-05, "loss": 0.2766, "step": 5346 }, { "epoch": 0.46, "learning_rate": 1.1824811974039694e-05, "loss": 0.2963, "step": 5347 }, { "epoch": 0.46, "learning_rate": 1.182208227910485e-05, "loss": 0.2395, "step": 5348 }, { "epoch": 0.46, "learning_rate": 1.1819352443732908e-05, "loss": 0.2582, "step": 5349 }, { "epoch": 0.46, "learning_rate": 1.181662246813427e-05, "loss": 0.2554, "step": 5350 }, { "epoch": 0.46, "learning_rate": 1.1813892352519343e-05, "loss": 0.2722, "step": 5351 }, { "epoch": 0.46, "learning_rate": 1.1811162097098559e-05, "loss": 0.2477, "step": 5352 }, { "epoch": 0.46, "learning_rate": 1.180843170208235e-05, "loss": 0.2471, "step": 5353 }, { "epoch": 0.46, "learning_rate": 1.180570116768116e-05, "loss": 0.2558, "step": 5354 }, { "epoch": 0.46, "learning_rate": 1.180297049410545e-05, "loss": 0.3052, "step": 5355 }, { "epoch": 0.46, "learning_rate": 1.1800239681565679e-05, "loss": 0.2889, "step": 5356 }, { "epoch": 0.46, "learning_rate": 1.1797508730272329e-05, "loss": 0.2632, "step": 5357 }, { "epoch": 0.46, "learning_rate": 1.179477764043589e-05, "loss": 0.2872, "step": 5358 }, { "epoch": 0.46, "learning_rate": 1.1792046412266857e-05, "loss": 0.2975, "step": 5359 }, { "epoch": 0.46, "learning_rate": 1.1789315045975742e-05, "loss": 0.2983, "step": 5360 }, { "epoch": 0.46, "learning_rate": 1.1786583541773064e-05, "loss": 0.2526, "step": 5361 }, { "epoch": 0.46, "learning_rate": 1.1783851899869357e-05, "loss": 0.3323, "step": 5362 }, { "epoch": 0.46, "learning_rate": 1.1781120120475156e-05, "loss": 0.2587, "step": 5363 }, { "epoch": 0.46, "learning_rate": 1.1778388203801019e-05, "loss": 0.3342, "step": 5364 }, { "epoch": 0.46, "learning_rate": 1.1775656150057507e-05, "loss": 0.3036, "step": 5365 }, { "epoch": 0.46, "learning_rate": 1.1772923959455188e-05, "loss": 0.266, "step": 5366 }, { "epoch": 0.46, "learning_rate": 1.1770191632204656e-05, "loss": 0.2959, "step": 5367 }, { "epoch": 0.46, "learning_rate": 1.1767459168516497e-05, "loss": 0.2777, "step": 5368 }, { "epoch": 0.46, "learning_rate": 1.176472656860132e-05, "loss": 0.338, "step": 5369 }, { "epoch": 0.46, "learning_rate": 1.176199383266974e-05, "loss": 0.3168, "step": 5370 }, { "epoch": 0.46, "learning_rate": 1.175926096093238e-05, "loss": 0.2654, "step": 5371 }, { "epoch": 0.46, "learning_rate": 1.1756527953599877e-05, "loss": 0.5797, "step": 5372 }, { "epoch": 0.46, "learning_rate": 1.175379481088288e-05, "loss": 0.2917, "step": 5373 }, { "epoch": 0.46, "learning_rate": 1.1751061532992045e-05, "loss": 0.6058, "step": 5374 }, { "epoch": 0.46, "learning_rate": 1.1748328120138038e-05, "loss": 0.3195, "step": 5375 }, { "epoch": 0.46, "learning_rate": 1.1745594572531538e-05, "loss": 0.3198, "step": 5376 }, { "epoch": 0.46, "learning_rate": 1.1742860890383234e-05, "loss": 0.2981, "step": 5377 }, { "epoch": 0.46, "learning_rate": 1.1740127073903826e-05, "loss": 0.3001, "step": 5378 }, { "epoch": 0.46, "learning_rate": 1.1737393123304019e-05, "loss": 0.343, "step": 5379 }, { "epoch": 0.46, "learning_rate": 1.1734659038794535e-05, "loss": 0.2911, "step": 5380 }, { "epoch": 0.46, "learning_rate": 1.1731924820586106e-05, "loss": 0.2665, "step": 5381 }, { "epoch": 0.46, "learning_rate": 1.1729190468889466e-05, "loss": 0.3011, "step": 5382 }, { "epoch": 0.46, "learning_rate": 1.172645598391537e-05, "loss": 0.3096, "step": 5383 }, { "epoch": 0.46, "learning_rate": 1.172372136587458e-05, "loss": 0.3055, "step": 5384 }, { "epoch": 0.46, "learning_rate": 1.1720986614977861e-05, "loss": 0.28, "step": 5385 }, { "epoch": 0.46, "learning_rate": 1.1718251731436001e-05, "loss": 0.2758, "step": 5386 }, { "epoch": 0.46, "learning_rate": 1.1715516715459784e-05, "loss": 0.2798, "step": 5387 }, { "epoch": 0.46, "learning_rate": 1.1712781567260018e-05, "loss": 0.2996, "step": 5388 }, { "epoch": 0.46, "learning_rate": 1.171004628704751e-05, "loss": 0.2979, "step": 5389 }, { "epoch": 0.46, "learning_rate": 1.1707310875033085e-05, "loss": 0.2941, "step": 5390 }, { "epoch": 0.46, "learning_rate": 1.170457533142757e-05, "loss": 0.2889, "step": 5391 }, { "epoch": 0.46, "learning_rate": 1.1701839656441813e-05, "loss": 0.2754, "step": 5392 }, { "epoch": 0.46, "learning_rate": 1.1699103850286668e-05, "loss": 0.274, "step": 5393 }, { "epoch": 0.46, "learning_rate": 1.169636791317299e-05, "loss": 0.3447, "step": 5394 }, { "epoch": 0.46, "learning_rate": 1.1693631845311657e-05, "loss": 0.2928, "step": 5395 }, { "epoch": 0.46, "learning_rate": 1.1690895646913551e-05, "loss": 0.293, "step": 5396 }, { "epoch": 0.46, "learning_rate": 1.168815931818956e-05, "loss": 0.2974, "step": 5397 }, { "epoch": 0.46, "learning_rate": 1.1685422859350592e-05, "loss": 0.3529, "step": 5398 }, { "epoch": 0.46, "learning_rate": 1.1682686270607558e-05, "loss": 0.2847, "step": 5399 }, { "epoch": 0.46, "learning_rate": 1.1679949552171382e-05, "loss": 0.3314, "step": 5400 }, { "epoch": 0.46, "learning_rate": 1.1677212704252994e-05, "loss": 0.285, "step": 5401 }, { "epoch": 0.46, "learning_rate": 1.1674475727063337e-05, "loss": 0.3358, "step": 5402 }, { "epoch": 0.46, "learning_rate": 1.1671738620813367e-05, "loss": 0.325, "step": 5403 }, { "epoch": 0.46, "learning_rate": 1.1669001385714041e-05, "loss": 0.2756, "step": 5404 }, { "epoch": 0.46, "learning_rate": 1.166626402197634e-05, "loss": 0.3076, "step": 5405 }, { "epoch": 0.46, "learning_rate": 1.1663526529811235e-05, "loss": 0.2558, "step": 5406 }, { "epoch": 0.46, "learning_rate": 1.166078890942973e-05, "loss": 0.2808, "step": 5407 }, { "epoch": 0.46, "learning_rate": 1.165805116104282e-05, "loss": 0.2755, "step": 5408 }, { "epoch": 0.46, "learning_rate": 1.1655313284861521e-05, "loss": 0.3037, "step": 5409 }, { "epoch": 0.46, "learning_rate": 1.165257528109685e-05, "loss": 0.298, "step": 5410 }, { "epoch": 0.46, "learning_rate": 1.1649837149959844e-05, "loss": 0.2918, "step": 5411 }, { "epoch": 0.46, "learning_rate": 1.1647098891661543e-05, "loss": 0.3099, "step": 5412 }, { "epoch": 0.46, "learning_rate": 1.1644360506412997e-05, "loss": 0.2764, "step": 5413 }, { "epoch": 0.46, "learning_rate": 1.164162199442527e-05, "loss": 0.2953, "step": 5414 }, { "epoch": 0.46, "learning_rate": 1.1638883355909429e-05, "loss": 0.2708, "step": 5415 }, { "epoch": 0.46, "learning_rate": 1.1636144591076557e-05, "loss": 0.288, "step": 5416 }, { "epoch": 0.46, "learning_rate": 1.1633405700137744e-05, "loss": 0.2725, "step": 5417 }, { "epoch": 0.46, "learning_rate": 1.163066668330409e-05, "loss": 0.2901, "step": 5418 }, { "epoch": 0.46, "learning_rate": 1.162792754078671e-05, "loss": 0.5836, "step": 5419 }, { "epoch": 0.46, "learning_rate": 1.1625188272796714e-05, "loss": 0.289, "step": 5420 }, { "epoch": 0.46, "learning_rate": 1.1622448879545238e-05, "loss": 0.2429, "step": 5421 }, { "epoch": 0.46, "learning_rate": 1.161970936124342e-05, "loss": 0.2787, "step": 5422 }, { "epoch": 0.46, "learning_rate": 1.1616969718102404e-05, "loss": 0.2667, "step": 5423 }, { "epoch": 0.46, "learning_rate": 1.1614229950333358e-05, "loss": 0.2722, "step": 5424 }, { "epoch": 0.47, "learning_rate": 1.161149005814744e-05, "loss": 0.2934, "step": 5425 }, { "epoch": 0.47, "learning_rate": 1.1608750041755832e-05, "loss": 0.249, "step": 5426 }, { "epoch": 0.47, "learning_rate": 1.1606009901369718e-05, "loss": 0.2656, "step": 5427 }, { "epoch": 0.47, "learning_rate": 1.1603269637200296e-05, "loss": 0.6384, "step": 5428 }, { "epoch": 0.47, "learning_rate": 1.1600529249458773e-05, "loss": 0.3127, "step": 5429 }, { "epoch": 0.47, "learning_rate": 1.1597788738356365e-05, "loss": 0.2834, "step": 5430 }, { "epoch": 0.47, "learning_rate": 1.1595048104104296e-05, "loss": 0.3002, "step": 5431 }, { "epoch": 0.47, "learning_rate": 1.15923073469138e-05, "loss": 0.2895, "step": 5432 }, { "epoch": 0.47, "learning_rate": 1.1589566466996124e-05, "loss": 0.6115, "step": 5433 }, { "epoch": 0.47, "learning_rate": 1.1586825464562515e-05, "loss": 0.311, "step": 5434 }, { "epoch": 0.47, "learning_rate": 1.1584084339824242e-05, "loss": 0.2893, "step": 5435 }, { "epoch": 0.47, "learning_rate": 1.1581343092992574e-05, "loss": 0.3311, "step": 5436 }, { "epoch": 0.47, "learning_rate": 1.1578601724278794e-05, "loss": 0.291, "step": 5437 }, { "epoch": 0.47, "learning_rate": 1.1575860233894195e-05, "loss": 0.2982, "step": 5438 }, { "epoch": 0.47, "learning_rate": 1.1573118622050075e-05, "loss": 0.3025, "step": 5439 }, { "epoch": 0.47, "learning_rate": 1.1570376888957742e-05, "loss": 0.3111, "step": 5440 }, { "epoch": 0.47, "learning_rate": 1.1567635034828521e-05, "loss": 0.2822, "step": 5441 }, { "epoch": 0.47, "learning_rate": 1.1564893059873734e-05, "loss": 0.2585, "step": 5442 }, { "epoch": 0.47, "learning_rate": 1.1562150964304727e-05, "loss": 0.2985, "step": 5443 }, { "epoch": 0.47, "learning_rate": 1.1559408748332841e-05, "loss": 0.3049, "step": 5444 }, { "epoch": 0.47, "learning_rate": 1.1556666412169435e-05, "loss": 0.3305, "step": 5445 }, { "epoch": 0.47, "learning_rate": 1.1553923956025871e-05, "loss": 0.2461, "step": 5446 }, { "epoch": 0.47, "learning_rate": 1.1551181380113528e-05, "loss": 0.287, "step": 5447 }, { "epoch": 0.47, "learning_rate": 1.1548438684643789e-05, "loss": 0.282, "step": 5448 }, { "epoch": 0.47, "learning_rate": 1.1545695869828044e-05, "loss": 0.3109, "step": 5449 }, { "epoch": 0.47, "learning_rate": 1.1542952935877703e-05, "loss": 0.2466, "step": 5450 }, { "epoch": 0.47, "learning_rate": 1.1540209883004171e-05, "loss": 0.2564, "step": 5451 }, { "epoch": 0.47, "learning_rate": 1.1537466711418874e-05, "loss": 0.2736, "step": 5452 }, { "epoch": 0.47, "learning_rate": 1.1534723421333239e-05, "loss": 0.2795, "step": 5453 }, { "epoch": 0.47, "learning_rate": 1.1531980012958706e-05, "loss": 0.2607, "step": 5454 }, { "epoch": 0.47, "learning_rate": 1.152923648650672e-05, "loss": 0.2598, "step": 5455 }, { "epoch": 0.47, "learning_rate": 1.1526492842188746e-05, "loss": 0.2688, "step": 5456 }, { "epoch": 0.47, "learning_rate": 1.1523749080216246e-05, "loss": 0.2988, "step": 5457 }, { "epoch": 0.47, "learning_rate": 1.1521005200800694e-05, "loss": 0.2971, "step": 5458 }, { "epoch": 0.47, "learning_rate": 1.1518261204153578e-05, "loss": 0.2606, "step": 5459 }, { "epoch": 0.47, "learning_rate": 1.151551709048639e-05, "loss": 0.2984, "step": 5460 }, { "epoch": 0.47, "learning_rate": 1.1512772860010633e-05, "loss": 0.3318, "step": 5461 }, { "epoch": 0.47, "learning_rate": 1.1510028512937818e-05, "loss": 0.3378, "step": 5462 }, { "epoch": 0.47, "learning_rate": 1.1507284049479467e-05, "loss": 0.2794, "step": 5463 }, { "epoch": 0.47, "learning_rate": 1.150453946984711e-05, "loss": 0.3212, "step": 5464 }, { "epoch": 0.47, "learning_rate": 1.1501794774252284e-05, "loss": 0.2617, "step": 5465 }, { "epoch": 0.47, "learning_rate": 1.149904996290654e-05, "loss": 0.6039, "step": 5466 }, { "epoch": 0.47, "learning_rate": 1.1496305036021427e-05, "loss": 0.2834, "step": 5467 }, { "epoch": 0.47, "learning_rate": 1.1493559993808518e-05, "loss": 0.3163, "step": 5468 }, { "epoch": 0.47, "learning_rate": 1.1490814836479384e-05, "loss": 0.2886, "step": 5469 }, { "epoch": 0.47, "learning_rate": 1.148806956424561e-05, "loss": 0.2926, "step": 5470 }, { "epoch": 0.47, "learning_rate": 1.148532417731879e-05, "loss": 0.3063, "step": 5471 }, { "epoch": 0.47, "learning_rate": 1.1482578675910514e-05, "loss": 0.2313, "step": 5472 }, { "epoch": 0.47, "learning_rate": 1.1479833060232401e-05, "loss": 0.2841, "step": 5473 }, { "epoch": 0.47, "learning_rate": 1.1477087330496071e-05, "loss": 0.2817, "step": 5474 }, { "epoch": 0.47, "learning_rate": 1.1474341486913146e-05, "loss": 0.2673, "step": 5475 }, { "epoch": 0.47, "learning_rate": 1.1471595529695266e-05, "loss": 0.2932, "step": 5476 }, { "epoch": 0.47, "learning_rate": 1.1468849459054073e-05, "loss": 0.2584, "step": 5477 }, { "epoch": 0.47, "learning_rate": 1.146610327520122e-05, "loss": 0.2826, "step": 5478 }, { "epoch": 0.47, "learning_rate": 1.1463356978348373e-05, "loss": 0.3217, "step": 5479 }, { "epoch": 0.47, "learning_rate": 1.14606105687072e-05, "loss": 0.2513, "step": 5480 }, { "epoch": 0.47, "learning_rate": 1.145786404648938e-05, "loss": 0.3086, "step": 5481 }, { "epoch": 0.47, "learning_rate": 1.1455117411906604e-05, "loss": 0.3024, "step": 5482 }, { "epoch": 0.47, "learning_rate": 1.145237066517057e-05, "loss": 0.3093, "step": 5483 }, { "epoch": 0.47, "learning_rate": 1.1449623806492977e-05, "loss": 0.296, "step": 5484 }, { "epoch": 0.47, "learning_rate": 1.1446876836085546e-05, "loss": 0.2818, "step": 5485 }, { "epoch": 0.47, "learning_rate": 1.1444129754159998e-05, "loss": 0.2956, "step": 5486 }, { "epoch": 0.47, "learning_rate": 1.1441382560928063e-05, "loss": 0.2988, "step": 5487 }, { "epoch": 0.47, "learning_rate": 1.1438635256601484e-05, "loss": 0.3042, "step": 5488 }, { "epoch": 0.47, "learning_rate": 1.1435887841392009e-05, "loss": 0.3234, "step": 5489 }, { "epoch": 0.47, "learning_rate": 1.1433140315511392e-05, "loss": 0.2617, "step": 5490 }, { "epoch": 0.47, "learning_rate": 1.14303926791714e-05, "loss": 0.3085, "step": 5491 }, { "epoch": 0.47, "learning_rate": 1.142764493258381e-05, "loss": 0.2785, "step": 5492 }, { "epoch": 0.47, "learning_rate": 1.1424897075960402e-05, "loss": 0.2516, "step": 5493 }, { "epoch": 0.47, "learning_rate": 1.1422149109512967e-05, "loss": 0.3103, "step": 5494 }, { "epoch": 0.47, "learning_rate": 1.1419401033453308e-05, "loss": 0.2817, "step": 5495 }, { "epoch": 0.47, "learning_rate": 1.1416652847993231e-05, "loss": 0.2701, "step": 5496 }, { "epoch": 0.47, "learning_rate": 1.1413904553344551e-05, "loss": 0.2699, "step": 5497 }, { "epoch": 0.47, "learning_rate": 1.1411156149719094e-05, "loss": 0.6191, "step": 5498 }, { "epoch": 0.47, "learning_rate": 1.1408407637328694e-05, "loss": 0.2776, "step": 5499 }, { "epoch": 0.47, "learning_rate": 1.1405659016385191e-05, "loss": 0.2715, "step": 5500 }, { "epoch": 0.47, "learning_rate": 1.1402910287100436e-05, "loss": 0.2797, "step": 5501 }, { "epoch": 0.47, "learning_rate": 1.1400161449686293e-05, "loss": 0.2726, "step": 5502 }, { "epoch": 0.47, "learning_rate": 1.1397412504354621e-05, "loss": 0.3041, "step": 5503 }, { "epoch": 0.47, "learning_rate": 1.1394663451317296e-05, "loss": 0.3093, "step": 5504 }, { "epoch": 0.47, "learning_rate": 1.1391914290786206e-05, "loss": 0.2958, "step": 5505 }, { "epoch": 0.47, "learning_rate": 1.1389165022973238e-05, "loss": 0.2703, "step": 5506 }, { "epoch": 0.47, "learning_rate": 1.1386415648090296e-05, "loss": 0.2881, "step": 5507 }, { "epoch": 0.47, "learning_rate": 1.1383666166349286e-05, "loss": 0.3035, "step": 5508 }, { "epoch": 0.47, "learning_rate": 1.1380916577962127e-05, "loss": 0.3101, "step": 5509 }, { "epoch": 0.47, "learning_rate": 1.1378166883140738e-05, "loss": 0.3297, "step": 5510 }, { "epoch": 0.47, "learning_rate": 1.1375417082097055e-05, "loss": 0.2722, "step": 5511 }, { "epoch": 0.47, "learning_rate": 1.1372667175043021e-05, "loss": 0.3062, "step": 5512 }, { "epoch": 0.47, "learning_rate": 1.1369917162190585e-05, "loss": 0.2759, "step": 5513 }, { "epoch": 0.47, "learning_rate": 1.1367167043751701e-05, "loss": 0.2617, "step": 5514 }, { "epoch": 0.47, "learning_rate": 1.1364416819938338e-05, "loss": 0.2924, "step": 5515 }, { "epoch": 0.47, "learning_rate": 1.1361666490962468e-05, "loss": 0.2826, "step": 5516 }, { "epoch": 0.47, "learning_rate": 1.1358916057036074e-05, "loss": 0.2865, "step": 5517 }, { "epoch": 0.47, "learning_rate": 1.1356165518371142e-05, "loss": 0.2792, "step": 5518 }, { "epoch": 0.47, "learning_rate": 1.1353414875179673e-05, "loss": 0.2476, "step": 5519 }, { "epoch": 0.47, "learning_rate": 1.1350664127673675e-05, "loss": 0.3047, "step": 5520 }, { "epoch": 0.47, "learning_rate": 1.1347913276065162e-05, "loss": 0.2776, "step": 5521 }, { "epoch": 0.47, "learning_rate": 1.134516232056615e-05, "loss": 0.261, "step": 5522 }, { "epoch": 0.47, "learning_rate": 1.1342411261388672e-05, "loss": 0.3355, "step": 5523 }, { "epoch": 0.47, "learning_rate": 1.133966009874477e-05, "loss": 0.2893, "step": 5524 }, { "epoch": 0.47, "learning_rate": 1.1336908832846485e-05, "loss": 0.2431, "step": 5525 }, { "epoch": 0.47, "learning_rate": 1.1334157463905876e-05, "loss": 0.3019, "step": 5526 }, { "epoch": 0.47, "learning_rate": 1.1331405992134999e-05, "loss": 0.2971, "step": 5527 }, { "epoch": 0.47, "learning_rate": 1.1328654417745931e-05, "loss": 0.2949, "step": 5528 }, { "epoch": 0.47, "learning_rate": 1.132590274095074e-05, "loss": 0.5835, "step": 5529 }, { "epoch": 0.47, "learning_rate": 1.132315096196152e-05, "loss": 0.27, "step": 5530 }, { "epoch": 0.47, "learning_rate": 1.1320399080990361e-05, "loss": 0.3137, "step": 5531 }, { "epoch": 0.47, "learning_rate": 1.1317647098249364e-05, "loss": 0.2603, "step": 5532 }, { "epoch": 0.47, "learning_rate": 1.1314895013950645e-05, "loss": 0.2945, "step": 5533 }, { "epoch": 0.47, "learning_rate": 1.1312142828306309e-05, "loss": 0.2939, "step": 5534 }, { "epoch": 0.47, "learning_rate": 1.1309390541528492e-05, "loss": 0.3442, "step": 5535 }, { "epoch": 0.47, "learning_rate": 1.130663815382932e-05, "loss": 0.3185, "step": 5536 }, { "epoch": 0.47, "learning_rate": 1.1303885665420932e-05, "loss": 0.2907, "step": 5537 }, { "epoch": 0.47, "learning_rate": 1.1301133076515482e-05, "loss": 0.2894, "step": 5538 }, { "epoch": 0.47, "learning_rate": 1.1298380387325124e-05, "loss": 0.257, "step": 5539 }, { "epoch": 0.47, "learning_rate": 1.1295627598062025e-05, "loss": 0.2973, "step": 5540 }, { "epoch": 0.47, "learning_rate": 1.1292874708938346e-05, "loss": 0.2645, "step": 5541 }, { "epoch": 0.48, "learning_rate": 1.1290121720166277e-05, "loss": 0.2813, "step": 5542 }, { "epoch": 0.48, "learning_rate": 1.1287368631957998e-05, "loss": 0.2355, "step": 5543 }, { "epoch": 0.48, "learning_rate": 1.1284615444525706e-05, "loss": 0.2487, "step": 5544 }, { "epoch": 0.48, "learning_rate": 1.1281862158081605e-05, "loss": 0.2957, "step": 5545 }, { "epoch": 0.48, "learning_rate": 1.1279108772837901e-05, "loss": 0.3004, "step": 5546 }, { "epoch": 0.48, "learning_rate": 1.1276355289006818e-05, "loss": 0.3286, "step": 5547 }, { "epoch": 0.48, "learning_rate": 1.127360170680057e-05, "loss": 0.2992, "step": 5548 }, { "epoch": 0.48, "learning_rate": 1.1270848026431396e-05, "loss": 0.5975, "step": 5549 }, { "epoch": 0.48, "learning_rate": 1.1268094248111536e-05, "loss": 0.2502, "step": 5550 }, { "epoch": 0.48, "learning_rate": 1.1265340372053237e-05, "loss": 0.3278, "step": 5551 }, { "epoch": 0.48, "learning_rate": 1.1262586398468759e-05, "loss": 0.3055, "step": 5552 }, { "epoch": 0.48, "learning_rate": 1.1259832327570354e-05, "loss": 0.2346, "step": 5553 }, { "epoch": 0.48, "learning_rate": 1.1257078159570303e-05, "loss": 0.3109, "step": 5554 }, { "epoch": 0.48, "learning_rate": 1.1254323894680876e-05, "loss": 0.2964, "step": 5555 }, { "epoch": 0.48, "learning_rate": 1.125156953311436e-05, "loss": 0.2648, "step": 5556 }, { "epoch": 0.48, "learning_rate": 1.1248815075083051e-05, "loss": 0.269, "step": 5557 }, { "epoch": 0.48, "learning_rate": 1.1246060520799244e-05, "loss": 0.2689, "step": 5558 }, { "epoch": 0.48, "learning_rate": 1.1243305870475255e-05, "loss": 0.305, "step": 5559 }, { "epoch": 0.48, "learning_rate": 1.1240551124323386e-05, "loss": 0.2968, "step": 5560 }, { "epoch": 0.48, "learning_rate": 1.1237796282555968e-05, "loss": 0.2881, "step": 5561 }, { "epoch": 0.48, "learning_rate": 1.1235041345385328e-05, "loss": 0.2873, "step": 5562 }, { "epoch": 0.48, "learning_rate": 1.1232286313023807e-05, "loss": 0.3195, "step": 5563 }, { "epoch": 0.48, "learning_rate": 1.1229531185683743e-05, "loss": 0.3179, "step": 5564 }, { "epoch": 0.48, "learning_rate": 1.1226775963577492e-05, "loss": 0.2891, "step": 5565 }, { "epoch": 0.48, "learning_rate": 1.1224020646917413e-05, "loss": 0.3074, "step": 5566 }, { "epoch": 0.48, "learning_rate": 1.1221265235915865e-05, "loss": 0.2572, "step": 5567 }, { "epoch": 0.48, "learning_rate": 1.1218509730785231e-05, "loss": 0.2892, "step": 5568 }, { "epoch": 0.48, "learning_rate": 1.1215754131737889e-05, "loss": 0.303, "step": 5569 }, { "epoch": 0.48, "learning_rate": 1.1212998438986223e-05, "loss": 0.2982, "step": 5570 }, { "epoch": 0.48, "learning_rate": 1.1210242652742632e-05, "loss": 0.2475, "step": 5571 }, { "epoch": 0.48, "learning_rate": 1.1207486773219515e-05, "loss": 0.2832, "step": 5572 }, { "epoch": 0.48, "learning_rate": 1.1204730800629289e-05, "loss": 0.2927, "step": 5573 }, { "epoch": 0.48, "learning_rate": 1.1201974735184362e-05, "loss": 0.2658, "step": 5574 }, { "epoch": 0.48, "learning_rate": 1.1199218577097163e-05, "loss": 0.3198, "step": 5575 }, { "epoch": 0.48, "learning_rate": 1.119646232658012e-05, "loss": 0.2575, "step": 5576 }, { "epoch": 0.48, "learning_rate": 1.1193705983845673e-05, "loss": 0.3147, "step": 5577 }, { "epoch": 0.48, "learning_rate": 1.119094954910627e-05, "loss": 0.3093, "step": 5578 }, { "epoch": 0.48, "learning_rate": 1.1188193022574356e-05, "loss": 0.2626, "step": 5579 }, { "epoch": 0.48, "learning_rate": 1.1185436404462398e-05, "loss": 0.3241, "step": 5580 }, { "epoch": 0.48, "learning_rate": 1.1182679694982857e-05, "loss": 0.2479, "step": 5581 }, { "epoch": 0.48, "learning_rate": 1.1179922894348207e-05, "loss": 0.2788, "step": 5582 }, { "epoch": 0.48, "learning_rate": 1.1177166002770937e-05, "loss": 0.3054, "step": 5583 }, { "epoch": 0.48, "learning_rate": 1.1174409020463524e-05, "loss": 0.5701, "step": 5584 }, { "epoch": 0.48, "learning_rate": 1.1171651947638468e-05, "loss": 0.2875, "step": 5585 }, { "epoch": 0.48, "learning_rate": 1.1168894784508268e-05, "loss": 0.2573, "step": 5586 }, { "epoch": 0.48, "learning_rate": 1.1166137531285435e-05, "loss": 0.2822, "step": 5587 }, { "epoch": 0.48, "learning_rate": 1.1163380188182482e-05, "loss": 0.2219, "step": 5588 }, { "epoch": 0.48, "learning_rate": 1.1160622755411932e-05, "loss": 0.2692, "step": 5589 }, { "epoch": 0.48, "learning_rate": 1.1157865233186315e-05, "loss": 0.3212, "step": 5590 }, { "epoch": 0.48, "learning_rate": 1.1155107621718168e-05, "loss": 0.304, "step": 5591 }, { "epoch": 0.48, "learning_rate": 1.1152349921220036e-05, "loss": 0.302, "step": 5592 }, { "epoch": 0.48, "learning_rate": 1.114959213190446e-05, "loss": 0.2794, "step": 5593 }, { "epoch": 0.48, "learning_rate": 1.1146834253984008e-05, "loss": 0.2825, "step": 5594 }, { "epoch": 0.48, "learning_rate": 1.1144076287671232e-05, "loss": 0.2881, "step": 5595 }, { "epoch": 0.48, "learning_rate": 1.1141318233178713e-05, "loss": 0.6, "step": 5596 }, { "epoch": 0.48, "learning_rate": 1.1138560090719025e-05, "loss": 0.3169, "step": 5597 }, { "epoch": 0.48, "learning_rate": 1.113580186050475e-05, "loss": 0.3161, "step": 5598 }, { "epoch": 0.48, "learning_rate": 1.1133043542748481e-05, "loss": 0.3279, "step": 5599 }, { "epoch": 0.48, "learning_rate": 1.1130285137662811e-05, "loss": 0.2771, "step": 5600 }, { "epoch": 0.48, "learning_rate": 1.1127526645460347e-05, "loss": 0.3382, "step": 5601 }, { "epoch": 0.48, "learning_rate": 1.1124768066353705e-05, "loss": 0.3408, "step": 5602 }, { "epoch": 0.48, "learning_rate": 1.1122009400555495e-05, "loss": 0.3117, "step": 5603 }, { "epoch": 0.48, "learning_rate": 1.1119250648278345e-05, "loss": 0.2899, "step": 5604 }, { "epoch": 0.48, "learning_rate": 1.1116491809734886e-05, "loss": 0.3174, "step": 5605 }, { "epoch": 0.48, "learning_rate": 1.1113732885137755e-05, "loss": 0.2377, "step": 5606 }, { "epoch": 0.48, "learning_rate": 1.1110973874699595e-05, "loss": 0.2486, "step": 5607 }, { "epoch": 0.48, "learning_rate": 1.110821477863306e-05, "loss": 0.2451, "step": 5608 }, { "epoch": 0.48, "learning_rate": 1.1105455597150805e-05, "loss": 0.3058, "step": 5609 }, { "epoch": 0.48, "learning_rate": 1.1102696330465495e-05, "loss": 0.3296, "step": 5610 }, { "epoch": 0.48, "learning_rate": 1.10999369787898e-05, "loss": 0.2969, "step": 5611 }, { "epoch": 0.48, "learning_rate": 1.1097177542336399e-05, "loss": 0.2857, "step": 5612 }, { "epoch": 0.48, "learning_rate": 1.1094418021317972e-05, "loss": 0.2979, "step": 5613 }, { "epoch": 0.48, "learning_rate": 1.109165841594721e-05, "loss": 0.2902, "step": 5614 }, { "epoch": 0.48, "learning_rate": 1.1088898726436814e-05, "loss": 0.2621, "step": 5615 }, { "epoch": 0.48, "learning_rate": 1.1086138952999487e-05, "loss": 0.2905, "step": 5616 }, { "epoch": 0.48, "learning_rate": 1.1083379095847933e-05, "loss": 0.2766, "step": 5617 }, { "epoch": 0.48, "learning_rate": 1.1080619155194873e-05, "loss": 0.2778, "step": 5618 }, { "epoch": 0.48, "learning_rate": 1.1077859131253026e-05, "loss": 0.3027, "step": 5619 }, { "epoch": 0.48, "learning_rate": 1.1075099024235123e-05, "loss": 0.2944, "step": 5620 }, { "epoch": 0.48, "learning_rate": 1.1072338834353902e-05, "loss": 0.298, "step": 5621 }, { "epoch": 0.48, "learning_rate": 1.10695785618221e-05, "loss": 0.3042, "step": 5622 }, { "epoch": 0.48, "learning_rate": 1.1066818206852472e-05, "loss": 0.35, "step": 5623 }, { "epoch": 0.48, "learning_rate": 1.1064057769657764e-05, "loss": 0.2962, "step": 5624 }, { "epoch": 0.48, "learning_rate": 1.106129725045074e-05, "loss": 0.3158, "step": 5625 }, { "epoch": 0.48, "learning_rate": 1.1058536649444167e-05, "loss": 0.2773, "step": 5626 }, { "epoch": 0.48, "learning_rate": 1.1055775966850821e-05, "loss": 0.3075, "step": 5627 }, { "epoch": 0.48, "learning_rate": 1.1053015202883483e-05, "loss": 0.2625, "step": 5628 }, { "epoch": 0.48, "learning_rate": 1.1050254357754932e-05, "loss": 0.2972, "step": 5629 }, { "epoch": 0.48, "learning_rate": 1.104749343167797e-05, "loss": 0.2762, "step": 5630 }, { "epoch": 0.48, "learning_rate": 1.1044732424865386e-05, "loss": 0.2307, "step": 5631 }, { "epoch": 0.48, "learning_rate": 1.104197133752999e-05, "loss": 0.3394, "step": 5632 }, { "epoch": 0.48, "learning_rate": 1.1039210169884589e-05, "loss": 0.3195, "step": 5633 }, { "epoch": 0.48, "learning_rate": 1.1036448922142004e-05, "loss": 0.3192, "step": 5634 }, { "epoch": 0.48, "learning_rate": 1.1033687594515062e-05, "loss": 0.3105, "step": 5635 }, { "epoch": 0.48, "learning_rate": 1.1030926187216581e-05, "loss": 0.2896, "step": 5636 }, { "epoch": 0.48, "learning_rate": 1.1028164700459409e-05, "loss": 0.2466, "step": 5637 }, { "epoch": 0.48, "learning_rate": 1.1025403134456378e-05, "loss": 0.2784, "step": 5638 }, { "epoch": 0.48, "learning_rate": 1.1022641489420342e-05, "loss": 0.2577, "step": 5639 }, { "epoch": 0.48, "learning_rate": 1.1019879765564155e-05, "loss": 0.2577, "step": 5640 }, { "epoch": 0.48, "learning_rate": 1.1017117963100672e-05, "loss": 0.2607, "step": 5641 }, { "epoch": 0.48, "learning_rate": 1.1014356082242766e-05, "loss": 0.3171, "step": 5642 }, { "epoch": 0.48, "learning_rate": 1.1011594123203302e-05, "loss": 0.2711, "step": 5643 }, { "epoch": 0.48, "learning_rate": 1.100883208619516e-05, "loss": 0.2964, "step": 5644 }, { "epoch": 0.48, "learning_rate": 1.1006069971431228e-05, "loss": 0.282, "step": 5645 }, { "epoch": 0.48, "learning_rate": 1.1003307779124392e-05, "loss": 0.2993, "step": 5646 }, { "epoch": 0.48, "learning_rate": 1.1000545509487555e-05, "loss": 0.3268, "step": 5647 }, { "epoch": 0.48, "learning_rate": 1.0997783162733608e-05, "loss": 0.296, "step": 5648 }, { "epoch": 0.48, "learning_rate": 1.0995020739075468e-05, "loss": 0.2925, "step": 5649 }, { "epoch": 0.48, "learning_rate": 1.0992258238726046e-05, "loss": 0.2502, "step": 5650 }, { "epoch": 0.48, "learning_rate": 1.0989495661898259e-05, "loss": 0.3224, "step": 5651 }, { "epoch": 0.48, "learning_rate": 1.0986733008805038e-05, "loss": 0.27, "step": 5652 }, { "epoch": 0.48, "learning_rate": 1.0983970279659311e-05, "loss": 0.3111, "step": 5653 }, { "epoch": 0.48, "learning_rate": 1.0981207474674021e-05, "loss": 0.2794, "step": 5654 }, { "epoch": 0.48, "learning_rate": 1.0978444594062104e-05, "loss": 0.2609, "step": 5655 }, { "epoch": 0.48, "learning_rate": 1.0975681638036513e-05, "loss": 0.2902, "step": 5656 }, { "epoch": 0.48, "learning_rate": 1.0972918606810198e-05, "loss": 0.2939, "step": 5657 }, { "epoch": 0.48, "learning_rate": 1.0970155500596127e-05, "loss": 0.3184, "step": 5658 }, { "epoch": 0.49, "learning_rate": 1.0967392319607267e-05, "loss": 0.2737, "step": 5659 }, { "epoch": 0.49, "learning_rate": 1.0964629064056583e-05, "loss": 0.3092, "step": 5660 }, { "epoch": 0.49, "learning_rate": 1.096186573415706e-05, "loss": 0.3104, "step": 5661 }, { "epoch": 0.49, "learning_rate": 1.0959102330121676e-05, "loss": 0.2681, "step": 5662 }, { "epoch": 0.49, "learning_rate": 1.0956338852163424e-05, "loss": 0.2479, "step": 5663 }, { "epoch": 0.49, "learning_rate": 1.0953575300495299e-05, "loss": 0.3253, "step": 5664 }, { "epoch": 0.49, "learning_rate": 1.0950811675330303e-05, "loss": 0.3229, "step": 5665 }, { "epoch": 0.49, "learning_rate": 1.0948047976881439e-05, "loss": 0.2815, "step": 5666 }, { "epoch": 0.49, "learning_rate": 1.0945284205361723e-05, "loss": 0.2849, "step": 5667 }, { "epoch": 0.49, "learning_rate": 1.0942520360984172e-05, "loss": 0.2917, "step": 5668 }, { "epoch": 0.49, "learning_rate": 1.0939756443961809e-05, "loss": 0.3042, "step": 5669 }, { "epoch": 0.49, "learning_rate": 1.093699245450766e-05, "loss": 0.2601, "step": 5670 }, { "epoch": 0.49, "learning_rate": 1.0934228392834763e-05, "loss": 0.2871, "step": 5671 }, { "epoch": 0.49, "learning_rate": 1.0931464259156158e-05, "loss": 0.2992, "step": 5672 }, { "epoch": 0.49, "learning_rate": 1.0928700053684893e-05, "loss": 0.2832, "step": 5673 }, { "epoch": 0.49, "learning_rate": 1.0925935776634014e-05, "loss": 0.2737, "step": 5674 }, { "epoch": 0.49, "learning_rate": 1.0923171428216581e-05, "loss": 0.2444, "step": 5675 }, { "epoch": 0.49, "learning_rate": 1.0920407008645656e-05, "loss": 0.3256, "step": 5676 }, { "epoch": 0.49, "learning_rate": 1.0917642518134304e-05, "loss": 0.225, "step": 5677 }, { "epoch": 0.49, "learning_rate": 1.0914877956895604e-05, "loss": 0.2691, "step": 5678 }, { "epoch": 0.49, "learning_rate": 1.0912113325142632e-05, "loss": 0.2818, "step": 5679 }, { "epoch": 0.49, "learning_rate": 1.0909348623088472e-05, "loss": 0.3021, "step": 5680 }, { "epoch": 0.49, "learning_rate": 1.090658385094621e-05, "loss": 0.311, "step": 5681 }, { "epoch": 0.49, "learning_rate": 1.0903819008928948e-05, "loss": 0.2844, "step": 5682 }, { "epoch": 0.49, "learning_rate": 1.090105409724978e-05, "loss": 0.2791, "step": 5683 }, { "epoch": 0.49, "learning_rate": 1.0898289116121817e-05, "loss": 0.2706, "step": 5684 }, { "epoch": 0.49, "learning_rate": 1.0895524065758164e-05, "loss": 0.3007, "step": 5685 }, { "epoch": 0.49, "learning_rate": 1.0892758946371943e-05, "loss": 0.2937, "step": 5686 }, { "epoch": 0.49, "learning_rate": 1.0889993758176276e-05, "loss": 0.307, "step": 5687 }, { "epoch": 0.49, "learning_rate": 1.0887228501384287e-05, "loss": 0.3426, "step": 5688 }, { "epoch": 0.49, "learning_rate": 1.0884463176209105e-05, "loss": 0.2866, "step": 5689 }, { "epoch": 0.49, "learning_rate": 1.0881697782863874e-05, "loss": 0.2861, "step": 5690 }, { "epoch": 0.49, "learning_rate": 1.0878932321561734e-05, "loss": 0.2752, "step": 5691 }, { "epoch": 0.49, "learning_rate": 1.0876166792515836e-05, "loss": 0.2538, "step": 5692 }, { "epoch": 0.49, "learning_rate": 1.0873401195939328e-05, "loss": 0.2941, "step": 5693 }, { "epoch": 0.49, "learning_rate": 1.0870635532045375e-05, "loss": 0.3277, "step": 5694 }, { "epoch": 0.49, "learning_rate": 1.086786980104713e-05, "loss": 0.2917, "step": 5695 }, { "epoch": 0.49, "learning_rate": 1.0865104003157774e-05, "loss": 0.278, "step": 5696 }, { "epoch": 0.49, "learning_rate": 1.0862338138590479e-05, "loss": 0.2872, "step": 5697 }, { "epoch": 0.49, "learning_rate": 1.0859572207558416e-05, "loss": 0.3156, "step": 5698 }, { "epoch": 0.49, "learning_rate": 1.085680621027478e-05, "loss": 0.2991, "step": 5699 }, { "epoch": 0.49, "learning_rate": 1.085404014695275e-05, "loss": 0.2949, "step": 5700 }, { "epoch": 0.49, "learning_rate": 1.0851274017805525e-05, "loss": 0.2551, "step": 5701 }, { "epoch": 0.49, "learning_rate": 1.0848507823046306e-05, "loss": 0.3223, "step": 5702 }, { "epoch": 0.49, "learning_rate": 1.0845741562888297e-05, "loss": 0.292, "step": 5703 }, { "epoch": 0.49, "learning_rate": 1.084297523754471e-05, "loss": 0.2451, "step": 5704 }, { "epoch": 0.49, "learning_rate": 1.0840208847228753e-05, "loss": 0.3699, "step": 5705 }, { "epoch": 0.49, "learning_rate": 1.0837442392153651e-05, "loss": 0.2649, "step": 5706 }, { "epoch": 0.49, "learning_rate": 1.083467587253263e-05, "loss": 0.2656, "step": 5707 }, { "epoch": 0.49, "learning_rate": 1.0831909288578913e-05, "loss": 0.2858, "step": 5708 }, { "epoch": 0.49, "learning_rate": 1.082914264050574e-05, "loss": 0.314, "step": 5709 }, { "epoch": 0.49, "learning_rate": 1.082637592852635e-05, "loss": 0.2957, "step": 5710 }, { "epoch": 0.49, "learning_rate": 1.0823609152853987e-05, "loss": 0.2497, "step": 5711 }, { "epoch": 0.49, "learning_rate": 1.0820842313701898e-05, "loss": 0.3034, "step": 5712 }, { "epoch": 0.49, "learning_rate": 1.0818075411283341e-05, "loss": 0.2827, "step": 5713 }, { "epoch": 0.49, "learning_rate": 1.081530844581157e-05, "loss": 0.2714, "step": 5714 }, { "epoch": 0.49, "learning_rate": 1.0812541417499855e-05, "loss": 0.2974, "step": 5715 }, { "epoch": 0.49, "learning_rate": 1.0809774326561464e-05, "loss": 0.2956, "step": 5716 }, { "epoch": 0.49, "learning_rate": 1.080700717320967e-05, "loss": 0.2864, "step": 5717 }, { "epoch": 0.49, "learning_rate": 1.080423995765775e-05, "loss": 0.3052, "step": 5718 }, { "epoch": 0.49, "learning_rate": 1.0801472680118984e-05, "loss": 0.3207, "step": 5719 }, { "epoch": 0.49, "learning_rate": 1.0798705340806668e-05, "loss": 0.2517, "step": 5720 }, { "epoch": 0.49, "learning_rate": 1.0795937939934088e-05, "loss": 0.28, "step": 5721 }, { "epoch": 0.49, "learning_rate": 1.0793170477714546e-05, "loss": 0.2748, "step": 5722 }, { "epoch": 0.49, "learning_rate": 1.0790402954361344e-05, "loss": 0.2958, "step": 5723 }, { "epoch": 0.49, "learning_rate": 1.0787635370087786e-05, "loss": 0.2582, "step": 5724 }, { "epoch": 0.49, "learning_rate": 1.0784867725107187e-05, "loss": 0.3481, "step": 5725 }, { "epoch": 0.49, "learning_rate": 1.078210001963286e-05, "loss": 0.2842, "step": 5726 }, { "epoch": 0.49, "learning_rate": 1.0779332253878127e-05, "loss": 0.2576, "step": 5727 }, { "epoch": 0.49, "learning_rate": 1.0776564428056317e-05, "loss": 0.2343, "step": 5728 }, { "epoch": 0.49, "learning_rate": 1.0773796542380757e-05, "loss": 0.2874, "step": 5729 }, { "epoch": 0.49, "learning_rate": 1.0771028597064785e-05, "loss": 0.2637, "step": 5730 }, { "epoch": 0.49, "learning_rate": 1.0768260592321735e-05, "loss": 0.2996, "step": 5731 }, { "epoch": 0.49, "learning_rate": 1.076549252836496e-05, "loss": 0.3318, "step": 5732 }, { "epoch": 0.49, "learning_rate": 1.0762724405407795e-05, "loss": 0.5643, "step": 5733 }, { "epoch": 0.49, "learning_rate": 1.0759956223663608e-05, "loss": 0.2958, "step": 5734 }, { "epoch": 0.49, "learning_rate": 1.075718798334575e-05, "loss": 0.2444, "step": 5735 }, { "epoch": 0.49, "learning_rate": 1.075441968466758e-05, "loss": 0.6011, "step": 5736 }, { "epoch": 0.49, "learning_rate": 1.0751651327842474e-05, "loss": 0.2748, "step": 5737 }, { "epoch": 0.49, "learning_rate": 1.0748882913083794e-05, "loss": 0.2813, "step": 5738 }, { "epoch": 0.49, "learning_rate": 1.074611444060492e-05, "loss": 0.3068, "step": 5739 }, { "epoch": 0.49, "learning_rate": 1.074334591061923e-05, "loss": 0.2938, "step": 5740 }, { "epoch": 0.49, "learning_rate": 1.0740577323340112e-05, "loss": 0.2624, "step": 5741 }, { "epoch": 0.49, "learning_rate": 1.0737808678980954e-05, "loss": 0.3124, "step": 5742 }, { "epoch": 0.49, "learning_rate": 1.0735039977755147e-05, "loss": 0.2632, "step": 5743 }, { "epoch": 0.49, "learning_rate": 1.0732271219876092e-05, "loss": 0.2575, "step": 5744 }, { "epoch": 0.49, "learning_rate": 1.0729502405557188e-05, "loss": 0.3065, "step": 5745 }, { "epoch": 0.49, "learning_rate": 1.0726733535011844e-05, "loss": 0.2885, "step": 5746 }, { "epoch": 0.49, "learning_rate": 1.0723964608453467e-05, "loss": 0.2744, "step": 5747 }, { "epoch": 0.49, "learning_rate": 1.0721195626095477e-05, "loss": 0.2945, "step": 5748 }, { "epoch": 0.49, "learning_rate": 1.0718426588151296e-05, "loss": 0.2759, "step": 5749 }, { "epoch": 0.49, "learning_rate": 1.071565749483434e-05, "loss": 0.3277, "step": 5750 }, { "epoch": 0.49, "learning_rate": 1.0712888346358041e-05, "loss": 0.2669, "step": 5751 }, { "epoch": 0.49, "learning_rate": 1.0710119142935829e-05, "loss": 0.2228, "step": 5752 }, { "epoch": 0.49, "learning_rate": 1.0707349884781142e-05, "loss": 0.2715, "step": 5753 }, { "epoch": 0.49, "learning_rate": 1.0704580572107424e-05, "loss": 0.3269, "step": 5754 }, { "epoch": 0.49, "learning_rate": 1.0701811205128115e-05, "loss": 0.2429, "step": 5755 }, { "epoch": 0.49, "learning_rate": 1.0699041784056667e-05, "loss": 0.311, "step": 5756 }, { "epoch": 0.49, "learning_rate": 1.0696272309106532e-05, "loss": 0.2752, "step": 5757 }, { "epoch": 0.49, "learning_rate": 1.0693502780491168e-05, "loss": 0.2734, "step": 5758 }, { "epoch": 0.49, "learning_rate": 1.0690733198424035e-05, "loss": 0.3068, "step": 5759 }, { "epoch": 0.49, "learning_rate": 1.06879635631186e-05, "loss": 0.3011, "step": 5760 }, { "epoch": 0.49, "learning_rate": 1.0685193874788335e-05, "loss": 0.2622, "step": 5761 }, { "epoch": 0.49, "learning_rate": 1.0682424133646712e-05, "loss": 0.2632, "step": 5762 }, { "epoch": 0.49, "learning_rate": 1.0679654339907208e-05, "loss": 0.2817, "step": 5763 }, { "epoch": 0.49, "learning_rate": 1.0676884493783304e-05, "loss": 0.62, "step": 5764 }, { "epoch": 0.49, "learning_rate": 1.0674114595488489e-05, "loss": 0.2929, "step": 5765 }, { "epoch": 0.49, "learning_rate": 1.0671344645236253e-05, "loss": 0.3076, "step": 5766 }, { "epoch": 0.49, "learning_rate": 1.0668574643240087e-05, "loss": 0.2897, "step": 5767 }, { "epoch": 0.49, "learning_rate": 1.0665804589713494e-05, "loss": 0.3101, "step": 5768 }, { "epoch": 0.49, "learning_rate": 1.066303448486997e-05, "loss": 0.3101, "step": 5769 }, { "epoch": 0.49, "learning_rate": 1.0660264328923024e-05, "loss": 0.2764, "step": 5770 }, { "epoch": 0.49, "learning_rate": 1.0657494122086165e-05, "loss": 0.2588, "step": 5771 }, { "epoch": 0.49, "learning_rate": 1.0654723864572909e-05, "loss": 0.2718, "step": 5772 }, { "epoch": 0.49, "learning_rate": 1.0651953556596777e-05, "loss": 0.3024, "step": 5773 }, { "epoch": 0.49, "learning_rate": 1.064918319837128e-05, "loss": 0.2729, "step": 5774 }, { "epoch": 0.5, "learning_rate": 1.0646412790109954e-05, "loss": 0.2581, "step": 5775 }, { "epoch": 0.5, "learning_rate": 1.0643642332026323e-05, "loss": 0.3369, "step": 5776 }, { "epoch": 0.5, "learning_rate": 1.064087182433392e-05, "loss": 0.2566, "step": 5777 }, { "epoch": 0.5, "learning_rate": 1.0638101267246283e-05, "loss": 0.2369, "step": 5778 }, { "epoch": 0.5, "learning_rate": 1.0635330660976955e-05, "loss": 0.2759, "step": 5779 }, { "epoch": 0.5, "learning_rate": 1.0632560005739481e-05, "loss": 0.3203, "step": 5780 }, { "epoch": 0.5, "learning_rate": 1.0629789301747404e-05, "loss": 0.2769, "step": 5781 }, { "epoch": 0.5, "learning_rate": 1.0627018549214284e-05, "loss": 0.2582, "step": 5782 }, { "epoch": 0.5, "learning_rate": 1.0624247748353666e-05, "loss": 0.2404, "step": 5783 }, { "epoch": 0.5, "learning_rate": 1.062147689937912e-05, "loss": 0.3202, "step": 5784 }, { "epoch": 0.5, "learning_rate": 1.0618706002504202e-05, "loss": 0.2665, "step": 5785 }, { "epoch": 0.5, "learning_rate": 1.0615935057942485e-05, "loss": 0.3236, "step": 5786 }, { "epoch": 0.5, "learning_rate": 1.0613164065907539e-05, "loss": 0.2955, "step": 5787 }, { "epoch": 0.5, "learning_rate": 1.0610393026612933e-05, "loss": 0.6254, "step": 5788 }, { "epoch": 0.5, "learning_rate": 1.0607621940272253e-05, "loss": 0.2924, "step": 5789 }, { "epoch": 0.5, "learning_rate": 1.060485080709907e-05, "loss": 0.3092, "step": 5790 }, { "epoch": 0.5, "learning_rate": 1.0602079627306979e-05, "loss": 0.265, "step": 5791 }, { "epoch": 0.5, "learning_rate": 1.0599308401109564e-05, "loss": 0.2678, "step": 5792 }, { "epoch": 0.5, "learning_rate": 1.0596537128720421e-05, "loss": 0.3041, "step": 5793 }, { "epoch": 0.5, "learning_rate": 1.0593765810353142e-05, "loss": 0.2837, "step": 5794 }, { "epoch": 0.5, "learning_rate": 1.059099444622133e-05, "loss": 0.2853, "step": 5795 }, { "epoch": 0.5, "learning_rate": 1.0588223036538583e-05, "loss": 0.2825, "step": 5796 }, { "epoch": 0.5, "learning_rate": 1.0585451581518513e-05, "loss": 0.2611, "step": 5797 }, { "epoch": 0.5, "learning_rate": 1.0582680081374728e-05, "loss": 0.3231, "step": 5798 }, { "epoch": 0.5, "learning_rate": 1.0579908536320842e-05, "loss": 0.3279, "step": 5799 }, { "epoch": 0.5, "learning_rate": 1.057713694657047e-05, "loss": 0.2585, "step": 5800 }, { "epoch": 0.5, "learning_rate": 1.0574365312337235e-05, "loss": 0.2878, "step": 5801 }, { "epoch": 0.5, "learning_rate": 1.0571593633834758e-05, "loss": 0.2378, "step": 5802 }, { "epoch": 0.5, "learning_rate": 1.056882191127667e-05, "loss": 0.294, "step": 5803 }, { "epoch": 0.5, "learning_rate": 1.0566050144876599e-05, "loss": 0.296, "step": 5804 }, { "epoch": 0.5, "learning_rate": 1.0563278334848178e-05, "loss": 0.2853, "step": 5805 }, { "epoch": 0.5, "learning_rate": 1.0560506481405048e-05, "loss": 0.3073, "step": 5806 }, { "epoch": 0.5, "learning_rate": 1.0557734584760849e-05, "loss": 0.2751, "step": 5807 }, { "epoch": 0.5, "learning_rate": 1.0554962645129223e-05, "loss": 0.2862, "step": 5808 }, { "epoch": 0.5, "learning_rate": 1.0552190662723816e-05, "loss": 0.287, "step": 5809 }, { "epoch": 0.5, "learning_rate": 1.0549418637758284e-05, "loss": 0.2589, "step": 5810 }, { "epoch": 0.5, "learning_rate": 1.0546646570446277e-05, "loss": 0.3345, "step": 5811 }, { "epoch": 0.5, "learning_rate": 1.0543874461001456e-05, "loss": 0.36, "step": 5812 }, { "epoch": 0.5, "learning_rate": 1.0541102309637477e-05, "loss": 0.2893, "step": 5813 }, { "epoch": 0.5, "learning_rate": 1.0538330116568006e-05, "loss": 0.2719, "step": 5814 }, { "epoch": 0.5, "learning_rate": 1.0535557882006708e-05, "loss": 0.2906, "step": 5815 }, { "epoch": 0.5, "learning_rate": 1.0532785606167256e-05, "loss": 0.3034, "step": 5816 }, { "epoch": 0.5, "learning_rate": 1.0530013289263318e-05, "loss": 0.266, "step": 5817 }, { "epoch": 0.5, "learning_rate": 1.0527240931508582e-05, "loss": 0.278, "step": 5818 }, { "epoch": 0.5, "learning_rate": 1.0524468533116716e-05, "loss": 0.3, "step": 5819 }, { "epoch": 0.5, "learning_rate": 1.052169609430141e-05, "loss": 0.2811, "step": 5820 }, { "epoch": 0.5, "learning_rate": 1.0518923615276342e-05, "loss": 0.2853, "step": 5821 }, { "epoch": 0.5, "learning_rate": 1.051615109625521e-05, "loss": 0.2915, "step": 5822 }, { "epoch": 0.5, "learning_rate": 1.0513378537451697e-05, "loss": 0.3004, "step": 5823 }, { "epoch": 0.5, "learning_rate": 1.0510605939079505e-05, "loss": 0.2691, "step": 5824 }, { "epoch": 0.5, "learning_rate": 1.0507833301352335e-05, "loss": 0.2947, "step": 5825 }, { "epoch": 0.5, "learning_rate": 1.0505060624483878e-05, "loss": 0.2592, "step": 5826 }, { "epoch": 0.5, "learning_rate": 1.0502287908687847e-05, "loss": 0.2948, "step": 5827 }, { "epoch": 0.5, "learning_rate": 1.0499515154177941e-05, "loss": 0.2931, "step": 5828 }, { "epoch": 0.5, "learning_rate": 1.049674236116788e-05, "loss": 0.288, "step": 5829 }, { "epoch": 0.5, "learning_rate": 1.049396952987137e-05, "loss": 0.6166, "step": 5830 }, { "epoch": 0.5, "learning_rate": 1.049119666050213e-05, "loss": 0.31, "step": 5831 }, { "epoch": 0.5, "learning_rate": 1.048842375327388e-05, "loss": 0.277, "step": 5832 }, { "epoch": 0.5, "learning_rate": 1.0485650808400339e-05, "loss": 0.29, "step": 5833 }, { "epoch": 0.5, "learning_rate": 1.0482877826095233e-05, "loss": 0.2904, "step": 5834 }, { "epoch": 0.5, "learning_rate": 1.0480104806572288e-05, "loss": 0.337, "step": 5835 }, { "epoch": 0.5, "learning_rate": 1.0477331750045239e-05, "loss": 0.2444, "step": 5836 }, { "epoch": 0.5, "learning_rate": 1.047455865672782e-05, "loss": 0.2681, "step": 5837 }, { "epoch": 0.5, "learning_rate": 1.0471785526833762e-05, "loss": 0.2759, "step": 5838 }, { "epoch": 0.5, "learning_rate": 1.0469012360576807e-05, "loss": 0.2717, "step": 5839 }, { "epoch": 0.5, "learning_rate": 1.0466239158170697e-05, "loss": 0.3179, "step": 5840 }, { "epoch": 0.5, "learning_rate": 1.0463465919829175e-05, "loss": 0.3065, "step": 5841 }, { "epoch": 0.5, "learning_rate": 1.046069264576599e-05, "loss": 0.3027, "step": 5842 }, { "epoch": 0.5, "learning_rate": 1.0457919336194892e-05, "loss": 0.2836, "step": 5843 }, { "epoch": 0.5, "learning_rate": 1.0455145991329639e-05, "loss": 0.3084, "step": 5844 }, { "epoch": 0.5, "learning_rate": 1.0452372611383977e-05, "loss": 0.2818, "step": 5845 }, { "epoch": 0.5, "learning_rate": 1.0449599196571671e-05, "loss": 0.2429, "step": 5846 }, { "epoch": 0.5, "learning_rate": 1.044682574710648e-05, "loss": 0.2764, "step": 5847 }, { "epoch": 0.5, "learning_rate": 1.0444052263202169e-05, "loss": 0.2596, "step": 5848 }, { "epoch": 0.5, "learning_rate": 1.04412787450725e-05, "loss": 0.2999, "step": 5849 }, { "epoch": 0.5, "learning_rate": 1.043850519293125e-05, "loss": 0.3206, "step": 5850 }, { "epoch": 0.5, "learning_rate": 1.0435731606992188e-05, "loss": 0.2603, "step": 5851 }, { "epoch": 0.5, "learning_rate": 1.0432957987469081e-05, "loss": 0.2875, "step": 5852 }, { "epoch": 0.5, "learning_rate": 1.0430184334575715e-05, "loss": 0.2653, "step": 5853 }, { "epoch": 0.5, "learning_rate": 1.0427410648525863e-05, "loss": 0.2701, "step": 5854 }, { "epoch": 0.5, "learning_rate": 1.0424636929533315e-05, "loss": 0.3293, "step": 5855 }, { "epoch": 0.5, "learning_rate": 1.0421863177811848e-05, "loss": 0.2586, "step": 5856 }, { "epoch": 0.5, "learning_rate": 1.0419089393575253e-05, "loss": 0.2738, "step": 5857 }, { "epoch": 0.5, "learning_rate": 1.041631557703732e-05, "loss": 0.2709, "step": 5858 }, { "epoch": 0.5, "learning_rate": 1.0413541728411836e-05, "loss": 0.2313, "step": 5859 }, { "epoch": 0.5, "learning_rate": 1.04107678479126e-05, "loss": 0.2802, "step": 5860 }, { "epoch": 0.5, "learning_rate": 1.0407993935753406e-05, "loss": 0.2375, "step": 5861 }, { "epoch": 0.5, "learning_rate": 1.0405219992148057e-05, "loss": 0.3206, "step": 5862 }, { "epoch": 0.5, "learning_rate": 1.0402446017310355e-05, "loss": 0.2903, "step": 5863 }, { "epoch": 0.5, "learning_rate": 1.0399672011454101e-05, "loss": 0.2972, "step": 5864 }, { "epoch": 0.5, "learning_rate": 1.0396897974793102e-05, "loss": 0.2703, "step": 5865 }, { "epoch": 0.5, "learning_rate": 1.039412390754117e-05, "loss": 0.3087, "step": 5866 }, { "epoch": 0.5, "learning_rate": 1.0391349809912115e-05, "loss": 0.2697, "step": 5867 }, { "epoch": 0.5, "learning_rate": 1.0388575682119748e-05, "loss": 0.2578, "step": 5868 }, { "epoch": 0.5, "learning_rate": 1.0385801524377888e-05, "loss": 0.3052, "step": 5869 }, { "epoch": 0.5, "learning_rate": 1.0383027336900356e-05, "loss": 0.3104, "step": 5870 }, { "epoch": 0.5, "learning_rate": 1.0380253119900967e-05, "loss": 0.2667, "step": 5871 }, { "epoch": 0.5, "learning_rate": 1.0377478873593546e-05, "loss": 0.2794, "step": 5872 }, { "epoch": 0.5, "learning_rate": 1.0374704598191918e-05, "loss": 0.3147, "step": 5873 }, { "epoch": 0.5, "learning_rate": 1.0371930293909911e-05, "loss": 0.2961, "step": 5874 }, { "epoch": 0.5, "learning_rate": 1.0369155960961356e-05, "loss": 0.2938, "step": 5875 }, { "epoch": 0.5, "learning_rate": 1.0366381599560086e-05, "loss": 0.2693, "step": 5876 }, { "epoch": 0.5, "learning_rate": 1.036360720991993e-05, "loss": 0.2653, "step": 5877 }, { "epoch": 0.5, "learning_rate": 1.0360832792254727e-05, "loss": 0.3221, "step": 5878 }, { "epoch": 0.5, "learning_rate": 1.0358058346778314e-05, "loss": 0.2729, "step": 5879 }, { "epoch": 0.5, "learning_rate": 1.0355283873704533e-05, "loss": 0.2859, "step": 5880 }, { "epoch": 0.5, "learning_rate": 1.0352509373247227e-05, "loss": 0.2946, "step": 5881 }, { "epoch": 0.5, "learning_rate": 1.0349734845620244e-05, "loss": 0.2281, "step": 5882 }, { "epoch": 0.5, "learning_rate": 1.0346960291037424e-05, "loss": 0.3042, "step": 5883 }, { "epoch": 0.5, "learning_rate": 1.034418570971262e-05, "loss": 0.5502, "step": 5884 }, { "epoch": 0.5, "learning_rate": 1.034141110185968e-05, "loss": 0.2851, "step": 5885 }, { "epoch": 0.5, "learning_rate": 1.0338636467692462e-05, "loss": 0.328, "step": 5886 }, { "epoch": 0.5, "learning_rate": 1.0335861807424816e-05, "loss": 0.2556, "step": 5887 }, { "epoch": 0.5, "learning_rate": 1.0333087121270602e-05, "loss": 0.3177, "step": 5888 }, { "epoch": 0.5, "learning_rate": 1.0330312409443681e-05, "loss": 0.293, "step": 5889 }, { "epoch": 0.5, "learning_rate": 1.0327537672157908e-05, "loss": 0.2901, "step": 5890 }, { "epoch": 0.5, "learning_rate": 1.0324762909627151e-05, "loss": 0.2851, "step": 5891 }, { "epoch": 0.51, "learning_rate": 1.0321988122065274e-05, "loss": 0.2742, "step": 5892 }, { "epoch": 0.51, "learning_rate": 1.0319213309686145e-05, "loss": 0.3129, "step": 5893 }, { "epoch": 0.51, "learning_rate": 1.031643847270363e-05, "loss": 0.2683, "step": 5894 }, { "epoch": 0.51, "learning_rate": 1.03136636113316e-05, "loss": 0.2502, "step": 5895 }, { "epoch": 0.51, "learning_rate": 1.031088872578393e-05, "loss": 0.264, "step": 5896 }, { "epoch": 0.51, "learning_rate": 1.0308113816274492e-05, "loss": 0.2546, "step": 5897 }, { "epoch": 0.51, "learning_rate": 1.0305338883017163e-05, "loss": 0.2793, "step": 5898 }, { "epoch": 0.51, "learning_rate": 1.0302563926225824e-05, "loss": 0.2556, "step": 5899 }, { "epoch": 0.51, "learning_rate": 1.0299788946114352e-05, "loss": 0.3304, "step": 5900 }, { "epoch": 0.51, "learning_rate": 1.0297013942896628e-05, "loss": 0.2787, "step": 5901 }, { "epoch": 0.51, "learning_rate": 1.0294238916786537e-05, "loss": 0.3022, "step": 5902 }, { "epoch": 0.51, "learning_rate": 1.0291463867997967e-05, "loss": 0.2983, "step": 5903 }, { "epoch": 0.51, "learning_rate": 1.0288688796744797e-05, "loss": 0.3044, "step": 5904 }, { "epoch": 0.51, "learning_rate": 1.0285913703240927e-05, "loss": 0.2782, "step": 5905 }, { "epoch": 0.51, "learning_rate": 1.0283138587700236e-05, "loss": 0.2772, "step": 5906 }, { "epoch": 0.51, "learning_rate": 1.0280363450336623e-05, "loss": 0.2653, "step": 5907 }, { "epoch": 0.51, "learning_rate": 1.0277588291363984e-05, "loss": 0.2966, "step": 5908 }, { "epoch": 0.51, "learning_rate": 1.027481311099621e-05, "loss": 0.2208, "step": 5909 }, { "epoch": 0.51, "learning_rate": 1.0272037909447197e-05, "loss": 0.3234, "step": 5910 }, { "epoch": 0.51, "learning_rate": 1.0269262686930845e-05, "loss": 0.2675, "step": 5911 }, { "epoch": 0.51, "learning_rate": 1.026648744366106e-05, "loss": 0.248, "step": 5912 }, { "epoch": 0.51, "learning_rate": 1.0263712179851736e-05, "loss": 0.2637, "step": 5913 }, { "epoch": 0.51, "learning_rate": 1.0260936895716781e-05, "loss": 0.2807, "step": 5914 }, { "epoch": 0.51, "learning_rate": 1.0258161591470105e-05, "loss": 0.2745, "step": 5915 }, { "epoch": 0.51, "learning_rate": 1.0255386267325602e-05, "loss": 0.2509, "step": 5916 }, { "epoch": 0.51, "learning_rate": 1.0252610923497188e-05, "loss": 0.2764, "step": 5917 }, { "epoch": 0.51, "learning_rate": 1.0249835560198772e-05, "loss": 0.29, "step": 5918 }, { "epoch": 0.51, "learning_rate": 1.0247060177644267e-05, "loss": 0.2548, "step": 5919 }, { "epoch": 0.51, "learning_rate": 1.0244284776047589e-05, "loss": 0.2979, "step": 5920 }, { "epoch": 0.51, "learning_rate": 1.024150935562264e-05, "loss": 0.2521, "step": 5921 }, { "epoch": 0.51, "learning_rate": 1.023873391658335e-05, "loss": 0.2816, "step": 5922 }, { "epoch": 0.51, "learning_rate": 1.0235958459143623e-05, "loss": 0.2781, "step": 5923 }, { "epoch": 0.51, "learning_rate": 1.0233182983517387e-05, "loss": 0.2908, "step": 5924 }, { "epoch": 0.51, "learning_rate": 1.0230407489918556e-05, "loss": 0.2952, "step": 5925 }, { "epoch": 0.51, "learning_rate": 1.0227631978561057e-05, "loss": 0.2786, "step": 5926 }, { "epoch": 0.51, "learning_rate": 1.0224856449658811e-05, "loss": 0.2825, "step": 5927 }, { "epoch": 0.51, "learning_rate": 1.022208090342574e-05, "loss": 0.2765, "step": 5928 }, { "epoch": 0.51, "learning_rate": 1.0219305340075767e-05, "loss": 0.2783, "step": 5929 }, { "epoch": 0.51, "learning_rate": 1.0216529759822823e-05, "loss": 0.2986, "step": 5930 }, { "epoch": 0.51, "learning_rate": 1.021375416288084e-05, "loss": 0.3079, "step": 5931 }, { "epoch": 0.51, "learning_rate": 1.021097854946374e-05, "loss": 0.291, "step": 5932 }, { "epoch": 0.51, "learning_rate": 1.0208202919785453e-05, "loss": 0.2977, "step": 5933 }, { "epoch": 0.51, "learning_rate": 1.0205427274059915e-05, "loss": 0.2658, "step": 5934 }, { "epoch": 0.51, "learning_rate": 1.020265161250106e-05, "loss": 0.2443, "step": 5935 }, { "epoch": 0.51, "learning_rate": 1.0199875935322815e-05, "loss": 0.2748, "step": 5936 }, { "epoch": 0.51, "learning_rate": 1.0197100242739123e-05, "loss": 0.316, "step": 5937 }, { "epoch": 0.51, "learning_rate": 1.019432453496392e-05, "loss": 0.2752, "step": 5938 }, { "epoch": 0.51, "learning_rate": 1.0191548812211143e-05, "loss": 0.2738, "step": 5939 }, { "epoch": 0.51, "learning_rate": 1.0188773074694727e-05, "loss": 0.3325, "step": 5940 }, { "epoch": 0.51, "learning_rate": 1.0185997322628618e-05, "loss": 0.599, "step": 5941 }, { "epoch": 0.51, "learning_rate": 1.018322155622675e-05, "loss": 0.2682, "step": 5942 }, { "epoch": 0.51, "learning_rate": 1.0180445775703074e-05, "loss": 0.3058, "step": 5943 }, { "epoch": 0.51, "learning_rate": 1.0177669981271528e-05, "loss": 0.2665, "step": 5944 }, { "epoch": 0.51, "learning_rate": 1.0174894173146055e-05, "loss": 0.3224, "step": 5945 }, { "epoch": 0.51, "learning_rate": 1.0172118351540608e-05, "loss": 0.2926, "step": 5946 }, { "epoch": 0.51, "learning_rate": 1.0169342516669125e-05, "loss": 0.2706, "step": 5947 }, { "epoch": 0.51, "learning_rate": 1.0166566668745558e-05, "loss": 0.2663, "step": 5948 }, { "epoch": 0.51, "learning_rate": 1.0163790807983858e-05, "loss": 0.2739, "step": 5949 }, { "epoch": 0.51, "learning_rate": 1.016101493459797e-05, "loss": 0.254, "step": 5950 }, { "epoch": 0.51, "learning_rate": 1.0158239048801848e-05, "loss": 0.29, "step": 5951 }, { "epoch": 0.51, "learning_rate": 1.0155463150809439e-05, "loss": 0.2756, "step": 5952 }, { "epoch": 0.51, "learning_rate": 1.0152687240834702e-05, "loss": 0.3024, "step": 5953 }, { "epoch": 0.51, "learning_rate": 1.0149911319091583e-05, "loss": 0.2458, "step": 5954 }, { "epoch": 0.51, "learning_rate": 1.0147135385794043e-05, "loss": 0.2426, "step": 5955 }, { "epoch": 0.51, "learning_rate": 1.0144359441156033e-05, "loss": 0.2494, "step": 5956 }, { "epoch": 0.51, "learning_rate": 1.014158348539151e-05, "loss": 0.303, "step": 5957 }, { "epoch": 0.51, "learning_rate": 1.0138807518714435e-05, "loss": 0.2877, "step": 5958 }, { "epoch": 0.51, "learning_rate": 1.013603154133876e-05, "loss": 0.2545, "step": 5959 }, { "epoch": 0.51, "learning_rate": 1.0133255553478447e-05, "loss": 0.3298, "step": 5960 }, { "epoch": 0.51, "learning_rate": 1.013047955534745e-05, "loss": 0.2941, "step": 5961 }, { "epoch": 0.51, "learning_rate": 1.012770354715974e-05, "loss": 0.2681, "step": 5962 }, { "epoch": 0.51, "learning_rate": 1.0124927529129267e-05, "loss": 0.277, "step": 5963 }, { "epoch": 0.51, "learning_rate": 1.0122151501469999e-05, "loss": 0.3475, "step": 5964 }, { "epoch": 0.51, "learning_rate": 1.01193754643959e-05, "loss": 0.2628, "step": 5965 }, { "epoch": 0.51, "learning_rate": 1.0116599418120924e-05, "loss": 0.2421, "step": 5966 }, { "epoch": 0.51, "learning_rate": 1.0113823362859042e-05, "loss": 0.2729, "step": 5967 }, { "epoch": 0.51, "learning_rate": 1.0111047298824222e-05, "loss": 0.2359, "step": 5968 }, { "epoch": 0.51, "learning_rate": 1.0108271226230423e-05, "loss": 0.2687, "step": 5969 }, { "epoch": 0.51, "learning_rate": 1.0105495145291612e-05, "loss": 0.267, "step": 5970 }, { "epoch": 0.51, "learning_rate": 1.0102719056221757e-05, "loss": 0.2715, "step": 5971 }, { "epoch": 0.51, "learning_rate": 1.0099942959234826e-05, "loss": 0.3027, "step": 5972 }, { "epoch": 0.51, "learning_rate": 1.0097166854544782e-05, "loss": 0.3155, "step": 5973 }, { "epoch": 0.51, "learning_rate": 1.0094390742365598e-05, "loss": 0.2945, "step": 5974 }, { "epoch": 0.51, "learning_rate": 1.0091614622911243e-05, "loss": 0.2979, "step": 5975 }, { "epoch": 0.51, "learning_rate": 1.0088838496395688e-05, "loss": 0.3051, "step": 5976 }, { "epoch": 0.51, "learning_rate": 1.0086062363032896e-05, "loss": 0.2727, "step": 5977 }, { "epoch": 0.51, "learning_rate": 1.0083286223036845e-05, "loss": 0.2344, "step": 5978 }, { "epoch": 0.51, "learning_rate": 1.0080510076621503e-05, "loss": 0.3022, "step": 5979 }, { "epoch": 0.51, "learning_rate": 1.0077733924000841e-05, "loss": 0.2423, "step": 5980 }, { "epoch": 0.51, "learning_rate": 1.0074957765388832e-05, "loss": 0.2502, "step": 5981 }, { "epoch": 0.51, "learning_rate": 1.007218160099945e-05, "loss": 0.3272, "step": 5982 }, { "epoch": 0.51, "learning_rate": 1.0069405431046669e-05, "loss": 0.2727, "step": 5983 }, { "epoch": 0.51, "learning_rate": 1.0066629255744458e-05, "loss": 0.2729, "step": 5984 }, { "epoch": 0.51, "learning_rate": 1.0063853075306792e-05, "loss": 0.247, "step": 5985 }, { "epoch": 0.51, "learning_rate": 1.006107688994765e-05, "loss": 0.2372, "step": 5986 }, { "epoch": 0.51, "learning_rate": 1.0058300699880998e-05, "loss": 0.2842, "step": 5987 }, { "epoch": 0.51, "learning_rate": 1.0055524505320821e-05, "loss": 0.2956, "step": 5988 }, { "epoch": 0.51, "learning_rate": 1.0052748306481088e-05, "loss": 0.2763, "step": 5989 }, { "epoch": 0.51, "learning_rate": 1.0049972103575775e-05, "loss": 0.356, "step": 5990 }, { "epoch": 0.51, "learning_rate": 1.0047195896818863e-05, "loss": 0.259, "step": 5991 }, { "epoch": 0.51, "learning_rate": 1.004441968642432e-05, "loss": 0.2883, "step": 5992 }, { "epoch": 0.51, "learning_rate": 1.0041643472606129e-05, "loss": 0.2598, "step": 5993 }, { "epoch": 0.51, "learning_rate": 1.0038867255578261e-05, "loss": 0.2783, "step": 5994 }, { "epoch": 0.51, "learning_rate": 1.0036091035554703e-05, "loss": 0.2498, "step": 5995 }, { "epoch": 0.51, "learning_rate": 1.0033314812749423e-05, "loss": 0.324, "step": 5996 }, { "epoch": 0.51, "learning_rate": 1.0030538587376402e-05, "loss": 0.2609, "step": 5997 }, { "epoch": 0.51, "learning_rate": 1.002776235964962e-05, "loss": 0.2521, "step": 5998 }, { "epoch": 0.51, "learning_rate": 1.0024986129783047e-05, "loss": 0.2919, "step": 5999 }, { "epoch": 0.51, "learning_rate": 1.0022209897990673e-05, "loss": 0.2731, "step": 6000 }, { "epoch": 0.51, "learning_rate": 1.0019433664486466e-05, "loss": 0.2726, "step": 6001 }, { "epoch": 0.51, "learning_rate": 1.001665742948441e-05, "loss": 0.29, "step": 6002 }, { "epoch": 0.51, "learning_rate": 1.001388119319848e-05, "loss": 0.3542, "step": 6003 }, { "epoch": 0.51, "learning_rate": 1.0011104955842656e-05, "loss": 0.2651, "step": 6004 }, { "epoch": 0.51, "learning_rate": 1.000832871763092e-05, "loss": 0.2708, "step": 6005 }, { "epoch": 0.51, "learning_rate": 1.0005552478777244e-05, "loss": 0.2897, "step": 6006 }, { "epoch": 0.51, "learning_rate": 1.0002776239495613e-05, "loss": 0.2759, "step": 6007 }, { "epoch": 0.52, "learning_rate": 1e-05, "loss": 0.2853, "step": 6008 }, { "epoch": 0.52, "learning_rate": 9.997223760504392e-06, "loss": 0.3304, "step": 6009 }, { "epoch": 0.52, "learning_rate": 9.994447521222758e-06, "loss": 0.2999, "step": 6010 }, { "epoch": 0.52, "learning_rate": 9.991671282369084e-06, "loss": 0.2718, "step": 6011 }, { "epoch": 0.52, "learning_rate": 9.988895044157345e-06, "loss": 0.2897, "step": 6012 }, { "epoch": 0.52, "learning_rate": 9.986118806801525e-06, "loss": 0.2991, "step": 6013 }, { "epoch": 0.52, "learning_rate": 9.983342570515592e-06, "loss": 0.2459, "step": 6014 }, { "epoch": 0.52, "learning_rate": 9.980566335513538e-06, "loss": 0.2974, "step": 6015 }, { "epoch": 0.52, "learning_rate": 9.977790102009332e-06, "loss": 0.3033, "step": 6016 }, { "epoch": 0.52, "learning_rate": 9.975013870216953e-06, "loss": 0.3109, "step": 6017 }, { "epoch": 0.52, "learning_rate": 9.972237640350383e-06, "loss": 0.2469, "step": 6018 }, { "epoch": 0.52, "learning_rate": 9.969461412623601e-06, "loss": 0.2933, "step": 6019 }, { "epoch": 0.52, "learning_rate": 9.966685187250582e-06, "loss": 0.302, "step": 6020 }, { "epoch": 0.52, "learning_rate": 9.963908964445298e-06, "loss": 0.3113, "step": 6021 }, { "epoch": 0.52, "learning_rate": 9.96113274442174e-06, "loss": 0.2988, "step": 6022 }, { "epoch": 0.52, "learning_rate": 9.958356527393876e-06, "loss": 0.2715, "step": 6023 }, { "epoch": 0.52, "learning_rate": 9.955580313575683e-06, "loss": 0.3322, "step": 6024 }, { "epoch": 0.52, "learning_rate": 9.95280410318114e-06, "loss": 0.2742, "step": 6025 }, { "epoch": 0.52, "learning_rate": 9.950027896424228e-06, "loss": 0.2628, "step": 6026 }, { "epoch": 0.52, "learning_rate": 9.947251693518917e-06, "loss": 0.2476, "step": 6027 }, { "epoch": 0.52, "learning_rate": 9.94447549467918e-06, "loss": 0.2836, "step": 6028 }, { "epoch": 0.52, "learning_rate": 9.941699300119004e-06, "loss": 0.2685, "step": 6029 }, { "epoch": 0.52, "learning_rate": 9.938923110052353e-06, "loss": 0.3553, "step": 6030 }, { "epoch": 0.52, "learning_rate": 9.93614692469321e-06, "loss": 0.2778, "step": 6031 }, { "epoch": 0.52, "learning_rate": 9.933370744255543e-06, "loss": 0.2977, "step": 6032 }, { "epoch": 0.52, "learning_rate": 9.930594568953335e-06, "loss": 0.3034, "step": 6033 }, { "epoch": 0.52, "learning_rate": 9.92781839900055e-06, "loss": 0.2576, "step": 6034 }, { "epoch": 0.52, "learning_rate": 9.92504223461117e-06, "loss": 0.2761, "step": 6035 }, { "epoch": 0.52, "learning_rate": 9.922266075999162e-06, "loss": 0.2675, "step": 6036 }, { "epoch": 0.52, "learning_rate": 9.9194899233785e-06, "loss": 0.284, "step": 6037 }, { "epoch": 0.52, "learning_rate": 9.916713776963156e-06, "loss": 0.5876, "step": 6038 }, { "epoch": 0.52, "learning_rate": 9.913937636967106e-06, "loss": 0.3221, "step": 6039 }, { "epoch": 0.52, "learning_rate": 9.911161503604317e-06, "loss": 0.2642, "step": 6040 }, { "epoch": 0.52, "learning_rate": 9.908385377088757e-06, "loss": 0.2617, "step": 6041 }, { "epoch": 0.52, "learning_rate": 9.905609257634404e-06, "loss": 0.2643, "step": 6042 }, { "epoch": 0.52, "learning_rate": 9.902833145455221e-06, "loss": 0.2636, "step": 6043 }, { "epoch": 0.52, "learning_rate": 9.90005704076518e-06, "loss": 0.3041, "step": 6044 }, { "epoch": 0.52, "learning_rate": 9.897280943778245e-06, "loss": 0.2446, "step": 6045 }, { "epoch": 0.52, "learning_rate": 9.894504854708391e-06, "loss": 0.3168, "step": 6046 }, { "epoch": 0.52, "learning_rate": 9.891728773769582e-06, "loss": 0.2867, "step": 6047 }, { "epoch": 0.52, "learning_rate": 9.88895270117578e-06, "loss": 0.2738, "step": 6048 }, { "epoch": 0.52, "learning_rate": 9.886176637140959e-06, "loss": 0.2551, "step": 6049 }, { "epoch": 0.52, "learning_rate": 9.883400581879077e-06, "loss": 0.2531, "step": 6050 }, { "epoch": 0.52, "learning_rate": 9.880624535604107e-06, "loss": 0.2666, "step": 6051 }, { "epoch": 0.52, "learning_rate": 9.877848498530001e-06, "loss": 0.318, "step": 6052 }, { "epoch": 0.52, "learning_rate": 9.875072470870735e-06, "loss": 0.2891, "step": 6053 }, { "epoch": 0.52, "learning_rate": 9.872296452840266e-06, "loss": 0.3179, "step": 6054 }, { "epoch": 0.52, "learning_rate": 9.86952044465255e-06, "loss": 0.2405, "step": 6055 }, { "epoch": 0.52, "learning_rate": 9.866744446521555e-06, "loss": 0.2767, "step": 6056 }, { "epoch": 0.52, "learning_rate": 9.863968458661244e-06, "loss": 0.3212, "step": 6057 }, { "epoch": 0.52, "learning_rate": 9.86119248128557e-06, "loss": 0.2786, "step": 6058 }, { "epoch": 0.52, "learning_rate": 9.85841651460849e-06, "loss": 0.2606, "step": 6059 }, { "epoch": 0.52, "learning_rate": 9.85564055884397e-06, "loss": 0.2619, "step": 6060 }, { "epoch": 0.52, "learning_rate": 9.85286461420596e-06, "loss": 0.2863, "step": 6061 }, { "epoch": 0.52, "learning_rate": 9.85008868090842e-06, "loss": 0.3206, "step": 6062 }, { "epoch": 0.52, "learning_rate": 9.847312759165301e-06, "loss": 0.2991, "step": 6063 }, { "epoch": 0.52, "learning_rate": 9.844536849190564e-06, "loss": 0.2839, "step": 6064 }, { "epoch": 0.52, "learning_rate": 9.841760951198159e-06, "loss": 0.3005, "step": 6065 }, { "epoch": 0.52, "learning_rate": 9.838985065402032e-06, "loss": 0.2558, "step": 6066 }, { "epoch": 0.52, "learning_rate": 9.836209192016146e-06, "loss": 0.2752, "step": 6067 }, { "epoch": 0.52, "learning_rate": 9.833433331254443e-06, "loss": 0.2789, "step": 6068 }, { "epoch": 0.52, "learning_rate": 9.830657483330877e-06, "loss": 0.2729, "step": 6069 }, { "epoch": 0.52, "learning_rate": 9.827881648459396e-06, "loss": 0.2756, "step": 6070 }, { "epoch": 0.52, "learning_rate": 9.825105826853946e-06, "loss": 0.28, "step": 6071 }, { "epoch": 0.52, "learning_rate": 9.822330018728474e-06, "loss": 0.2957, "step": 6072 }, { "epoch": 0.52, "learning_rate": 9.819554224296929e-06, "loss": 0.251, "step": 6073 }, { "epoch": 0.52, "learning_rate": 9.816778443773253e-06, "loss": 0.2593, "step": 6074 }, { "epoch": 0.52, "learning_rate": 9.814002677371387e-06, "loss": 0.2833, "step": 6075 }, { "epoch": 0.52, "learning_rate": 9.811226925305273e-06, "loss": 0.2727, "step": 6076 }, { "epoch": 0.52, "learning_rate": 9.80845118778886e-06, "loss": 0.252, "step": 6077 }, { "epoch": 0.52, "learning_rate": 9.805675465036084e-06, "loss": 0.2828, "step": 6078 }, { "epoch": 0.52, "learning_rate": 9.802899757260875e-06, "loss": 0.2953, "step": 6079 }, { "epoch": 0.52, "learning_rate": 9.800124064677186e-06, "loss": 0.3062, "step": 6080 }, { "epoch": 0.52, "learning_rate": 9.797348387498944e-06, "loss": 0.2557, "step": 6081 }, { "epoch": 0.52, "learning_rate": 9.794572725940088e-06, "loss": 0.2673, "step": 6082 }, { "epoch": 0.52, "learning_rate": 9.791797080214547e-06, "loss": 0.2929, "step": 6083 }, { "epoch": 0.52, "learning_rate": 9.789021450536265e-06, "loss": 0.2668, "step": 6084 }, { "epoch": 0.52, "learning_rate": 9.786245837119165e-06, "loss": 0.2434, "step": 6085 }, { "epoch": 0.52, "learning_rate": 9.783470240177175e-06, "loss": 0.3055, "step": 6086 }, { "epoch": 0.52, "learning_rate": 9.780694659924235e-06, "loss": 0.3118, "step": 6087 }, { "epoch": 0.52, "learning_rate": 9.777919096574264e-06, "loss": 0.2756, "step": 6088 }, { "epoch": 0.52, "learning_rate": 9.775143550341194e-06, "loss": 0.3181, "step": 6089 }, { "epoch": 0.52, "learning_rate": 9.772368021438943e-06, "loss": 0.2761, "step": 6090 }, { "epoch": 0.52, "learning_rate": 9.769592510081445e-06, "loss": 0.2732, "step": 6091 }, { "epoch": 0.52, "learning_rate": 9.766817016482618e-06, "loss": 0.3576, "step": 6092 }, { "epoch": 0.52, "learning_rate": 9.764041540856379e-06, "loss": 0.3076, "step": 6093 }, { "epoch": 0.52, "learning_rate": 9.761266083416655e-06, "loss": 0.27, "step": 6094 }, { "epoch": 0.52, "learning_rate": 9.758490644377363e-06, "loss": 0.2689, "step": 6095 }, { "epoch": 0.52, "learning_rate": 9.755715223952418e-06, "loss": 0.2906, "step": 6096 }, { "epoch": 0.52, "learning_rate": 9.752939822355733e-06, "loss": 0.308, "step": 6097 }, { "epoch": 0.52, "learning_rate": 9.75016443980123e-06, "loss": 0.256, "step": 6098 }, { "epoch": 0.52, "learning_rate": 9.747389076502814e-06, "loss": 0.2881, "step": 6099 }, { "epoch": 0.52, "learning_rate": 9.744613732674401e-06, "loss": 0.2219, "step": 6100 }, { "epoch": 0.52, "learning_rate": 9.7418384085299e-06, "loss": 0.2842, "step": 6101 }, { "epoch": 0.52, "learning_rate": 9.73906310428322e-06, "loss": 0.2794, "step": 6102 }, { "epoch": 0.52, "learning_rate": 9.736287820148269e-06, "loss": 0.2455, "step": 6103 }, { "epoch": 0.52, "learning_rate": 9.733512556338941e-06, "loss": 0.278, "step": 6104 }, { "epoch": 0.52, "learning_rate": 9.730737313069157e-06, "loss": 0.2773, "step": 6105 }, { "epoch": 0.52, "learning_rate": 9.727962090552808e-06, "loss": 0.3433, "step": 6106 }, { "epoch": 0.52, "learning_rate": 9.725186889003795e-06, "loss": 0.2629, "step": 6107 }, { "epoch": 0.52, "learning_rate": 9.722411708636018e-06, "loss": 0.2755, "step": 6108 }, { "epoch": 0.52, "learning_rate": 9.719636549663379e-06, "loss": 0.6019, "step": 6109 }, { "epoch": 0.52, "learning_rate": 9.716861412299769e-06, "loss": 0.3077, "step": 6110 }, { "epoch": 0.52, "learning_rate": 9.714086296759078e-06, "loss": 0.263, "step": 6111 }, { "epoch": 0.52, "learning_rate": 9.711311203255207e-06, "loss": 0.3013, "step": 6112 }, { "epoch": 0.52, "learning_rate": 9.708536132002038e-06, "loss": 0.2523, "step": 6113 }, { "epoch": 0.52, "learning_rate": 9.705761083213463e-06, "loss": 0.2492, "step": 6114 }, { "epoch": 0.52, "learning_rate": 9.702986057103375e-06, "loss": 0.6278, "step": 6115 }, { "epoch": 0.52, "learning_rate": 9.700211053885653e-06, "loss": 0.3215, "step": 6116 }, { "epoch": 0.52, "learning_rate": 9.697436073774178e-06, "loss": 0.2739, "step": 6117 }, { "epoch": 0.52, "learning_rate": 9.694661116982838e-06, "loss": 0.2698, "step": 6118 }, { "epoch": 0.52, "learning_rate": 9.691886183725512e-06, "loss": 0.2839, "step": 6119 }, { "epoch": 0.52, "learning_rate": 9.689111274216075e-06, "loss": 0.3051, "step": 6120 }, { "epoch": 0.52, "learning_rate": 9.6863363886684e-06, "loss": 0.2514, "step": 6121 }, { "epoch": 0.52, "learning_rate": 9.683561527296375e-06, "loss": 0.2735, "step": 6122 }, { "epoch": 0.52, "learning_rate": 9.68078669031386e-06, "loss": 0.2988, "step": 6123 }, { "epoch": 0.52, "learning_rate": 9.678011877934728e-06, "loss": 0.2815, "step": 6124 }, { "epoch": 0.53, "learning_rate": 9.675237090372852e-06, "loss": 0.296, "step": 6125 }, { "epoch": 0.53, "learning_rate": 9.672462327842095e-06, "loss": 0.2869, "step": 6126 }, { "epoch": 0.53, "learning_rate": 9.669687590556325e-06, "loss": 0.2947, "step": 6127 }, { "epoch": 0.53, "learning_rate": 9.666912878729398e-06, "loss": 0.2599, "step": 6128 }, { "epoch": 0.53, "learning_rate": 9.664138192575187e-06, "loss": 0.264, "step": 6129 }, { "epoch": 0.53, "learning_rate": 9.661363532307543e-06, "loss": 0.2852, "step": 6130 }, { "epoch": 0.53, "learning_rate": 9.658588898140322e-06, "loss": 0.2942, "step": 6131 }, { "epoch": 0.53, "learning_rate": 9.655814290287381e-06, "loss": 0.3217, "step": 6132 }, { "epoch": 0.53, "learning_rate": 9.65303970896258e-06, "loss": 0.3466, "step": 6133 }, { "epoch": 0.53, "learning_rate": 9.650265154379761e-06, "loss": 0.3008, "step": 6134 }, { "epoch": 0.53, "learning_rate": 9.647490626752773e-06, "loss": 0.2971, "step": 6135 }, { "epoch": 0.53, "learning_rate": 9.644716126295468e-06, "loss": 0.3315, "step": 6136 }, { "epoch": 0.53, "learning_rate": 9.641941653221687e-06, "loss": 0.3229, "step": 6137 }, { "epoch": 0.53, "learning_rate": 9.639167207745276e-06, "loss": 0.3083, "step": 6138 }, { "epoch": 0.53, "learning_rate": 9.636392790080073e-06, "loss": 0.3123, "step": 6139 }, { "epoch": 0.53, "learning_rate": 9.633618400439918e-06, "loss": 0.2985, "step": 6140 }, { "epoch": 0.53, "learning_rate": 9.630844039038647e-06, "loss": 0.3315, "step": 6141 }, { "epoch": 0.53, "learning_rate": 9.628069706090089e-06, "loss": 0.2172, "step": 6142 }, { "epoch": 0.53, "learning_rate": 9.625295401808085e-06, "loss": 0.2834, "step": 6143 }, { "epoch": 0.53, "learning_rate": 9.622521126406458e-06, "loss": 0.2465, "step": 6144 }, { "epoch": 0.53, "learning_rate": 9.619746880099036e-06, "loss": 0.2546, "step": 6145 }, { "epoch": 0.53, "learning_rate": 9.616972663099648e-06, "loss": 0.2654, "step": 6146 }, { "epoch": 0.53, "learning_rate": 9.614198475622113e-06, "loss": 0.2982, "step": 6147 }, { "epoch": 0.53, "learning_rate": 9.611424317880257e-06, "loss": 0.2773, "step": 6148 }, { "epoch": 0.53, "learning_rate": 9.608650190087888e-06, "loss": 0.2869, "step": 6149 }, { "epoch": 0.53, "learning_rate": 9.605876092458835e-06, "loss": 0.3239, "step": 6150 }, { "epoch": 0.53, "learning_rate": 9.603102025206901e-06, "loss": 0.2952, "step": 6151 }, { "epoch": 0.53, "learning_rate": 9.6003279885459e-06, "loss": 0.28, "step": 6152 }, { "epoch": 0.53, "learning_rate": 9.597553982689649e-06, "loss": 0.3323, "step": 6153 }, { "epoch": 0.53, "learning_rate": 9.594780007851947e-06, "loss": 0.2784, "step": 6154 }, { "epoch": 0.53, "learning_rate": 9.592006064246596e-06, "loss": 0.2544, "step": 6155 }, { "epoch": 0.53, "learning_rate": 9.589232152087404e-06, "loss": 0.315, "step": 6156 }, { "epoch": 0.53, "learning_rate": 9.586458271588167e-06, "loss": 0.2506, "step": 6157 }, { "epoch": 0.53, "learning_rate": 9.583684422962686e-06, "loss": 0.2751, "step": 6158 }, { "epoch": 0.53, "learning_rate": 9.580910606424747e-06, "loss": 0.289, "step": 6159 }, { "epoch": 0.53, "learning_rate": 9.578136822188154e-06, "loss": 0.2582, "step": 6160 }, { "epoch": 0.53, "learning_rate": 9.575363070466689e-06, "loss": 0.2971, "step": 6161 }, { "epoch": 0.53, "learning_rate": 9.572589351474135e-06, "loss": 0.2603, "step": 6162 }, { "epoch": 0.53, "learning_rate": 9.569815665424288e-06, "loss": 0.2667, "step": 6163 }, { "epoch": 0.53, "learning_rate": 9.56704201253092e-06, "loss": 0.2909, "step": 6164 }, { "epoch": 0.53, "learning_rate": 9.564268393007819e-06, "loss": 0.28, "step": 6165 }, { "epoch": 0.53, "learning_rate": 9.56149480706875e-06, "loss": 0.2992, "step": 6166 }, { "epoch": 0.53, "learning_rate": 9.558721254927501e-06, "loss": 0.6023, "step": 6167 }, { "epoch": 0.53, "learning_rate": 9.555947736797836e-06, "loss": 0.2901, "step": 6168 }, { "epoch": 0.53, "learning_rate": 9.553174252893522e-06, "loss": 0.2755, "step": 6169 }, { "epoch": 0.53, "learning_rate": 9.55040080342833e-06, "loss": 0.3389, "step": 6170 }, { "epoch": 0.53, "learning_rate": 9.547627388616026e-06, "loss": 0.2678, "step": 6171 }, { "epoch": 0.53, "learning_rate": 9.544854008670366e-06, "loss": 0.2711, "step": 6172 }, { "epoch": 0.53, "learning_rate": 9.542080663805108e-06, "loss": 0.2574, "step": 6173 }, { "epoch": 0.53, "learning_rate": 9.539307354234013e-06, "loss": 0.2719, "step": 6174 }, { "epoch": 0.53, "learning_rate": 9.536534080170827e-06, "loss": 0.2614, "step": 6175 }, { "epoch": 0.53, "learning_rate": 9.533760841829306e-06, "loss": 0.2811, "step": 6176 }, { "epoch": 0.53, "learning_rate": 9.530987639423195e-06, "loss": 0.3169, "step": 6177 }, { "epoch": 0.53, "learning_rate": 9.528214473166241e-06, "loss": 0.2661, "step": 6178 }, { "epoch": 0.53, "learning_rate": 9.525441343272185e-06, "loss": 0.6213, "step": 6179 }, { "epoch": 0.53, "learning_rate": 9.522668249954761e-06, "loss": 0.3129, "step": 6180 }, { "epoch": 0.53, "learning_rate": 9.519895193427713e-06, "loss": 0.3321, "step": 6181 }, { "epoch": 0.53, "learning_rate": 9.51712217390477e-06, "loss": 0.5764, "step": 6182 }, { "epoch": 0.53, "learning_rate": 9.514349191599665e-06, "loss": 0.3439, "step": 6183 }, { "epoch": 0.53, "learning_rate": 9.511576246726123e-06, "loss": 0.2642, "step": 6184 }, { "epoch": 0.53, "learning_rate": 9.508803339497872e-06, "loss": 0.2975, "step": 6185 }, { "epoch": 0.53, "learning_rate": 9.506030470128635e-06, "loss": 0.3195, "step": 6186 }, { "epoch": 0.53, "learning_rate": 9.503257638832122e-06, "loss": 0.3178, "step": 6187 }, { "epoch": 0.53, "learning_rate": 9.50048484582206e-06, "loss": 0.2945, "step": 6188 }, { "epoch": 0.53, "learning_rate": 9.497712091312158e-06, "loss": 0.3174, "step": 6189 }, { "epoch": 0.53, "learning_rate": 9.494939375516122e-06, "loss": 0.293, "step": 6190 }, { "epoch": 0.53, "learning_rate": 9.49216669864767e-06, "loss": 0.2802, "step": 6191 }, { "epoch": 0.53, "learning_rate": 9.489394060920496e-06, "loss": 0.3081, "step": 6192 }, { "epoch": 0.53, "learning_rate": 9.486621462548307e-06, "loss": 0.308, "step": 6193 }, { "epoch": 0.53, "learning_rate": 9.483848903744795e-06, "loss": 0.2646, "step": 6194 }, { "epoch": 0.53, "learning_rate": 9.48107638472366e-06, "loss": 0.2584, "step": 6195 }, { "epoch": 0.53, "learning_rate": 9.478303905698595e-06, "loss": 0.2626, "step": 6196 }, { "epoch": 0.53, "learning_rate": 9.475531466883284e-06, "loss": 0.2735, "step": 6197 }, { "epoch": 0.53, "learning_rate": 9.472759068491421e-06, "loss": 0.2536, "step": 6198 }, { "epoch": 0.53, "learning_rate": 9.469986710736683e-06, "loss": 0.2912, "step": 6199 }, { "epoch": 0.53, "learning_rate": 9.467214393832746e-06, "loss": 0.2922, "step": 6200 }, { "epoch": 0.53, "learning_rate": 9.464442117993296e-06, "loss": 0.2674, "step": 6201 }, { "epoch": 0.53, "learning_rate": 9.461669883431997e-06, "loss": 0.2382, "step": 6202 }, { "epoch": 0.53, "learning_rate": 9.458897690362528e-06, "loss": 0.2496, "step": 6203 }, { "epoch": 0.53, "learning_rate": 9.456125538998546e-06, "loss": 0.2491, "step": 6204 }, { "epoch": 0.53, "learning_rate": 9.453353429553724e-06, "loss": 0.281, "step": 6205 }, { "epoch": 0.53, "learning_rate": 9.45058136224172e-06, "loss": 0.2861, "step": 6206 }, { "epoch": 0.53, "learning_rate": 9.447809337276184e-06, "loss": 0.2622, "step": 6207 }, { "epoch": 0.53, "learning_rate": 9.44503735487078e-06, "loss": 0.255, "step": 6208 }, { "epoch": 0.53, "learning_rate": 9.442265415239154e-06, "loss": 0.2674, "step": 6209 }, { "epoch": 0.53, "learning_rate": 9.439493518594957e-06, "loss": 0.2699, "step": 6210 }, { "epoch": 0.53, "learning_rate": 9.436721665151824e-06, "loss": 0.2976, "step": 6211 }, { "epoch": 0.53, "learning_rate": 9.433949855123405e-06, "loss": 0.3428, "step": 6212 }, { "epoch": 0.53, "learning_rate": 9.431178088723334e-06, "loss": 0.2438, "step": 6213 }, { "epoch": 0.53, "learning_rate": 9.428406366165244e-06, "loss": 0.2827, "step": 6214 }, { "epoch": 0.53, "learning_rate": 9.425634687662768e-06, "loss": 0.2614, "step": 6215 }, { "epoch": 0.53, "learning_rate": 9.422863053429534e-06, "loss": 0.2676, "step": 6216 }, { "epoch": 0.53, "learning_rate": 9.420091463679164e-06, "loss": 0.3492, "step": 6217 }, { "epoch": 0.53, "learning_rate": 9.417319918625274e-06, "loss": 0.2786, "step": 6218 }, { "epoch": 0.53, "learning_rate": 9.41454841848149e-06, "loss": 0.3083, "step": 6219 }, { "epoch": 0.53, "learning_rate": 9.41177696346142e-06, "loss": 0.3035, "step": 6220 }, { "epoch": 0.53, "learning_rate": 9.409005553778673e-06, "loss": 0.27, "step": 6221 }, { "epoch": 0.53, "learning_rate": 9.40623418964686e-06, "loss": 0.2797, "step": 6222 }, { "epoch": 0.53, "learning_rate": 9.403462871279582e-06, "loss": 0.2883, "step": 6223 }, { "epoch": 0.53, "learning_rate": 9.40069159889044e-06, "loss": 0.2992, "step": 6224 }, { "epoch": 0.53, "learning_rate": 9.397920372693023e-06, "loss": 0.2802, "step": 6225 }, { "epoch": 0.53, "learning_rate": 9.395149192900934e-06, "loss": 0.6013, "step": 6226 }, { "epoch": 0.53, "learning_rate": 9.392378059727752e-06, "loss": 0.2268, "step": 6227 }, { "epoch": 0.53, "learning_rate": 9.389606973387067e-06, "loss": 0.3, "step": 6228 }, { "epoch": 0.53, "learning_rate": 9.386835934092464e-06, "loss": 0.2955, "step": 6229 }, { "epoch": 0.53, "learning_rate": 9.384064942057518e-06, "loss": 0.3259, "step": 6230 }, { "epoch": 0.53, "learning_rate": 9.381293997495801e-06, "loss": 0.3099, "step": 6231 }, { "epoch": 0.53, "learning_rate": 9.378523100620883e-06, "loss": 0.2547, "step": 6232 }, { "epoch": 0.53, "learning_rate": 9.375752251646336e-06, "loss": 0.2751, "step": 6233 }, { "epoch": 0.53, "learning_rate": 9.372981450785723e-06, "loss": 0.2932, "step": 6234 }, { "epoch": 0.53, "learning_rate": 9.370210698252597e-06, "loss": 0.3113, "step": 6235 }, { "epoch": 0.53, "learning_rate": 9.367439994260522e-06, "loss": 0.3331, "step": 6236 }, { "epoch": 0.53, "learning_rate": 9.364669339023047e-06, "loss": 0.2291, "step": 6237 }, { "epoch": 0.53, "learning_rate": 9.361898732753715e-06, "loss": 0.2784, "step": 6238 }, { "epoch": 0.53, "learning_rate": 9.359128175666083e-06, "loss": 0.2957, "step": 6239 }, { "epoch": 0.53, "learning_rate": 9.356357667973679e-06, "loss": 0.262, "step": 6240 }, { "epoch": 0.53, "learning_rate": 9.353587209890049e-06, "loss": 0.3329, "step": 6241 }, { "epoch": 0.54, "learning_rate": 9.35081680162872e-06, "loss": 0.2634, "step": 6242 }, { "epoch": 0.54, "learning_rate": 9.348046443403227e-06, "loss": 0.3326, "step": 6243 }, { "epoch": 0.54, "learning_rate": 9.345276135427093e-06, "loss": 0.6267, "step": 6244 }, { "epoch": 0.54, "learning_rate": 9.342505877913835e-06, "loss": 0.2528, "step": 6245 }, { "epoch": 0.54, "learning_rate": 9.339735671076978e-06, "loss": 0.3056, "step": 6246 }, { "epoch": 0.54, "learning_rate": 9.336965515130034e-06, "loss": 0.2305, "step": 6247 }, { "epoch": 0.54, "learning_rate": 9.334195410286513e-06, "loss": 0.2986, "step": 6248 }, { "epoch": 0.54, "learning_rate": 9.331425356759915e-06, "loss": 0.2968, "step": 6249 }, { "epoch": 0.54, "learning_rate": 9.32865535476375e-06, "loss": 0.231, "step": 6250 }, { "epoch": 0.54, "learning_rate": 9.325885404511513e-06, "loss": 0.313, "step": 6251 }, { "epoch": 0.54, "learning_rate": 9.323115506216698e-06, "loss": 0.2585, "step": 6252 }, { "epoch": 0.54, "learning_rate": 9.320345660092794e-06, "loss": 0.2469, "step": 6253 }, { "epoch": 0.54, "learning_rate": 9.317575866353293e-06, "loss": 0.2513, "step": 6254 }, { "epoch": 0.54, "learning_rate": 9.314806125211669e-06, "loss": 0.2708, "step": 6255 }, { "epoch": 0.54, "learning_rate": 9.312036436881402e-06, "loss": 0.3019, "step": 6256 }, { "epoch": 0.54, "learning_rate": 9.309266801575968e-06, "loss": 0.2678, "step": 6257 }, { "epoch": 0.54, "learning_rate": 9.306497219508835e-06, "loss": 0.2935, "step": 6258 }, { "epoch": 0.54, "learning_rate": 9.30372769089347e-06, "loss": 0.2889, "step": 6259 }, { "epoch": 0.54, "learning_rate": 9.300958215943335e-06, "loss": 0.2858, "step": 6260 }, { "epoch": 0.54, "learning_rate": 9.298188794871888e-06, "loss": 0.3174, "step": 6261 }, { "epoch": 0.54, "learning_rate": 9.29541942789258e-06, "loss": 0.288, "step": 6262 }, { "epoch": 0.54, "learning_rate": 9.29265011521886e-06, "loss": 0.2868, "step": 6263 }, { "epoch": 0.54, "learning_rate": 9.289880857064175e-06, "loss": 0.2936, "step": 6264 }, { "epoch": 0.54, "learning_rate": 9.287111653641964e-06, "loss": 0.2488, "step": 6265 }, { "epoch": 0.54, "learning_rate": 9.28434250516566e-06, "loss": 0.322, "step": 6266 }, { "epoch": 0.54, "learning_rate": 9.281573411848707e-06, "loss": 0.2982, "step": 6267 }, { "epoch": 0.54, "learning_rate": 9.278804373904525e-06, "loss": 0.2462, "step": 6268 }, { "epoch": 0.54, "learning_rate": 9.276035391546538e-06, "loss": 0.2831, "step": 6269 }, { "epoch": 0.54, "learning_rate": 9.27326646498816e-06, "loss": 0.2899, "step": 6270 }, { "epoch": 0.54, "learning_rate": 9.270497594442815e-06, "loss": 0.2493, "step": 6271 }, { "epoch": 0.54, "learning_rate": 9.267728780123913e-06, "loss": 0.2828, "step": 6272 }, { "epoch": 0.54, "learning_rate": 9.264960022244855e-06, "loss": 0.248, "step": 6273 }, { "epoch": 0.54, "learning_rate": 9.262191321019049e-06, "loss": 0.2837, "step": 6274 }, { "epoch": 0.54, "learning_rate": 9.259422676659892e-06, "loss": 0.2541, "step": 6275 }, { "epoch": 0.54, "learning_rate": 9.256654089380773e-06, "loss": 0.2232, "step": 6276 }, { "epoch": 0.54, "learning_rate": 9.253885559395084e-06, "loss": 0.2437, "step": 6277 }, { "epoch": 0.54, "learning_rate": 9.251117086916209e-06, "loss": 0.3179, "step": 6278 }, { "epoch": 0.54, "learning_rate": 9.24834867215753e-06, "loss": 0.2921, "step": 6279 }, { "epoch": 0.54, "learning_rate": 9.245580315332418e-06, "loss": 0.3156, "step": 6280 }, { "epoch": 0.54, "learning_rate": 9.242812016654252e-06, "loss": 0.2936, "step": 6281 }, { "epoch": 0.54, "learning_rate": 9.240043776336397e-06, "loss": 0.3064, "step": 6282 }, { "epoch": 0.54, "learning_rate": 9.237275594592205e-06, "loss": 0.2792, "step": 6283 }, { "epoch": 0.54, "learning_rate": 9.234507471635043e-06, "loss": 0.2784, "step": 6284 }, { "epoch": 0.54, "learning_rate": 9.231739407678267e-06, "loss": 0.2673, "step": 6285 }, { "epoch": 0.54, "learning_rate": 9.22897140293522e-06, "loss": 0.2992, "step": 6286 }, { "epoch": 0.54, "learning_rate": 9.226203457619245e-06, "loss": 0.5927, "step": 6287 }, { "epoch": 0.54, "learning_rate": 9.223435571943685e-06, "loss": 0.2769, "step": 6288 }, { "epoch": 0.54, "learning_rate": 9.220667746121875e-06, "loss": 0.2463, "step": 6289 }, { "epoch": 0.54, "learning_rate": 9.217899980367142e-06, "loss": 0.3136, "step": 6290 }, { "epoch": 0.54, "learning_rate": 9.215132274892817e-06, "loss": 0.2569, "step": 6291 }, { "epoch": 0.54, "learning_rate": 9.212364629912218e-06, "loss": 0.2844, "step": 6292 }, { "epoch": 0.54, "learning_rate": 9.209597045638661e-06, "loss": 0.2471, "step": 6293 }, { "epoch": 0.54, "learning_rate": 9.206829522285456e-06, "loss": 0.3096, "step": 6294 }, { "epoch": 0.54, "learning_rate": 9.204062060065915e-06, "loss": 0.2534, "step": 6295 }, { "epoch": 0.54, "learning_rate": 9.201294659193337e-06, "loss": 0.2936, "step": 6296 }, { "epoch": 0.54, "learning_rate": 9.198527319881017e-06, "loss": 0.2944, "step": 6297 }, { "epoch": 0.54, "learning_rate": 9.195760042342254e-06, "loss": 0.2858, "step": 6298 }, { "epoch": 0.54, "learning_rate": 9.192992826790335e-06, "loss": 0.2599, "step": 6299 }, { "epoch": 0.54, "learning_rate": 9.19022567343854e-06, "loss": 0.2722, "step": 6300 }, { "epoch": 0.54, "learning_rate": 9.187458582500145e-06, "loss": 0.2665, "step": 6301 }, { "epoch": 0.54, "learning_rate": 9.184691554188432e-06, "loss": 0.2485, "step": 6302 }, { "epoch": 0.54, "learning_rate": 9.181924588716664e-06, "loss": 0.2971, "step": 6303 }, { "epoch": 0.54, "learning_rate": 9.179157686298104e-06, "loss": 0.2433, "step": 6304 }, { "epoch": 0.54, "learning_rate": 9.176390847146018e-06, "loss": 0.2442, "step": 6305 }, { "epoch": 0.54, "learning_rate": 9.173624071473655e-06, "loss": 0.2957, "step": 6306 }, { "epoch": 0.54, "learning_rate": 9.170857359494265e-06, "loss": 0.2916, "step": 6307 }, { "epoch": 0.54, "learning_rate": 9.168090711421089e-06, "loss": 0.2991, "step": 6308 }, { "epoch": 0.54, "learning_rate": 9.165324127467375e-06, "loss": 0.2511, "step": 6309 }, { "epoch": 0.54, "learning_rate": 9.162557607846352e-06, "loss": 0.2816, "step": 6310 }, { "epoch": 0.54, "learning_rate": 9.159791152771247e-06, "loss": 0.2364, "step": 6311 }, { "epoch": 0.54, "learning_rate": 9.157024762455292e-06, "loss": 0.2662, "step": 6312 }, { "epoch": 0.54, "learning_rate": 9.154258437111706e-06, "loss": 0.2716, "step": 6313 }, { "epoch": 0.54, "learning_rate": 9.151492176953697e-06, "loss": 0.267, "step": 6314 }, { "epoch": 0.54, "learning_rate": 9.148725982194477e-06, "loss": 0.2935, "step": 6315 }, { "epoch": 0.54, "learning_rate": 9.145959853047254e-06, "loss": 0.6002, "step": 6316 }, { "epoch": 0.54, "learning_rate": 9.143193789725227e-06, "loss": 0.2593, "step": 6317 }, { "epoch": 0.54, "learning_rate": 9.140427792441584e-06, "loss": 0.324, "step": 6318 }, { "epoch": 0.54, "learning_rate": 9.137661861409525e-06, "loss": 0.2525, "step": 6319 }, { "epoch": 0.54, "learning_rate": 9.134895996842228e-06, "loss": 0.3224, "step": 6320 }, { "epoch": 0.54, "learning_rate": 9.13213019895287e-06, "loss": 0.3298, "step": 6321 }, { "epoch": 0.54, "learning_rate": 9.129364467954628e-06, "loss": 0.2899, "step": 6322 }, { "epoch": 0.54, "learning_rate": 9.126598804060675e-06, "loss": 0.3036, "step": 6323 }, { "epoch": 0.54, "learning_rate": 9.123833207484169e-06, "loss": 0.2993, "step": 6324 }, { "epoch": 0.54, "learning_rate": 9.121067678438267e-06, "loss": 0.3029, "step": 6325 }, { "epoch": 0.54, "learning_rate": 9.11830221713613e-06, "loss": 0.2406, "step": 6326 }, { "epoch": 0.54, "learning_rate": 9.115536823790896e-06, "loss": 0.2467, "step": 6327 }, { "epoch": 0.54, "learning_rate": 9.112771498615717e-06, "loss": 0.3025, "step": 6328 }, { "epoch": 0.54, "learning_rate": 9.110006241823726e-06, "loss": 0.2587, "step": 6329 }, { "epoch": 0.54, "learning_rate": 9.107241053628058e-06, "loss": 0.348, "step": 6330 }, { "epoch": 0.54, "learning_rate": 9.104475934241839e-06, "loss": 0.2704, "step": 6331 }, { "epoch": 0.54, "learning_rate": 9.101710883878185e-06, "loss": 0.2437, "step": 6332 }, { "epoch": 0.54, "learning_rate": 9.098945902750222e-06, "loss": 0.2874, "step": 6333 }, { "epoch": 0.54, "learning_rate": 9.096180991071055e-06, "loss": 0.2757, "step": 6334 }, { "epoch": 0.54, "learning_rate": 9.093416149053791e-06, "loss": 0.3019, "step": 6335 }, { "epoch": 0.54, "learning_rate": 9.090651376911532e-06, "loss": 0.2715, "step": 6336 }, { "epoch": 0.54, "learning_rate": 9.087886674857371e-06, "loss": 0.2633, "step": 6337 }, { "epoch": 0.54, "learning_rate": 9.0851220431044e-06, "loss": 0.3062, "step": 6338 }, { "epoch": 0.54, "learning_rate": 9.082357481865697e-06, "loss": 0.2756, "step": 6339 }, { "epoch": 0.54, "learning_rate": 9.07959299135435e-06, "loss": 0.2933, "step": 6340 }, { "epoch": 0.54, "learning_rate": 9.076828571783422e-06, "loss": 0.2599, "step": 6341 }, { "epoch": 0.54, "learning_rate": 9.074064223365986e-06, "loss": 0.2508, "step": 6342 }, { "epoch": 0.54, "learning_rate": 9.07129994631511e-06, "loss": 0.2957, "step": 6343 }, { "epoch": 0.54, "learning_rate": 9.068535740843844e-06, "loss": 0.2809, "step": 6344 }, { "epoch": 0.54, "learning_rate": 9.065771607165242e-06, "loss": 0.2898, "step": 6345 }, { "epoch": 0.54, "learning_rate": 9.063007545492342e-06, "loss": 0.3552, "step": 6346 }, { "epoch": 0.54, "learning_rate": 9.060243556038195e-06, "loss": 0.2702, "step": 6347 }, { "epoch": 0.54, "learning_rate": 9.057479639015832e-06, "loss": 0.2979, "step": 6348 }, { "epoch": 0.54, "learning_rate": 9.054715794638277e-06, "loss": 0.3071, "step": 6349 }, { "epoch": 0.54, "learning_rate": 9.051952023118563e-06, "loss": 0.2598, "step": 6350 }, { "epoch": 0.54, "learning_rate": 9.0491883246697e-06, "loss": 0.2349, "step": 6351 }, { "epoch": 0.54, "learning_rate": 9.046424699504703e-06, "loss": 0.278, "step": 6352 }, { "epoch": 0.54, "learning_rate": 9.043661147836578e-06, "loss": 0.2801, "step": 6353 }, { "epoch": 0.54, "learning_rate": 9.040897669878327e-06, "loss": 0.2824, "step": 6354 }, { "epoch": 0.54, "learning_rate": 9.038134265842946e-06, "loss": 0.3063, "step": 6355 }, { "epoch": 0.54, "learning_rate": 9.035370935943419e-06, "loss": 0.2936, "step": 6356 }, { "epoch": 0.54, "learning_rate": 9.032607680392738e-06, "loss": 0.2664, "step": 6357 }, { "epoch": 0.55, "learning_rate": 9.029844499403876e-06, "loss": 0.3663, "step": 6358 }, { "epoch": 0.55, "learning_rate": 9.027081393189804e-06, "loss": 0.2704, "step": 6359 }, { "epoch": 0.55, "learning_rate": 9.02431836196349e-06, "loss": 0.2867, "step": 6360 }, { "epoch": 0.55, "learning_rate": 9.021555405937901e-06, "loss": 0.2859, "step": 6361 }, { "epoch": 0.55, "learning_rate": 9.018792525325986e-06, "loss": 0.2525, "step": 6362 }, { "epoch": 0.55, "learning_rate": 9.016029720340688e-06, "loss": 0.2542, "step": 6363 }, { "epoch": 0.55, "learning_rate": 9.013266991194964e-06, "loss": 0.3002, "step": 6364 }, { "epoch": 0.55, "learning_rate": 9.010504338101743e-06, "loss": 0.304, "step": 6365 }, { "epoch": 0.55, "learning_rate": 9.007741761273957e-06, "loss": 0.287, "step": 6366 }, { "epoch": 0.55, "learning_rate": 9.004979260924534e-06, "loss": 0.2683, "step": 6367 }, { "epoch": 0.55, "learning_rate": 9.002216837266394e-06, "loss": 0.2769, "step": 6368 }, { "epoch": 0.55, "learning_rate": 8.999454490512451e-06, "loss": 0.3268, "step": 6369 }, { "epoch": 0.55, "learning_rate": 8.996692220875608e-06, "loss": 0.2604, "step": 6370 }, { "epoch": 0.55, "learning_rate": 8.993930028568775e-06, "loss": 0.2933, "step": 6371 }, { "epoch": 0.55, "learning_rate": 8.991167913804842e-06, "loss": 0.2385, "step": 6372 }, { "epoch": 0.55, "learning_rate": 8.988405876796701e-06, "loss": 0.2835, "step": 6373 }, { "epoch": 0.55, "learning_rate": 8.985643917757237e-06, "loss": 0.2943, "step": 6374 }, { "epoch": 0.55, "learning_rate": 8.98288203689933e-06, "loss": 0.2914, "step": 6375 }, { "epoch": 0.55, "learning_rate": 8.98012023443585e-06, "loss": 0.2491, "step": 6376 }, { "epoch": 0.55, "learning_rate": 8.977358510579658e-06, "loss": 0.2793, "step": 6377 }, { "epoch": 0.55, "learning_rate": 8.974596865543624e-06, "loss": 0.267, "step": 6378 }, { "epoch": 0.55, "learning_rate": 8.971835299540595e-06, "loss": 0.3075, "step": 6379 }, { "epoch": 0.55, "learning_rate": 8.969073812783417e-06, "loss": 0.2628, "step": 6380 }, { "epoch": 0.55, "learning_rate": 8.966312405484942e-06, "loss": 0.3061, "step": 6381 }, { "epoch": 0.55, "learning_rate": 8.963551077857999e-06, "loss": 0.6271, "step": 6382 }, { "epoch": 0.55, "learning_rate": 8.960789830115416e-06, "loss": 0.3176, "step": 6383 }, { "epoch": 0.55, "learning_rate": 8.958028662470014e-06, "loss": 0.2692, "step": 6384 }, { "epoch": 0.55, "learning_rate": 8.955267575134618e-06, "loss": 0.2994, "step": 6385 }, { "epoch": 0.55, "learning_rate": 8.952506568322036e-06, "loss": 0.2919, "step": 6386 }, { "epoch": 0.55, "learning_rate": 8.949745642245066e-06, "loss": 0.262, "step": 6387 }, { "epoch": 0.55, "learning_rate": 8.94698479711652e-06, "loss": 0.2734, "step": 6388 }, { "epoch": 0.55, "learning_rate": 8.944224033149182e-06, "loss": 0.3241, "step": 6389 }, { "epoch": 0.55, "learning_rate": 8.941463350555835e-06, "loss": 0.3006, "step": 6390 }, { "epoch": 0.55, "learning_rate": 8.938702749549264e-06, "loss": 0.2688, "step": 6391 }, { "epoch": 0.55, "learning_rate": 8.93594223034224e-06, "loss": 0.2596, "step": 6392 }, { "epoch": 0.55, "learning_rate": 8.933181793147534e-06, "loss": 0.2806, "step": 6393 }, { "epoch": 0.55, "learning_rate": 8.9304214381779e-06, "loss": 0.2568, "step": 6394 }, { "epoch": 0.55, "learning_rate": 8.9276611656461e-06, "loss": 0.2725, "step": 6395 }, { "epoch": 0.55, "learning_rate": 8.924900975764879e-06, "loss": 0.269, "step": 6396 }, { "epoch": 0.55, "learning_rate": 8.922140868746977e-06, "loss": 0.2949, "step": 6397 }, { "epoch": 0.55, "learning_rate": 8.919380844805129e-06, "loss": 0.338, "step": 6398 }, { "epoch": 0.55, "learning_rate": 8.916620904152069e-06, "loss": 0.2419, "step": 6399 }, { "epoch": 0.55, "learning_rate": 8.913861047000518e-06, "loss": 0.2715, "step": 6400 }, { "epoch": 0.55, "learning_rate": 8.911101273563185e-06, "loss": 0.3061, "step": 6401 }, { "epoch": 0.55, "learning_rate": 8.908341584052791e-06, "loss": 0.2795, "step": 6402 }, { "epoch": 0.55, "learning_rate": 8.905581978682031e-06, "loss": 0.2607, "step": 6403 }, { "epoch": 0.55, "learning_rate": 8.902822457663605e-06, "loss": 0.2841, "step": 6404 }, { "epoch": 0.55, "learning_rate": 8.900063021210203e-06, "loss": 0.3083, "step": 6405 }, { "epoch": 0.55, "learning_rate": 8.897303669534508e-06, "loss": 0.2467, "step": 6406 }, { "epoch": 0.55, "learning_rate": 8.8945444028492e-06, "loss": 0.2706, "step": 6407 }, { "epoch": 0.55, "learning_rate": 8.891785221366942e-06, "loss": 0.2778, "step": 6408 }, { "epoch": 0.55, "learning_rate": 8.889026125300407e-06, "loss": 0.259, "step": 6409 }, { "epoch": 0.55, "learning_rate": 8.886267114862248e-06, "loss": 0.2808, "step": 6410 }, { "epoch": 0.55, "learning_rate": 8.883508190265117e-06, "loss": 0.277, "step": 6411 }, { "epoch": 0.55, "learning_rate": 8.880749351721656e-06, "loss": 0.6112, "step": 6412 }, { "epoch": 0.55, "learning_rate": 8.877990599444508e-06, "loss": 0.2902, "step": 6413 }, { "epoch": 0.55, "learning_rate": 8.8752319336463e-06, "loss": 0.3396, "step": 6414 }, { "epoch": 0.55, "learning_rate": 8.872473354539653e-06, "loss": 0.2559, "step": 6415 }, { "epoch": 0.55, "learning_rate": 8.869714862337194e-06, "loss": 0.3209, "step": 6416 }, { "epoch": 0.55, "learning_rate": 8.866956457251524e-06, "loss": 0.2571, "step": 6417 }, { "epoch": 0.55, "learning_rate": 8.86419813949525e-06, "loss": 0.311, "step": 6418 }, { "epoch": 0.55, "learning_rate": 8.861439909280976e-06, "loss": 0.265, "step": 6419 }, { "epoch": 0.55, "learning_rate": 8.858681766821289e-06, "loss": 0.2789, "step": 6420 }, { "epoch": 0.55, "learning_rate": 8.855923712328771e-06, "loss": 0.2816, "step": 6421 }, { "epoch": 0.55, "learning_rate": 8.853165746015997e-06, "loss": 0.25, "step": 6422 }, { "epoch": 0.55, "learning_rate": 8.850407868095541e-06, "loss": 0.2814, "step": 6423 }, { "epoch": 0.55, "learning_rate": 8.84765007877997e-06, "loss": 0.2615, "step": 6424 }, { "epoch": 0.55, "learning_rate": 8.844892378281831e-06, "loss": 0.3049, "step": 6425 }, { "epoch": 0.55, "learning_rate": 8.842134766813687e-06, "loss": 0.3261, "step": 6426 }, { "epoch": 0.55, "learning_rate": 8.839377244588073e-06, "loss": 0.2766, "step": 6427 }, { "epoch": 0.55, "learning_rate": 8.836619811817522e-06, "loss": 0.2922, "step": 6428 }, { "epoch": 0.55, "learning_rate": 8.83386246871457e-06, "loss": 0.3375, "step": 6429 }, { "epoch": 0.55, "learning_rate": 8.831105215491734e-06, "loss": 0.314, "step": 6430 }, { "epoch": 0.55, "learning_rate": 8.828348052361537e-06, "loss": 0.2966, "step": 6431 }, { "epoch": 0.55, "learning_rate": 8.825590979536476e-06, "loss": 0.2787, "step": 6432 }, { "epoch": 0.55, "learning_rate": 8.822833997229065e-06, "loss": 0.2784, "step": 6433 }, { "epoch": 0.55, "learning_rate": 8.820077105651794e-06, "loss": 0.3019, "step": 6434 }, { "epoch": 0.55, "learning_rate": 8.817320305017148e-06, "loss": 0.2717, "step": 6435 }, { "epoch": 0.55, "learning_rate": 8.814563595537604e-06, "loss": 0.2824, "step": 6436 }, { "epoch": 0.55, "learning_rate": 8.811806977425647e-06, "loss": 0.2773, "step": 6437 }, { "epoch": 0.55, "learning_rate": 8.809050450893737e-06, "loss": 0.2692, "step": 6438 }, { "epoch": 0.55, "learning_rate": 8.806294016154329e-06, "loss": 0.2727, "step": 6439 }, { "epoch": 0.55, "learning_rate": 8.803537673419885e-06, "loss": 0.3005, "step": 6440 }, { "epoch": 0.55, "learning_rate": 8.80078142290284e-06, "loss": 0.3063, "step": 6441 }, { "epoch": 0.55, "learning_rate": 8.798025264815643e-06, "loss": 0.2875, "step": 6442 }, { "epoch": 0.55, "learning_rate": 8.795269199370714e-06, "loss": 0.2943, "step": 6443 }, { "epoch": 0.55, "learning_rate": 8.792513226780486e-06, "loss": 0.2686, "step": 6444 }, { "epoch": 0.55, "learning_rate": 8.789757347257373e-06, "loss": 0.3112, "step": 6445 }, { "epoch": 0.55, "learning_rate": 8.787001561013779e-06, "loss": 0.2678, "step": 6446 }, { "epoch": 0.55, "learning_rate": 8.784245868262117e-06, "loss": 0.2623, "step": 6447 }, { "epoch": 0.55, "learning_rate": 8.781490269214772e-06, "loss": 0.3001, "step": 6448 }, { "epoch": 0.55, "learning_rate": 8.778734764084136e-06, "loss": 0.2631, "step": 6449 }, { "epoch": 0.55, "learning_rate": 8.77597935308259e-06, "loss": 0.3328, "step": 6450 }, { "epoch": 0.55, "learning_rate": 8.773224036422512e-06, "loss": 0.2712, "step": 6451 }, { "epoch": 0.55, "learning_rate": 8.770468814316262e-06, "loss": 0.2569, "step": 6452 }, { "epoch": 0.55, "learning_rate": 8.767713686976195e-06, "loss": 0.2609, "step": 6453 }, { "epoch": 0.55, "learning_rate": 8.764958654614673e-06, "loss": 0.2792, "step": 6454 }, { "epoch": 0.55, "learning_rate": 8.762203717444034e-06, "loss": 0.2847, "step": 6455 }, { "epoch": 0.55, "learning_rate": 8.759448875676616e-06, "loss": 0.2769, "step": 6456 }, { "epoch": 0.55, "learning_rate": 8.75669412952475e-06, "loss": 0.2563, "step": 6457 }, { "epoch": 0.55, "learning_rate": 8.753939479200758e-06, "loss": 0.2811, "step": 6458 }, { "epoch": 0.55, "learning_rate": 8.751184924916954e-06, "loss": 0.2651, "step": 6459 }, { "epoch": 0.55, "learning_rate": 8.748430466885643e-06, "loss": 0.2924, "step": 6460 }, { "epoch": 0.55, "learning_rate": 8.745676105319126e-06, "loss": 0.3098, "step": 6461 }, { "epoch": 0.55, "learning_rate": 8.742921840429702e-06, "loss": 0.3028, "step": 6462 }, { "epoch": 0.55, "learning_rate": 8.740167672429646e-06, "loss": 0.2536, "step": 6463 }, { "epoch": 0.55, "learning_rate": 8.737413601531245e-06, "loss": 0.2317, "step": 6464 }, { "epoch": 0.55, "learning_rate": 8.734659627946765e-06, "loss": 0.2729, "step": 6465 }, { "epoch": 0.55, "learning_rate": 8.731905751888466e-06, "loss": 0.292, "step": 6466 }, { "epoch": 0.55, "learning_rate": 8.729151973568608e-06, "loss": 0.2749, "step": 6467 }, { "epoch": 0.55, "learning_rate": 8.726398293199434e-06, "loss": 0.2852, "step": 6468 }, { "epoch": 0.55, "learning_rate": 8.723644710993189e-06, "loss": 0.5852, "step": 6469 }, { "epoch": 0.55, "learning_rate": 8.720891227162099e-06, "loss": 0.2871, "step": 6470 }, { "epoch": 0.55, "learning_rate": 8.718137841918396e-06, "loss": 0.2972, "step": 6471 }, { "epoch": 0.55, "learning_rate": 8.715384555474297e-06, "loss": 0.2758, "step": 6472 }, { "epoch": 0.55, "learning_rate": 8.712631368042006e-06, "loss": 0.2944, "step": 6473 }, { "epoch": 0.55, "learning_rate": 8.709878279833725e-06, "loss": 0.2717, "step": 6474 }, { "epoch": 0.56, "learning_rate": 8.707125291061656e-06, "loss": 0.278, "step": 6475 }, { "epoch": 0.56, "learning_rate": 8.704372401937982e-06, "loss": 0.289, "step": 6476 }, { "epoch": 0.56, "learning_rate": 8.701619612674877e-06, "loss": 0.28, "step": 6477 }, { "epoch": 0.56, "learning_rate": 8.698866923484521e-06, "loss": 0.3257, "step": 6478 }, { "epoch": 0.56, "learning_rate": 8.69611433457907e-06, "loss": 0.2626, "step": 6479 }, { "epoch": 0.56, "learning_rate": 8.693361846170687e-06, "loss": 0.2358, "step": 6480 }, { "epoch": 0.56, "learning_rate": 8.690609458471512e-06, "loss": 0.3358, "step": 6481 }, { "epoch": 0.56, "learning_rate": 8.687857171693693e-06, "loss": 0.5938, "step": 6482 }, { "epoch": 0.56, "learning_rate": 8.685104986049362e-06, "loss": 0.2931, "step": 6483 }, { "epoch": 0.56, "learning_rate": 8.682352901750636e-06, "loss": 0.2775, "step": 6484 }, { "epoch": 0.56, "learning_rate": 8.679600919009642e-06, "loss": 0.2616, "step": 6485 }, { "epoch": 0.56, "learning_rate": 8.676849038038483e-06, "loss": 0.2341, "step": 6486 }, { "epoch": 0.56, "learning_rate": 8.674097259049263e-06, "loss": 0.2853, "step": 6487 }, { "epoch": 0.56, "learning_rate": 8.671345582254072e-06, "loss": 0.2344, "step": 6488 }, { "epoch": 0.56, "learning_rate": 8.668594007865003e-06, "loss": 0.2463, "step": 6489 }, { "epoch": 0.56, "learning_rate": 8.66584253609413e-06, "loss": 0.5797, "step": 6490 }, { "epoch": 0.56, "learning_rate": 8.663091167153516e-06, "loss": 0.2997, "step": 6491 }, { "epoch": 0.56, "learning_rate": 8.660339901255234e-06, "loss": 0.3104, "step": 6492 }, { "epoch": 0.56, "learning_rate": 8.65758873861133e-06, "loss": 0.2821, "step": 6493 }, { "epoch": 0.56, "learning_rate": 8.654837679433852e-06, "loss": 0.2617, "step": 6494 }, { "epoch": 0.56, "learning_rate": 8.652086723934841e-06, "loss": 0.291, "step": 6495 }, { "epoch": 0.56, "learning_rate": 8.649335872326328e-06, "loss": 0.275, "step": 6496 }, { "epoch": 0.56, "learning_rate": 8.64658512482033e-06, "loss": 0.2801, "step": 6497 }, { "epoch": 0.56, "learning_rate": 8.643834481628861e-06, "loss": 0.2419, "step": 6498 }, { "epoch": 0.56, "learning_rate": 8.641083942963929e-06, "loss": 0.2874, "step": 6499 }, { "epoch": 0.56, "learning_rate": 8.638333509037537e-06, "loss": 0.2811, "step": 6500 }, { "epoch": 0.56, "learning_rate": 8.635583180061664e-06, "loss": 0.2966, "step": 6501 }, { "epoch": 0.56, "learning_rate": 8.6328329562483e-06, "loss": 0.3436, "step": 6502 }, { "epoch": 0.56, "learning_rate": 8.630082837809419e-06, "loss": 0.3058, "step": 6503 }, { "epoch": 0.56, "learning_rate": 8.62733282495698e-06, "loss": 0.236, "step": 6504 }, { "epoch": 0.56, "learning_rate": 8.624582917902947e-06, "loss": 0.2879, "step": 6505 }, { "epoch": 0.56, "learning_rate": 8.621833116859264e-06, "loss": 0.2872, "step": 6506 }, { "epoch": 0.56, "learning_rate": 8.619083422037878e-06, "loss": 0.3298, "step": 6507 }, { "epoch": 0.56, "learning_rate": 8.616333833650714e-06, "loss": 0.2851, "step": 6508 }, { "epoch": 0.56, "learning_rate": 8.613584351909705e-06, "loss": 0.3307, "step": 6509 }, { "epoch": 0.56, "learning_rate": 8.610834977026765e-06, "loss": 0.2833, "step": 6510 }, { "epoch": 0.56, "learning_rate": 8.608085709213797e-06, "loss": 0.2859, "step": 6511 }, { "epoch": 0.56, "learning_rate": 8.605336548682704e-06, "loss": 0.3217, "step": 6512 }, { "epoch": 0.56, "learning_rate": 8.602587495645382e-06, "loss": 0.322, "step": 6513 }, { "epoch": 0.56, "learning_rate": 8.599838550313714e-06, "loss": 0.2433, "step": 6514 }, { "epoch": 0.56, "learning_rate": 8.597089712899564e-06, "loss": 0.3004, "step": 6515 }, { "epoch": 0.56, "learning_rate": 8.594340983614812e-06, "loss": 0.3048, "step": 6516 }, { "epoch": 0.56, "learning_rate": 8.591592362671311e-06, "loss": 0.2905, "step": 6517 }, { "epoch": 0.56, "learning_rate": 8.588843850280911e-06, "loss": 0.2921, "step": 6518 }, { "epoch": 0.56, "learning_rate": 8.586095446655452e-06, "loss": 0.2819, "step": 6519 }, { "epoch": 0.56, "learning_rate": 8.583347152006774e-06, "loss": 0.2658, "step": 6520 }, { "epoch": 0.56, "learning_rate": 8.580598966546697e-06, "loss": 0.2552, "step": 6521 }, { "epoch": 0.56, "learning_rate": 8.577850890487035e-06, "loss": 0.2912, "step": 6522 }, { "epoch": 0.56, "learning_rate": 8.575102924039602e-06, "loss": 0.2716, "step": 6523 }, { "epoch": 0.56, "learning_rate": 8.572355067416194e-06, "loss": 0.6274, "step": 6524 }, { "epoch": 0.56, "learning_rate": 8.569607320828604e-06, "loss": 0.2807, "step": 6525 }, { "epoch": 0.56, "learning_rate": 8.566859684488611e-06, "loss": 0.2546, "step": 6526 }, { "epoch": 0.56, "learning_rate": 8.564112158607996e-06, "loss": 0.2858, "step": 6527 }, { "epoch": 0.56, "learning_rate": 8.561364743398521e-06, "loss": 0.2894, "step": 6528 }, { "epoch": 0.56, "learning_rate": 8.558617439071938e-06, "loss": 0.2871, "step": 6529 }, { "epoch": 0.56, "learning_rate": 8.555870245840005e-06, "loss": 0.3452, "step": 6530 }, { "epoch": 0.56, "learning_rate": 8.553123163914456e-06, "loss": 0.3228, "step": 6531 }, { "epoch": 0.56, "learning_rate": 8.550376193507023e-06, "loss": 0.2663, "step": 6532 }, { "epoch": 0.56, "learning_rate": 8.547629334829434e-06, "loss": 0.2761, "step": 6533 }, { "epoch": 0.56, "learning_rate": 8.544882588093399e-06, "loss": 0.2941, "step": 6534 }, { "epoch": 0.56, "learning_rate": 8.542135953510625e-06, "loss": 0.2724, "step": 6535 }, { "epoch": 0.56, "learning_rate": 8.539389431292803e-06, "loss": 0.2615, "step": 6536 }, { "epoch": 0.56, "learning_rate": 8.536643021651629e-06, "loss": 0.2693, "step": 6537 }, { "epoch": 0.56, "learning_rate": 8.533896724798784e-06, "loss": 0.6083, "step": 6538 }, { "epoch": 0.56, "learning_rate": 8.531150540945929e-06, "loss": 0.2809, "step": 6539 }, { "epoch": 0.56, "learning_rate": 8.528404470304736e-06, "loss": 0.2509, "step": 6540 }, { "epoch": 0.56, "learning_rate": 8.525658513086857e-06, "loss": 0.3068, "step": 6541 }, { "epoch": 0.56, "learning_rate": 8.522912669503932e-06, "loss": 0.2273, "step": 6542 }, { "epoch": 0.56, "learning_rate": 8.5201669397676e-06, "loss": 0.2704, "step": 6543 }, { "epoch": 0.56, "learning_rate": 8.517421324089488e-06, "loss": 0.301, "step": 6544 }, { "epoch": 0.56, "learning_rate": 8.514675822681218e-06, "loss": 0.314, "step": 6545 }, { "epoch": 0.56, "learning_rate": 8.511930435754391e-06, "loss": 0.2621, "step": 6546 }, { "epoch": 0.56, "learning_rate": 8.509185163520617e-06, "loss": 0.2892, "step": 6547 }, { "epoch": 0.56, "learning_rate": 8.506440006191485e-06, "loss": 0.2894, "step": 6548 }, { "epoch": 0.56, "learning_rate": 8.503694963978576e-06, "loss": 0.3623, "step": 6549 }, { "epoch": 0.56, "learning_rate": 8.500950037093462e-06, "loss": 0.3357, "step": 6550 }, { "epoch": 0.56, "learning_rate": 8.498205225747717e-06, "loss": 0.6342, "step": 6551 }, { "epoch": 0.56, "learning_rate": 8.495460530152893e-06, "loss": 0.2982, "step": 6552 }, { "epoch": 0.56, "learning_rate": 8.492715950520534e-06, "loss": 0.2757, "step": 6553 }, { "epoch": 0.56, "learning_rate": 8.489971487062184e-06, "loss": 0.2718, "step": 6554 }, { "epoch": 0.56, "learning_rate": 8.487227139989372e-06, "loss": 0.318, "step": 6555 }, { "epoch": 0.56, "learning_rate": 8.484482909513613e-06, "loss": 0.2923, "step": 6556 }, { "epoch": 0.56, "learning_rate": 8.481738795846424e-06, "loss": 0.2532, "step": 6557 }, { "epoch": 0.56, "learning_rate": 8.478994799199308e-06, "loss": 0.2848, "step": 6558 }, { "epoch": 0.56, "learning_rate": 8.476250919783759e-06, "loss": 0.2385, "step": 6559 }, { "epoch": 0.56, "learning_rate": 8.473507157811254e-06, "loss": 0.31, "step": 6560 }, { "epoch": 0.56, "learning_rate": 8.470763513493281e-06, "loss": 0.3047, "step": 6561 }, { "epoch": 0.56, "learning_rate": 8.468019987041298e-06, "loss": 0.2968, "step": 6562 }, { "epoch": 0.56, "learning_rate": 8.465276578666766e-06, "loss": 0.2969, "step": 6563 }, { "epoch": 0.56, "learning_rate": 8.462533288581128e-06, "loss": 0.2549, "step": 6564 }, { "epoch": 0.56, "learning_rate": 8.45979011699583e-06, "loss": 0.2863, "step": 6565 }, { "epoch": 0.56, "learning_rate": 8.4570470641223e-06, "loss": 0.2489, "step": 6566 }, { "epoch": 0.56, "learning_rate": 8.454304130171956e-06, "loss": 0.2518, "step": 6567 }, { "epoch": 0.56, "learning_rate": 8.451561315356216e-06, "loss": 0.5439, "step": 6568 }, { "epoch": 0.56, "learning_rate": 8.448818619886477e-06, "loss": 0.2722, "step": 6569 }, { "epoch": 0.56, "learning_rate": 8.44607604397413e-06, "loss": 0.2662, "step": 6570 }, { "epoch": 0.56, "learning_rate": 8.443333587830568e-06, "loss": 0.2607, "step": 6571 }, { "epoch": 0.56, "learning_rate": 8.440591251667164e-06, "loss": 0.2697, "step": 6572 }, { "epoch": 0.56, "learning_rate": 8.437849035695278e-06, "loss": 0.2972, "step": 6573 }, { "epoch": 0.56, "learning_rate": 8.435106940126266e-06, "loss": 0.303, "step": 6574 }, { "epoch": 0.56, "learning_rate": 8.43236496517148e-06, "loss": 0.2728, "step": 6575 }, { "epoch": 0.56, "learning_rate": 8.42962311104226e-06, "loss": 0.3177, "step": 6576 }, { "epoch": 0.56, "learning_rate": 8.426881377949927e-06, "loss": 0.2699, "step": 6577 }, { "epoch": 0.56, "learning_rate": 8.424139766105808e-06, "loss": 0.2378, "step": 6578 }, { "epoch": 0.56, "learning_rate": 8.421398275721208e-06, "loss": 0.278, "step": 6579 }, { "epoch": 0.56, "learning_rate": 8.41865690700743e-06, "loss": 0.2327, "step": 6580 }, { "epoch": 0.56, "learning_rate": 8.415915660175763e-06, "loss": 0.2849, "step": 6581 }, { "epoch": 0.56, "learning_rate": 8.413174535437486e-06, "loss": 0.2592, "step": 6582 }, { "epoch": 0.56, "learning_rate": 8.410433533003881e-06, "loss": 0.285, "step": 6583 }, { "epoch": 0.56, "learning_rate": 8.4076926530862e-06, "loss": 0.2789, "step": 6584 }, { "epoch": 0.56, "learning_rate": 8.404951895895706e-06, "loss": 0.3026, "step": 6585 }, { "epoch": 0.56, "learning_rate": 8.402211261643638e-06, "loss": 0.2661, "step": 6586 }, { "epoch": 0.56, "learning_rate": 8.399470750541228e-06, "loss": 0.2972, "step": 6587 }, { "epoch": 0.56, "learning_rate": 8.396730362799704e-06, "loss": 0.302, "step": 6588 }, { "epoch": 0.56, "learning_rate": 8.393990098630284e-06, "loss": 0.2769, "step": 6589 }, { "epoch": 0.56, "learning_rate": 8.391249958244173e-06, "loss": 0.2604, "step": 6590 }, { "epoch": 0.56, "learning_rate": 8.388509941852562e-06, "loss": 0.2522, "step": 6591 }, { "epoch": 0.57, "learning_rate": 8.385770049666646e-06, "loss": 0.2576, "step": 6592 }, { "epoch": 0.57, "learning_rate": 8.383030281897598e-06, "loss": 0.2767, "step": 6593 }, { "epoch": 0.57, "learning_rate": 8.380290638756584e-06, "loss": 0.3027, "step": 6594 }, { "epoch": 0.57, "learning_rate": 8.377551120454762e-06, "loss": 0.2668, "step": 6595 }, { "epoch": 0.57, "learning_rate": 8.374811727203288e-06, "loss": 0.2876, "step": 6596 }, { "epoch": 0.57, "learning_rate": 8.372072459213296e-06, "loss": 0.252, "step": 6597 }, { "epoch": 0.57, "learning_rate": 8.369333316695909e-06, "loss": 0.278, "step": 6598 }, { "epoch": 0.57, "learning_rate": 8.366594299862258e-06, "loss": 0.2847, "step": 6599 }, { "epoch": 0.57, "learning_rate": 8.363855408923446e-06, "loss": 0.2445, "step": 6600 }, { "epoch": 0.57, "learning_rate": 8.361116644090576e-06, "loss": 0.2927, "step": 6601 }, { "epoch": 0.57, "learning_rate": 8.358378005574731e-06, "loss": 0.2589, "step": 6602 }, { "epoch": 0.57, "learning_rate": 8.355639493587005e-06, "loss": 0.2673, "step": 6603 }, { "epoch": 0.57, "learning_rate": 8.352901108338462e-06, "loss": 0.251, "step": 6604 }, { "epoch": 0.57, "learning_rate": 8.350162850040158e-06, "loss": 0.2782, "step": 6605 }, { "epoch": 0.57, "learning_rate": 8.347424718903152e-06, "loss": 0.2795, "step": 6606 }, { "epoch": 0.57, "learning_rate": 8.344686715138482e-06, "loss": 0.2858, "step": 6607 }, { "epoch": 0.57, "learning_rate": 8.341948838957185e-06, "loss": 0.2376, "step": 6608 }, { "epoch": 0.57, "learning_rate": 8.339211090570272e-06, "loss": 0.2654, "step": 6609 }, { "epoch": 0.57, "learning_rate": 8.336473470188767e-06, "loss": 0.2508, "step": 6610 }, { "epoch": 0.57, "learning_rate": 8.333735978023667e-06, "loss": 0.254, "step": 6611 }, { "epoch": 0.57, "learning_rate": 8.33099861428596e-06, "loss": 0.276, "step": 6612 }, { "epoch": 0.57, "learning_rate": 8.328261379186636e-06, "loss": 0.3005, "step": 6613 }, { "epoch": 0.57, "learning_rate": 8.325524272936668e-06, "loss": 0.3035, "step": 6614 }, { "epoch": 0.57, "learning_rate": 8.322787295747007e-06, "loss": 0.2884, "step": 6615 }, { "epoch": 0.57, "learning_rate": 8.320050447828622e-06, "loss": 0.2643, "step": 6616 }, { "epoch": 0.57, "learning_rate": 8.317313729392446e-06, "loss": 0.2758, "step": 6617 }, { "epoch": 0.57, "learning_rate": 8.31457714064941e-06, "loss": 0.2894, "step": 6618 }, { "epoch": 0.57, "learning_rate": 8.311840681810441e-06, "loss": 0.3061, "step": 6619 }, { "epoch": 0.57, "learning_rate": 8.309104353086452e-06, "loss": 0.2903, "step": 6620 }, { "epoch": 0.57, "learning_rate": 8.306368154688347e-06, "loss": 0.2937, "step": 6621 }, { "epoch": 0.57, "learning_rate": 8.30363208682701e-06, "loss": 0.3031, "step": 6622 }, { "epoch": 0.57, "learning_rate": 8.300896149713334e-06, "loss": 0.3235, "step": 6623 }, { "epoch": 0.57, "learning_rate": 8.298160343558188e-06, "loss": 0.329, "step": 6624 }, { "epoch": 0.57, "learning_rate": 8.295424668572432e-06, "loss": 0.2549, "step": 6625 }, { "epoch": 0.57, "learning_rate": 8.292689124966917e-06, "loss": 0.3129, "step": 6626 }, { "epoch": 0.57, "learning_rate": 8.289953712952494e-06, "loss": 0.2688, "step": 6627 }, { "epoch": 0.57, "learning_rate": 8.287218432739987e-06, "loss": 0.2543, "step": 6628 }, { "epoch": 0.57, "learning_rate": 8.284483284540217e-06, "loss": 0.2755, "step": 6629 }, { "epoch": 0.57, "learning_rate": 8.281748268564002e-06, "loss": 0.3182, "step": 6630 }, { "epoch": 0.57, "learning_rate": 8.279013385022142e-06, "loss": 0.3085, "step": 6631 }, { "epoch": 0.57, "learning_rate": 8.276278634125424e-06, "loss": 0.2692, "step": 6632 }, { "epoch": 0.57, "learning_rate": 8.273544016084629e-06, "loss": 0.2648, "step": 6633 }, { "epoch": 0.57, "learning_rate": 8.270809531110536e-06, "loss": 0.2913, "step": 6634 }, { "epoch": 0.57, "learning_rate": 8.268075179413899e-06, "loss": 0.2413, "step": 6635 }, { "epoch": 0.57, "learning_rate": 8.265340961205467e-06, "loss": 0.2721, "step": 6636 }, { "epoch": 0.57, "learning_rate": 8.262606876695984e-06, "loss": 0.2873, "step": 6637 }, { "epoch": 0.57, "learning_rate": 8.259872926096177e-06, "loss": 0.2712, "step": 6638 }, { "epoch": 0.57, "learning_rate": 8.257139109616769e-06, "loss": 0.2597, "step": 6639 }, { "epoch": 0.57, "learning_rate": 8.254405427468464e-06, "loss": 0.2756, "step": 6640 }, { "epoch": 0.57, "learning_rate": 8.251671879861966e-06, "loss": 0.2834, "step": 6641 }, { "epoch": 0.57, "learning_rate": 8.24893846700796e-06, "loss": 0.2936, "step": 6642 }, { "epoch": 0.57, "learning_rate": 8.246205189117122e-06, "loss": 0.3012, "step": 6643 }, { "epoch": 0.57, "learning_rate": 8.243472046400126e-06, "loss": 0.274, "step": 6644 }, { "epoch": 0.57, "learning_rate": 8.240739039067623e-06, "loss": 0.274, "step": 6645 }, { "epoch": 0.57, "learning_rate": 8.238006167330266e-06, "loss": 0.2891, "step": 6646 }, { "epoch": 0.57, "learning_rate": 8.235273431398681e-06, "loss": 0.2826, "step": 6647 }, { "epoch": 0.57, "learning_rate": 8.232540831483505e-06, "loss": 0.2602, "step": 6648 }, { "epoch": 0.57, "learning_rate": 8.229808367795349e-06, "loss": 0.3093, "step": 6649 }, { "epoch": 0.57, "learning_rate": 8.227076040544813e-06, "loss": 0.2997, "step": 6650 }, { "epoch": 0.57, "learning_rate": 8.224343849942496e-06, "loss": 0.2674, "step": 6651 }, { "epoch": 0.57, "learning_rate": 8.221611796198984e-06, "loss": 0.3036, "step": 6652 }, { "epoch": 0.57, "learning_rate": 8.218879879524844e-06, "loss": 0.2232, "step": 6653 }, { "epoch": 0.57, "learning_rate": 8.216148100130647e-06, "loss": 0.5638, "step": 6654 }, { "epoch": 0.57, "learning_rate": 8.213416458226939e-06, "loss": 0.3172, "step": 6655 }, { "epoch": 0.57, "learning_rate": 8.210684954024261e-06, "loss": 0.244, "step": 6656 }, { "epoch": 0.57, "learning_rate": 8.207953587733145e-06, "loss": 0.2872, "step": 6657 }, { "epoch": 0.57, "learning_rate": 8.205222359564113e-06, "loss": 0.2779, "step": 6658 }, { "epoch": 0.57, "learning_rate": 8.202491269727674e-06, "loss": 0.3121, "step": 6659 }, { "epoch": 0.57, "learning_rate": 8.199760318434323e-06, "loss": 0.2393, "step": 6660 }, { "epoch": 0.57, "learning_rate": 8.197029505894553e-06, "loss": 0.2648, "step": 6661 }, { "epoch": 0.57, "learning_rate": 8.194298832318843e-06, "loss": 0.3467, "step": 6662 }, { "epoch": 0.57, "learning_rate": 8.191568297917654e-06, "loss": 0.2256, "step": 6663 }, { "epoch": 0.57, "learning_rate": 8.188837902901441e-06, "loss": 0.3017, "step": 6664 }, { "epoch": 0.57, "learning_rate": 8.186107647480659e-06, "loss": 0.3411, "step": 6665 }, { "epoch": 0.57, "learning_rate": 8.183377531865737e-06, "loss": 0.256, "step": 6666 }, { "epoch": 0.57, "learning_rate": 8.180647556267093e-06, "loss": 0.2773, "step": 6667 }, { "epoch": 0.57, "learning_rate": 8.177917720895152e-06, "loss": 0.2856, "step": 6668 }, { "epoch": 0.57, "learning_rate": 8.17518802596031e-06, "loss": 0.3012, "step": 6669 }, { "epoch": 0.57, "learning_rate": 8.172458471672953e-06, "loss": 0.2455, "step": 6670 }, { "epoch": 0.57, "learning_rate": 8.169729058243468e-06, "loss": 0.2645, "step": 6671 }, { "epoch": 0.57, "learning_rate": 8.166999785882226e-06, "loss": 0.2902, "step": 6672 }, { "epoch": 0.57, "learning_rate": 8.164270654799584e-06, "loss": 0.2939, "step": 6673 }, { "epoch": 0.57, "learning_rate": 8.161541665205885e-06, "loss": 0.2874, "step": 6674 }, { "epoch": 0.57, "learning_rate": 8.158812817311474e-06, "loss": 0.3354, "step": 6675 }, { "epoch": 0.57, "learning_rate": 8.156084111326673e-06, "loss": 0.3188, "step": 6676 }, { "epoch": 0.57, "learning_rate": 8.1533555474618e-06, "loss": 0.3161, "step": 6677 }, { "epoch": 0.57, "learning_rate": 8.15062712592715e-06, "loss": 0.2739, "step": 6678 }, { "epoch": 0.57, "learning_rate": 8.14789884693303e-06, "loss": 0.2619, "step": 6679 }, { "epoch": 0.57, "learning_rate": 8.145170710689712e-06, "loss": 0.2914, "step": 6680 }, { "epoch": 0.57, "learning_rate": 8.142442717407469e-06, "loss": 0.2628, "step": 6681 }, { "epoch": 0.57, "learning_rate": 8.139714867296567e-06, "loss": 0.3356, "step": 6682 }, { "epoch": 0.57, "learning_rate": 8.13698716056725e-06, "loss": 0.2923, "step": 6683 }, { "epoch": 0.57, "learning_rate": 8.134259597429757e-06, "loss": 0.3265, "step": 6684 }, { "epoch": 0.57, "learning_rate": 8.13153217809431e-06, "loss": 0.2641, "step": 6685 }, { "epoch": 0.57, "learning_rate": 8.128804902771137e-06, "loss": 0.275, "step": 6686 }, { "epoch": 0.57, "learning_rate": 8.126077771670438e-06, "loss": 0.3011, "step": 6687 }, { "epoch": 0.57, "learning_rate": 8.123350785002398e-06, "loss": 0.2404, "step": 6688 }, { "epoch": 0.57, "learning_rate": 8.120623942977209e-06, "loss": 0.2519, "step": 6689 }, { "epoch": 0.57, "learning_rate": 8.117897245805044e-06, "loss": 0.2847, "step": 6690 }, { "epoch": 0.57, "learning_rate": 8.115170693696058e-06, "loss": 0.3049, "step": 6691 }, { "epoch": 0.57, "learning_rate": 8.112444286860397e-06, "loss": 0.2795, "step": 6692 }, { "epoch": 0.57, "learning_rate": 8.109718025508208e-06, "loss": 0.3232, "step": 6693 }, { "epoch": 0.57, "learning_rate": 8.106991909849613e-06, "loss": 0.2578, "step": 6694 }, { "epoch": 0.57, "learning_rate": 8.104265940094726e-06, "loss": 0.2699, "step": 6695 }, { "epoch": 0.57, "learning_rate": 8.101540116453655e-06, "loss": 0.6116, "step": 6696 }, { "epoch": 0.57, "learning_rate": 8.098814439136492e-06, "loss": 0.2864, "step": 6697 }, { "epoch": 0.57, "learning_rate": 8.096088908353316e-06, "loss": 0.3403, "step": 6698 }, { "epoch": 0.57, "learning_rate": 8.093363524314202e-06, "loss": 0.2563, "step": 6699 }, { "epoch": 0.57, "learning_rate": 8.090638287229207e-06, "loss": 0.3217, "step": 6700 }, { "epoch": 0.57, "learning_rate": 8.087913197308376e-06, "loss": 0.2843, "step": 6701 }, { "epoch": 0.57, "learning_rate": 8.085188254761744e-06, "loss": 0.2822, "step": 6702 }, { "epoch": 0.57, "learning_rate": 8.082463459799346e-06, "loss": 0.2934, "step": 6703 }, { "epoch": 0.57, "learning_rate": 8.07973881263119e-06, "loss": 0.2802, "step": 6704 }, { "epoch": 0.57, "learning_rate": 8.077014313467274e-06, "loss": 0.27, "step": 6705 }, { "epoch": 0.57, "learning_rate": 8.074289962517597e-06, "loss": 0.2915, "step": 6706 }, { "epoch": 0.57, "learning_rate": 8.071565759992133e-06, "loss": 0.2678, "step": 6707 }, { "epoch": 0.58, "learning_rate": 8.068841706100851e-06, "loss": 0.3052, "step": 6708 }, { "epoch": 0.58, "learning_rate": 8.066117801053706e-06, "loss": 0.2889, "step": 6709 }, { "epoch": 0.58, "learning_rate": 8.063394045060648e-06, "loss": 0.2965, "step": 6710 }, { "epoch": 0.58, "learning_rate": 8.06067043833161e-06, "loss": 0.2789, "step": 6711 }, { "epoch": 0.58, "learning_rate": 8.057946981076506e-06, "loss": 0.2478, "step": 6712 }, { "epoch": 0.58, "learning_rate": 8.055223673505258e-06, "loss": 0.2797, "step": 6713 }, { "epoch": 0.58, "learning_rate": 8.052500515827759e-06, "loss": 0.2854, "step": 6714 }, { "epoch": 0.58, "learning_rate": 8.049777508253898e-06, "loss": 0.2609, "step": 6715 }, { "epoch": 0.58, "learning_rate": 8.047054650993545e-06, "loss": 0.3015, "step": 6716 }, { "epoch": 0.58, "learning_rate": 8.044331944256576e-06, "loss": 0.261, "step": 6717 }, { "epoch": 0.58, "learning_rate": 8.041609388252836e-06, "loss": 0.2498, "step": 6718 }, { "epoch": 0.58, "learning_rate": 8.038886983192164e-06, "loss": 0.2546, "step": 6719 }, { "epoch": 0.58, "learning_rate": 8.036164729284398e-06, "loss": 0.2996, "step": 6720 }, { "epoch": 0.58, "learning_rate": 8.033442626739347e-06, "loss": 0.3373, "step": 6721 }, { "epoch": 0.58, "learning_rate": 8.030720675766825e-06, "loss": 0.2789, "step": 6722 }, { "epoch": 0.58, "learning_rate": 8.02799887657662e-06, "loss": 0.261, "step": 6723 }, { "epoch": 0.58, "learning_rate": 8.025277229378519e-06, "loss": 0.2935, "step": 6724 }, { "epoch": 0.58, "learning_rate": 8.022555734382294e-06, "loss": 0.2772, "step": 6725 }, { "epoch": 0.58, "learning_rate": 8.019834391797696e-06, "loss": 0.2841, "step": 6726 }, { "epoch": 0.58, "learning_rate": 8.017113201834482e-06, "loss": 0.2551, "step": 6727 }, { "epoch": 0.58, "learning_rate": 8.014392164702387e-06, "loss": 0.305, "step": 6728 }, { "epoch": 0.58, "learning_rate": 8.011671280611132e-06, "loss": 0.2997, "step": 6729 }, { "epoch": 0.58, "learning_rate": 8.008950549770426e-06, "loss": 0.2642, "step": 6730 }, { "epoch": 0.58, "learning_rate": 8.006229972389979e-06, "loss": 0.2518, "step": 6731 }, { "epoch": 0.58, "learning_rate": 8.003509548679471e-06, "loss": 0.3026, "step": 6732 }, { "epoch": 0.58, "learning_rate": 8.000789278848582e-06, "loss": 0.298, "step": 6733 }, { "epoch": 0.58, "learning_rate": 7.998069163106977e-06, "loss": 0.2606, "step": 6734 }, { "epoch": 0.58, "learning_rate": 7.995349201664311e-06, "loss": 0.2728, "step": 6735 }, { "epoch": 0.58, "learning_rate": 7.992629394730225e-06, "loss": 0.3022, "step": 6736 }, { "epoch": 0.58, "learning_rate": 7.98990974251434e-06, "loss": 0.2748, "step": 6737 }, { "epoch": 0.58, "learning_rate": 7.987190245226285e-06, "loss": 0.2649, "step": 6738 }, { "epoch": 0.58, "learning_rate": 7.984470903075658e-06, "loss": 0.2667, "step": 6739 }, { "epoch": 0.58, "learning_rate": 7.981751716272054e-06, "loss": 0.2701, "step": 6740 }, { "epoch": 0.58, "learning_rate": 7.979032685025057e-06, "loss": 0.288, "step": 6741 }, { "epoch": 0.58, "learning_rate": 7.976313809544237e-06, "loss": 0.3501, "step": 6742 }, { "epoch": 0.58, "learning_rate": 7.973595090039144e-06, "loss": 0.2673, "step": 6743 }, { "epoch": 0.58, "learning_rate": 7.970876526719333e-06, "loss": 0.274, "step": 6744 }, { "epoch": 0.58, "learning_rate": 7.968158119794334e-06, "loss": 0.2609, "step": 6745 }, { "epoch": 0.58, "learning_rate": 7.965439869473664e-06, "loss": 0.3143, "step": 6746 }, { "epoch": 0.58, "learning_rate": 7.962721775966836e-06, "loss": 0.2941, "step": 6747 }, { "epoch": 0.58, "learning_rate": 7.960003839483348e-06, "loss": 0.2725, "step": 6748 }, { "epoch": 0.58, "learning_rate": 7.957286060232687e-06, "loss": 0.6223, "step": 6749 }, { "epoch": 0.58, "learning_rate": 7.954568438424315e-06, "loss": 0.2475, "step": 6750 }, { "epoch": 0.58, "learning_rate": 7.95185097426771e-06, "loss": 0.2858, "step": 6751 }, { "epoch": 0.58, "learning_rate": 7.949133667972307e-06, "loss": 0.5742, "step": 6752 }, { "epoch": 0.58, "learning_rate": 7.946416519747549e-06, "loss": 0.6016, "step": 6753 }, { "epoch": 0.58, "learning_rate": 7.943699529802854e-06, "loss": 0.2668, "step": 6754 }, { "epoch": 0.58, "learning_rate": 7.940982698347646e-06, "loss": 0.2445, "step": 6755 }, { "epoch": 0.58, "learning_rate": 7.938266025591315e-06, "loss": 0.2624, "step": 6756 }, { "epoch": 0.58, "learning_rate": 7.935549511743249e-06, "loss": 0.2715, "step": 6757 }, { "epoch": 0.58, "learning_rate": 7.932833157012829e-06, "loss": 0.3148, "step": 6758 }, { "epoch": 0.58, "learning_rate": 7.930116961609413e-06, "loss": 0.2615, "step": 6759 }, { "epoch": 0.58, "learning_rate": 7.927400925742357e-06, "loss": 0.2903, "step": 6760 }, { "epoch": 0.58, "learning_rate": 7.92468504962099e-06, "loss": 0.2978, "step": 6761 }, { "epoch": 0.58, "learning_rate": 7.921969333454652e-06, "loss": 0.2657, "step": 6762 }, { "epoch": 0.58, "learning_rate": 7.919253777452649e-06, "loss": 0.2697, "step": 6763 }, { "epoch": 0.58, "learning_rate": 7.916538381824279e-06, "loss": 0.2648, "step": 6764 }, { "epoch": 0.58, "learning_rate": 7.913823146778838e-06, "loss": 0.2726, "step": 6765 }, { "epoch": 0.58, "learning_rate": 7.911108072525603e-06, "loss": 0.2791, "step": 6766 }, { "epoch": 0.58, "learning_rate": 7.908393159273835e-06, "loss": 0.3007, "step": 6767 }, { "epoch": 0.58, "learning_rate": 7.905678407232785e-06, "loss": 0.3189, "step": 6768 }, { "epoch": 0.58, "learning_rate": 7.902963816611699e-06, "loss": 0.2844, "step": 6769 }, { "epoch": 0.58, "learning_rate": 7.900249387619797e-06, "loss": 0.2731, "step": 6770 }, { "epoch": 0.58, "learning_rate": 7.897535120466297e-06, "loss": 0.312, "step": 6771 }, { "epoch": 0.58, "learning_rate": 7.894821015360404e-06, "loss": 0.275, "step": 6772 }, { "epoch": 0.58, "learning_rate": 7.892107072511303e-06, "loss": 0.2603, "step": 6773 }, { "epoch": 0.58, "learning_rate": 7.889393292128177e-06, "loss": 0.2852, "step": 6774 }, { "epoch": 0.58, "learning_rate": 7.886679674420182e-06, "loss": 0.2716, "step": 6775 }, { "epoch": 0.58, "learning_rate": 7.88396621959648e-06, "loss": 0.2916, "step": 6776 }, { "epoch": 0.58, "learning_rate": 7.881252927866203e-06, "loss": 0.2948, "step": 6777 }, { "epoch": 0.58, "learning_rate": 7.878539799438479e-06, "loss": 0.2668, "step": 6778 }, { "epoch": 0.58, "learning_rate": 7.87582683452243e-06, "loss": 0.2807, "step": 6779 }, { "epoch": 0.58, "learning_rate": 7.873114033327153e-06, "loss": 0.289, "step": 6780 }, { "epoch": 0.58, "learning_rate": 7.870401396061731e-06, "loss": 0.3131, "step": 6781 }, { "epoch": 0.58, "learning_rate": 7.867688922935253e-06, "loss": 0.2866, "step": 6782 }, { "epoch": 0.58, "learning_rate": 7.864976614156776e-06, "loss": 0.3062, "step": 6783 }, { "epoch": 0.58, "learning_rate": 7.86226446993535e-06, "loss": 0.27, "step": 6784 }, { "epoch": 0.58, "learning_rate": 7.859552490480014e-06, "loss": 0.2682, "step": 6785 }, { "epoch": 0.58, "learning_rate": 7.856840675999799e-06, "loss": 0.3036, "step": 6786 }, { "epoch": 0.58, "learning_rate": 7.854129026703716e-06, "loss": 0.3032, "step": 6787 }, { "epoch": 0.58, "learning_rate": 7.85141754280076e-06, "loss": 0.2999, "step": 6788 }, { "epoch": 0.58, "learning_rate": 7.848706224499928e-06, "loss": 0.2505, "step": 6789 }, { "epoch": 0.58, "learning_rate": 7.845995072010188e-06, "loss": 0.2905, "step": 6790 }, { "epoch": 0.58, "learning_rate": 7.843284085540505e-06, "loss": 0.308, "step": 6791 }, { "epoch": 0.58, "learning_rate": 7.840573265299827e-06, "loss": 0.283, "step": 6792 }, { "epoch": 0.58, "learning_rate": 7.837862611497094e-06, "loss": 0.2396, "step": 6793 }, { "epoch": 0.58, "learning_rate": 7.835152124341228e-06, "loss": 0.2682, "step": 6794 }, { "epoch": 0.58, "learning_rate": 7.832441804041135e-06, "loss": 0.3159, "step": 6795 }, { "epoch": 0.58, "learning_rate": 7.829731650805722e-06, "loss": 0.2722, "step": 6796 }, { "epoch": 0.58, "learning_rate": 7.827021664843867e-06, "loss": 0.2944, "step": 6797 }, { "epoch": 0.58, "learning_rate": 7.824311846364448e-06, "loss": 0.2573, "step": 6798 }, { "epoch": 0.58, "learning_rate": 7.821602195576316e-06, "loss": 0.2988, "step": 6799 }, { "epoch": 0.58, "learning_rate": 7.818892712688328e-06, "loss": 0.2433, "step": 6800 }, { "epoch": 0.58, "learning_rate": 7.816183397909312e-06, "loss": 0.3422, "step": 6801 }, { "epoch": 0.58, "learning_rate": 7.813474251448086e-06, "loss": 0.3299, "step": 6802 }, { "epoch": 0.58, "learning_rate": 7.810765273513463e-06, "loss": 0.2822, "step": 6803 }, { "epoch": 0.58, "learning_rate": 7.808056464314236e-06, "loss": 0.2115, "step": 6804 }, { "epoch": 0.58, "learning_rate": 7.805347824059188e-06, "loss": 0.3323, "step": 6805 }, { "epoch": 0.58, "learning_rate": 7.80263935295708e-06, "loss": 0.2448, "step": 6806 }, { "epoch": 0.58, "learning_rate": 7.799931051216677e-06, "loss": 0.3411, "step": 6807 }, { "epoch": 0.58, "learning_rate": 7.797222919046717e-06, "loss": 0.3226, "step": 6808 }, { "epoch": 0.58, "learning_rate": 7.794514956655929e-06, "loss": 0.265, "step": 6809 }, { "epoch": 0.58, "learning_rate": 7.79180716425303e-06, "loss": 0.2964, "step": 6810 }, { "epoch": 0.58, "learning_rate": 7.789099542046727e-06, "loss": 0.2639, "step": 6811 }, { "epoch": 0.58, "learning_rate": 7.786392090245708e-06, "loss": 0.2372, "step": 6812 }, { "epoch": 0.58, "learning_rate": 7.783684809058642e-06, "loss": 0.2568, "step": 6813 }, { "epoch": 0.58, "learning_rate": 7.780977698694206e-06, "loss": 0.2457, "step": 6814 }, { "epoch": 0.58, "learning_rate": 7.778270759361044e-06, "loss": 0.2863, "step": 6815 }, { "epoch": 0.58, "learning_rate": 7.775563991267789e-06, "loss": 0.3049, "step": 6816 }, { "epoch": 0.58, "learning_rate": 7.772857394623074e-06, "loss": 0.2921, "step": 6817 }, { "epoch": 0.58, "learning_rate": 7.770150969635509e-06, "loss": 0.2805, "step": 6818 }, { "epoch": 0.58, "learning_rate": 7.767444716513686e-06, "loss": 0.3014, "step": 6819 }, { "epoch": 0.58, "learning_rate": 7.764738635466192e-06, "loss": 0.2441, "step": 6820 }, { "epoch": 0.58, "learning_rate": 7.762032726701602e-06, "loss": 0.2899, "step": 6821 }, { "epoch": 0.58, "learning_rate": 7.759326990428468e-06, "loss": 0.2731, "step": 6822 }, { "epoch": 0.58, "learning_rate": 7.756621426855337e-06, "loss": 0.3071, "step": 6823 }, { "epoch": 0.58, "learning_rate": 7.753916036190747e-06, "loss": 0.2357, "step": 6824 }, { "epoch": 0.59, "learning_rate": 7.751210818643209e-06, "loss": 0.2946, "step": 6825 }, { "epoch": 0.59, "learning_rate": 7.748505774421227e-06, "loss": 0.2566, "step": 6826 }, { "epoch": 0.59, "learning_rate": 7.745800903733298e-06, "loss": 0.2599, "step": 6827 }, { "epoch": 0.59, "learning_rate": 7.743096206787894e-06, "loss": 0.2734, "step": 6828 }, { "epoch": 0.59, "learning_rate": 7.740391683793486e-06, "loss": 0.2442, "step": 6829 }, { "epoch": 0.59, "learning_rate": 7.737687334958518e-06, "loss": 0.2748, "step": 6830 }, { "epoch": 0.59, "learning_rate": 7.734983160491435e-06, "loss": 0.242, "step": 6831 }, { "epoch": 0.59, "learning_rate": 7.73227916060066e-06, "loss": 0.2683, "step": 6832 }, { "epoch": 0.59, "learning_rate": 7.729575335494595e-06, "loss": 0.2817, "step": 6833 }, { "epoch": 0.59, "learning_rate": 7.726871685381652e-06, "loss": 0.29, "step": 6834 }, { "epoch": 0.59, "learning_rate": 7.724168210470203e-06, "loss": 0.2422, "step": 6835 }, { "epoch": 0.59, "learning_rate": 7.721464910968628e-06, "loss": 0.2679, "step": 6836 }, { "epoch": 0.59, "learning_rate": 7.718761787085271e-06, "loss": 0.2892, "step": 6837 }, { "epoch": 0.59, "learning_rate": 7.71605883902849e-06, "loss": 0.2853, "step": 6838 }, { "epoch": 0.59, "learning_rate": 7.713356067006609e-06, "loss": 0.2867, "step": 6839 }, { "epoch": 0.59, "learning_rate": 7.710653471227939e-06, "loss": 0.3144, "step": 6840 }, { "epoch": 0.59, "learning_rate": 7.70795105190079e-06, "loss": 0.2573, "step": 6841 }, { "epoch": 0.59, "learning_rate": 7.70524880923345e-06, "loss": 0.2829, "step": 6842 }, { "epoch": 0.59, "learning_rate": 7.702546743434193e-06, "loss": 0.3174, "step": 6843 }, { "epoch": 0.59, "learning_rate": 7.699844854711276e-06, "loss": 0.2766, "step": 6844 }, { "epoch": 0.59, "learning_rate": 7.697143143272959e-06, "loss": 0.261, "step": 6845 }, { "epoch": 0.59, "learning_rate": 7.694441609327465e-06, "loss": 0.2397, "step": 6846 }, { "epoch": 0.59, "learning_rate": 7.691740253083022e-06, "loss": 0.264, "step": 6847 }, { "epoch": 0.59, "learning_rate": 7.689039074747832e-06, "loss": 0.2981, "step": 6848 }, { "epoch": 0.59, "learning_rate": 7.686338074530095e-06, "loss": 0.2701, "step": 6849 }, { "epoch": 0.59, "learning_rate": 7.683637252637988e-06, "loss": 0.3522, "step": 6850 }, { "epoch": 0.59, "learning_rate": 7.68093660927967e-06, "loss": 0.2596, "step": 6851 }, { "epoch": 0.59, "learning_rate": 7.678236144663304e-06, "loss": 0.2336, "step": 6852 }, { "epoch": 0.59, "learning_rate": 7.675535858997024e-06, "loss": 0.2643, "step": 6853 }, { "epoch": 0.59, "learning_rate": 7.67283575248895e-06, "loss": 0.2522, "step": 6854 }, { "epoch": 0.59, "learning_rate": 7.670135825347202e-06, "loss": 0.2463, "step": 6855 }, { "epoch": 0.59, "learning_rate": 7.667436077779872e-06, "loss": 0.2444, "step": 6856 }, { "epoch": 0.59, "learning_rate": 7.664736509995042e-06, "loss": 0.2952, "step": 6857 }, { "epoch": 0.59, "learning_rate": 7.662037122200783e-06, "loss": 0.2639, "step": 6858 }, { "epoch": 0.59, "learning_rate": 7.659337914605152e-06, "loss": 0.282, "step": 6859 }, { "epoch": 0.59, "learning_rate": 7.656638887416186e-06, "loss": 0.2481, "step": 6860 }, { "epoch": 0.59, "learning_rate": 7.653940040841917e-06, "loss": 0.2653, "step": 6861 }, { "epoch": 0.59, "learning_rate": 7.651241375090358e-06, "loss": 0.2657, "step": 6862 }, { "epoch": 0.59, "learning_rate": 7.64854289036951e-06, "loss": 0.2699, "step": 6863 }, { "epoch": 0.59, "learning_rate": 7.645844586887353e-06, "loss": 0.2776, "step": 6864 }, { "epoch": 0.59, "learning_rate": 7.643146464851867e-06, "loss": 0.2554, "step": 6865 }, { "epoch": 0.59, "learning_rate": 7.640448524471002e-06, "loss": 0.3297, "step": 6866 }, { "epoch": 0.59, "learning_rate": 7.63775076595271e-06, "loss": 0.2556, "step": 6867 }, { "epoch": 0.59, "learning_rate": 7.635053189504913e-06, "loss": 0.2626, "step": 6868 }, { "epoch": 0.59, "learning_rate": 7.632355795335533e-06, "loss": 0.2658, "step": 6869 }, { "epoch": 0.59, "learning_rate": 7.629658583652471e-06, "loss": 0.2875, "step": 6870 }, { "epoch": 0.59, "learning_rate": 7.626961554663609e-06, "loss": 0.241, "step": 6871 }, { "epoch": 0.59, "learning_rate": 7.624264708576827e-06, "loss": 0.6245, "step": 6872 }, { "epoch": 0.59, "learning_rate": 7.621568045599983e-06, "loss": 0.2819, "step": 6873 }, { "epoch": 0.59, "learning_rate": 7.6188715659409216e-06, "loss": 0.2404, "step": 6874 }, { "epoch": 0.59, "learning_rate": 7.616175269807472e-06, "loss": 0.3386, "step": 6875 }, { "epoch": 0.59, "learning_rate": 7.613479157407457e-06, "loss": 0.2772, "step": 6876 }, { "epoch": 0.59, "learning_rate": 7.6107832289486775e-06, "loss": 0.2893, "step": 6877 }, { "epoch": 0.59, "learning_rate": 7.608087484638915e-06, "loss": 0.2455, "step": 6878 }, { "epoch": 0.59, "learning_rate": 7.605391924685954e-06, "loss": 0.2654, "step": 6879 }, { "epoch": 0.59, "learning_rate": 7.6026965492975535e-06, "loss": 0.2864, "step": 6880 }, { "epoch": 0.59, "learning_rate": 7.600001358681457e-06, "loss": 0.2517, "step": 6881 }, { "epoch": 0.59, "learning_rate": 7.597306353045393e-06, "loss": 0.2388, "step": 6882 }, { "epoch": 0.59, "learning_rate": 7.594611532597087e-06, "loss": 0.3116, "step": 6883 }, { "epoch": 0.59, "learning_rate": 7.591916897544238e-06, "loss": 0.2532, "step": 6884 }, { "epoch": 0.59, "learning_rate": 7.589222448094535e-06, "loss": 0.2809, "step": 6885 }, { "epoch": 0.59, "learning_rate": 7.586528184455653e-06, "loss": 0.2706, "step": 6886 }, { "epoch": 0.59, "learning_rate": 7.583834106835256e-06, "loss": 0.6118, "step": 6887 }, { "epoch": 0.59, "learning_rate": 7.581140215440987e-06, "loss": 0.248, "step": 6888 }, { "epoch": 0.59, "learning_rate": 7.578446510480475e-06, "loss": 0.2797, "step": 6889 }, { "epoch": 0.59, "learning_rate": 7.575752992161345e-06, "loss": 0.2577, "step": 6890 }, { "epoch": 0.59, "learning_rate": 7.573059660691192e-06, "loss": 0.2634, "step": 6891 }, { "epoch": 0.59, "learning_rate": 7.570366516277607e-06, "loss": 0.288, "step": 6892 }, { "epoch": 0.59, "learning_rate": 7.567673559128171e-06, "loss": 0.2911, "step": 6893 }, { "epoch": 0.59, "learning_rate": 7.564980789450438e-06, "loss": 0.2475, "step": 6894 }, { "epoch": 0.59, "learning_rate": 7.5622882074519544e-06, "loss": 0.3536, "step": 6895 }, { "epoch": 0.59, "learning_rate": 7.559595813340246e-06, "loss": 0.2318, "step": 6896 }, { "epoch": 0.59, "learning_rate": 7.556903607322839e-06, "loss": 0.285, "step": 6897 }, { "epoch": 0.59, "learning_rate": 7.554211589607227e-06, "loss": 0.2857, "step": 6898 }, { "epoch": 0.59, "learning_rate": 7.551519760400898e-06, "loss": 0.2746, "step": 6899 }, { "epoch": 0.59, "learning_rate": 7.548828119911333e-06, "loss": 0.2625, "step": 6900 }, { "epoch": 0.59, "learning_rate": 7.546136668345985e-06, "loss": 0.2668, "step": 6901 }, { "epoch": 0.59, "learning_rate": 7.543445405912298e-06, "loss": 0.2488, "step": 6902 }, { "epoch": 0.59, "learning_rate": 7.540754332817695e-06, "loss": 0.2884, "step": 6903 }, { "epoch": 0.59, "learning_rate": 7.538063449269599e-06, "loss": 0.3168, "step": 6904 }, { "epoch": 0.59, "learning_rate": 7.535372755475411e-06, "loss": 0.2642, "step": 6905 }, { "epoch": 0.59, "learning_rate": 7.532682251642508e-06, "loss": 0.2701, "step": 6906 }, { "epoch": 0.59, "learning_rate": 7.5299919379782695e-06, "loss": 0.3134, "step": 6907 }, { "epoch": 0.59, "learning_rate": 7.527301814690048e-06, "loss": 0.2495, "step": 6908 }, { "epoch": 0.59, "learning_rate": 7.524611881985181e-06, "loss": 0.3099, "step": 6909 }, { "epoch": 0.59, "learning_rate": 7.521922140071003e-06, "loss": 0.2658, "step": 6910 }, { "epoch": 0.59, "learning_rate": 7.519232589154819e-06, "loss": 0.2795, "step": 6911 }, { "epoch": 0.59, "learning_rate": 7.516543229443931e-06, "loss": 0.2455, "step": 6912 }, { "epoch": 0.59, "learning_rate": 7.513854061145617e-06, "loss": 0.265, "step": 6913 }, { "epoch": 0.59, "learning_rate": 7.5111650844671515e-06, "loss": 0.3412, "step": 6914 }, { "epoch": 0.59, "learning_rate": 7.5084762996157835e-06, "loss": 0.2528, "step": 6915 }, { "epoch": 0.59, "learning_rate": 7.5057877067987464e-06, "loss": 0.2936, "step": 6916 }, { "epoch": 0.59, "learning_rate": 7.503099306223271e-06, "loss": 0.3046, "step": 6917 }, { "epoch": 0.59, "learning_rate": 7.5004110980965664e-06, "loss": 0.2598, "step": 6918 }, { "epoch": 0.59, "learning_rate": 7.4977230826258226e-06, "loss": 0.3021, "step": 6919 }, { "epoch": 0.59, "learning_rate": 7.495035260018215e-06, "loss": 0.3339, "step": 6920 }, { "epoch": 0.59, "learning_rate": 7.492347630480917e-06, "loss": 0.3074, "step": 6921 }, { "epoch": 0.59, "learning_rate": 7.489660194221071e-06, "loss": 0.2566, "step": 6922 }, { "epoch": 0.59, "learning_rate": 7.486972951445812e-06, "loss": 0.2756, "step": 6923 }, { "epoch": 0.59, "learning_rate": 7.484285902362263e-06, "loss": 0.2369, "step": 6924 }, { "epoch": 0.59, "learning_rate": 7.481599047177527e-06, "loss": 0.2578, "step": 6925 }, { "epoch": 0.59, "learning_rate": 7.478912386098692e-06, "loss": 0.3085, "step": 6926 }, { "epoch": 0.59, "learning_rate": 7.47622591933283e-06, "loss": 0.2516, "step": 6927 }, { "epoch": 0.59, "learning_rate": 7.473539647087007e-06, "loss": 0.2854, "step": 6928 }, { "epoch": 0.59, "learning_rate": 7.470853569568264e-06, "loss": 0.2673, "step": 6929 }, { "epoch": 0.59, "learning_rate": 7.468167686983627e-06, "loss": 0.2991, "step": 6930 }, { "epoch": 0.59, "learning_rate": 7.46548199954012e-06, "loss": 0.2335, "step": 6931 }, { "epoch": 0.59, "learning_rate": 7.462796507444736e-06, "loss": 0.2685, "step": 6932 }, { "epoch": 0.59, "learning_rate": 7.4601112109044615e-06, "loss": 0.3016, "step": 6933 }, { "epoch": 0.59, "learning_rate": 7.4574261101262604e-06, "loss": 0.3145, "step": 6934 }, { "epoch": 0.59, "learning_rate": 7.4547412053170955e-06, "loss": 0.2549, "step": 6935 }, { "epoch": 0.59, "learning_rate": 7.4520564966839e-06, "loss": 0.2838, "step": 6936 }, { "epoch": 0.59, "learning_rate": 7.449371984433598e-06, "loss": 0.2552, "step": 6937 }, { "epoch": 0.59, "learning_rate": 7.446687668773105e-06, "loss": 0.2852, "step": 6938 }, { "epoch": 0.59, "learning_rate": 7.44400354990931e-06, "loss": 0.2657, "step": 6939 }, { "epoch": 0.59, "learning_rate": 7.4413196280490905e-06, "loss": 0.2704, "step": 6940 }, { "epoch": 0.59, "learning_rate": 7.43863590339931e-06, "loss": 0.2847, "step": 6941 }, { "epoch": 0.6, "learning_rate": 7.435952376166818e-06, "loss": 0.3054, "step": 6942 }, { "epoch": 0.6, "learning_rate": 7.433269046558449e-06, "loss": 0.3448, "step": 6943 }, { "epoch": 0.6, "learning_rate": 7.430585914781017e-06, "loss": 0.2815, "step": 6944 }, { "epoch": 0.6, "learning_rate": 7.427902981041329e-06, "loss": 0.2787, "step": 6945 }, { "epoch": 0.6, "learning_rate": 7.425220245546172e-06, "loss": 0.2469, "step": 6946 }, { "epoch": 0.6, "learning_rate": 7.4225377085023105e-06, "loss": 0.2617, "step": 6947 }, { "epoch": 0.6, "learning_rate": 7.419855370116511e-06, "loss": 0.3333, "step": 6948 }, { "epoch": 0.6, "learning_rate": 7.4171732305955095e-06, "loss": 0.3027, "step": 6949 }, { "epoch": 0.6, "learning_rate": 7.4144912901460355e-06, "loss": 0.2921, "step": 6950 }, { "epoch": 0.6, "learning_rate": 7.411809548974792e-06, "loss": 0.289, "step": 6951 }, { "epoch": 0.6, "learning_rate": 7.4091280072884854e-06, "loss": 0.2651, "step": 6952 }, { "epoch": 0.6, "learning_rate": 7.406446665293789e-06, "loss": 0.262, "step": 6953 }, { "epoch": 0.6, "learning_rate": 7.403765523197365e-06, "loss": 0.2734, "step": 6954 }, { "epoch": 0.6, "learning_rate": 7.401084581205869e-06, "loss": 0.3026, "step": 6955 }, { "epoch": 0.6, "learning_rate": 7.3984038395259315e-06, "loss": 0.3073, "step": 6956 }, { "epoch": 0.6, "learning_rate": 7.395723298364174e-06, "loss": 0.3115, "step": 6957 }, { "epoch": 0.6, "learning_rate": 7.39304295792719e-06, "loss": 0.2886, "step": 6958 }, { "epoch": 0.6, "learning_rate": 7.390362818421579e-06, "loss": 0.2398, "step": 6959 }, { "epoch": 0.6, "learning_rate": 7.387682880053906e-06, "loss": 0.2817, "step": 6960 }, { "epoch": 0.6, "learning_rate": 7.385003143030727e-06, "loss": 0.2376, "step": 6961 }, { "epoch": 0.6, "learning_rate": 7.382323607558585e-06, "loss": 0.2755, "step": 6962 }, { "epoch": 0.6, "learning_rate": 7.379644273844008e-06, "loss": 0.2515, "step": 6963 }, { "epoch": 0.6, "learning_rate": 7.376965142093502e-06, "loss": 0.2986, "step": 6964 }, { "epoch": 0.6, "learning_rate": 7.374286212513558e-06, "loss": 0.3193, "step": 6965 }, { "epoch": 0.6, "learning_rate": 7.3716074853106635e-06, "loss": 0.2745, "step": 6966 }, { "epoch": 0.6, "learning_rate": 7.368928960691275e-06, "loss": 0.2859, "step": 6967 }, { "epoch": 0.6, "learning_rate": 7.366250638861838e-06, "loss": 0.2816, "step": 6968 }, { "epoch": 0.6, "learning_rate": 7.3635725200287936e-06, "loss": 0.3256, "step": 6969 }, { "epoch": 0.6, "learning_rate": 7.3608946043985515e-06, "loss": 0.2828, "step": 6970 }, { "epoch": 0.6, "learning_rate": 7.358216892177514e-06, "loss": 0.2491, "step": 6971 }, { "epoch": 0.6, "learning_rate": 7.355539383572059e-06, "loss": 0.239, "step": 6972 }, { "epoch": 0.6, "learning_rate": 7.3528620787885676e-06, "loss": 0.2664, "step": 6973 }, { "epoch": 0.6, "learning_rate": 7.350184978033386e-06, "loss": 0.2633, "step": 6974 }, { "epoch": 0.6, "learning_rate": 7.347508081512848e-06, "loss": 0.3063, "step": 6975 }, { "epoch": 0.6, "learning_rate": 7.344831389433287e-06, "loss": 0.2521, "step": 6976 }, { "epoch": 0.6, "learning_rate": 7.342154902001003e-06, "loss": 0.2835, "step": 6977 }, { "epoch": 0.6, "learning_rate": 7.339478619422287e-06, "loss": 0.2777, "step": 6978 }, { "epoch": 0.6, "learning_rate": 7.336802541903408e-06, "loss": 0.3508, "step": 6979 }, { "epoch": 0.6, "learning_rate": 7.3341266696506304e-06, "loss": 0.2777, "step": 6980 }, { "epoch": 0.6, "learning_rate": 7.3314510028702e-06, "loss": 0.2792, "step": 6981 }, { "epoch": 0.6, "learning_rate": 7.328775541768336e-06, "loss": 0.2884, "step": 6982 }, { "epoch": 0.6, "learning_rate": 7.32610028655126e-06, "loss": 0.2721, "step": 6983 }, { "epoch": 0.6, "learning_rate": 7.3234252374251614e-06, "loss": 0.2717, "step": 6984 }, { "epoch": 0.6, "learning_rate": 7.320750394596217e-06, "loss": 0.2684, "step": 6985 }, { "epoch": 0.6, "learning_rate": 7.318075758270593e-06, "loss": 0.2686, "step": 6986 }, { "epoch": 0.6, "learning_rate": 7.315401328654439e-06, "loss": 0.2759, "step": 6987 }, { "epoch": 0.6, "learning_rate": 7.312727105953888e-06, "loss": 0.3135, "step": 6988 }, { "epoch": 0.6, "learning_rate": 7.310053090375049e-06, "loss": 0.3065, "step": 6989 }, { "epoch": 0.6, "learning_rate": 7.30737928212403e-06, "loss": 0.2645, "step": 6990 }, { "epoch": 0.6, "learning_rate": 7.3047056814069115e-06, "loss": 0.2267, "step": 6991 }, { "epoch": 0.6, "learning_rate": 7.3020322884297565e-06, "loss": 0.2621, "step": 6992 }, { "epoch": 0.6, "learning_rate": 7.299359103398626e-06, "loss": 0.2657, "step": 6993 }, { "epoch": 0.6, "learning_rate": 7.296686126519552e-06, "loss": 0.241, "step": 6994 }, { "epoch": 0.6, "learning_rate": 7.294013357998554e-06, "loss": 0.3229, "step": 6995 }, { "epoch": 0.6, "learning_rate": 7.291340798041631e-06, "loss": 0.2821, "step": 6996 }, { "epoch": 0.6, "learning_rate": 7.288668446854781e-06, "loss": 0.3004, "step": 6997 }, { "epoch": 0.6, "learning_rate": 7.2859963046439665e-06, "loss": 0.3038, "step": 6998 }, { "epoch": 0.6, "learning_rate": 7.283324371615147e-06, "loss": 0.2745, "step": 6999 }, { "epoch": 0.6, "learning_rate": 7.280652647974263e-06, "loss": 0.2761, "step": 7000 }, { "epoch": 0.6, "learning_rate": 7.277981133927236e-06, "loss": 0.2981, "step": 7001 }, { "epoch": 0.6, "learning_rate": 7.275309829679973e-06, "loss": 0.2807, "step": 7002 }, { "epoch": 0.6, "learning_rate": 7.2726387354383625e-06, "loss": 0.2733, "step": 7003 }, { "epoch": 0.6, "learning_rate": 7.269967851408286e-06, "loss": 0.2708, "step": 7004 }, { "epoch": 0.6, "learning_rate": 7.267297177795596e-06, "loss": 0.285, "step": 7005 }, { "epoch": 0.6, "learning_rate": 7.264626714806135e-06, "loss": 0.2902, "step": 7006 }, { "epoch": 0.6, "learning_rate": 7.261956462645734e-06, "loss": 0.2422, "step": 7007 }, { "epoch": 0.6, "learning_rate": 7.259286421520201e-06, "loss": 0.278, "step": 7008 }, { "epoch": 0.6, "learning_rate": 7.256616591635328e-06, "loss": 0.2925, "step": 7009 }, { "epoch": 0.6, "learning_rate": 7.253946973196888e-06, "loss": 0.3397, "step": 7010 }, { "epoch": 0.6, "learning_rate": 7.251277566410651e-06, "loss": 0.3224, "step": 7011 }, { "epoch": 0.6, "learning_rate": 7.248608371482355e-06, "loss": 0.2954, "step": 7012 }, { "epoch": 0.6, "learning_rate": 7.24593938861773e-06, "loss": 0.2712, "step": 7013 }, { "epoch": 0.6, "learning_rate": 7.243270618022492e-06, "loss": 0.2603, "step": 7014 }, { "epoch": 0.6, "learning_rate": 7.240602059902333e-06, "loss": 0.3207, "step": 7015 }, { "epoch": 0.6, "learning_rate": 7.237933714462932e-06, "loss": 0.3207, "step": 7016 }, { "epoch": 0.6, "learning_rate": 7.23526558190995e-06, "loss": 0.28, "step": 7017 }, { "epoch": 0.6, "learning_rate": 7.232597662449038e-06, "loss": 0.2385, "step": 7018 }, { "epoch": 0.6, "learning_rate": 7.229929956285826e-06, "loss": 0.2546, "step": 7019 }, { "epoch": 0.6, "learning_rate": 7.22726246362592e-06, "loss": 0.2901, "step": 7020 }, { "epoch": 0.6, "learning_rate": 7.224595184674928e-06, "loss": 0.2338, "step": 7021 }, { "epoch": 0.6, "learning_rate": 7.221928119638426e-06, "loss": 0.3267, "step": 7022 }, { "epoch": 0.6, "learning_rate": 7.2192612687219755e-06, "loss": 0.2772, "step": 7023 }, { "epoch": 0.6, "learning_rate": 7.2165946321311254e-06, "loss": 0.2852, "step": 7024 }, { "epoch": 0.6, "learning_rate": 7.213928210071408e-06, "loss": 0.2764, "step": 7025 }, { "epoch": 0.6, "learning_rate": 7.211262002748341e-06, "loss": 0.3029, "step": 7026 }, { "epoch": 0.6, "learning_rate": 7.2085960103674146e-06, "loss": 0.2753, "step": 7027 }, { "epoch": 0.6, "learning_rate": 7.205930233134117e-06, "loss": 0.2527, "step": 7028 }, { "epoch": 0.6, "learning_rate": 7.203264671253915e-06, "loss": 0.2659, "step": 7029 }, { "epoch": 0.6, "learning_rate": 7.200599324932246e-06, "loss": 0.312, "step": 7030 }, { "epoch": 0.6, "learning_rate": 7.1979341943745515e-06, "loss": 0.3139, "step": 7031 }, { "epoch": 0.6, "learning_rate": 7.195269279786247e-06, "loss": 0.3, "step": 7032 }, { "epoch": 0.6, "learning_rate": 7.192604581372727e-06, "loss": 0.2886, "step": 7033 }, { "epoch": 0.6, "learning_rate": 7.18994009933937e-06, "loss": 0.2709, "step": 7034 }, { "epoch": 0.6, "learning_rate": 7.187275833891549e-06, "loss": 0.2518, "step": 7035 }, { "epoch": 0.6, "learning_rate": 7.1846117852346075e-06, "loss": 0.2731, "step": 7036 }, { "epoch": 0.6, "learning_rate": 7.181947953573878e-06, "loss": 0.2617, "step": 7037 }, { "epoch": 0.6, "learning_rate": 7.179284339114676e-06, "loss": 0.2572, "step": 7038 }, { "epoch": 0.6, "learning_rate": 7.1766209420623e-06, "loss": 0.2774, "step": 7039 }, { "epoch": 0.6, "learning_rate": 7.173957762622032e-06, "loss": 0.2656, "step": 7040 }, { "epoch": 0.6, "learning_rate": 7.171294800999134e-06, "loss": 0.2863, "step": 7041 }, { "epoch": 0.6, "learning_rate": 7.168632057398857e-06, "loss": 0.2738, "step": 7042 }, { "epoch": 0.6, "learning_rate": 7.16596953202643e-06, "loss": 0.3048, "step": 7043 }, { "epoch": 0.6, "learning_rate": 7.1633072250870665e-06, "loss": 0.2772, "step": 7044 }, { "epoch": 0.6, "learning_rate": 7.160645136785968e-06, "loss": 0.2604, "step": 7045 }, { "epoch": 0.6, "learning_rate": 7.157983267328314e-06, "loss": 0.2914, "step": 7046 }, { "epoch": 0.6, "learning_rate": 7.155321616919267e-06, "loss": 0.2443, "step": 7047 }, { "epoch": 0.6, "learning_rate": 7.152660185763969e-06, "loss": 0.283, "step": 7048 }, { "epoch": 0.6, "learning_rate": 7.149998974067558e-06, "loss": 0.2876, "step": 7049 }, { "epoch": 0.6, "learning_rate": 7.147337982035143e-06, "loss": 0.2795, "step": 7050 }, { "epoch": 0.6, "learning_rate": 7.144677209871819e-06, "loss": 0.2578, "step": 7051 }, { "epoch": 0.6, "learning_rate": 7.142016657782671e-06, "loss": 0.285, "step": 7052 }, { "epoch": 0.6, "learning_rate": 7.139356325972757e-06, "loss": 0.2953, "step": 7053 }, { "epoch": 0.6, "learning_rate": 7.136696214647123e-06, "loss": 0.2733, "step": 7054 }, { "epoch": 0.6, "learning_rate": 7.134036324010791e-06, "loss": 0.2737, "step": 7055 }, { "epoch": 0.6, "learning_rate": 7.1313766542687824e-06, "loss": 0.2746, "step": 7056 }, { "epoch": 0.6, "learning_rate": 7.1287172056260875e-06, "loss": 0.2905, "step": 7057 }, { "epoch": 0.61, "learning_rate": 7.12605797828768e-06, "loss": 0.2798, "step": 7058 }, { "epoch": 0.61, "learning_rate": 7.123398972458526e-06, "loss": 0.3279, "step": 7059 }, { "epoch": 0.61, "learning_rate": 7.120740188343567e-06, "loss": 0.2869, "step": 7060 }, { "epoch": 0.61, "learning_rate": 7.118081626147724e-06, "loss": 0.2475, "step": 7061 }, { "epoch": 0.61, "learning_rate": 7.11542328607591e-06, "loss": 0.2903, "step": 7062 }, { "epoch": 0.61, "learning_rate": 7.112765168333016e-06, "loss": 0.2856, "step": 7063 }, { "epoch": 0.61, "learning_rate": 7.110107273123919e-06, "loss": 0.2883, "step": 7064 }, { "epoch": 0.61, "learning_rate": 7.107449600653468e-06, "loss": 0.2548, "step": 7065 }, { "epoch": 0.61, "learning_rate": 7.104792151126515e-06, "loss": 0.2606, "step": 7066 }, { "epoch": 0.61, "learning_rate": 7.102134924747877e-06, "loss": 0.2535, "step": 7067 }, { "epoch": 0.61, "learning_rate": 7.099477921722358e-06, "loss": 0.6453, "step": 7068 }, { "epoch": 0.61, "learning_rate": 7.096821142254747e-06, "loss": 0.2961, "step": 7069 }, { "epoch": 0.61, "learning_rate": 7.094164586549821e-06, "loss": 0.2875, "step": 7070 }, { "epoch": 0.61, "learning_rate": 7.091508254812331e-06, "loss": 0.2695, "step": 7071 }, { "epoch": 0.61, "learning_rate": 7.088852147247009e-06, "loss": 0.2967, "step": 7072 }, { "epoch": 0.61, "learning_rate": 7.086196264058584e-06, "loss": 0.2778, "step": 7073 }, { "epoch": 0.61, "learning_rate": 7.0835406054517505e-06, "loss": 0.2772, "step": 7074 }, { "epoch": 0.61, "learning_rate": 7.080885171631198e-06, "loss": 0.264, "step": 7075 }, { "epoch": 0.61, "learning_rate": 7.078229962801592e-06, "loss": 0.2643, "step": 7076 }, { "epoch": 0.61, "learning_rate": 7.075574979167585e-06, "loss": 0.2778, "step": 7077 }, { "epoch": 0.61, "learning_rate": 7.072920220933808e-06, "loss": 0.298, "step": 7078 }, { "epoch": 0.61, "learning_rate": 7.070265688304873e-06, "loss": 0.6022, "step": 7079 }, { "epoch": 0.61, "learning_rate": 7.067611381485388e-06, "loss": 0.3085, "step": 7080 }, { "epoch": 0.61, "learning_rate": 7.064957300679927e-06, "loss": 0.2512, "step": 7081 }, { "epoch": 0.61, "learning_rate": 7.062303446093051e-06, "loss": 0.2683, "step": 7082 }, { "epoch": 0.61, "learning_rate": 7.059649817929315e-06, "loss": 0.2716, "step": 7083 }, { "epoch": 0.61, "learning_rate": 7.056996416393241e-06, "loss": 0.3134, "step": 7084 }, { "epoch": 0.61, "learning_rate": 7.054343241689343e-06, "loss": 0.2478, "step": 7085 }, { "epoch": 0.61, "learning_rate": 7.051690294022108e-06, "loss": 0.2875, "step": 7086 }, { "epoch": 0.61, "learning_rate": 7.049037573596021e-06, "loss": 0.2996, "step": 7087 }, { "epoch": 0.61, "learning_rate": 7.0463850806155355e-06, "loss": 0.2771, "step": 7088 }, { "epoch": 0.61, "learning_rate": 7.043732815285091e-06, "loss": 0.2462, "step": 7089 }, { "epoch": 0.61, "learning_rate": 7.041080777809118e-06, "loss": 0.3026, "step": 7090 }, { "epoch": 0.61, "learning_rate": 7.038428968392018e-06, "loss": 0.247, "step": 7091 }, { "epoch": 0.61, "learning_rate": 7.0357773872381804e-06, "loss": 0.2667, "step": 7092 }, { "epoch": 0.61, "learning_rate": 7.0331260345519705e-06, "loss": 0.2906, "step": 7093 }, { "epoch": 0.61, "learning_rate": 7.030474910537748e-06, "loss": 0.3019, "step": 7094 }, { "epoch": 0.61, "learning_rate": 7.027824015399849e-06, "loss": 0.3752, "step": 7095 }, { "epoch": 0.61, "learning_rate": 7.025173349342584e-06, "loss": 0.2483, "step": 7096 }, { "epoch": 0.61, "learning_rate": 7.022522912570262e-06, "loss": 0.2938, "step": 7097 }, { "epoch": 0.61, "learning_rate": 7.019872705287163e-06, "loss": 0.5854, "step": 7098 }, { "epoch": 0.61, "learning_rate": 7.017222727697548e-06, "loss": 0.5742, "step": 7099 }, { "epoch": 0.61, "learning_rate": 7.014572980005667e-06, "loss": 0.2359, "step": 7100 }, { "epoch": 0.61, "learning_rate": 7.01192346241575e-06, "loss": 0.2411, "step": 7101 }, { "epoch": 0.61, "learning_rate": 7.009274175132009e-06, "loss": 0.277, "step": 7102 }, { "epoch": 0.61, "learning_rate": 7.006625118358633e-06, "loss": 0.274, "step": 7103 }, { "epoch": 0.61, "learning_rate": 7.003976292299807e-06, "loss": 0.2981, "step": 7104 }, { "epoch": 0.61, "learning_rate": 7.001327697159684e-06, "loss": 0.2745, "step": 7105 }, { "epoch": 0.61, "learning_rate": 6.998679333142403e-06, "loss": 0.2916, "step": 7106 }, { "epoch": 0.61, "learning_rate": 6.996031200452087e-06, "loss": 0.2646, "step": 7107 }, { "epoch": 0.61, "learning_rate": 6.9933832992928476e-06, "loss": 0.269, "step": 7108 }, { "epoch": 0.61, "learning_rate": 6.990735629868768e-06, "loss": 0.27, "step": 7109 }, { "epoch": 0.61, "learning_rate": 6.9880881923839105e-06, "loss": 0.2866, "step": 7110 }, { "epoch": 0.61, "learning_rate": 6.985440987042339e-06, "loss": 0.272, "step": 7111 }, { "epoch": 0.61, "learning_rate": 6.9827940140480776e-06, "loss": 0.2966, "step": 7112 }, { "epoch": 0.61, "learning_rate": 6.980147273605146e-06, "loss": 0.2431, "step": 7113 }, { "epoch": 0.61, "learning_rate": 6.97750076591754e-06, "loss": 0.259, "step": 7114 }, { "epoch": 0.61, "learning_rate": 6.974854491189243e-06, "loss": 0.2808, "step": 7115 }, { "epoch": 0.61, "learning_rate": 6.9722084496242146e-06, "loss": 0.2999, "step": 7116 }, { "epoch": 0.61, "learning_rate": 6.969562641426394e-06, "loss": 0.2514, "step": 7117 }, { "epoch": 0.61, "learning_rate": 6.966917066799714e-06, "loss": 0.2651, "step": 7118 }, { "epoch": 0.61, "learning_rate": 6.96427172594808e-06, "loss": 0.2206, "step": 7119 }, { "epoch": 0.61, "learning_rate": 6.961626619075377e-06, "loss": 0.253, "step": 7120 }, { "epoch": 0.61, "learning_rate": 6.958981746385486e-06, "loss": 0.3116, "step": 7121 }, { "epoch": 0.61, "learning_rate": 6.956337108082256e-06, "loss": 0.332, "step": 7122 }, { "epoch": 0.61, "learning_rate": 6.953692704369522e-06, "loss": 0.261, "step": 7123 }, { "epoch": 0.61, "learning_rate": 6.951048535451099e-06, "loss": 0.2593, "step": 7124 }, { "epoch": 0.61, "learning_rate": 6.948404601530793e-06, "loss": 0.2974, "step": 7125 }, { "epoch": 0.61, "learning_rate": 6.9457609028123795e-06, "loss": 0.3338, "step": 7126 }, { "epoch": 0.61, "learning_rate": 6.943117439499622e-06, "loss": 0.3351, "step": 7127 }, { "epoch": 0.61, "learning_rate": 6.940474211796273e-06, "loss": 0.3165, "step": 7128 }, { "epoch": 0.61, "learning_rate": 6.937831219906055e-06, "loss": 0.2773, "step": 7129 }, { "epoch": 0.61, "learning_rate": 6.935188464032674e-06, "loss": 0.3149, "step": 7130 }, { "epoch": 0.61, "learning_rate": 6.932545944379818e-06, "loss": 0.2656, "step": 7131 }, { "epoch": 0.61, "learning_rate": 6.929903661151167e-06, "loss": 0.2434, "step": 7132 }, { "epoch": 0.61, "learning_rate": 6.927261614550375e-06, "loss": 0.3065, "step": 7133 }, { "epoch": 0.61, "learning_rate": 6.924619804781069e-06, "loss": 0.2697, "step": 7134 }, { "epoch": 0.61, "learning_rate": 6.921978232046878e-06, "loss": 0.3273, "step": 7135 }, { "epoch": 0.61, "learning_rate": 6.919336896551396e-06, "loss": 0.3043, "step": 7136 }, { "epoch": 0.61, "learning_rate": 6.916695798498201e-06, "loss": 0.2528, "step": 7137 }, { "epoch": 0.61, "learning_rate": 6.91405493809086e-06, "loss": 0.298, "step": 7138 }, { "epoch": 0.61, "learning_rate": 6.911414315532914e-06, "loss": 0.2842, "step": 7139 }, { "epoch": 0.61, "learning_rate": 6.9087739310278956e-06, "loss": 0.2744, "step": 7140 }, { "epoch": 0.61, "learning_rate": 6.906133784779303e-06, "loss": 0.2769, "step": 7141 }, { "epoch": 0.61, "learning_rate": 6.903493876990637e-06, "loss": 0.2692, "step": 7142 }, { "epoch": 0.61, "learning_rate": 6.900854207865361e-06, "loss": 0.5897, "step": 7143 }, { "epoch": 0.61, "learning_rate": 6.898214777606927e-06, "loss": 0.2576, "step": 7144 }, { "epoch": 0.61, "learning_rate": 6.89557558641877e-06, "loss": 0.2808, "step": 7145 }, { "epoch": 0.61, "learning_rate": 6.892936634504313e-06, "loss": 0.3228, "step": 7146 }, { "epoch": 0.61, "learning_rate": 6.890297922066947e-06, "loss": 0.2793, "step": 7147 }, { "epoch": 0.61, "learning_rate": 6.887659449310045e-06, "loss": 0.3448, "step": 7148 }, { "epoch": 0.61, "learning_rate": 6.885021216436981e-06, "loss": 0.2498, "step": 7149 }, { "epoch": 0.61, "learning_rate": 6.882383223651088e-06, "loss": 0.268, "step": 7150 }, { "epoch": 0.61, "learning_rate": 6.879745471155692e-06, "loss": 0.2971, "step": 7151 }, { "epoch": 0.61, "learning_rate": 6.877107959154094e-06, "loss": 0.275, "step": 7152 }, { "epoch": 0.61, "learning_rate": 6.8744706878495885e-06, "loss": 0.2622, "step": 7153 }, { "epoch": 0.61, "learning_rate": 6.871833657445438e-06, "loss": 0.235, "step": 7154 }, { "epoch": 0.61, "learning_rate": 6.8691968681448895e-06, "loss": 0.2673, "step": 7155 }, { "epoch": 0.61, "learning_rate": 6.866560320151179e-06, "loss": 0.2803, "step": 7156 }, { "epoch": 0.61, "learning_rate": 6.8639240136675156e-06, "loss": 0.3454, "step": 7157 }, { "epoch": 0.61, "learning_rate": 6.861287948897091e-06, "loss": 0.2888, "step": 7158 }, { "epoch": 0.61, "learning_rate": 6.858652126043086e-06, "loss": 0.3086, "step": 7159 }, { "epoch": 0.61, "learning_rate": 6.856016545308655e-06, "loss": 0.2526, "step": 7160 }, { "epoch": 0.61, "learning_rate": 6.853381206896932e-06, "loss": 0.2421, "step": 7161 }, { "epoch": 0.61, "learning_rate": 6.850746111011034e-06, "loss": 0.2998, "step": 7162 }, { "epoch": 0.61, "learning_rate": 6.848111257854069e-06, "loss": 0.2838, "step": 7163 }, { "epoch": 0.61, "learning_rate": 6.845476647629112e-06, "loss": 0.3314, "step": 7164 }, { "epoch": 0.61, "learning_rate": 6.842842280539226e-06, "loss": 0.2663, "step": 7165 }, { "epoch": 0.61, "learning_rate": 6.8402081567874625e-06, "loss": 0.2894, "step": 7166 }, { "epoch": 0.61, "learning_rate": 6.8375742765768394e-06, "loss": 0.3058, "step": 7167 }, { "epoch": 0.61, "learning_rate": 6.8349406401103655e-06, "loss": 0.2712, "step": 7168 }, { "epoch": 0.61, "learning_rate": 6.832307247591026e-06, "loss": 0.2624, "step": 7169 }, { "epoch": 0.61, "learning_rate": 6.8296740992217915e-06, "loss": 0.2549, "step": 7170 }, { "epoch": 0.61, "learning_rate": 6.827041195205615e-06, "loss": 0.2476, "step": 7171 }, { "epoch": 0.61, "learning_rate": 6.824408535745422e-06, "loss": 0.2554, "step": 7172 }, { "epoch": 0.61, "learning_rate": 6.82177612104413e-06, "loss": 0.2761, "step": 7173 }, { "epoch": 0.61, "learning_rate": 6.819143951304632e-06, "loss": 0.2582, "step": 7174 }, { "epoch": 0.62, "learning_rate": 6.8165120267297994e-06, "loss": 0.3132, "step": 7175 }, { "epoch": 0.62, "learning_rate": 6.813880347522489e-06, "loss": 0.2543, "step": 7176 }, { "epoch": 0.62, "learning_rate": 6.811248913885539e-06, "loss": 0.2748, "step": 7177 }, { "epoch": 0.62, "learning_rate": 6.8086177260217675e-06, "loss": 0.2635, "step": 7178 }, { "epoch": 0.62, "learning_rate": 6.80598678413397e-06, "loss": 0.2427, "step": 7179 }, { "epoch": 0.62, "learning_rate": 6.803356088424933e-06, "loss": 0.276, "step": 7180 }, { "epoch": 0.62, "learning_rate": 6.800725639097412e-06, "loss": 0.2837, "step": 7181 }, { "epoch": 0.62, "learning_rate": 6.7980954363541506e-06, "loss": 0.2981, "step": 7182 }, { "epoch": 0.62, "learning_rate": 6.795465480397868e-06, "loss": 0.2987, "step": 7183 }, { "epoch": 0.62, "learning_rate": 6.792835771431278e-06, "loss": 0.2744, "step": 7184 }, { "epoch": 0.62, "learning_rate": 6.790206309657058e-06, "loss": 0.2968, "step": 7185 }, { "epoch": 0.62, "learning_rate": 6.787577095277873e-06, "loss": 0.2891, "step": 7186 }, { "epoch": 0.62, "learning_rate": 6.784948128496376e-06, "loss": 0.2675, "step": 7187 }, { "epoch": 0.62, "learning_rate": 6.782319409515188e-06, "loss": 0.2592, "step": 7188 }, { "epoch": 0.62, "learning_rate": 6.7796909385369245e-06, "loss": 0.2728, "step": 7189 }, { "epoch": 0.62, "learning_rate": 6.777062715764166e-06, "loss": 0.2361, "step": 7190 }, { "epoch": 0.62, "learning_rate": 6.774434741399493e-06, "loss": 0.2347, "step": 7191 }, { "epoch": 0.62, "learning_rate": 6.771807015645453e-06, "loss": 0.2879, "step": 7192 }, { "epoch": 0.62, "learning_rate": 6.7691795387045735e-06, "loss": 0.2632, "step": 7193 }, { "epoch": 0.62, "learning_rate": 6.766552310779374e-06, "loss": 0.2704, "step": 7194 }, { "epoch": 0.62, "learning_rate": 6.763925332072343e-06, "loss": 0.2549, "step": 7195 }, { "epoch": 0.62, "learning_rate": 6.761298602785957e-06, "loss": 0.2593, "step": 7196 }, { "epoch": 0.62, "learning_rate": 6.758672123122675e-06, "loss": 0.2761, "step": 7197 }, { "epoch": 0.62, "learning_rate": 6.7560458932849306e-06, "loss": 0.2642, "step": 7198 }, { "epoch": 0.62, "learning_rate": 6.753419913475139e-06, "loss": 0.2402, "step": 7199 }, { "epoch": 0.62, "learning_rate": 6.7507941838956946e-06, "loss": 0.3021, "step": 7200 }, { "epoch": 0.62, "learning_rate": 6.748168704748984e-06, "loss": 0.2784, "step": 7201 }, { "epoch": 0.62, "learning_rate": 6.74554347623736e-06, "loss": 0.2567, "step": 7202 }, { "epoch": 0.62, "learning_rate": 6.742918498563163e-06, "loss": 0.3071, "step": 7203 }, { "epoch": 0.62, "learning_rate": 6.740293771928717e-06, "loss": 0.2748, "step": 7204 }, { "epoch": 0.62, "learning_rate": 6.7376692965363196e-06, "loss": 0.3159, "step": 7205 }, { "epoch": 0.62, "learning_rate": 6.735045072588256e-06, "loss": 0.2794, "step": 7206 }, { "epoch": 0.62, "learning_rate": 6.732421100286779e-06, "loss": 0.259, "step": 7207 }, { "epoch": 0.62, "learning_rate": 6.7297973798341405e-06, "loss": 0.2919, "step": 7208 }, { "epoch": 0.62, "learning_rate": 6.727173911432565e-06, "loss": 0.3078, "step": 7209 }, { "epoch": 0.62, "learning_rate": 6.724550695284247e-06, "loss": 0.2858, "step": 7210 }, { "epoch": 0.62, "learning_rate": 6.721927731591382e-06, "loss": 0.2748, "step": 7211 }, { "epoch": 0.62, "learning_rate": 6.71930502055613e-06, "loss": 0.2647, "step": 7212 }, { "epoch": 0.62, "learning_rate": 6.716682562380634e-06, "loss": 0.2778, "step": 7213 }, { "epoch": 0.62, "learning_rate": 6.714060357267023e-06, "loss": 0.2635, "step": 7214 }, { "epoch": 0.62, "learning_rate": 6.711438405417403e-06, "loss": 0.3018, "step": 7215 }, { "epoch": 0.62, "learning_rate": 6.708816707033865e-06, "loss": 0.5948, "step": 7216 }, { "epoch": 0.62, "learning_rate": 6.706195262318467e-06, "loss": 0.2814, "step": 7217 }, { "epoch": 0.62, "learning_rate": 6.703574071473269e-06, "loss": 0.2925, "step": 7218 }, { "epoch": 0.62, "learning_rate": 6.7009531347002924e-06, "loss": 0.298, "step": 7219 }, { "epoch": 0.62, "learning_rate": 6.698332452201545e-06, "loss": 0.2256, "step": 7220 }, { "epoch": 0.62, "learning_rate": 6.695712024179015e-06, "loss": 0.3104, "step": 7221 }, { "epoch": 0.62, "learning_rate": 6.693091850834681e-06, "loss": 0.264, "step": 7222 }, { "epoch": 0.62, "learning_rate": 6.690471932370487e-06, "loss": 0.2758, "step": 7223 }, { "epoch": 0.62, "learning_rate": 6.6878522689883596e-06, "loss": 0.2579, "step": 7224 }, { "epoch": 0.62, "learning_rate": 6.6852328608902165e-06, "loss": 0.2759, "step": 7225 }, { "epoch": 0.62, "learning_rate": 6.682613708277945e-06, "loss": 0.2706, "step": 7226 }, { "epoch": 0.62, "learning_rate": 6.679994811353419e-06, "loss": 0.2902, "step": 7227 }, { "epoch": 0.62, "learning_rate": 6.677376170318484e-06, "loss": 0.2663, "step": 7228 }, { "epoch": 0.62, "learning_rate": 6.674757785374979e-06, "loss": 0.2781, "step": 7229 }, { "epoch": 0.62, "learning_rate": 6.672139656724715e-06, "loss": 0.3488, "step": 7230 }, { "epoch": 0.62, "learning_rate": 6.669521784569479e-06, "loss": 0.3009, "step": 7231 }, { "epoch": 0.62, "learning_rate": 6.66690416911105e-06, "loss": 0.3086, "step": 7232 }, { "epoch": 0.62, "learning_rate": 6.664286810551177e-06, "loss": 0.2546, "step": 7233 }, { "epoch": 0.62, "learning_rate": 6.6616697090915975e-06, "loss": 0.277, "step": 7234 }, { "epoch": 0.62, "learning_rate": 6.659052864934017e-06, "loss": 0.2794, "step": 7235 }, { "epoch": 0.62, "learning_rate": 6.656436278280136e-06, "loss": 0.2183, "step": 7236 }, { "epoch": 0.62, "learning_rate": 6.653819949331628e-06, "loss": 0.3157, "step": 7237 }, { "epoch": 0.62, "learning_rate": 6.651203878290139e-06, "loss": 0.3036, "step": 7238 }, { "epoch": 0.62, "learning_rate": 6.648588065357313e-06, "loss": 0.3045, "step": 7239 }, { "epoch": 0.62, "learning_rate": 6.645972510734756e-06, "loss": 0.3218, "step": 7240 }, { "epoch": 0.62, "learning_rate": 6.643357214624064e-06, "loss": 0.2398, "step": 7241 }, { "epoch": 0.62, "learning_rate": 6.640742177226816e-06, "loss": 0.2417, "step": 7242 }, { "epoch": 0.62, "learning_rate": 6.638127398744563e-06, "loss": 0.2916, "step": 7243 }, { "epoch": 0.62, "learning_rate": 6.635512879378837e-06, "loss": 0.2627, "step": 7244 }, { "epoch": 0.62, "learning_rate": 6.632898619331151e-06, "loss": 0.3278, "step": 7245 }, { "epoch": 0.62, "learning_rate": 6.630284618803003e-06, "loss": 0.3138, "step": 7246 }, { "epoch": 0.62, "learning_rate": 6.6276708779958696e-06, "loss": 0.2462, "step": 7247 }, { "epoch": 0.62, "learning_rate": 6.6250573971111975e-06, "loss": 0.2772, "step": 7248 }, { "epoch": 0.62, "learning_rate": 6.622444176350429e-06, "loss": 0.2576, "step": 7249 }, { "epoch": 0.62, "learning_rate": 6.619831215914974e-06, "loss": 0.2695, "step": 7250 }, { "epoch": 0.62, "learning_rate": 6.6172185160062255e-06, "loss": 0.3051, "step": 7251 }, { "epoch": 0.62, "learning_rate": 6.6146060768255596e-06, "loss": 0.3267, "step": 7252 }, { "epoch": 0.62, "learning_rate": 6.611993898574329e-06, "loss": 0.3086, "step": 7253 }, { "epoch": 0.62, "learning_rate": 6.609381981453869e-06, "loss": 0.2958, "step": 7254 }, { "epoch": 0.62, "learning_rate": 6.60677032566549e-06, "loss": 0.2664, "step": 7255 }, { "epoch": 0.62, "learning_rate": 6.604158931410491e-06, "loss": 0.3224, "step": 7256 }, { "epoch": 0.62, "learning_rate": 6.601547798890142e-06, "loss": 0.2673, "step": 7257 }, { "epoch": 0.62, "learning_rate": 6.598936928305695e-06, "loss": 0.3295, "step": 7258 }, { "epoch": 0.62, "learning_rate": 6.596326319858382e-06, "loss": 0.2814, "step": 7259 }, { "epoch": 0.62, "learning_rate": 6.593715973749422e-06, "loss": 0.3036, "step": 7260 }, { "epoch": 0.62, "learning_rate": 6.591105890180005e-06, "loss": 0.3146, "step": 7261 }, { "epoch": 0.62, "learning_rate": 6.5884960693512965e-06, "loss": 0.2615, "step": 7262 }, { "epoch": 0.62, "learning_rate": 6.5858865114644584e-06, "loss": 0.269, "step": 7263 }, { "epoch": 0.62, "learning_rate": 6.583277216720618e-06, "loss": 0.2655, "step": 7264 }, { "epoch": 0.62, "learning_rate": 6.580668185320889e-06, "loss": 0.295, "step": 7265 }, { "epoch": 0.62, "learning_rate": 6.578059417466356e-06, "loss": 0.2618, "step": 7266 }, { "epoch": 0.62, "learning_rate": 6.575450913358099e-06, "loss": 0.2559, "step": 7267 }, { "epoch": 0.62, "learning_rate": 6.572842673197164e-06, "loss": 0.3317, "step": 7268 }, { "epoch": 0.62, "learning_rate": 6.570234697184578e-06, "loss": 0.2997, "step": 7269 }, { "epoch": 0.62, "learning_rate": 6.5676269855213585e-06, "loss": 0.3006, "step": 7270 }, { "epoch": 0.62, "learning_rate": 6.565019538408488e-06, "loss": 0.3048, "step": 7271 }, { "epoch": 0.62, "learning_rate": 6.562412356046943e-06, "loss": 0.3724, "step": 7272 }, { "epoch": 0.62, "learning_rate": 6.559805438637663e-06, "loss": 0.2789, "step": 7273 }, { "epoch": 0.62, "learning_rate": 6.557198786381584e-06, "loss": 0.2672, "step": 7274 }, { "epoch": 0.62, "learning_rate": 6.554592399479614e-06, "loss": 0.2433, "step": 7275 }, { "epoch": 0.62, "learning_rate": 6.5519862781326315e-06, "loss": 0.269, "step": 7276 }, { "epoch": 0.62, "learning_rate": 6.549380422541514e-06, "loss": 0.2672, "step": 7277 }, { "epoch": 0.62, "learning_rate": 6.546774832907101e-06, "loss": 0.274, "step": 7278 }, { "epoch": 0.62, "learning_rate": 6.544169509430219e-06, "loss": 0.2919, "step": 7279 }, { "epoch": 0.62, "learning_rate": 6.541564452311681e-06, "loss": 0.3206, "step": 7280 }, { "epoch": 0.62, "learning_rate": 6.538959661752264e-06, "loss": 0.3083, "step": 7281 }, { "epoch": 0.62, "learning_rate": 6.536355137952737e-06, "loss": 0.2836, "step": 7282 }, { "epoch": 0.62, "learning_rate": 6.533750881113836e-06, "loss": 0.2808, "step": 7283 }, { "epoch": 0.62, "learning_rate": 6.531146891436293e-06, "loss": 0.3727, "step": 7284 }, { "epoch": 0.62, "learning_rate": 6.528543169120809e-06, "loss": 0.2753, "step": 7285 }, { "epoch": 0.62, "learning_rate": 6.52593971436806e-06, "loss": 0.2754, "step": 7286 }, { "epoch": 0.62, "learning_rate": 6.523336527378716e-06, "loss": 0.2632, "step": 7287 }, { "epoch": 0.62, "learning_rate": 6.520733608353415e-06, "loss": 0.269, "step": 7288 }, { "epoch": 0.62, "learning_rate": 6.518130957492774e-06, "loss": 0.2343, "step": 7289 }, { "epoch": 0.62, "learning_rate": 6.515528574997394e-06, "loss": 0.3079, "step": 7290 }, { "epoch": 0.62, "learning_rate": 6.512926461067853e-06, "loss": 0.2656, "step": 7291 }, { "epoch": 0.63, "learning_rate": 6.510324615904713e-06, "loss": 0.2411, "step": 7292 }, { "epoch": 0.63, "learning_rate": 6.507723039708505e-06, "loss": 0.2785, "step": 7293 }, { "epoch": 0.63, "learning_rate": 6.5051217326797535e-06, "loss": 0.3478, "step": 7294 }, { "epoch": 0.63, "learning_rate": 6.5025206950189475e-06, "loss": 0.258, "step": 7295 }, { "epoch": 0.63, "learning_rate": 6.499919926926566e-06, "loss": 0.2908, "step": 7296 }, { "epoch": 0.63, "learning_rate": 6.49731942860306e-06, "loss": 0.2934, "step": 7297 }, { "epoch": 0.63, "learning_rate": 6.494719200248867e-06, "loss": 0.275, "step": 7298 }, { "epoch": 0.63, "learning_rate": 6.492119242064398e-06, "loss": 0.2389, "step": 7299 }, { "epoch": 0.63, "learning_rate": 6.489519554250043e-06, "loss": 0.2653, "step": 7300 }, { "epoch": 0.63, "learning_rate": 6.4869201370061785e-06, "loss": 0.2875, "step": 7301 }, { "epoch": 0.63, "learning_rate": 6.484320990533148e-06, "loss": 0.3289, "step": 7302 }, { "epoch": 0.63, "learning_rate": 6.481722115031287e-06, "loss": 0.3198, "step": 7303 }, { "epoch": 0.63, "learning_rate": 6.479123510700896e-06, "loss": 0.2668, "step": 7304 }, { "epoch": 0.63, "learning_rate": 6.476525177742272e-06, "loss": 0.2445, "step": 7305 }, { "epoch": 0.63, "learning_rate": 6.473927116355678e-06, "loss": 0.3007, "step": 7306 }, { "epoch": 0.63, "learning_rate": 6.4713293267413555e-06, "loss": 0.29, "step": 7307 }, { "epoch": 0.63, "learning_rate": 6.468731809099536e-06, "loss": 0.2662, "step": 7308 }, { "epoch": 0.63, "learning_rate": 6.466134563630418e-06, "loss": 0.2924, "step": 7309 }, { "epoch": 0.63, "learning_rate": 6.463537590534188e-06, "loss": 0.2774, "step": 7310 }, { "epoch": 0.63, "learning_rate": 6.460940890011004e-06, "loss": 0.2723, "step": 7311 }, { "epoch": 0.63, "learning_rate": 6.4583444622610126e-06, "loss": 0.3069, "step": 7312 }, { "epoch": 0.63, "learning_rate": 6.455748307484328e-06, "loss": 0.2598, "step": 7313 }, { "epoch": 0.63, "learning_rate": 6.453152425881051e-06, "loss": 0.3437, "step": 7314 }, { "epoch": 0.63, "learning_rate": 6.450556817651261e-06, "loss": 0.288, "step": 7315 }, { "epoch": 0.63, "learning_rate": 6.447961482995011e-06, "loss": 0.2327, "step": 7316 }, { "epoch": 0.63, "learning_rate": 6.4453664221123425e-06, "loss": 0.2458, "step": 7317 }, { "epoch": 0.63, "learning_rate": 6.44277163520326e-06, "loss": 0.2919, "step": 7318 }, { "epoch": 0.63, "learning_rate": 6.440177122467769e-06, "loss": 0.288, "step": 7319 }, { "epoch": 0.63, "learning_rate": 6.437582884105835e-06, "loss": 0.2764, "step": 7320 }, { "epoch": 0.63, "learning_rate": 6.434988920317407e-06, "loss": 0.2935, "step": 7321 }, { "epoch": 0.63, "learning_rate": 6.432395231302418e-06, "loss": 0.2794, "step": 7322 }, { "epoch": 0.63, "learning_rate": 6.429801817260779e-06, "loss": 0.2689, "step": 7323 }, { "epoch": 0.63, "learning_rate": 6.4272086783923715e-06, "loss": 0.2647, "step": 7324 }, { "epoch": 0.63, "learning_rate": 6.424615814897068e-06, "loss": 0.267, "step": 7325 }, { "epoch": 0.63, "learning_rate": 6.422023226974713e-06, "loss": 0.3537, "step": 7326 }, { "epoch": 0.63, "learning_rate": 6.419430914825125e-06, "loss": 0.5598, "step": 7327 }, { "epoch": 0.63, "learning_rate": 6.4168388786481106e-06, "loss": 0.5558, "step": 7328 }, { "epoch": 0.63, "learning_rate": 6.414247118643451e-06, "loss": 0.2962, "step": 7329 }, { "epoch": 0.63, "learning_rate": 6.411655635010907e-06, "loss": 0.2992, "step": 7330 }, { "epoch": 0.63, "learning_rate": 6.409064427950213e-06, "loss": 0.2709, "step": 7331 }, { "epoch": 0.63, "learning_rate": 6.406473497661092e-06, "loss": 0.2609, "step": 7332 }, { "epoch": 0.63, "learning_rate": 6.403882844343239e-06, "loss": 0.3163, "step": 7333 }, { "epoch": 0.63, "learning_rate": 6.4012924681963255e-06, "loss": 0.2516, "step": 7334 }, { "epoch": 0.63, "learning_rate": 6.3987023694200045e-06, "loss": 0.2801, "step": 7335 }, { "epoch": 0.63, "learning_rate": 6.396112548213913e-06, "loss": 0.2522, "step": 7336 }, { "epoch": 0.63, "learning_rate": 6.393523004777661e-06, "loss": 0.2338, "step": 7337 }, { "epoch": 0.63, "learning_rate": 6.39093373931083e-06, "loss": 0.2737, "step": 7338 }, { "epoch": 0.63, "learning_rate": 6.388344752012999e-06, "loss": 0.2892, "step": 7339 }, { "epoch": 0.63, "learning_rate": 6.385756043083706e-06, "loss": 0.3145, "step": 7340 }, { "epoch": 0.63, "learning_rate": 6.383167612722481e-06, "loss": 0.2906, "step": 7341 }, { "epoch": 0.63, "learning_rate": 6.38057946112882e-06, "loss": 0.3256, "step": 7342 }, { "epoch": 0.63, "learning_rate": 6.3779915885022145e-06, "loss": 0.2568, "step": 7343 }, { "epoch": 0.63, "learning_rate": 6.375403995042122e-06, "loss": 0.3112, "step": 7344 }, { "epoch": 0.63, "learning_rate": 6.3728166809479744e-06, "loss": 0.254, "step": 7345 }, { "epoch": 0.63, "learning_rate": 6.370229646419199e-06, "loss": 0.2955, "step": 7346 }, { "epoch": 0.63, "learning_rate": 6.3676428916551856e-06, "loss": 0.2528, "step": 7347 }, { "epoch": 0.63, "learning_rate": 6.365056416855311e-06, "loss": 0.2457, "step": 7348 }, { "epoch": 0.63, "learning_rate": 6.3624702222189235e-06, "loss": 0.2742, "step": 7349 }, { "epoch": 0.63, "learning_rate": 6.359884307945363e-06, "loss": 0.2371, "step": 7350 }, { "epoch": 0.63, "learning_rate": 6.357298674233931e-06, "loss": 0.2926, "step": 7351 }, { "epoch": 0.63, "learning_rate": 6.354713321283916e-06, "loss": 0.2774, "step": 7352 }, { "epoch": 0.63, "learning_rate": 6.352128249294591e-06, "loss": 0.2715, "step": 7353 }, { "epoch": 0.63, "learning_rate": 6.349543458465193e-06, "loss": 0.2508, "step": 7354 }, { "epoch": 0.63, "learning_rate": 6.3469589489949504e-06, "loss": 0.254, "step": 7355 }, { "epoch": 0.63, "learning_rate": 6.3443747210830565e-06, "loss": 0.2607, "step": 7356 }, { "epoch": 0.63, "learning_rate": 6.3417907749287e-06, "loss": 0.285, "step": 7357 }, { "epoch": 0.63, "learning_rate": 6.339207110731036e-06, "loss": 0.3003, "step": 7358 }, { "epoch": 0.63, "learning_rate": 6.336623728689195e-06, "loss": 0.2698, "step": 7359 }, { "epoch": 0.63, "learning_rate": 6.3340406290022986e-06, "loss": 0.2584, "step": 7360 }, { "epoch": 0.63, "learning_rate": 6.331457811869437e-06, "loss": 0.3047, "step": 7361 }, { "epoch": 0.63, "learning_rate": 6.328875277489677e-06, "loss": 0.2912, "step": 7362 }, { "epoch": 0.63, "learning_rate": 6.326293026062075e-06, "loss": 0.3234, "step": 7363 }, { "epoch": 0.63, "learning_rate": 6.3237110577856534e-06, "loss": 0.2498, "step": 7364 }, { "epoch": 0.63, "learning_rate": 6.321129372859418e-06, "loss": 0.2927, "step": 7365 }, { "epoch": 0.63, "learning_rate": 6.318547971482352e-06, "loss": 0.2851, "step": 7366 }, { "epoch": 0.63, "learning_rate": 6.315966853853417e-06, "loss": 0.2859, "step": 7367 }, { "epoch": 0.63, "learning_rate": 6.313386020171557e-06, "loss": 0.2474, "step": 7368 }, { "epoch": 0.63, "learning_rate": 6.310805470635682e-06, "loss": 0.2416, "step": 7369 }, { "epoch": 0.63, "learning_rate": 6.3082252054446955e-06, "loss": 0.3397, "step": 7370 }, { "epoch": 0.63, "learning_rate": 6.30564522479747e-06, "loss": 0.2685, "step": 7371 }, { "epoch": 0.63, "learning_rate": 6.303065528892853e-06, "loss": 0.2521, "step": 7372 }, { "epoch": 0.63, "learning_rate": 6.300486117929676e-06, "loss": 0.2812, "step": 7373 }, { "epoch": 0.63, "learning_rate": 6.297906992106755e-06, "loss": 0.644, "step": 7374 }, { "epoch": 0.63, "learning_rate": 6.295328151622868e-06, "loss": 0.3009, "step": 7375 }, { "epoch": 0.63, "learning_rate": 6.292749596676779e-06, "loss": 0.2661, "step": 7376 }, { "epoch": 0.63, "learning_rate": 6.290171327467238e-06, "loss": 0.2781, "step": 7377 }, { "epoch": 0.63, "learning_rate": 6.287593344192957e-06, "loss": 0.2819, "step": 7378 }, { "epoch": 0.63, "learning_rate": 6.285015647052639e-06, "loss": 0.2754, "step": 7379 }, { "epoch": 0.63, "learning_rate": 6.282438236244956e-06, "loss": 0.2919, "step": 7380 }, { "epoch": 0.63, "learning_rate": 6.2798611119685685e-06, "loss": 0.2772, "step": 7381 }, { "epoch": 0.63, "learning_rate": 6.277284274422104e-06, "loss": 0.3116, "step": 7382 }, { "epoch": 0.63, "learning_rate": 6.27470772380417e-06, "loss": 0.302, "step": 7383 }, { "epoch": 0.63, "learning_rate": 6.27213146031336e-06, "loss": 0.2711, "step": 7384 }, { "epoch": 0.63, "learning_rate": 6.269555484148237e-06, "loss": 0.2593, "step": 7385 }, { "epoch": 0.63, "learning_rate": 6.266979795507346e-06, "loss": 0.3012, "step": 7386 }, { "epoch": 0.63, "learning_rate": 6.264404394589202e-06, "loss": 0.2141, "step": 7387 }, { "epoch": 0.63, "learning_rate": 6.261829281592313e-06, "loss": 0.2646, "step": 7388 }, { "epoch": 0.63, "learning_rate": 6.259254456715154e-06, "loss": 0.2444, "step": 7389 }, { "epoch": 0.63, "learning_rate": 6.256679920156172e-06, "loss": 0.2916, "step": 7390 }, { "epoch": 0.63, "learning_rate": 6.25410567211381e-06, "loss": 0.2636, "step": 7391 }, { "epoch": 0.63, "learning_rate": 6.251531712786473e-06, "loss": 0.2279, "step": 7392 }, { "epoch": 0.63, "learning_rate": 6.24895804237255e-06, "loss": 0.2864, "step": 7393 }, { "epoch": 0.63, "learning_rate": 6.246384661070404e-06, "loss": 0.2712, "step": 7394 }, { "epoch": 0.63, "learning_rate": 6.243811569078384e-06, "loss": 0.2708, "step": 7395 }, { "epoch": 0.63, "learning_rate": 6.24123876659481e-06, "loss": 0.2595, "step": 7396 }, { "epoch": 0.63, "learning_rate": 6.238666253817974e-06, "loss": 0.2948, "step": 7397 }, { "epoch": 0.63, "learning_rate": 6.23609403094616e-06, "loss": 0.2607, "step": 7398 }, { "epoch": 0.63, "learning_rate": 6.233522098177622e-06, "loss": 0.2733, "step": 7399 }, { "epoch": 0.63, "learning_rate": 6.230950455710592e-06, "loss": 0.3316, "step": 7400 }, { "epoch": 0.63, "learning_rate": 6.228379103743272e-06, "loss": 0.2439, "step": 7401 }, { "epoch": 0.63, "learning_rate": 6.225808042473857e-06, "loss": 0.2556, "step": 7402 }, { "epoch": 0.63, "learning_rate": 6.223237272100509e-06, "loss": 0.2602, "step": 7403 }, { "epoch": 0.63, "learning_rate": 6.220666792821371e-06, "loss": 0.2871, "step": 7404 }, { "epoch": 0.63, "learning_rate": 6.21809660483456e-06, "loss": 0.2881, "step": 7405 }, { "epoch": 0.63, "learning_rate": 6.2155267083381795e-06, "loss": 0.2601, "step": 7406 }, { "epoch": 0.63, "learning_rate": 6.212957103530297e-06, "loss": 0.292, "step": 7407 }, { "epoch": 0.64, "learning_rate": 6.210387790608972e-06, "loss": 0.2257, "step": 7408 }, { "epoch": 0.64, "learning_rate": 6.207818769772231e-06, "loss": 0.2697, "step": 7409 }, { "epoch": 0.64, "learning_rate": 6.2052500412180805e-06, "loss": 0.3264, "step": 7410 }, { "epoch": 0.64, "learning_rate": 6.202681605144503e-06, "loss": 0.2346, "step": 7411 }, { "epoch": 0.64, "learning_rate": 6.200113461749469e-06, "loss": 0.2527, "step": 7412 }, { "epoch": 0.64, "learning_rate": 6.197545611230913e-06, "loss": 0.267, "step": 7413 }, { "epoch": 0.64, "learning_rate": 6.194978053786749e-06, "loss": 0.2754, "step": 7414 }, { "epoch": 0.64, "learning_rate": 6.19241078961488e-06, "loss": 0.2548, "step": 7415 }, { "epoch": 0.64, "learning_rate": 6.189843818913172e-06, "loss": 0.2764, "step": 7416 }, { "epoch": 0.64, "learning_rate": 6.187277141879476e-06, "loss": 0.271, "step": 7417 }, { "epoch": 0.64, "learning_rate": 6.184710758711616e-06, "loss": 0.2069, "step": 7418 }, { "epoch": 0.64, "learning_rate": 6.182144669607403e-06, "loss": 0.2579, "step": 7419 }, { "epoch": 0.64, "learning_rate": 6.179578874764614e-06, "loss": 0.2655, "step": 7420 }, { "epoch": 0.64, "learning_rate": 6.177013374381005e-06, "loss": 0.2969, "step": 7421 }, { "epoch": 0.64, "learning_rate": 6.174448168654317e-06, "loss": 0.3246, "step": 7422 }, { "epoch": 0.64, "learning_rate": 6.171883257782261e-06, "loss": 0.3112, "step": 7423 }, { "epoch": 0.64, "learning_rate": 6.16931864196253e-06, "loss": 0.2684, "step": 7424 }, { "epoch": 0.64, "learning_rate": 6.166754321392785e-06, "loss": 0.248, "step": 7425 }, { "epoch": 0.64, "learning_rate": 6.164190296270683e-06, "loss": 0.2527, "step": 7426 }, { "epoch": 0.64, "learning_rate": 6.161626566793837e-06, "loss": 0.2909, "step": 7427 }, { "epoch": 0.64, "learning_rate": 6.159063133159846e-06, "loss": 0.2526, "step": 7428 }, { "epoch": 0.64, "learning_rate": 6.156499995566294e-06, "loss": 0.2944, "step": 7429 }, { "epoch": 0.64, "learning_rate": 6.1539371542107295e-06, "loss": 0.2761, "step": 7430 }, { "epoch": 0.64, "learning_rate": 6.151374609290688e-06, "loss": 0.2386, "step": 7431 }, { "epoch": 0.64, "learning_rate": 6.148812361003669e-06, "loss": 0.309, "step": 7432 }, { "epoch": 0.64, "learning_rate": 6.14625040954717e-06, "loss": 0.2401, "step": 7433 }, { "epoch": 0.64, "learning_rate": 6.1436887551186466e-06, "loss": 0.2916, "step": 7434 }, { "epoch": 0.64, "learning_rate": 6.141127397915534e-06, "loss": 0.3116, "step": 7435 }, { "epoch": 0.64, "learning_rate": 6.138566338135259e-06, "loss": 0.2842, "step": 7436 }, { "epoch": 0.64, "learning_rate": 6.13600557597521e-06, "loss": 0.2538, "step": 7437 }, { "epoch": 0.64, "learning_rate": 6.133445111632761e-06, "loss": 0.283, "step": 7438 }, { "epoch": 0.64, "learning_rate": 6.130884945305252e-06, "loss": 0.2909, "step": 7439 }, { "epoch": 0.64, "learning_rate": 6.128325077190018e-06, "loss": 0.2332, "step": 7440 }, { "epoch": 0.64, "learning_rate": 6.125765507484356e-06, "loss": 0.2864, "step": 7441 }, { "epoch": 0.64, "learning_rate": 6.123206236385543e-06, "loss": 0.2509, "step": 7442 }, { "epoch": 0.64, "learning_rate": 6.120647264090839e-06, "loss": 0.2197, "step": 7443 }, { "epoch": 0.64, "learning_rate": 6.1180885907974775e-06, "loss": 0.3011, "step": 7444 }, { "epoch": 0.64, "learning_rate": 6.115530216702661e-06, "loss": 0.2401, "step": 7445 }, { "epoch": 0.64, "learning_rate": 6.112972142003587e-06, "loss": 0.3022, "step": 7446 }, { "epoch": 0.64, "learning_rate": 6.110414366897413e-06, "loss": 0.2604, "step": 7447 }, { "epoch": 0.64, "learning_rate": 6.107856891581281e-06, "loss": 0.285, "step": 7448 }, { "epoch": 0.64, "learning_rate": 6.105299716252303e-06, "loss": 0.2635, "step": 7449 }, { "epoch": 0.64, "learning_rate": 6.102742841107585e-06, "loss": 0.2951, "step": 7450 }, { "epoch": 0.64, "learning_rate": 6.1001862663441906e-06, "loss": 0.2982, "step": 7451 }, { "epoch": 0.64, "learning_rate": 6.0976299921591645e-06, "loss": 0.2589, "step": 7452 }, { "epoch": 0.64, "learning_rate": 6.095074018749542e-06, "loss": 0.2439, "step": 7453 }, { "epoch": 0.64, "learning_rate": 6.092518346312317e-06, "loss": 0.2491, "step": 7454 }, { "epoch": 0.64, "learning_rate": 6.089962975044472e-06, "loss": 0.2677, "step": 7455 }, { "epoch": 0.64, "learning_rate": 6.087407905142957e-06, "loss": 0.3097, "step": 7456 }, { "epoch": 0.64, "learning_rate": 6.084853136804711e-06, "loss": 0.2666, "step": 7457 }, { "epoch": 0.64, "learning_rate": 6.082298670226642e-06, "loss": 0.2827, "step": 7458 }, { "epoch": 0.64, "learning_rate": 6.079744505605628e-06, "loss": 0.2264, "step": 7459 }, { "epoch": 0.64, "learning_rate": 6.077190643138542e-06, "loss": 0.3216, "step": 7460 }, { "epoch": 0.64, "learning_rate": 6.0746370830222145e-06, "loss": 0.2244, "step": 7461 }, { "epoch": 0.64, "learning_rate": 6.0720838254534675e-06, "loss": 0.2762, "step": 7462 }, { "epoch": 0.64, "learning_rate": 6.069530870629088e-06, "loss": 0.3168, "step": 7463 }, { "epoch": 0.64, "learning_rate": 6.0669782187458515e-06, "loss": 0.306, "step": 7464 }, { "epoch": 0.64, "learning_rate": 6.064425870000499e-06, "loss": 0.2759, "step": 7465 }, { "epoch": 0.64, "learning_rate": 6.061873824589751e-06, "loss": 0.2559, "step": 7466 }, { "epoch": 0.64, "learning_rate": 6.059322082710315e-06, "loss": 0.2316, "step": 7467 }, { "epoch": 0.64, "learning_rate": 6.056770644558858e-06, "loss": 0.2603, "step": 7468 }, { "epoch": 0.64, "learning_rate": 6.054219510332038e-06, "loss": 0.2874, "step": 7469 }, { "epoch": 0.64, "learning_rate": 6.051668680226477e-06, "loss": 0.2875, "step": 7470 }, { "epoch": 0.64, "learning_rate": 6.049118154438789e-06, "loss": 0.281, "step": 7471 }, { "epoch": 0.64, "learning_rate": 6.046567933165552e-06, "loss": 0.2738, "step": 7472 }, { "epoch": 0.64, "learning_rate": 6.044018016603321e-06, "loss": 0.2448, "step": 7473 }, { "epoch": 0.64, "learning_rate": 6.0414684049486335e-06, "loss": 0.2523, "step": 7474 }, { "epoch": 0.64, "learning_rate": 6.038919098398006e-06, "loss": 0.2759, "step": 7475 }, { "epoch": 0.64, "learning_rate": 6.036370097147922e-06, "loss": 0.2946, "step": 7476 }, { "epoch": 0.64, "learning_rate": 6.033821401394842e-06, "loss": 0.2922, "step": 7477 }, { "epoch": 0.64, "learning_rate": 6.031273011335215e-06, "loss": 0.2779, "step": 7478 }, { "epoch": 0.64, "learning_rate": 6.028724927165452e-06, "loss": 0.2715, "step": 7479 }, { "epoch": 0.64, "learning_rate": 6.026177149081949e-06, "loss": 0.3035, "step": 7480 }, { "epoch": 0.64, "learning_rate": 6.023629677281075e-06, "loss": 0.2566, "step": 7481 }, { "epoch": 0.64, "learning_rate": 6.0210825119591806e-06, "loss": 0.2521, "step": 7482 }, { "epoch": 0.64, "learning_rate": 6.018535653312586e-06, "loss": 0.3024, "step": 7483 }, { "epoch": 0.64, "learning_rate": 6.015989101537586e-06, "loss": 0.2773, "step": 7484 }, { "epoch": 0.64, "learning_rate": 6.0134428568304645e-06, "loss": 0.2871, "step": 7485 }, { "epoch": 0.64, "learning_rate": 6.0108969193874675e-06, "loss": 0.2777, "step": 7486 }, { "epoch": 0.64, "learning_rate": 6.008351289404824e-06, "loss": 0.2424, "step": 7487 }, { "epoch": 0.64, "learning_rate": 6.005805967078741e-06, "loss": 0.3023, "step": 7488 }, { "epoch": 0.64, "learning_rate": 6.003260952605401e-06, "loss": 0.2704, "step": 7489 }, { "epoch": 0.64, "learning_rate": 6.000716246180953e-06, "loss": 0.2845, "step": 7490 }, { "epoch": 0.64, "learning_rate": 5.9981718480015416e-06, "loss": 0.3026, "step": 7491 }, { "epoch": 0.64, "learning_rate": 5.995627758263267e-06, "loss": 0.2785, "step": 7492 }, { "epoch": 0.64, "learning_rate": 5.9930839771622196e-06, "loss": 0.2854, "step": 7493 }, { "epoch": 0.64, "learning_rate": 5.9905405048944575e-06, "loss": 0.2971, "step": 7494 }, { "epoch": 0.64, "learning_rate": 5.987997341656027e-06, "loss": 0.2958, "step": 7495 }, { "epoch": 0.64, "learning_rate": 5.9854544876429364e-06, "loss": 0.2897, "step": 7496 }, { "epoch": 0.64, "learning_rate": 5.982911943051173e-06, "loss": 0.2921, "step": 7497 }, { "epoch": 0.64, "learning_rate": 5.980369708076713e-06, "loss": 0.288, "step": 7498 }, { "epoch": 0.64, "learning_rate": 5.977827782915493e-06, "loss": 0.3124, "step": 7499 }, { "epoch": 0.64, "learning_rate": 5.975286167763433e-06, "loss": 0.2576, "step": 7500 }, { "epoch": 0.64, "learning_rate": 5.972744862816426e-06, "loss": 0.2802, "step": 7501 }, { "epoch": 0.64, "learning_rate": 5.97020386827035e-06, "loss": 0.2531, "step": 7502 }, { "epoch": 0.64, "learning_rate": 5.967663184321047e-06, "loss": 0.2457, "step": 7503 }, { "epoch": 0.64, "learning_rate": 5.9651228111643385e-06, "loss": 0.2734, "step": 7504 }, { "epoch": 0.64, "learning_rate": 5.962582748996031e-06, "loss": 0.2679, "step": 7505 }, { "epoch": 0.64, "learning_rate": 5.960042998011892e-06, "loss": 0.3422, "step": 7506 }, { "epoch": 0.64, "learning_rate": 5.95750355840768e-06, "loss": 0.2347, "step": 7507 }, { "epoch": 0.64, "learning_rate": 5.954964430379115e-06, "loss": 0.2913, "step": 7508 }, { "epoch": 0.64, "learning_rate": 5.952425614121908e-06, "loss": 0.3139, "step": 7509 }, { "epoch": 0.64, "learning_rate": 5.949887109831736e-06, "loss": 0.3038, "step": 7510 }, { "epoch": 0.64, "learning_rate": 5.947348917704248e-06, "loss": 0.3193, "step": 7511 }, { "epoch": 0.64, "learning_rate": 5.944811037935083e-06, "loss": 0.2969, "step": 7512 }, { "epoch": 0.64, "learning_rate": 5.94227347071985e-06, "loss": 0.2983, "step": 7513 }, { "epoch": 0.64, "learning_rate": 5.939736216254126e-06, "loss": 0.2701, "step": 7514 }, { "epoch": 0.64, "learning_rate": 5.937199274733468e-06, "loss": 0.2819, "step": 7515 }, { "epoch": 0.64, "learning_rate": 5.93466264635342e-06, "loss": 0.3, "step": 7516 }, { "epoch": 0.64, "learning_rate": 5.932126331309486e-06, "loss": 0.2663, "step": 7517 }, { "epoch": 0.64, "learning_rate": 5.929590329797154e-06, "loss": 0.2562, "step": 7518 }, { "epoch": 0.64, "learning_rate": 5.9270546420118855e-06, "loss": 0.2665, "step": 7519 }, { "epoch": 0.64, "learning_rate": 5.924519268149123e-06, "loss": 0.2967, "step": 7520 }, { "epoch": 0.64, "learning_rate": 5.921984208404279e-06, "loss": 0.3071, "step": 7521 }, { "epoch": 0.64, "learning_rate": 5.919449462972737e-06, "loss": 0.2625, "step": 7522 }, { "epoch": 0.64, "learning_rate": 5.916915032049873e-06, "loss": 0.2537, "step": 7523 }, { "epoch": 0.64, "learning_rate": 5.91438091583102e-06, "loss": 0.2715, "step": 7524 }, { "epoch": 0.65, "learning_rate": 5.911847114511497e-06, "loss": 0.3113, "step": 7525 }, { "epoch": 0.65, "learning_rate": 5.9093136282866014e-06, "loss": 0.3052, "step": 7526 }, { "epoch": 0.65, "learning_rate": 5.9067804573516e-06, "loss": 0.2943, "step": 7527 }, { "epoch": 0.65, "learning_rate": 5.9042476019017304e-06, "loss": 0.2795, "step": 7528 }, { "epoch": 0.65, "learning_rate": 5.901715062132223e-06, "loss": 0.2703, "step": 7529 }, { "epoch": 0.65, "learning_rate": 5.899182838238265e-06, "loss": 0.2491, "step": 7530 }, { "epoch": 0.65, "learning_rate": 5.8966509304150354e-06, "loss": 0.597, "step": 7531 }, { "epoch": 0.65, "learning_rate": 5.894119338857671e-06, "loss": 0.2837, "step": 7532 }, { "epoch": 0.65, "learning_rate": 5.891588063761304e-06, "loss": 0.2944, "step": 7533 }, { "epoch": 0.65, "learning_rate": 5.8890571053210295e-06, "loss": 0.259, "step": 7534 }, { "epoch": 0.65, "learning_rate": 5.886526463731916e-06, "loss": 0.3053, "step": 7535 }, { "epoch": 0.65, "learning_rate": 5.8839961391890234e-06, "loss": 0.252, "step": 7536 }, { "epoch": 0.65, "learning_rate": 5.8814661318873665e-06, "loss": 0.6045, "step": 7537 }, { "epoch": 0.65, "learning_rate": 5.878936442021952e-06, "loss": 0.3126, "step": 7538 }, { "epoch": 0.65, "learning_rate": 5.87640706978775e-06, "loss": 0.2875, "step": 7539 }, { "epoch": 0.65, "learning_rate": 5.873878015379722e-06, "loss": 0.2729, "step": 7540 }, { "epoch": 0.65, "learning_rate": 5.871349278992786e-06, "loss": 0.2519, "step": 7541 }, { "epoch": 0.65, "learning_rate": 5.868820860821844e-06, "loss": 0.2601, "step": 7542 }, { "epoch": 0.65, "learning_rate": 5.866292761061781e-06, "loss": 0.2795, "step": 7543 }, { "epoch": 0.65, "learning_rate": 5.863764979907446e-06, "loss": 0.2809, "step": 7544 }, { "epoch": 0.65, "learning_rate": 5.861237517553669e-06, "loss": 0.2676, "step": 7545 }, { "epoch": 0.65, "learning_rate": 5.858710374195251e-06, "loss": 0.311, "step": 7546 }, { "epoch": 0.65, "learning_rate": 5.8561835500269795e-06, "loss": 0.2461, "step": 7547 }, { "epoch": 0.65, "learning_rate": 5.853657045243604e-06, "loss": 0.3074, "step": 7548 }, { "epoch": 0.65, "learning_rate": 5.851130860039851e-06, "loss": 0.2747, "step": 7549 }, { "epoch": 0.65, "learning_rate": 5.848604994610434e-06, "loss": 0.351, "step": 7550 }, { "epoch": 0.65, "learning_rate": 5.8460794491500325e-06, "loss": 0.2626, "step": 7551 }, { "epoch": 0.65, "learning_rate": 5.843554223853303e-06, "loss": 0.3003, "step": 7552 }, { "epoch": 0.65, "learning_rate": 5.8410293189148704e-06, "loss": 0.299, "step": 7553 }, { "epoch": 0.65, "learning_rate": 5.838504734529353e-06, "loss": 0.2704, "step": 7554 }, { "epoch": 0.65, "learning_rate": 5.835980470891327e-06, "loss": 0.2344, "step": 7555 }, { "epoch": 0.65, "learning_rate": 5.83345652819535e-06, "loss": 0.2782, "step": 7556 }, { "epoch": 0.65, "learning_rate": 5.8309329066359575e-06, "loss": 0.288, "step": 7557 }, { "epoch": 0.65, "learning_rate": 5.828409606407659e-06, "loss": 0.2661, "step": 7558 }, { "epoch": 0.65, "learning_rate": 5.825886627704933e-06, "loss": 0.2749, "step": 7559 }, { "epoch": 0.65, "learning_rate": 5.823363970722237e-06, "loss": 0.258, "step": 7560 }, { "epoch": 0.65, "learning_rate": 5.820841635654015e-06, "loss": 0.3162, "step": 7561 }, { "epoch": 0.65, "learning_rate": 5.818319622694668e-06, "loss": 0.5818, "step": 7562 }, { "epoch": 0.65, "learning_rate": 5.815797932038581e-06, "loss": 0.2512, "step": 7563 }, { "epoch": 0.65, "learning_rate": 5.813276563880114e-06, "loss": 0.2894, "step": 7564 }, { "epoch": 0.65, "learning_rate": 5.810755518413605e-06, "loss": 0.2413, "step": 7565 }, { "epoch": 0.65, "learning_rate": 5.8082347958333625e-06, "loss": 0.2911, "step": 7566 }, { "epoch": 0.65, "learning_rate": 5.805714396333662e-06, "loss": 0.2607, "step": 7567 }, { "epoch": 0.65, "learning_rate": 5.8031943201087805e-06, "loss": 0.2923, "step": 7568 }, { "epoch": 0.65, "learning_rate": 5.800674567352938e-06, "loss": 0.3039, "step": 7569 }, { "epoch": 0.65, "learning_rate": 5.798155138260352e-06, "loss": 0.2632, "step": 7570 }, { "epoch": 0.65, "learning_rate": 5.795636033025205e-06, "loss": 0.2619, "step": 7571 }, { "epoch": 0.65, "learning_rate": 5.793117251841659e-06, "loss": 0.2997, "step": 7572 }, { "epoch": 0.65, "learning_rate": 5.790598794903847e-06, "loss": 0.3199, "step": 7573 }, { "epoch": 0.65, "learning_rate": 5.788080662405881e-06, "loss": 0.2531, "step": 7574 }, { "epoch": 0.65, "learning_rate": 5.7855628545418515e-06, "loss": 0.2776, "step": 7575 }, { "epoch": 0.65, "learning_rate": 5.783045371505809e-06, "loss": 0.295, "step": 7576 }, { "epoch": 0.65, "learning_rate": 5.780528213491792e-06, "loss": 0.2551, "step": 7577 }, { "epoch": 0.65, "learning_rate": 5.7780113806938095e-06, "loss": 0.2497, "step": 7578 }, { "epoch": 0.65, "learning_rate": 5.77549487330585e-06, "loss": 0.2665, "step": 7579 }, { "epoch": 0.65, "learning_rate": 5.772978691521871e-06, "loss": 0.2389, "step": 7580 }, { "epoch": 0.65, "learning_rate": 5.770462835535809e-06, "loss": 0.3396, "step": 7581 }, { "epoch": 0.65, "learning_rate": 5.767947305541577e-06, "loss": 0.2678, "step": 7582 }, { "epoch": 0.65, "learning_rate": 5.7654321017330505e-06, "loss": 0.3085, "step": 7583 }, { "epoch": 0.65, "learning_rate": 5.762917224304094e-06, "loss": 0.2903, "step": 7584 }, { "epoch": 0.65, "learning_rate": 5.760402673448544e-06, "loss": 0.2639, "step": 7585 }, { "epoch": 0.65, "learning_rate": 5.757888449360205e-06, "loss": 0.3151, "step": 7586 }, { "epoch": 0.65, "learning_rate": 5.755374552232864e-06, "loss": 0.2701, "step": 7587 }, { "epoch": 0.65, "learning_rate": 5.75286098226028e-06, "loss": 0.3142, "step": 7588 }, { "epoch": 0.65, "learning_rate": 5.750347739636188e-06, "loss": 0.3275, "step": 7589 }, { "epoch": 0.65, "learning_rate": 5.747834824554293e-06, "loss": 0.2758, "step": 7590 }, { "epoch": 0.65, "learning_rate": 5.745322237208273e-06, "loss": 0.3183, "step": 7591 }, { "epoch": 0.65, "learning_rate": 5.7428099777918e-06, "loss": 0.3133, "step": 7592 }, { "epoch": 0.65, "learning_rate": 5.740298046498496e-06, "loss": 0.2865, "step": 7593 }, { "epoch": 0.65, "learning_rate": 5.737786443521968e-06, "loss": 0.3123, "step": 7594 }, { "epoch": 0.65, "learning_rate": 5.7352751690558025e-06, "loss": 0.2478, "step": 7595 }, { "epoch": 0.65, "learning_rate": 5.732764223293559e-06, "loss": 0.2595, "step": 7596 }, { "epoch": 0.65, "learning_rate": 5.730253606428759e-06, "loss": 0.2635, "step": 7597 }, { "epoch": 0.65, "learning_rate": 5.727743318654911e-06, "loss": 0.2667, "step": 7598 }, { "epoch": 0.65, "learning_rate": 5.725233360165505e-06, "loss": 0.2758, "step": 7599 }, { "epoch": 0.65, "learning_rate": 5.722723731153986e-06, "loss": 0.2652, "step": 7600 }, { "epoch": 0.65, "learning_rate": 5.720214431813786e-06, "loss": 0.3037, "step": 7601 }, { "epoch": 0.65, "learning_rate": 5.717705462338311e-06, "loss": 0.2795, "step": 7602 }, { "epoch": 0.65, "learning_rate": 5.71519682292094e-06, "loss": 0.2213, "step": 7603 }, { "epoch": 0.65, "learning_rate": 5.71268851375503e-06, "loss": 0.2542, "step": 7604 }, { "epoch": 0.65, "learning_rate": 5.710180535033897e-06, "loss": 0.3026, "step": 7605 }, { "epoch": 0.65, "learning_rate": 5.707672886950859e-06, "loss": 0.3079, "step": 7606 }, { "epoch": 0.65, "learning_rate": 5.7051655696991825e-06, "loss": 0.2803, "step": 7607 }, { "epoch": 0.65, "learning_rate": 5.7026585834721225e-06, "loss": 0.2959, "step": 7608 }, { "epoch": 0.65, "learning_rate": 5.7001519284629045e-06, "loss": 0.2888, "step": 7609 }, { "epoch": 0.65, "learning_rate": 5.697645604864732e-06, "loss": 0.2676, "step": 7610 }, { "epoch": 0.65, "learning_rate": 5.6951396128707745e-06, "loss": 0.6045, "step": 7611 }, { "epoch": 0.65, "learning_rate": 5.692633952674187e-06, "loss": 0.2894, "step": 7612 }, { "epoch": 0.65, "learning_rate": 5.6901286244680946e-06, "loss": 0.2874, "step": 7613 }, { "epoch": 0.65, "learning_rate": 5.687623628445588e-06, "loss": 0.2729, "step": 7614 }, { "epoch": 0.65, "learning_rate": 5.685118964799743e-06, "loss": 0.2735, "step": 7615 }, { "epoch": 0.65, "learning_rate": 5.682614633723609e-06, "loss": 0.2899, "step": 7616 }, { "epoch": 0.65, "learning_rate": 5.680110635410205e-06, "loss": 0.2418, "step": 7617 }, { "epoch": 0.65, "learning_rate": 5.67760697005253e-06, "loss": 0.2729, "step": 7618 }, { "epoch": 0.65, "learning_rate": 5.675103637843551e-06, "loss": 0.2515, "step": 7619 }, { "epoch": 0.65, "learning_rate": 5.672600638976218e-06, "loss": 0.2402, "step": 7620 }, { "epoch": 0.65, "learning_rate": 5.67009797364344e-06, "loss": 0.2917, "step": 7621 }, { "epoch": 0.65, "learning_rate": 5.667595642038117e-06, "loss": 0.5909, "step": 7622 }, { "epoch": 0.65, "learning_rate": 5.665093644353115e-06, "loss": 0.2448, "step": 7623 }, { "epoch": 0.65, "learning_rate": 5.662591980781276e-06, "loss": 0.2631, "step": 7624 }, { "epoch": 0.65, "learning_rate": 5.660090651515413e-06, "loss": 0.2591, "step": 7625 }, { "epoch": 0.65, "learning_rate": 5.657589656748321e-06, "loss": 0.2198, "step": 7626 }, { "epoch": 0.65, "learning_rate": 5.655088996672764e-06, "loss": 0.2787, "step": 7627 }, { "epoch": 0.65, "learning_rate": 5.652588671481475e-06, "loss": 0.2795, "step": 7628 }, { "epoch": 0.65, "learning_rate": 5.650088681367166e-06, "loss": 0.2906, "step": 7629 }, { "epoch": 0.65, "learning_rate": 5.647589026522535e-06, "loss": 0.3565, "step": 7630 }, { "epoch": 0.65, "learning_rate": 5.645089707140234e-06, "loss": 0.2897, "step": 7631 }, { "epoch": 0.65, "learning_rate": 5.642590723412898e-06, "loss": 0.3113, "step": 7632 }, { "epoch": 0.65, "learning_rate": 5.64009207553314e-06, "loss": 0.6165, "step": 7633 }, { "epoch": 0.65, "learning_rate": 5.637593763693545e-06, "loss": 0.298, "step": 7634 }, { "epoch": 0.65, "learning_rate": 5.635095788086664e-06, "loss": 0.2778, "step": 7635 }, { "epoch": 0.65, "learning_rate": 5.632598148905027e-06, "loss": 0.3043, "step": 7636 }, { "epoch": 0.65, "learning_rate": 5.630100846341153e-06, "loss": 0.2673, "step": 7637 }, { "epoch": 0.65, "learning_rate": 5.627603880587511e-06, "loss": 0.2756, "step": 7638 }, { "epoch": 0.65, "learning_rate": 5.625107251836556e-06, "loss": 0.2678, "step": 7639 }, { "epoch": 0.65, "learning_rate": 5.622610960280717e-06, "loss": 0.3, "step": 7640 }, { "epoch": 0.65, "learning_rate": 5.620115006112396e-06, "loss": 0.2802, "step": 7641 }, { "epoch": 0.66, "learning_rate": 5.617619389523973e-06, "loss": 0.2824, "step": 7642 }, { "epoch": 0.66, "learning_rate": 5.615124110707786e-06, "loss": 0.3045, "step": 7643 }, { "epoch": 0.66, "learning_rate": 5.612629169856172e-06, "loss": 0.2823, "step": 7644 }, { "epoch": 0.66, "learning_rate": 5.61013456716142e-06, "loss": 0.243, "step": 7645 }, { "epoch": 0.66, "learning_rate": 5.607640302815806e-06, "loss": 0.2424, "step": 7646 }, { "epoch": 0.66, "learning_rate": 5.605146377011572e-06, "loss": 0.2728, "step": 7647 }, { "epoch": 0.66, "learning_rate": 5.602652789940941e-06, "loss": 0.3058, "step": 7648 }, { "epoch": 0.66, "learning_rate": 5.60015954179611e-06, "loss": 0.3168, "step": 7649 }, { "epoch": 0.66, "learning_rate": 5.597666632769232e-06, "loss": 0.2645, "step": 7650 }, { "epoch": 0.66, "learning_rate": 5.595174063052465e-06, "loss": 0.2794, "step": 7651 }, { "epoch": 0.66, "learning_rate": 5.592681832837913e-06, "loss": 0.303, "step": 7652 }, { "epoch": 0.66, "learning_rate": 5.5901899423176674e-06, "loss": 0.2482, "step": 7653 }, { "epoch": 0.66, "learning_rate": 5.587698391683792e-06, "loss": 0.2643, "step": 7654 }, { "epoch": 0.66, "learning_rate": 5.585207181128323e-06, "loss": 0.2599, "step": 7655 }, { "epoch": 0.66, "learning_rate": 5.5827163108432704e-06, "loss": 0.247, "step": 7656 }, { "epoch": 0.66, "learning_rate": 5.580225781020618e-06, "loss": 0.3073, "step": 7657 }, { "epoch": 0.66, "learning_rate": 5.577735591852327e-06, "loss": 0.2609, "step": 7658 }, { "epoch": 0.66, "learning_rate": 5.575245743530322e-06, "loss": 0.2872, "step": 7659 }, { "epoch": 0.66, "learning_rate": 5.572756236246512e-06, "loss": 0.3033, "step": 7660 }, { "epoch": 0.66, "learning_rate": 5.570267070192776e-06, "loss": 0.2974, "step": 7661 }, { "epoch": 0.66, "learning_rate": 5.567778245560966e-06, "loss": 0.2669, "step": 7662 }, { "epoch": 0.66, "learning_rate": 5.565289762542908e-06, "loss": 0.2764, "step": 7663 }, { "epoch": 0.66, "learning_rate": 5.562801621330402e-06, "loss": 0.2768, "step": 7664 }, { "epoch": 0.66, "learning_rate": 5.560313822115229e-06, "loss": 0.3415, "step": 7665 }, { "epoch": 0.66, "learning_rate": 5.5578263650891225e-06, "loss": 0.2858, "step": 7666 }, { "epoch": 0.66, "learning_rate": 5.555339250443808e-06, "loss": 0.2777, "step": 7667 }, { "epoch": 0.66, "learning_rate": 5.552852478370989e-06, "loss": 0.2498, "step": 7668 }, { "epoch": 0.66, "learning_rate": 5.550366049062323e-06, "loss": 0.2642, "step": 7669 }, { "epoch": 0.66, "learning_rate": 5.547879962709457e-06, "loss": 0.2096, "step": 7670 }, { "epoch": 0.66, "learning_rate": 5.545394219504005e-06, "loss": 0.281, "step": 7671 }, { "epoch": 0.66, "learning_rate": 5.542908819637558e-06, "loss": 0.308, "step": 7672 }, { "epoch": 0.66, "learning_rate": 5.540423763301674e-06, "loss": 0.2758, "step": 7673 }, { "epoch": 0.66, "learning_rate": 5.537939050687886e-06, "loss": 0.3063, "step": 7674 }, { "epoch": 0.66, "learning_rate": 5.535454681987715e-06, "loss": 0.2714, "step": 7675 }, { "epoch": 0.66, "learning_rate": 5.532970657392635e-06, "loss": 0.2983, "step": 7676 }, { "epoch": 0.66, "learning_rate": 5.530486977094104e-06, "loss": 0.2545, "step": 7677 }, { "epoch": 0.66, "learning_rate": 5.528003641283552e-06, "loss": 0.2699, "step": 7678 }, { "epoch": 0.66, "learning_rate": 5.525520650152383e-06, "loss": 0.2586, "step": 7679 }, { "epoch": 0.66, "learning_rate": 5.523038003891976e-06, "loss": 0.2681, "step": 7680 }, { "epoch": 0.66, "learning_rate": 5.5205557026936714e-06, "loss": 0.2833, "step": 7681 }, { "epoch": 0.66, "learning_rate": 5.5180737467488085e-06, "loss": 0.2584, "step": 7682 }, { "epoch": 0.66, "learning_rate": 5.51559213624867e-06, "loss": 0.2679, "step": 7683 }, { "epoch": 0.66, "learning_rate": 5.513110871384532e-06, "loss": 0.2529, "step": 7684 }, { "epoch": 0.66, "learning_rate": 5.510629952347637e-06, "loss": 0.2674, "step": 7685 }, { "epoch": 0.66, "learning_rate": 5.508149379329204e-06, "loss": 0.243, "step": 7686 }, { "epoch": 0.66, "learning_rate": 5.505669152520425e-06, "loss": 0.2602, "step": 7687 }, { "epoch": 0.66, "learning_rate": 5.503189272112452e-06, "loss": 0.2945, "step": 7688 }, { "epoch": 0.66, "learning_rate": 5.50070973829644e-06, "loss": 0.2533, "step": 7689 }, { "epoch": 0.66, "learning_rate": 5.4982305512634845e-06, "loss": 0.2582, "step": 7690 }, { "epoch": 0.66, "learning_rate": 5.495751711204675e-06, "loss": 0.2778, "step": 7691 }, { "epoch": 0.66, "learning_rate": 5.493273218311067e-06, "loss": 0.2935, "step": 7692 }, { "epoch": 0.66, "learning_rate": 5.490795072773692e-06, "loss": 0.283, "step": 7693 }, { "epoch": 0.66, "learning_rate": 5.48831727478355e-06, "loss": 0.262, "step": 7694 }, { "epoch": 0.66, "learning_rate": 5.485839824531621e-06, "loss": 0.3019, "step": 7695 }, { "epoch": 0.66, "learning_rate": 5.483362722208858e-06, "loss": 0.2448, "step": 7696 }, { "epoch": 0.66, "learning_rate": 5.4808859680061734e-06, "loss": 0.2444, "step": 7697 }, { "epoch": 0.66, "learning_rate": 5.478409562114469e-06, "loss": 0.2437, "step": 7698 }, { "epoch": 0.66, "learning_rate": 5.4759335047246154e-06, "loss": 0.2203, "step": 7699 }, { "epoch": 0.66, "learning_rate": 5.4734577960274515e-06, "loss": 0.2722, "step": 7700 }, { "epoch": 0.66, "learning_rate": 5.4709824362137945e-06, "loss": 0.2975, "step": 7701 }, { "epoch": 0.66, "learning_rate": 5.4685074254744346e-06, "loss": 0.3494, "step": 7702 }, { "epoch": 0.66, "learning_rate": 5.4660327640001335e-06, "loss": 0.2834, "step": 7703 }, { "epoch": 0.66, "learning_rate": 5.4635584519816195e-06, "loss": 0.2932, "step": 7704 }, { "epoch": 0.66, "learning_rate": 5.461084489609603e-06, "loss": 0.2642, "step": 7705 }, { "epoch": 0.66, "learning_rate": 5.458610877074773e-06, "loss": 0.2798, "step": 7706 }, { "epoch": 0.66, "learning_rate": 5.456137614567773e-06, "loss": 0.2859, "step": 7707 }, { "epoch": 0.66, "learning_rate": 5.453664702279235e-06, "loss": 0.2778, "step": 7708 }, { "epoch": 0.66, "learning_rate": 5.451192140399757e-06, "loss": 0.2631, "step": 7709 }, { "epoch": 0.66, "learning_rate": 5.448719929119916e-06, "loss": 0.3259, "step": 7710 }, { "epoch": 0.66, "learning_rate": 5.446248068630251e-06, "loss": 0.2751, "step": 7711 }, { "epoch": 0.66, "learning_rate": 5.443776559121279e-06, "loss": 0.2667, "step": 7712 }, { "epoch": 0.66, "learning_rate": 5.4413054007835055e-06, "loss": 0.2521, "step": 7713 }, { "epoch": 0.66, "learning_rate": 5.4388345938073824e-06, "loss": 0.2806, "step": 7714 }, { "epoch": 0.66, "learning_rate": 5.436364138383352e-06, "loss": 0.2941, "step": 7715 }, { "epoch": 0.66, "learning_rate": 5.433894034701824e-06, "loss": 0.319, "step": 7716 }, { "epoch": 0.66, "learning_rate": 5.431424282953181e-06, "loss": 0.277, "step": 7717 }, { "epoch": 0.66, "learning_rate": 5.4289548833277865e-06, "loss": 0.6084, "step": 7718 }, { "epoch": 0.66, "learning_rate": 5.426485836015953e-06, "loss": 0.6002, "step": 7719 }, { "epoch": 0.66, "learning_rate": 5.424017141208002e-06, "loss": 0.2696, "step": 7720 }, { "epoch": 0.66, "learning_rate": 5.421548799094196e-06, "loss": 0.2503, "step": 7721 }, { "epoch": 0.66, "learning_rate": 5.419080809864785e-06, "loss": 0.3042, "step": 7722 }, { "epoch": 0.66, "learning_rate": 5.41661317370999e-06, "loss": 0.3444, "step": 7723 }, { "epoch": 0.66, "learning_rate": 5.414145890820004e-06, "loss": 0.2747, "step": 7724 }, { "epoch": 0.66, "learning_rate": 5.411678961384998e-06, "loss": 0.2639, "step": 7725 }, { "epoch": 0.66, "learning_rate": 5.409212385595098e-06, "loss": 0.2527, "step": 7726 }, { "epoch": 0.66, "learning_rate": 5.406746163640432e-06, "loss": 0.2514, "step": 7727 }, { "epoch": 0.66, "learning_rate": 5.404280295711071e-06, "loss": 0.2802, "step": 7728 }, { "epoch": 0.66, "learning_rate": 5.401814781997077e-06, "loss": 0.2713, "step": 7729 }, { "epoch": 0.66, "learning_rate": 5.399349622688479e-06, "loss": 0.2701, "step": 7730 }, { "epoch": 0.66, "learning_rate": 5.396884817975281e-06, "loss": 0.2708, "step": 7731 }, { "epoch": 0.66, "learning_rate": 5.394420368047459e-06, "loss": 0.2332, "step": 7732 }, { "epoch": 0.66, "learning_rate": 5.391956273094952e-06, "loss": 0.308, "step": 7733 }, { "epoch": 0.66, "learning_rate": 5.389492533307692e-06, "loss": 0.28, "step": 7734 }, { "epoch": 0.66, "learning_rate": 5.387029148875563e-06, "loss": 0.2489, "step": 7735 }, { "epoch": 0.66, "learning_rate": 5.384566119988435e-06, "loss": 0.2212, "step": 7736 }, { "epoch": 0.66, "learning_rate": 5.382103446836144e-06, "loss": 0.2369, "step": 7737 }, { "epoch": 0.66, "learning_rate": 5.379641129608501e-06, "loss": 0.2687, "step": 7738 }, { "epoch": 0.66, "learning_rate": 5.377179168495292e-06, "loss": 0.2842, "step": 7739 }, { "epoch": 0.66, "learning_rate": 5.374717563686269e-06, "loss": 0.226, "step": 7740 }, { "epoch": 0.66, "learning_rate": 5.372256315371167e-06, "loss": 0.2617, "step": 7741 }, { "epoch": 0.66, "learning_rate": 5.3697954237396764e-06, "loss": 0.2941, "step": 7742 }, { "epoch": 0.66, "learning_rate": 5.367334888981474e-06, "loss": 0.269, "step": 7743 }, { "epoch": 0.66, "learning_rate": 5.3648747112862145e-06, "loss": 0.2697, "step": 7744 }, { "epoch": 0.66, "learning_rate": 5.362414890843504e-06, "loss": 0.3158, "step": 7745 }, { "epoch": 0.66, "learning_rate": 5.3599554278429415e-06, "loss": 0.3206, "step": 7746 }, { "epoch": 0.66, "learning_rate": 5.357496322474086e-06, "loss": 0.2864, "step": 7747 }, { "epoch": 0.66, "learning_rate": 5.355037574926478e-06, "loss": 0.2402, "step": 7748 }, { "epoch": 0.66, "learning_rate": 5.35257918538962e-06, "loss": 0.2917, "step": 7749 }, { "epoch": 0.66, "learning_rate": 5.35012115405299e-06, "loss": 0.2921, "step": 7750 }, { "epoch": 0.66, "learning_rate": 5.347663481106053e-06, "loss": 0.3192, "step": 7751 }, { "epoch": 0.66, "learning_rate": 5.345206166738225e-06, "loss": 0.3267, "step": 7752 }, { "epoch": 0.66, "learning_rate": 5.3427492111389045e-06, "loss": 0.2687, "step": 7753 }, { "epoch": 0.66, "learning_rate": 5.3402926144974625e-06, "loss": 0.2976, "step": 7754 }, { "epoch": 0.66, "learning_rate": 5.337836377003242e-06, "loss": 0.2578, "step": 7755 }, { "epoch": 0.66, "learning_rate": 5.335380498845559e-06, "loss": 0.2601, "step": 7756 }, { "epoch": 0.66, "learning_rate": 5.332924980213694e-06, "loss": 0.2804, "step": 7757 }, { "epoch": 0.67, "learning_rate": 5.330469821296916e-06, "loss": 0.2832, "step": 7758 }, { "epoch": 0.67, "learning_rate": 5.32801502228445e-06, "loss": 0.292, "step": 7759 }, { "epoch": 0.67, "learning_rate": 5.325560583365499e-06, "loss": 0.2629, "step": 7760 }, { "epoch": 0.67, "learning_rate": 5.323106504729241e-06, "loss": 0.2498, "step": 7761 }, { "epoch": 0.67, "learning_rate": 5.320652786564826e-06, "loss": 0.2895, "step": 7762 }, { "epoch": 0.67, "learning_rate": 5.318199429061375e-06, "loss": 0.2509, "step": 7763 }, { "epoch": 0.67, "learning_rate": 5.315746432407972e-06, "loss": 0.2601, "step": 7764 }, { "epoch": 0.67, "learning_rate": 5.313293796793696e-06, "loss": 0.2717, "step": 7765 }, { "epoch": 0.67, "learning_rate": 5.3108415224075725e-06, "loss": 0.2889, "step": 7766 }, { "epoch": 0.67, "learning_rate": 5.308389609438615e-06, "loss": 0.2663, "step": 7767 }, { "epoch": 0.67, "learning_rate": 5.305938058075804e-06, "loss": 0.3458, "step": 7768 }, { "epoch": 0.67, "learning_rate": 5.303486868508093e-06, "loss": 0.2789, "step": 7769 }, { "epoch": 0.67, "learning_rate": 5.301036040924412e-06, "loss": 0.26, "step": 7770 }, { "epoch": 0.67, "learning_rate": 5.298585575513649e-06, "loss": 0.3165, "step": 7771 }, { "epoch": 0.67, "learning_rate": 5.296135472464686e-06, "loss": 0.2643, "step": 7772 }, { "epoch": 0.67, "learning_rate": 5.293685731966353e-06, "loss": 0.2613, "step": 7773 }, { "epoch": 0.67, "learning_rate": 5.2912363542074695e-06, "loss": 0.2805, "step": 7774 }, { "epoch": 0.67, "learning_rate": 5.288787339376822e-06, "loss": 0.3005, "step": 7775 }, { "epoch": 0.67, "learning_rate": 5.2863386876631674e-06, "loss": 0.2213, "step": 7776 }, { "epoch": 0.67, "learning_rate": 5.2838903992552355e-06, "loss": 0.3118, "step": 7777 }, { "epoch": 0.67, "learning_rate": 5.281442474341729e-06, "loss": 0.2983, "step": 7778 }, { "epoch": 0.67, "learning_rate": 5.278994913111326e-06, "loss": 0.2526, "step": 7779 }, { "epoch": 0.67, "learning_rate": 5.276547715752663e-06, "loss": 0.2634, "step": 7780 }, { "epoch": 0.67, "learning_rate": 5.274100882454364e-06, "loss": 0.2818, "step": 7781 }, { "epoch": 0.67, "learning_rate": 5.271654413405016e-06, "loss": 0.3231, "step": 7782 }, { "epoch": 0.67, "learning_rate": 5.269208308793183e-06, "loss": 0.2935, "step": 7783 }, { "epoch": 0.67, "learning_rate": 5.2667625688074e-06, "loss": 0.2848, "step": 7784 }, { "epoch": 0.67, "learning_rate": 5.264317193636168e-06, "loss": 0.2892, "step": 7785 }, { "epoch": 0.67, "learning_rate": 5.261872183467972e-06, "loss": 0.2807, "step": 7786 }, { "epoch": 0.67, "learning_rate": 5.2594275384912526e-06, "loss": 0.2913, "step": 7787 }, { "epoch": 0.67, "learning_rate": 5.25698325889443e-06, "loss": 0.2629, "step": 7788 }, { "epoch": 0.67, "learning_rate": 5.25453934486591e-06, "loss": 0.2546, "step": 7789 }, { "epoch": 0.67, "learning_rate": 5.252095796594046e-06, "loss": 0.2908, "step": 7790 }, { "epoch": 0.67, "learning_rate": 5.249652614267178e-06, "loss": 0.2775, "step": 7791 }, { "epoch": 0.67, "learning_rate": 5.247209798073614e-06, "loss": 0.2923, "step": 7792 }, { "epoch": 0.67, "learning_rate": 5.2447673482016335e-06, "loss": 0.2929, "step": 7793 }, { "epoch": 0.67, "learning_rate": 5.242325264839494e-06, "loss": 0.2772, "step": 7794 }, { "epoch": 0.67, "learning_rate": 5.239883548175407e-06, "loss": 0.6228, "step": 7795 }, { "epoch": 0.67, "learning_rate": 5.2374421983975846e-06, "loss": 0.3563, "step": 7796 }, { "epoch": 0.67, "learning_rate": 5.23500121569418e-06, "loss": 0.2564, "step": 7797 }, { "epoch": 0.67, "learning_rate": 5.232560600253336e-06, "loss": 0.2518, "step": 7798 }, { "epoch": 0.67, "learning_rate": 5.230120352263166e-06, "loss": 0.2386, "step": 7799 }, { "epoch": 0.67, "learning_rate": 5.2276804719117504e-06, "loss": 0.3195, "step": 7800 }, { "epoch": 0.67, "learning_rate": 5.225240959387147e-06, "loss": 0.2998, "step": 7801 }, { "epoch": 0.67, "learning_rate": 5.22280181487737e-06, "loss": 0.2983, "step": 7802 }, { "epoch": 0.67, "learning_rate": 5.220363038570432e-06, "loss": 0.2548, "step": 7803 }, { "epoch": 0.67, "learning_rate": 5.21792463065429e-06, "loss": 0.2581, "step": 7804 }, { "epoch": 0.67, "learning_rate": 5.215486591316888e-06, "loss": 0.3034, "step": 7805 }, { "epoch": 0.67, "learning_rate": 5.21304892074614e-06, "loss": 0.2719, "step": 7806 }, { "epoch": 0.67, "learning_rate": 5.210611619129927e-06, "loss": 0.2829, "step": 7807 }, { "epoch": 0.67, "learning_rate": 5.20817468665611e-06, "loss": 0.2479, "step": 7808 }, { "epoch": 0.67, "learning_rate": 5.205738123512503e-06, "loss": 0.2667, "step": 7809 }, { "epoch": 0.67, "learning_rate": 5.20330192988692e-06, "loss": 0.2583, "step": 7810 }, { "epoch": 0.67, "learning_rate": 5.200866105967119e-06, "loss": 0.2859, "step": 7811 }, { "epoch": 0.67, "learning_rate": 5.198430651940846e-06, "loss": 0.2289, "step": 7812 }, { "epoch": 0.67, "learning_rate": 5.195995567995813e-06, "loss": 0.2433, "step": 7813 }, { "epoch": 0.67, "learning_rate": 5.1935608543197035e-06, "loss": 0.3417, "step": 7814 }, { "epoch": 0.67, "learning_rate": 5.191126511100179e-06, "loss": 0.2767, "step": 7815 }, { "epoch": 0.67, "learning_rate": 5.188692538524854e-06, "loss": 0.2842, "step": 7816 }, { "epoch": 0.67, "learning_rate": 5.186258936781341e-06, "loss": 0.3012, "step": 7817 }, { "epoch": 0.67, "learning_rate": 5.183825706057199e-06, "loss": 0.2468, "step": 7818 }, { "epoch": 0.67, "learning_rate": 5.1813928465399765e-06, "loss": 0.3082, "step": 7819 }, { "epoch": 0.67, "learning_rate": 5.178960358417184e-06, "loss": 0.2372, "step": 7820 }, { "epoch": 0.67, "learning_rate": 5.1765282418763045e-06, "loss": 0.3051, "step": 7821 }, { "epoch": 0.67, "learning_rate": 5.1740964971047945e-06, "loss": 0.3117, "step": 7822 }, { "epoch": 0.67, "learning_rate": 5.171665124290082e-06, "loss": 0.2623, "step": 7823 }, { "epoch": 0.67, "learning_rate": 5.169234123619569e-06, "loss": 0.2814, "step": 7824 }, { "epoch": 0.67, "learning_rate": 5.166803495280614e-06, "loss": 0.2733, "step": 7825 }, { "epoch": 0.67, "learning_rate": 5.164373239460561e-06, "loss": 0.2468, "step": 7826 }, { "epoch": 0.67, "learning_rate": 5.161943356346734e-06, "loss": 0.2553, "step": 7827 }, { "epoch": 0.67, "learning_rate": 5.159513846126403e-06, "loss": 0.2561, "step": 7828 }, { "epoch": 0.67, "learning_rate": 5.157084708986826e-06, "loss": 0.2724, "step": 7829 }, { "epoch": 0.67, "learning_rate": 5.154655945115233e-06, "loss": 0.2518, "step": 7830 }, { "epoch": 0.67, "learning_rate": 5.152227554698814e-06, "loss": 0.2718, "step": 7831 }, { "epoch": 0.67, "learning_rate": 5.149799537924749e-06, "loss": 0.2728, "step": 7832 }, { "epoch": 0.67, "learning_rate": 5.14737189498016e-06, "loss": 0.3082, "step": 7833 }, { "epoch": 0.67, "learning_rate": 5.144944626052178e-06, "loss": 0.2567, "step": 7834 }, { "epoch": 0.67, "learning_rate": 5.142517731327868e-06, "loss": 0.2644, "step": 7835 }, { "epoch": 0.67, "learning_rate": 5.1400912109942915e-06, "loss": 0.3019, "step": 7836 }, { "epoch": 0.67, "learning_rate": 5.137665065238471e-06, "loss": 0.2584, "step": 7837 }, { "epoch": 0.67, "learning_rate": 5.1352392942474005e-06, "loss": 0.2751, "step": 7838 }, { "epoch": 0.67, "learning_rate": 5.132813898208053e-06, "loss": 0.2803, "step": 7839 }, { "epoch": 0.67, "learning_rate": 5.130388877307353e-06, "loss": 0.2812, "step": 7840 }, { "epoch": 0.67, "learning_rate": 5.127964231732223e-06, "loss": 0.3066, "step": 7841 }, { "epoch": 0.67, "learning_rate": 5.1255399616695345e-06, "loss": 0.2782, "step": 7842 }, { "epoch": 0.67, "learning_rate": 5.12311606730614e-06, "loss": 0.2733, "step": 7843 }, { "epoch": 0.67, "learning_rate": 5.12069254882886e-06, "loss": 0.2774, "step": 7844 }, { "epoch": 0.67, "learning_rate": 5.118269406424492e-06, "loss": 0.275, "step": 7845 }, { "epoch": 0.67, "learning_rate": 5.115846640279798e-06, "loss": 0.2661, "step": 7846 }, { "epoch": 0.67, "learning_rate": 5.113424250581505e-06, "loss": 0.3121, "step": 7847 }, { "epoch": 0.67, "learning_rate": 5.111002237516334e-06, "loss": 0.3069, "step": 7848 }, { "epoch": 0.67, "learning_rate": 5.108580601270947e-06, "loss": 0.2819, "step": 7849 }, { "epoch": 0.67, "learning_rate": 5.106159342032e-06, "loss": 0.272, "step": 7850 }, { "epoch": 0.67, "learning_rate": 5.10373845998611e-06, "loss": 0.2482, "step": 7851 }, { "epoch": 0.67, "learning_rate": 5.101317955319866e-06, "loss": 0.2791, "step": 7852 }, { "epoch": 0.67, "learning_rate": 5.098897828219831e-06, "loss": 0.2651, "step": 7853 }, { "epoch": 0.67, "learning_rate": 5.096478078872528e-06, "loss": 0.2767, "step": 7854 }, { "epoch": 0.67, "learning_rate": 5.094058707464474e-06, "loss": 0.2966, "step": 7855 }, { "epoch": 0.67, "learning_rate": 5.091639714182129e-06, "loss": 0.2748, "step": 7856 }, { "epoch": 0.67, "learning_rate": 5.089221099211943e-06, "loss": 0.3063, "step": 7857 }, { "epoch": 0.67, "learning_rate": 5.08680286274033e-06, "loss": 0.253, "step": 7858 }, { "epoch": 0.67, "learning_rate": 5.084385004953674e-06, "loss": 0.2944, "step": 7859 }, { "epoch": 0.67, "learning_rate": 5.081967526038334e-06, "loss": 0.2981, "step": 7860 }, { "epoch": 0.67, "learning_rate": 5.079550426180635e-06, "loss": 0.2892, "step": 7861 }, { "epoch": 0.67, "learning_rate": 5.0771337055668826e-06, "loss": 0.287, "step": 7862 }, { "epoch": 0.67, "learning_rate": 5.074717364383335e-06, "loss": 0.2927, "step": 7863 }, { "epoch": 0.67, "learning_rate": 5.072301402816231e-06, "loss": 0.2622, "step": 7864 }, { "epoch": 0.67, "learning_rate": 5.069885821051796e-06, "loss": 0.2474, "step": 7865 }, { "epoch": 0.67, "learning_rate": 5.067470619276196e-06, "loss": 0.2961, "step": 7866 }, { "epoch": 0.67, "learning_rate": 5.06505579767559e-06, "loss": 0.2751, "step": 7867 }, { "epoch": 0.67, "learning_rate": 5.062641356436098e-06, "loss": 0.3049, "step": 7868 }, { "epoch": 0.67, "learning_rate": 5.060227295743813e-06, "loss": 0.2701, "step": 7869 }, { "epoch": 0.67, "learning_rate": 5.057813615784806e-06, "loss": 0.5499, "step": 7870 }, { "epoch": 0.67, "learning_rate": 5.055400316745096e-06, "loss": 0.2789, "step": 7871 }, { "epoch": 0.67, "learning_rate": 5.052987398810706e-06, "loss": 0.2699, "step": 7872 }, { "epoch": 0.67, "learning_rate": 5.0505748621676e-06, "loss": 0.2661, "step": 7873 }, { "epoch": 0.67, "learning_rate": 5.048162707001727e-06, "loss": 0.3229, "step": 7874 }, { "epoch": 0.68, "learning_rate": 5.045750933499005e-06, "loss": 0.2847, "step": 7875 }, { "epoch": 0.68, "learning_rate": 5.043339541845321e-06, "loss": 0.295, "step": 7876 }, { "epoch": 0.68, "learning_rate": 5.040928532226539e-06, "loss": 0.2955, "step": 7877 }, { "epoch": 0.68, "learning_rate": 5.038517904828473e-06, "loss": 0.2385, "step": 7878 }, { "epoch": 0.68, "learning_rate": 5.03610765983694e-06, "loss": 0.2499, "step": 7879 }, { "epoch": 0.68, "learning_rate": 5.033697797437695e-06, "loss": 0.2856, "step": 7880 }, { "epoch": 0.68, "learning_rate": 5.0312883178164875e-06, "loss": 0.3056, "step": 7881 }, { "epoch": 0.68, "learning_rate": 5.028879221159025e-06, "loss": 0.2779, "step": 7882 }, { "epoch": 0.68, "learning_rate": 5.026470507650988e-06, "loss": 0.2896, "step": 7883 }, { "epoch": 0.68, "learning_rate": 5.024062177478033e-06, "loss": 0.2896, "step": 7884 }, { "epoch": 0.68, "learning_rate": 5.021654230825772e-06, "loss": 0.2329, "step": 7885 }, { "epoch": 0.68, "learning_rate": 5.0192466678798116e-06, "loss": 0.2853, "step": 7886 }, { "epoch": 0.68, "learning_rate": 5.016839488825703e-06, "loss": 0.2591, "step": 7887 }, { "epoch": 0.68, "learning_rate": 5.014432693848985e-06, "loss": 0.2347, "step": 7888 }, { "epoch": 0.68, "learning_rate": 5.0120262831351595e-06, "loss": 0.2668, "step": 7889 }, { "epoch": 0.68, "learning_rate": 5.009620256869703e-06, "loss": 0.2458, "step": 7890 }, { "epoch": 0.68, "learning_rate": 5.007214615238061e-06, "loss": 0.2581, "step": 7891 }, { "epoch": 0.68, "learning_rate": 5.004809358425639e-06, "loss": 0.2809, "step": 7892 }, { "epoch": 0.68, "learning_rate": 5.002404486617839e-06, "loss": 0.2753, "step": 7893 }, { "epoch": 0.68, "learning_rate": 5.000000000000003e-06, "loss": 0.2739, "step": 7894 }, { "epoch": 0.68, "learning_rate": 4.9975958987574604e-06, "loss": 0.3065, "step": 7895 }, { "epoch": 0.68, "learning_rate": 4.995192183075509e-06, "loss": 0.3094, "step": 7896 }, { "epoch": 0.68, "learning_rate": 4.992788853139414e-06, "loss": 0.3174, "step": 7897 }, { "epoch": 0.68, "learning_rate": 4.9903859091344175e-06, "loss": 0.257, "step": 7898 }, { "epoch": 0.68, "learning_rate": 4.987983351245713e-06, "loss": 0.2749, "step": 7899 }, { "epoch": 0.68, "learning_rate": 4.985581179658495e-06, "loss": 0.2996, "step": 7900 }, { "epoch": 0.68, "learning_rate": 4.9831793945578995e-06, "loss": 0.3027, "step": 7901 }, { "epoch": 0.68, "learning_rate": 4.980777996129043e-06, "loss": 0.283, "step": 7902 }, { "epoch": 0.68, "learning_rate": 4.978376984557026e-06, "loss": 0.2543, "step": 7903 }, { "epoch": 0.68, "learning_rate": 4.975976360026894e-06, "loss": 0.5981, "step": 7904 }, { "epoch": 0.68, "learning_rate": 4.97357612272368e-06, "loss": 0.3047, "step": 7905 }, { "epoch": 0.68, "learning_rate": 4.971176272832382e-06, "loss": 0.2738, "step": 7906 }, { "epoch": 0.68, "learning_rate": 4.9687768105379685e-06, "loss": 0.2667, "step": 7907 }, { "epoch": 0.68, "learning_rate": 4.966377736025383e-06, "loss": 0.284, "step": 7908 }, { "epoch": 0.68, "learning_rate": 4.963979049479522e-06, "loss": 0.2688, "step": 7909 }, { "epoch": 0.68, "learning_rate": 4.9615807510852795e-06, "loss": 0.287, "step": 7910 }, { "epoch": 0.68, "learning_rate": 4.959182841027494e-06, "loss": 0.3025, "step": 7911 }, { "epoch": 0.68, "learning_rate": 4.956785319490986e-06, "loss": 0.2818, "step": 7912 }, { "epoch": 0.68, "learning_rate": 4.954388186660548e-06, "loss": 0.2675, "step": 7913 }, { "epoch": 0.68, "learning_rate": 4.951991442720937e-06, "loss": 0.2991, "step": 7914 }, { "epoch": 0.68, "learning_rate": 4.949595087856887e-06, "loss": 0.2449, "step": 7915 }, { "epoch": 0.68, "learning_rate": 4.947199122253083e-06, "loss": 0.3584, "step": 7916 }, { "epoch": 0.68, "learning_rate": 4.944803546094214e-06, "loss": 0.2966, "step": 7917 }, { "epoch": 0.68, "learning_rate": 4.942408359564906e-06, "loss": 0.2963, "step": 7918 }, { "epoch": 0.68, "learning_rate": 4.940013562849769e-06, "loss": 0.2566, "step": 7919 }, { "epoch": 0.68, "learning_rate": 4.937619156133385e-06, "loss": 0.2581, "step": 7920 }, { "epoch": 0.68, "learning_rate": 4.935225139600304e-06, "loss": 0.2724, "step": 7921 }, { "epoch": 0.68, "learning_rate": 4.932831513435045e-06, "loss": 0.5986, "step": 7922 }, { "epoch": 0.68, "learning_rate": 4.93043827782209e-06, "loss": 0.2997, "step": 7923 }, { "epoch": 0.68, "learning_rate": 4.928045432945909e-06, "loss": 0.2913, "step": 7924 }, { "epoch": 0.68, "learning_rate": 4.925652978990921e-06, "loss": 0.2437, "step": 7925 }, { "epoch": 0.68, "learning_rate": 4.92326091614153e-06, "loss": 0.576, "step": 7926 }, { "epoch": 0.68, "learning_rate": 4.920869244582102e-06, "loss": 0.2755, "step": 7927 }, { "epoch": 0.68, "learning_rate": 4.918477964496975e-06, "loss": 0.2682, "step": 7928 }, { "epoch": 0.68, "learning_rate": 4.916087076070462e-06, "loss": 0.2874, "step": 7929 }, { "epoch": 0.68, "learning_rate": 4.913696579486829e-06, "loss": 0.25, "step": 7930 }, { "epoch": 0.68, "learning_rate": 4.91130647493034e-06, "loss": 0.2695, "step": 7931 }, { "epoch": 0.68, "learning_rate": 4.9089167625852e-06, "loss": 0.2504, "step": 7932 }, { "epoch": 0.68, "learning_rate": 4.906527442635599e-06, "loss": 0.2741, "step": 7933 }, { "epoch": 0.68, "learning_rate": 4.904138515265696e-06, "loss": 0.2186, "step": 7934 }, { "epoch": 0.68, "learning_rate": 4.901749980659617e-06, "loss": 0.2565, "step": 7935 }, { "epoch": 0.68, "learning_rate": 4.899361839001462e-06, "loss": 0.2446, "step": 7936 }, { "epoch": 0.68, "learning_rate": 4.896974090475286e-06, "loss": 0.2684, "step": 7937 }, { "epoch": 0.68, "learning_rate": 4.89458673526514e-06, "loss": 0.2877, "step": 7938 }, { "epoch": 0.68, "learning_rate": 4.892199773555018e-06, "loss": 0.6144, "step": 7939 }, { "epoch": 0.68, "learning_rate": 4.889813205528895e-06, "loss": 0.2814, "step": 7940 }, { "epoch": 0.68, "learning_rate": 4.887427031370727e-06, "loss": 0.2676, "step": 7941 }, { "epoch": 0.68, "learning_rate": 4.885041251264419e-06, "loss": 0.2526, "step": 7942 }, { "epoch": 0.68, "learning_rate": 4.882655865393856e-06, "loss": 0.2839, "step": 7943 }, { "epoch": 0.68, "learning_rate": 4.880270873942895e-06, "loss": 0.2771, "step": 7944 }, { "epoch": 0.68, "learning_rate": 4.877886277095356e-06, "loss": 0.3147, "step": 7945 }, { "epoch": 0.68, "learning_rate": 4.875502075035039e-06, "loss": 0.3027, "step": 7946 }, { "epoch": 0.68, "learning_rate": 4.873118267945692e-06, "loss": 0.2527, "step": 7947 }, { "epoch": 0.68, "learning_rate": 4.870734856011066e-06, "loss": 0.3033, "step": 7948 }, { "epoch": 0.68, "learning_rate": 4.868351839414848e-06, "loss": 0.2677, "step": 7949 }, { "epoch": 0.68, "learning_rate": 4.8659692183407135e-06, "loss": 0.3055, "step": 7950 }, { "epoch": 0.68, "learning_rate": 4.863586992972304e-06, "loss": 0.2398, "step": 7951 }, { "epoch": 0.68, "learning_rate": 4.861205163493229e-06, "loss": 0.2754, "step": 7952 }, { "epoch": 0.68, "learning_rate": 4.858823730087072e-06, "loss": 0.3032, "step": 7953 }, { "epoch": 0.68, "learning_rate": 4.856442692937372e-06, "loss": 0.302, "step": 7954 }, { "epoch": 0.68, "learning_rate": 4.854062052227662e-06, "loss": 0.2488, "step": 7955 }, { "epoch": 0.68, "learning_rate": 4.851681808141418e-06, "loss": 0.2108, "step": 7956 }, { "epoch": 0.68, "learning_rate": 4.849301960862101e-06, "loss": 0.2521, "step": 7957 }, { "epoch": 0.68, "learning_rate": 4.846922510573139e-06, "loss": 0.2726, "step": 7958 }, { "epoch": 0.68, "learning_rate": 4.8445434574579275e-06, "loss": 0.3016, "step": 7959 }, { "epoch": 0.68, "learning_rate": 4.842164801699836e-06, "loss": 0.254, "step": 7960 }, { "epoch": 0.68, "learning_rate": 4.8397865434821886e-06, "loss": 0.294, "step": 7961 }, { "epoch": 0.68, "learning_rate": 4.837408682988305e-06, "loss": 0.2191, "step": 7962 }, { "epoch": 0.68, "learning_rate": 4.8350312204014475e-06, "loss": 0.3165, "step": 7963 }, { "epoch": 0.68, "learning_rate": 4.832654155904863e-06, "loss": 0.2407, "step": 7964 }, { "epoch": 0.68, "learning_rate": 4.830277489681762e-06, "loss": 0.2639, "step": 7965 }, { "epoch": 0.68, "learning_rate": 4.8279012219153284e-06, "loss": 0.3093, "step": 7966 }, { "epoch": 0.68, "learning_rate": 4.825525352788716e-06, "loss": 0.2513, "step": 7967 }, { "epoch": 0.68, "learning_rate": 4.8231498824850356e-06, "loss": 0.2851, "step": 7968 }, { "epoch": 0.68, "learning_rate": 4.820774811187389e-06, "loss": 0.2749, "step": 7969 }, { "epoch": 0.68, "learning_rate": 4.818400139078824e-06, "loss": 0.271, "step": 7970 }, { "epoch": 0.68, "learning_rate": 4.816025866342374e-06, "loss": 0.2677, "step": 7971 }, { "epoch": 0.68, "learning_rate": 4.813651993161036e-06, "loss": 0.2302, "step": 7972 }, { "epoch": 0.68, "learning_rate": 4.811278519717775e-06, "loss": 0.2811, "step": 7973 }, { "epoch": 0.68, "learning_rate": 4.808905446195532e-06, "loss": 0.2969, "step": 7974 }, { "epoch": 0.68, "learning_rate": 4.8065327727772005e-06, "loss": 0.3164, "step": 7975 }, { "epoch": 0.68, "learning_rate": 4.804160499645667e-06, "loss": 0.2782, "step": 7976 }, { "epoch": 0.68, "learning_rate": 4.8017886269837665e-06, "loss": 0.2904, "step": 7977 }, { "epoch": 0.68, "learning_rate": 4.7994171549743085e-06, "loss": 0.2961, "step": 7978 }, { "epoch": 0.68, "learning_rate": 4.797046083800087e-06, "loss": 0.3427, "step": 7979 }, { "epoch": 0.68, "learning_rate": 4.794675413643842e-06, "loss": 0.2944, "step": 7980 }, { "epoch": 0.68, "learning_rate": 4.7923051446883e-06, "loss": 0.278, "step": 7981 }, { "epoch": 0.68, "learning_rate": 4.7899352771161355e-06, "loss": 0.2711, "step": 7982 }, { "epoch": 0.68, "learning_rate": 4.787565811110022e-06, "loss": 0.306, "step": 7983 }, { "epoch": 0.68, "learning_rate": 4.785196746852584e-06, "loss": 0.2742, "step": 7984 }, { "epoch": 0.68, "learning_rate": 4.7828280845264056e-06, "loss": 0.2942, "step": 7985 }, { "epoch": 0.68, "learning_rate": 4.7804598243140664e-06, "loss": 0.2606, "step": 7986 }, { "epoch": 0.68, "learning_rate": 4.778091966398091e-06, "loss": 0.2525, "step": 7987 }, { "epoch": 0.68, "learning_rate": 4.775724510960984e-06, "loss": 0.2756, "step": 7988 }, { "epoch": 0.68, "learning_rate": 4.7733574581852185e-06, "loss": 0.274, "step": 7989 }, { "epoch": 0.68, "learning_rate": 4.770990808253234e-06, "loss": 0.2756, "step": 7990 }, { "epoch": 0.68, "learning_rate": 4.7686245613474445e-06, "loss": 0.2682, "step": 7991 }, { "epoch": 0.69, "learning_rate": 4.766258717650218e-06, "loss": 0.2888, "step": 7992 }, { "epoch": 0.69, "learning_rate": 4.763893277343915e-06, "loss": 0.2914, "step": 7993 }, { "epoch": 0.69, "learning_rate": 4.761528240610842e-06, "loss": 0.608, "step": 7994 }, { "epoch": 0.69, "learning_rate": 4.759163607633289e-06, "loss": 0.2693, "step": 7995 }, { "epoch": 0.69, "learning_rate": 4.756799378593509e-06, "loss": 0.2768, "step": 7996 }, { "epoch": 0.69, "learning_rate": 4.754435553673726e-06, "loss": 0.2953, "step": 7997 }, { "epoch": 0.69, "learning_rate": 4.752072133056135e-06, "loss": 0.2979, "step": 7998 }, { "epoch": 0.69, "learning_rate": 4.749709116922886e-06, "loss": 0.2753, "step": 7999 }, { "epoch": 0.69, "learning_rate": 4.747346505456123e-06, "loss": 0.2813, "step": 8000 }, { "epoch": 0.69, "learning_rate": 4.744984298837933e-06, "loss": 0.2518, "step": 8001 }, { "epoch": 0.69, "learning_rate": 4.742622497250389e-06, "loss": 0.3054, "step": 8002 }, { "epoch": 0.69, "learning_rate": 4.740261100875524e-06, "loss": 0.2849, "step": 8003 }, { "epoch": 0.69, "learning_rate": 4.737900109895345e-06, "loss": 0.2591, "step": 8004 }, { "epoch": 0.69, "learning_rate": 4.735539524491828e-06, "loss": 0.2394, "step": 8005 }, { "epoch": 0.69, "learning_rate": 4.7331793448469045e-06, "loss": 0.2416, "step": 8006 }, { "epoch": 0.69, "learning_rate": 4.7308195711425e-06, "loss": 0.2706, "step": 8007 }, { "epoch": 0.69, "learning_rate": 4.7284602035604845e-06, "loss": 0.2851, "step": 8008 }, { "epoch": 0.69, "learning_rate": 4.726101242282708e-06, "loss": 0.2498, "step": 8009 }, { "epoch": 0.69, "learning_rate": 4.723742687490988e-06, "loss": 0.3086, "step": 8010 }, { "epoch": 0.69, "learning_rate": 4.7213845393671136e-06, "loss": 0.2362, "step": 8011 }, { "epoch": 0.69, "learning_rate": 4.719026798092838e-06, "loss": 0.2952, "step": 8012 }, { "epoch": 0.69, "learning_rate": 4.7166694638498755e-06, "loss": 0.2143, "step": 8013 }, { "epoch": 0.69, "learning_rate": 4.7143125368199335e-06, "loss": 0.2373, "step": 8014 }, { "epoch": 0.69, "learning_rate": 4.71195601718466e-06, "loss": 0.2871, "step": 8015 }, { "epoch": 0.69, "learning_rate": 4.709599905125683e-06, "loss": 0.2692, "step": 8016 }, { "epoch": 0.69, "learning_rate": 4.7072442008246135e-06, "loss": 0.2756, "step": 8017 }, { "epoch": 0.69, "learning_rate": 4.704888904463003e-06, "loss": 0.252, "step": 8018 }, { "epoch": 0.69, "learning_rate": 4.702534016222398e-06, "loss": 0.2737, "step": 8019 }, { "epoch": 0.69, "learning_rate": 4.700179536284286e-06, "loss": 0.2926, "step": 8020 }, { "epoch": 0.69, "learning_rate": 4.697825464830153e-06, "loss": 0.2598, "step": 8021 }, { "epoch": 0.69, "learning_rate": 4.695471802041437e-06, "loss": 0.2375, "step": 8022 }, { "epoch": 0.69, "learning_rate": 4.693118548099538e-06, "loss": 0.2793, "step": 8023 }, { "epoch": 0.69, "learning_rate": 4.690765703185846e-06, "loss": 0.3109, "step": 8024 }, { "epoch": 0.69, "learning_rate": 4.6884132674816964e-06, "loss": 0.2486, "step": 8025 }, { "epoch": 0.69, "learning_rate": 4.686061241168406e-06, "loss": 0.2942, "step": 8026 }, { "epoch": 0.69, "learning_rate": 4.6837096244272586e-06, "loss": 0.2729, "step": 8027 }, { "epoch": 0.69, "learning_rate": 4.681358417439505e-06, "loss": 0.5831, "step": 8028 }, { "epoch": 0.69, "learning_rate": 4.679007620386369e-06, "loss": 0.3109, "step": 8029 }, { "epoch": 0.69, "learning_rate": 4.676657233449025e-06, "loss": 0.2721, "step": 8030 }, { "epoch": 0.69, "learning_rate": 4.674307256808646e-06, "loss": 0.2817, "step": 8031 }, { "epoch": 0.69, "learning_rate": 4.6719576906463445e-06, "loss": 0.2647, "step": 8032 }, { "epoch": 0.69, "learning_rate": 4.669608535143218e-06, "loss": 0.3099, "step": 8033 }, { "epoch": 0.69, "learning_rate": 4.667259790480327e-06, "loss": 0.3391, "step": 8034 }, { "epoch": 0.69, "learning_rate": 4.6649114568387024e-06, "loss": 0.2907, "step": 8035 }, { "epoch": 0.69, "learning_rate": 4.662563534399345e-06, "loss": 0.2653, "step": 8036 }, { "epoch": 0.69, "learning_rate": 4.6602160233432085e-06, "loss": 0.2228, "step": 8037 }, { "epoch": 0.69, "learning_rate": 4.657868923851244e-06, "loss": 0.252, "step": 8038 }, { "epoch": 0.69, "learning_rate": 4.655522236104344e-06, "loss": 0.2551, "step": 8039 }, { "epoch": 0.69, "learning_rate": 4.6531759602833815e-06, "loss": 0.345, "step": 8040 }, { "epoch": 0.69, "learning_rate": 4.650830096569196e-06, "loss": 0.2882, "step": 8041 }, { "epoch": 0.69, "learning_rate": 4.648484645142597e-06, "loss": 0.2986, "step": 8042 }, { "epoch": 0.69, "learning_rate": 4.646139606184362e-06, "loss": 0.2451, "step": 8043 }, { "epoch": 0.69, "learning_rate": 4.643794979875225e-06, "loss": 0.2682, "step": 8044 }, { "epoch": 0.69, "learning_rate": 4.6414507663959115e-06, "loss": 0.286, "step": 8045 }, { "epoch": 0.69, "learning_rate": 4.639106965927093e-06, "loss": 0.2797, "step": 8046 }, { "epoch": 0.69, "learning_rate": 4.636763578649419e-06, "loss": 0.3387, "step": 8047 }, { "epoch": 0.69, "learning_rate": 4.634420604743509e-06, "loss": 0.2574, "step": 8048 }, { "epoch": 0.69, "learning_rate": 4.632078044389947e-06, "loss": 0.2545, "step": 8049 }, { "epoch": 0.69, "learning_rate": 4.629735897769289e-06, "loss": 0.3049, "step": 8050 }, { "epoch": 0.69, "learning_rate": 4.627394165062045e-06, "loss": 0.2468, "step": 8051 }, { "epoch": 0.69, "learning_rate": 4.62505284644872e-06, "loss": 0.3224, "step": 8052 }, { "epoch": 0.69, "learning_rate": 4.622711942109759e-06, "loss": 0.2697, "step": 8053 }, { "epoch": 0.69, "learning_rate": 4.620371452225587e-06, "loss": 0.2632, "step": 8054 }, { "epoch": 0.69, "learning_rate": 4.61803137697661e-06, "loss": 0.2638, "step": 8055 }, { "epoch": 0.69, "learning_rate": 4.615691716543179e-06, "loss": 0.2703, "step": 8056 }, { "epoch": 0.69, "learning_rate": 4.613352471105627e-06, "loss": 0.2521, "step": 8057 }, { "epoch": 0.69, "learning_rate": 4.611013640844245e-06, "loss": 0.2441, "step": 8058 }, { "epoch": 0.69, "learning_rate": 4.608675225939308e-06, "loss": 0.2761, "step": 8059 }, { "epoch": 0.69, "learning_rate": 4.606337226571047e-06, "loss": 0.2935, "step": 8060 }, { "epoch": 0.69, "learning_rate": 4.6039996429196555e-06, "loss": 0.285, "step": 8061 }, { "epoch": 0.69, "learning_rate": 4.601662475165316e-06, "loss": 0.2796, "step": 8062 }, { "epoch": 0.69, "learning_rate": 4.5993257234881565e-06, "loss": 0.2687, "step": 8063 }, { "epoch": 0.69, "learning_rate": 4.596989388068283e-06, "loss": 0.2836, "step": 8064 }, { "epoch": 0.69, "learning_rate": 4.5946534690857705e-06, "loss": 0.2661, "step": 8065 }, { "epoch": 0.69, "learning_rate": 4.592317966720661e-06, "loss": 0.2847, "step": 8066 }, { "epoch": 0.69, "learning_rate": 4.589982881152966e-06, "loss": 0.3113, "step": 8067 }, { "epoch": 0.69, "learning_rate": 4.587648212562651e-06, "loss": 0.2533, "step": 8068 }, { "epoch": 0.69, "learning_rate": 4.585313961129676e-06, "loss": 0.2955, "step": 8069 }, { "epoch": 0.69, "learning_rate": 4.582980127033943e-06, "loss": 0.252, "step": 8070 }, { "epoch": 0.69, "learning_rate": 4.5806467104553345e-06, "loss": 0.2835, "step": 8071 }, { "epoch": 0.69, "learning_rate": 4.5783137115737e-06, "loss": 0.2668, "step": 8072 }, { "epoch": 0.69, "learning_rate": 4.575981130568856e-06, "loss": 0.3032, "step": 8073 }, { "epoch": 0.69, "learning_rate": 4.573648967620589e-06, "loss": 0.2485, "step": 8074 }, { "epoch": 0.69, "learning_rate": 4.57131722290864e-06, "loss": 0.312, "step": 8075 }, { "epoch": 0.69, "learning_rate": 4.568985896612742e-06, "loss": 0.2449, "step": 8076 }, { "epoch": 0.69, "learning_rate": 4.5666549889125726e-06, "loss": 0.2367, "step": 8077 }, { "epoch": 0.69, "learning_rate": 4.56432449998779e-06, "loss": 0.2734, "step": 8078 }, { "epoch": 0.69, "learning_rate": 4.561994430018016e-06, "loss": 0.2636, "step": 8079 }, { "epoch": 0.69, "learning_rate": 4.559664779182842e-06, "loss": 0.2562, "step": 8080 }, { "epoch": 0.69, "learning_rate": 4.557335547661828e-06, "loss": 0.2773, "step": 8081 }, { "epoch": 0.69, "learning_rate": 4.55500673563449e-06, "loss": 0.2729, "step": 8082 }, { "epoch": 0.69, "learning_rate": 4.552678343280337e-06, "loss": 0.2978, "step": 8083 }, { "epoch": 0.69, "learning_rate": 4.550350370778815e-06, "loss": 0.2931, "step": 8084 }, { "epoch": 0.69, "learning_rate": 4.548022818309361e-06, "loss": 0.2603, "step": 8085 }, { "epoch": 0.69, "learning_rate": 4.545695686051369e-06, "loss": 0.2605, "step": 8086 }, { "epoch": 0.69, "learning_rate": 4.5433689741842024e-06, "loss": 0.2934, "step": 8087 }, { "epoch": 0.69, "learning_rate": 4.5410426828871965e-06, "loss": 0.2527, "step": 8088 }, { "epoch": 0.69, "learning_rate": 4.5387168123396406e-06, "loss": 0.6045, "step": 8089 }, { "epoch": 0.69, "learning_rate": 4.536391362720816e-06, "loss": 0.2437, "step": 8090 }, { "epoch": 0.69, "learning_rate": 4.5340663342099435e-06, "loss": 0.2952, "step": 8091 }, { "epoch": 0.69, "learning_rate": 4.531741726986226e-06, "loss": 0.2439, "step": 8092 }, { "epoch": 0.69, "learning_rate": 4.529417541228846e-06, "loss": 0.2712, "step": 8093 }, { "epoch": 0.69, "learning_rate": 4.527093777116925e-06, "loss": 0.2842, "step": 8094 }, { "epoch": 0.69, "learning_rate": 4.5247704348295785e-06, "loss": 0.3024, "step": 8095 }, { "epoch": 0.69, "learning_rate": 4.522447514545865e-06, "loss": 0.2585, "step": 8096 }, { "epoch": 0.69, "learning_rate": 4.520125016444835e-06, "loss": 0.2802, "step": 8097 }, { "epoch": 0.69, "learning_rate": 4.5178029407054965e-06, "loss": 0.2325, "step": 8098 }, { "epoch": 0.69, "learning_rate": 4.515481287506811e-06, "loss": 0.3073, "step": 8099 }, { "epoch": 0.69, "learning_rate": 4.513160057027736e-06, "loss": 0.2603, "step": 8100 }, { "epoch": 0.69, "learning_rate": 4.510839249447169e-06, "loss": 0.3051, "step": 8101 }, { "epoch": 0.69, "learning_rate": 4.508518864943989e-06, "loss": 0.3133, "step": 8102 }, { "epoch": 0.69, "learning_rate": 4.506198903697041e-06, "loss": 0.2342, "step": 8103 }, { "epoch": 0.69, "learning_rate": 4.5038793658851365e-06, "loss": 0.3011, "step": 8104 }, { "epoch": 0.69, "learning_rate": 4.501560251687056e-06, "loss": 0.2777, "step": 8105 }, { "epoch": 0.69, "learning_rate": 4.4992415612815355e-06, "loss": 0.2684, "step": 8106 }, { "epoch": 0.69, "learning_rate": 4.496923294847303e-06, "loss": 0.2896, "step": 8107 }, { "epoch": 0.7, "learning_rate": 4.494605452563028e-06, "loss": 0.3073, "step": 8108 }, { "epoch": 0.7, "learning_rate": 4.492288034607361e-06, "loss": 0.2422, "step": 8109 }, { "epoch": 0.7, "learning_rate": 4.489971041158919e-06, "loss": 0.2971, "step": 8110 }, { "epoch": 0.7, "learning_rate": 4.487654472396284e-06, "loss": 0.251, "step": 8111 }, { "epoch": 0.7, "learning_rate": 4.4853383284980064e-06, "loss": 0.3059, "step": 8112 }, { "epoch": 0.7, "learning_rate": 4.483022609642596e-06, "loss": 0.2935, "step": 8113 }, { "epoch": 0.7, "learning_rate": 4.480707316008549e-06, "loss": 0.2644, "step": 8114 }, { "epoch": 0.7, "learning_rate": 4.478392447774307e-06, "loss": 0.2598, "step": 8115 }, { "epoch": 0.7, "learning_rate": 4.476078005118293e-06, "loss": 0.2622, "step": 8116 }, { "epoch": 0.7, "learning_rate": 4.473763988218891e-06, "loss": 0.299, "step": 8117 }, { "epoch": 0.7, "learning_rate": 4.4714503972544545e-06, "loss": 0.289, "step": 8118 }, { "epoch": 0.7, "learning_rate": 4.469137232403308e-06, "loss": 0.2335, "step": 8119 }, { "epoch": 0.7, "learning_rate": 4.466824493843728e-06, "loss": 0.2941, "step": 8120 }, { "epoch": 0.7, "learning_rate": 4.464512181753982e-06, "loss": 0.257, "step": 8121 }, { "epoch": 0.7, "learning_rate": 4.462200296312284e-06, "loss": 0.291, "step": 8122 }, { "epoch": 0.7, "learning_rate": 4.459888837696822e-06, "loss": 0.2661, "step": 8123 }, { "epoch": 0.7, "learning_rate": 4.457577806085754e-06, "loss": 0.2953, "step": 8124 }, { "epoch": 0.7, "learning_rate": 4.455267201657203e-06, "loss": 0.2645, "step": 8125 }, { "epoch": 0.7, "learning_rate": 4.4529570245892625e-06, "loss": 0.2548, "step": 8126 }, { "epoch": 0.7, "learning_rate": 4.450647275059979e-06, "loss": 0.2951, "step": 8127 }, { "epoch": 0.7, "learning_rate": 4.4483379532473906e-06, "loss": 0.2943, "step": 8128 }, { "epoch": 0.7, "learning_rate": 4.446029059329477e-06, "loss": 0.3195, "step": 8129 }, { "epoch": 0.7, "learning_rate": 4.443720593484198e-06, "loss": 0.2298, "step": 8130 }, { "epoch": 0.7, "learning_rate": 4.441412555889487e-06, "loss": 0.2728, "step": 8131 }, { "epoch": 0.7, "learning_rate": 4.439104946723228e-06, "loss": 0.2642, "step": 8132 }, { "epoch": 0.7, "learning_rate": 4.436797766163285e-06, "loss": 0.2825, "step": 8133 }, { "epoch": 0.7, "learning_rate": 4.4344910143874755e-06, "loss": 0.3176, "step": 8134 }, { "epoch": 0.7, "learning_rate": 4.432184691573602e-06, "loss": 0.2866, "step": 8135 }, { "epoch": 0.7, "learning_rate": 4.429878797899424e-06, "loss": 0.2526, "step": 8136 }, { "epoch": 0.7, "learning_rate": 4.42757333354266e-06, "loss": 0.2643, "step": 8137 }, { "epoch": 0.7, "learning_rate": 4.425268298681015e-06, "loss": 0.2716, "step": 8138 }, { "epoch": 0.7, "learning_rate": 4.422963693492141e-06, "loss": 0.2578, "step": 8139 }, { "epoch": 0.7, "learning_rate": 4.420659518153667e-06, "loss": 0.3008, "step": 8140 }, { "epoch": 0.7, "learning_rate": 4.41835577284319e-06, "loss": 0.2524, "step": 8141 }, { "epoch": 0.7, "learning_rate": 4.416052457738271e-06, "loss": 0.2606, "step": 8142 }, { "epoch": 0.7, "learning_rate": 4.41374957301644e-06, "loss": 0.2066, "step": 8143 }, { "epoch": 0.7, "learning_rate": 4.411447118855183e-06, "loss": 0.2757, "step": 8144 }, { "epoch": 0.7, "learning_rate": 4.409145095431976e-06, "loss": 0.2831, "step": 8145 }, { "epoch": 0.7, "learning_rate": 4.406843502924235e-06, "loss": 0.3225, "step": 8146 }, { "epoch": 0.7, "learning_rate": 4.40454234150936e-06, "loss": 0.5675, "step": 8147 }, { "epoch": 0.7, "learning_rate": 4.402241611364715e-06, "loss": 0.2445, "step": 8148 }, { "epoch": 0.7, "learning_rate": 4.399941312667626e-06, "loss": 0.2208, "step": 8149 }, { "epoch": 0.7, "learning_rate": 4.397641445595393e-06, "loss": 0.28, "step": 8150 }, { "epoch": 0.7, "learning_rate": 4.39534201032527e-06, "loss": 0.2852, "step": 8151 }, { "epoch": 0.7, "learning_rate": 4.393043007034496e-06, "loss": 0.3055, "step": 8152 }, { "epoch": 0.7, "learning_rate": 4.390744435900262e-06, "loss": 0.251, "step": 8153 }, { "epoch": 0.7, "learning_rate": 4.388446297099728e-06, "loss": 0.3126, "step": 8154 }, { "epoch": 0.7, "learning_rate": 4.386148590810027e-06, "loss": 0.3075, "step": 8155 }, { "epoch": 0.7, "learning_rate": 4.383851317208253e-06, "loss": 0.2547, "step": 8156 }, { "epoch": 0.7, "learning_rate": 4.381554476471473e-06, "loss": 0.2348, "step": 8157 }, { "epoch": 0.7, "learning_rate": 4.379258068776706e-06, "loss": 0.2521, "step": 8158 }, { "epoch": 0.7, "learning_rate": 4.3769620943009615e-06, "loss": 0.3021, "step": 8159 }, { "epoch": 0.7, "learning_rate": 4.374666553221191e-06, "loss": 0.2736, "step": 8160 }, { "epoch": 0.7, "learning_rate": 4.372371445714325e-06, "loss": 0.262, "step": 8161 }, { "epoch": 0.7, "learning_rate": 4.370076771957264e-06, "loss": 0.3145, "step": 8162 }, { "epoch": 0.7, "learning_rate": 4.367782532126864e-06, "loss": 0.3156, "step": 8163 }, { "epoch": 0.7, "learning_rate": 4.365488726399962e-06, "loss": 0.2725, "step": 8164 }, { "epoch": 0.7, "learning_rate": 4.363195354953341e-06, "loss": 0.2673, "step": 8165 }, { "epoch": 0.7, "learning_rate": 4.360902417963777e-06, "loss": 0.2756, "step": 8166 }, { "epoch": 0.7, "learning_rate": 4.358609915607987e-06, "loss": 0.2993, "step": 8167 }, { "epoch": 0.7, "learning_rate": 4.3563178480626665e-06, "loss": 0.2783, "step": 8168 }, { "epoch": 0.7, "learning_rate": 4.354026215504488e-06, "loss": 0.2764, "step": 8169 }, { "epoch": 0.7, "learning_rate": 4.351735018110066e-06, "loss": 0.2222, "step": 8170 }, { "epoch": 0.7, "learning_rate": 4.349444256056005e-06, "loss": 0.2797, "step": 8171 }, { "epoch": 0.7, "learning_rate": 4.347153929518852e-06, "loss": 0.244, "step": 8172 }, { "epoch": 0.7, "learning_rate": 4.344864038675147e-06, "loss": 0.2549, "step": 8173 }, { "epoch": 0.7, "learning_rate": 4.342574583701382e-06, "loss": 0.2751, "step": 8174 }, { "epoch": 0.7, "learning_rate": 4.340285564774007e-06, "loss": 0.2625, "step": 8175 }, { "epoch": 0.7, "learning_rate": 4.3379969820694636e-06, "loss": 0.2565, "step": 8176 }, { "epoch": 0.7, "learning_rate": 4.335708835764131e-06, "loss": 0.2946, "step": 8177 }, { "epoch": 0.7, "learning_rate": 4.333421126034374e-06, "loss": 0.2819, "step": 8178 }, { "epoch": 0.7, "learning_rate": 4.331133853056516e-06, "loss": 0.2598, "step": 8179 }, { "epoch": 0.7, "learning_rate": 4.3288470170068505e-06, "loss": 0.2549, "step": 8180 }, { "epoch": 0.7, "learning_rate": 4.326560618061639e-06, "loss": 0.2265, "step": 8181 }, { "epoch": 0.7, "learning_rate": 4.324274656397095e-06, "loss": 0.2296, "step": 8182 }, { "epoch": 0.7, "learning_rate": 4.321989132189422e-06, "loss": 0.2867, "step": 8183 }, { "epoch": 0.7, "learning_rate": 4.319704045614768e-06, "loss": 0.2393, "step": 8184 }, { "epoch": 0.7, "learning_rate": 4.317419396849258e-06, "loss": 0.304, "step": 8185 }, { "epoch": 0.7, "learning_rate": 4.315135186068984e-06, "loss": 0.2891, "step": 8186 }, { "epoch": 0.7, "learning_rate": 4.312851413449999e-06, "loss": 0.2687, "step": 8187 }, { "epoch": 0.7, "learning_rate": 4.31056807916833e-06, "loss": 0.2867, "step": 8188 }, { "epoch": 0.7, "learning_rate": 4.308285183399954e-06, "loss": 0.2703, "step": 8189 }, { "epoch": 0.7, "learning_rate": 4.306002726320839e-06, "loss": 0.3225, "step": 8190 }, { "epoch": 0.7, "learning_rate": 4.3037207081068965e-06, "loss": 0.2919, "step": 8191 }, { "epoch": 0.7, "learning_rate": 4.301439128934015e-06, "loss": 0.2985, "step": 8192 }, { "epoch": 0.7, "learning_rate": 4.299157988978048e-06, "loss": 0.297, "step": 8193 }, { "epoch": 0.7, "learning_rate": 4.296877288414815e-06, "loss": 0.6129, "step": 8194 }, { "epoch": 0.7, "learning_rate": 4.2945970274201045e-06, "loss": 0.2766, "step": 8195 }, { "epoch": 0.7, "learning_rate": 4.292317206169655e-06, "loss": 0.2724, "step": 8196 }, { "epoch": 0.7, "learning_rate": 4.290037824839202e-06, "loss": 0.2853, "step": 8197 }, { "epoch": 0.7, "learning_rate": 4.287758883604415e-06, "loss": 0.2875, "step": 8198 }, { "epoch": 0.7, "learning_rate": 4.285480382640947e-06, "loss": 0.2808, "step": 8199 }, { "epoch": 0.7, "learning_rate": 4.283202322124417e-06, "loss": 0.2679, "step": 8200 }, { "epoch": 0.7, "learning_rate": 4.280924702230403e-06, "loss": 0.2629, "step": 8201 }, { "epoch": 0.7, "learning_rate": 4.278647523134459e-06, "loss": 0.3494, "step": 8202 }, { "epoch": 0.7, "learning_rate": 4.276370785012086e-06, "loss": 0.2757, "step": 8203 }, { "epoch": 0.7, "learning_rate": 4.2740944880387795e-06, "loss": 0.262, "step": 8204 }, { "epoch": 0.7, "learning_rate": 4.271818632389973e-06, "loss": 0.28, "step": 8205 }, { "epoch": 0.7, "learning_rate": 4.269543218241079e-06, "loss": 0.2706, "step": 8206 }, { "epoch": 0.7, "learning_rate": 4.267268245767486e-06, "loss": 0.2501, "step": 8207 }, { "epoch": 0.7, "learning_rate": 4.2649937151445275e-06, "loss": 0.2593, "step": 8208 }, { "epoch": 0.7, "learning_rate": 4.262719626547519e-06, "loss": 0.2443, "step": 8209 }, { "epoch": 0.7, "learning_rate": 4.260445980151725e-06, "loss": 0.2626, "step": 8210 }, { "epoch": 0.7, "learning_rate": 4.2581727761324e-06, "loss": 0.2586, "step": 8211 }, { "epoch": 0.7, "learning_rate": 4.2559000146647485e-06, "loss": 0.2744, "step": 8212 }, { "epoch": 0.7, "learning_rate": 4.253627695923934e-06, "loss": 0.2627, "step": 8213 }, { "epoch": 0.7, "learning_rate": 4.2513558200851115e-06, "loss": 0.2453, "step": 8214 }, { "epoch": 0.7, "learning_rate": 4.249084387323373e-06, "loss": 0.3441, "step": 8215 }, { "epoch": 0.7, "learning_rate": 4.2468133978137945e-06, "loss": 0.5818, "step": 8216 }, { "epoch": 0.7, "learning_rate": 4.2445428517314116e-06, "loss": 0.2794, "step": 8217 }, { "epoch": 0.7, "learning_rate": 4.242272749251228e-06, "loss": 0.3177, "step": 8218 }, { "epoch": 0.7, "learning_rate": 4.240003090548213e-06, "loss": 0.304, "step": 8219 }, { "epoch": 0.7, "learning_rate": 4.237733875797293e-06, "loss": 0.2581, "step": 8220 }, { "epoch": 0.7, "learning_rate": 4.2354651051733795e-06, "loss": 0.3061, "step": 8221 }, { "epoch": 0.7, "learning_rate": 4.2331967788513295e-06, "loss": 0.3313, "step": 8222 }, { "epoch": 0.7, "learning_rate": 4.230928897005978e-06, "loss": 0.3079, "step": 8223 }, { "epoch": 0.7, "learning_rate": 4.22866145981212e-06, "loss": 0.2622, "step": 8224 }, { "epoch": 0.71, "learning_rate": 4.22639446744452e-06, "loss": 0.2564, "step": 8225 }, { "epoch": 0.71, "learning_rate": 4.2241279200779105e-06, "loss": 0.2591, "step": 8226 }, { "epoch": 0.71, "learning_rate": 4.221861817886973e-06, "loss": 0.2531, "step": 8227 }, { "epoch": 0.71, "learning_rate": 4.2195961610463845e-06, "loss": 0.2885, "step": 8228 }, { "epoch": 0.71, "learning_rate": 4.217330949730758e-06, "loss": 0.2784, "step": 8229 }, { "epoch": 0.71, "learning_rate": 4.215066184114689e-06, "loss": 0.3102, "step": 8230 }, { "epoch": 0.71, "learning_rate": 4.212801864372734e-06, "loss": 0.5885, "step": 8231 }, { "epoch": 0.71, "learning_rate": 4.210537990679417e-06, "loss": 0.2969, "step": 8232 }, { "epoch": 0.71, "learning_rate": 4.208274563209227e-06, "loss": 0.2432, "step": 8233 }, { "epoch": 0.71, "learning_rate": 4.2060115821366085e-06, "loss": 0.2612, "step": 8234 }, { "epoch": 0.71, "learning_rate": 4.203749047635998e-06, "loss": 0.5912, "step": 8235 }, { "epoch": 0.71, "learning_rate": 4.201486959881766e-06, "loss": 0.3296, "step": 8236 }, { "epoch": 0.71, "learning_rate": 4.199225319048267e-06, "loss": 0.2829, "step": 8237 }, { "epoch": 0.71, "learning_rate": 4.196964125309818e-06, "loss": 0.2664, "step": 8238 }, { "epoch": 0.71, "learning_rate": 4.194703378840701e-06, "loss": 0.257, "step": 8239 }, { "epoch": 0.71, "learning_rate": 4.192443079815166e-06, "loss": 0.2762, "step": 8240 }, { "epoch": 0.71, "learning_rate": 4.190183228407416e-06, "loss": 0.2682, "step": 8241 }, { "epoch": 0.71, "learning_rate": 4.187923824791642e-06, "loss": 0.2657, "step": 8242 }, { "epoch": 0.71, "learning_rate": 4.185664869141979e-06, "loss": 0.2876, "step": 8243 }, { "epoch": 0.71, "learning_rate": 4.183406361632534e-06, "loss": 0.2501, "step": 8244 }, { "epoch": 0.71, "learning_rate": 4.181148302437392e-06, "loss": 0.2637, "step": 8245 }, { "epoch": 0.71, "learning_rate": 4.178890691730585e-06, "loss": 0.2749, "step": 8246 }, { "epoch": 0.71, "learning_rate": 4.176633529686124e-06, "loss": 0.2677, "step": 8247 }, { "epoch": 0.71, "learning_rate": 4.174376816477969e-06, "loss": 0.2653, "step": 8248 }, { "epoch": 0.71, "learning_rate": 4.172120552280067e-06, "loss": 0.2803, "step": 8249 }, { "epoch": 0.71, "learning_rate": 4.169864737266321e-06, "loss": 0.2349, "step": 8250 }, { "epoch": 0.71, "learning_rate": 4.1676093716105845e-06, "loss": 0.2369, "step": 8251 }, { "epoch": 0.71, "learning_rate": 4.165354455486707e-06, "loss": 0.2574, "step": 8252 }, { "epoch": 0.71, "learning_rate": 4.163099989068476e-06, "loss": 0.234, "step": 8253 }, { "epoch": 0.71, "learning_rate": 4.160845972529656e-06, "loss": 0.2217, "step": 8254 }, { "epoch": 0.71, "learning_rate": 4.1585924060439755e-06, "loss": 0.2908, "step": 8255 }, { "epoch": 0.71, "learning_rate": 4.156339289785129e-06, "loss": 0.2633, "step": 8256 }, { "epoch": 0.71, "learning_rate": 4.154086623926781e-06, "loss": 0.2374, "step": 8257 }, { "epoch": 0.71, "learning_rate": 4.151834408642542e-06, "loss": 0.261, "step": 8258 }, { "epoch": 0.71, "learning_rate": 4.149582644106018e-06, "loss": 0.2505, "step": 8259 }, { "epoch": 0.71, "learning_rate": 4.147331330490752e-06, "loss": 0.2325, "step": 8260 }, { "epoch": 0.71, "learning_rate": 4.1450804679702685e-06, "loss": 0.2436, "step": 8261 }, { "epoch": 0.71, "learning_rate": 4.142830056718052e-06, "loss": 0.2469, "step": 8262 }, { "epoch": 0.71, "learning_rate": 4.140580096907554e-06, "loss": 0.2831, "step": 8263 }, { "epoch": 0.71, "learning_rate": 4.138330588712194e-06, "loss": 0.29, "step": 8264 }, { "epoch": 0.71, "learning_rate": 4.1360815323053406e-06, "loss": 0.306, "step": 8265 }, { "epoch": 0.71, "learning_rate": 4.133832927860356e-06, "loss": 0.2546, "step": 8266 }, { "epoch": 0.71, "learning_rate": 4.13158477555054e-06, "loss": 0.25, "step": 8267 }, { "epoch": 0.71, "learning_rate": 4.1293370755491725e-06, "loss": 0.2916, "step": 8268 }, { "epoch": 0.71, "learning_rate": 4.127089828029496e-06, "loss": 0.3303, "step": 8269 }, { "epoch": 0.71, "learning_rate": 4.124843033164716e-06, "loss": 0.3043, "step": 8270 }, { "epoch": 0.71, "learning_rate": 4.122596691128009e-06, "loss": 0.2563, "step": 8271 }, { "epoch": 0.71, "learning_rate": 4.120350802092501e-06, "loss": 0.3027, "step": 8272 }, { "epoch": 0.71, "learning_rate": 4.1181053662313075e-06, "loss": 0.2294, "step": 8273 }, { "epoch": 0.71, "learning_rate": 4.115860383717486e-06, "loss": 0.2878, "step": 8274 }, { "epoch": 0.71, "learning_rate": 4.113615854724071e-06, "loss": 0.2259, "step": 8275 }, { "epoch": 0.71, "learning_rate": 4.1113717794240615e-06, "loss": 0.2839, "step": 8276 }, { "epoch": 0.71, "learning_rate": 4.109128157990418e-06, "loss": 0.2674, "step": 8277 }, { "epoch": 0.71, "learning_rate": 4.106884990596073e-06, "loss": 0.2569, "step": 8278 }, { "epoch": 0.71, "learning_rate": 4.1046422774139065e-06, "loss": 0.2555, "step": 8279 }, { "epoch": 0.71, "learning_rate": 4.10240001861679e-06, "loss": 0.2634, "step": 8280 }, { "epoch": 0.71, "learning_rate": 4.100158214377536e-06, "loss": 0.2503, "step": 8281 }, { "epoch": 0.71, "learning_rate": 4.097916864868932e-06, "loss": 0.2673, "step": 8282 }, { "epoch": 0.71, "learning_rate": 4.095675970263738e-06, "loss": 0.265, "step": 8283 }, { "epoch": 0.71, "learning_rate": 4.093435530734664e-06, "loss": 0.2879, "step": 8284 }, { "epoch": 0.71, "learning_rate": 4.091195546454398e-06, "loss": 0.3255, "step": 8285 }, { "epoch": 0.71, "learning_rate": 4.088956017595575e-06, "loss": 0.3093, "step": 8286 }, { "epoch": 0.71, "learning_rate": 4.0867169443308196e-06, "loss": 0.2482, "step": 8287 }, { "epoch": 0.71, "learning_rate": 4.084478326832706e-06, "loss": 0.2867, "step": 8288 }, { "epoch": 0.71, "learning_rate": 4.082240165273767e-06, "loss": 0.2518, "step": 8289 }, { "epoch": 0.71, "learning_rate": 4.080002459826523e-06, "loss": 0.2887, "step": 8290 }, { "epoch": 0.71, "learning_rate": 4.0777652106634334e-06, "loss": 0.3162, "step": 8291 }, { "epoch": 0.71, "learning_rate": 4.07552841795694e-06, "loss": 0.2935, "step": 8292 }, { "epoch": 0.71, "learning_rate": 4.073292081879442e-06, "loss": 0.3007, "step": 8293 }, { "epoch": 0.71, "learning_rate": 4.071056202603305e-06, "loss": 0.2941, "step": 8294 }, { "epoch": 0.71, "learning_rate": 4.068820780300864e-06, "loss": 0.283, "step": 8295 }, { "epoch": 0.71, "learning_rate": 4.066585815144404e-06, "loss": 0.2927, "step": 8296 }, { "epoch": 0.71, "learning_rate": 4.0643513073061966e-06, "loss": 0.2428, "step": 8297 }, { "epoch": 0.71, "learning_rate": 4.06211725695846e-06, "loss": 0.2556, "step": 8298 }, { "epoch": 0.71, "learning_rate": 4.059883664273385e-06, "loss": 0.2646, "step": 8299 }, { "epoch": 0.71, "learning_rate": 4.057650529423126e-06, "loss": 0.2399, "step": 8300 }, { "epoch": 0.71, "learning_rate": 4.055417852579802e-06, "loss": 0.288, "step": 8301 }, { "epoch": 0.71, "learning_rate": 4.053185633915501e-06, "loss": 0.305, "step": 8302 }, { "epoch": 0.71, "learning_rate": 4.050953873602259e-06, "loss": 0.3251, "step": 8303 }, { "epoch": 0.71, "learning_rate": 4.048722571812105e-06, "loss": 0.2581, "step": 8304 }, { "epoch": 0.71, "learning_rate": 4.0464917287170055e-06, "loss": 0.2963, "step": 8305 }, { "epoch": 0.71, "learning_rate": 4.0442613444889065e-06, "loss": 0.2631, "step": 8306 }, { "epoch": 0.71, "learning_rate": 4.042031419299714e-06, "loss": 0.2841, "step": 8307 }, { "epoch": 0.71, "learning_rate": 4.039801953321302e-06, "loss": 0.3099, "step": 8308 }, { "epoch": 0.71, "learning_rate": 4.0375729467255074e-06, "loss": 0.2529, "step": 8309 }, { "epoch": 0.71, "learning_rate": 4.035344399684124e-06, "loss": 0.2891, "step": 8310 }, { "epoch": 0.71, "learning_rate": 4.033116312368926e-06, "loss": 0.2452, "step": 8311 }, { "epoch": 0.71, "learning_rate": 4.030888684951638e-06, "loss": 0.2714, "step": 8312 }, { "epoch": 0.71, "learning_rate": 4.028661517603956e-06, "loss": 0.2919, "step": 8313 }, { "epoch": 0.71, "learning_rate": 4.026434810497538e-06, "loss": 0.2911, "step": 8314 }, { "epoch": 0.71, "learning_rate": 4.024208563804008e-06, "loss": 0.2867, "step": 8315 }, { "epoch": 0.71, "learning_rate": 4.02198277769496e-06, "loss": 0.253, "step": 8316 }, { "epoch": 0.71, "learning_rate": 4.019757452341934e-06, "loss": 0.2496, "step": 8317 }, { "epoch": 0.71, "learning_rate": 4.017532587916461e-06, "loss": 0.2726, "step": 8318 }, { "epoch": 0.71, "learning_rate": 4.015308184590011e-06, "loss": 0.2758, "step": 8319 }, { "epoch": 0.71, "learning_rate": 4.013084242534032e-06, "loss": 0.2532, "step": 8320 }, { "epoch": 0.71, "learning_rate": 4.010860761919946e-06, "loss": 0.2753, "step": 8321 }, { "epoch": 0.71, "learning_rate": 4.008637742919114e-06, "loss": 0.2874, "step": 8322 }, { "epoch": 0.71, "learning_rate": 4.006415185702885e-06, "loss": 0.2992, "step": 8323 }, { "epoch": 0.71, "learning_rate": 4.004193090442551e-06, "loss": 0.2872, "step": 8324 }, { "epoch": 0.71, "learning_rate": 4.001971457309391e-06, "loss": 0.2856, "step": 8325 }, { "epoch": 0.71, "learning_rate": 3.999750286474637e-06, "loss": 0.2642, "step": 8326 }, { "epoch": 0.71, "learning_rate": 3.997529578109476e-06, "loss": 0.2352, "step": 8327 }, { "epoch": 0.71, "learning_rate": 3.995309332385083e-06, "loss": 0.2813, "step": 8328 }, { "epoch": 0.71, "learning_rate": 3.993089549472574e-06, "loss": 0.3192, "step": 8329 }, { "epoch": 0.71, "learning_rate": 3.99087022954304e-06, "loss": 0.3025, "step": 8330 }, { "epoch": 0.71, "learning_rate": 3.988651372767538e-06, "loss": 0.2973, "step": 8331 }, { "epoch": 0.71, "learning_rate": 3.986432979317085e-06, "loss": 0.3057, "step": 8332 }, { "epoch": 0.71, "learning_rate": 3.984215049362667e-06, "loss": 0.2794, "step": 8333 }, { "epoch": 0.71, "learning_rate": 3.981997583075222e-06, "loss": 0.3143, "step": 8334 }, { "epoch": 0.71, "learning_rate": 3.979780580625674e-06, "loss": 0.2734, "step": 8335 }, { "epoch": 0.71, "learning_rate": 3.977564042184888e-06, "loss": 0.2735, "step": 8336 }, { "epoch": 0.71, "learning_rate": 3.975347967923708e-06, "loss": 0.2598, "step": 8337 }, { "epoch": 0.71, "learning_rate": 3.973132358012939e-06, "loss": 0.3055, "step": 8338 }, { "epoch": 0.71, "learning_rate": 3.970917212623347e-06, "loss": 0.2514, "step": 8339 }, { "epoch": 0.71, "learning_rate": 3.96870253192567e-06, "loss": 0.2948, "step": 8340 }, { "epoch": 0.71, "learning_rate": 3.966488316090593e-06, "loss": 0.2532, "step": 8341 }, { "epoch": 0.72, "learning_rate": 3.964274565288792e-06, "loss": 0.283, "step": 8342 }, { "epoch": 0.72, "learning_rate": 3.9620612796908794e-06, "loss": 0.2628, "step": 8343 }, { "epoch": 0.72, "learning_rate": 3.95984845946745e-06, "loss": 0.3083, "step": 8344 }, { "epoch": 0.72, "learning_rate": 3.957636104789056e-06, "loss": 0.2852, "step": 8345 }, { "epoch": 0.72, "learning_rate": 3.9554242158262134e-06, "loss": 0.2607, "step": 8346 }, { "epoch": 0.72, "learning_rate": 3.95321279274941e-06, "loss": 0.312, "step": 8347 }, { "epoch": 0.72, "learning_rate": 3.951001835729079e-06, "loss": 0.2365, "step": 8348 }, { "epoch": 0.72, "learning_rate": 3.9487913449356454e-06, "loss": 0.2803, "step": 8349 }, { "epoch": 0.72, "learning_rate": 3.94658132053947e-06, "loss": 0.2827, "step": 8350 }, { "epoch": 0.72, "learning_rate": 3.944371762710897e-06, "loss": 0.3231, "step": 8351 }, { "epoch": 0.72, "learning_rate": 3.942162671620225e-06, "loss": 0.3166, "step": 8352 }, { "epoch": 0.72, "learning_rate": 3.939954047437723e-06, "loss": 0.2458, "step": 8353 }, { "epoch": 0.72, "learning_rate": 3.937745890333623e-06, "loss": 0.2772, "step": 8354 }, { "epoch": 0.72, "learning_rate": 3.935538200478108e-06, "loss": 0.2287, "step": 8355 }, { "epoch": 0.72, "learning_rate": 3.933330978041351e-06, "loss": 0.2947, "step": 8356 }, { "epoch": 0.72, "learning_rate": 3.931124223193461e-06, "loss": 0.2973, "step": 8357 }, { "epoch": 0.72, "learning_rate": 3.928917936104529e-06, "loss": 0.265, "step": 8358 }, { "epoch": 0.72, "learning_rate": 3.9267121169446056e-06, "loss": 0.2961, "step": 8359 }, { "epoch": 0.72, "learning_rate": 3.924506765883701e-06, "loss": 0.2796, "step": 8360 }, { "epoch": 0.72, "learning_rate": 3.9223018830918004e-06, "loss": 0.2588, "step": 8361 }, { "epoch": 0.72, "learning_rate": 3.920097468738833e-06, "loss": 0.3008, "step": 8362 }, { "epoch": 0.72, "learning_rate": 3.917893522994713e-06, "loss": 0.2415, "step": 8363 }, { "epoch": 0.72, "learning_rate": 3.9156900460293126e-06, "loss": 0.5614, "step": 8364 }, { "epoch": 0.72, "learning_rate": 3.913487038012451e-06, "loss": 0.2741, "step": 8365 }, { "epoch": 0.72, "learning_rate": 3.911284499113943e-06, "loss": 0.237, "step": 8366 }, { "epoch": 0.72, "learning_rate": 3.909082429503537e-06, "loss": 0.2568, "step": 8367 }, { "epoch": 0.72, "learning_rate": 3.906880829350961e-06, "loss": 0.2489, "step": 8368 }, { "epoch": 0.72, "learning_rate": 3.904679698825903e-06, "loss": 0.2836, "step": 8369 }, { "epoch": 0.72, "learning_rate": 3.902479038098017e-06, "loss": 0.2688, "step": 8370 }, { "epoch": 0.72, "learning_rate": 3.90027884733692e-06, "loss": 0.3223, "step": 8371 }, { "epoch": 0.72, "learning_rate": 3.898079126712184e-06, "loss": 0.2689, "step": 8372 }, { "epoch": 0.72, "learning_rate": 3.895879876393366e-06, "loss": 0.2696, "step": 8373 }, { "epoch": 0.72, "learning_rate": 3.893681096549961e-06, "loss": 0.2723, "step": 8374 }, { "epoch": 0.72, "learning_rate": 3.891482787351446e-06, "loss": 0.2868, "step": 8375 }, { "epoch": 0.72, "learning_rate": 3.889284948967253e-06, "loss": 0.2629, "step": 8376 }, { "epoch": 0.72, "learning_rate": 3.887087581566784e-06, "loss": 0.2762, "step": 8377 }, { "epoch": 0.72, "learning_rate": 3.884890685319402e-06, "loss": 0.2468, "step": 8378 }, { "epoch": 0.72, "learning_rate": 3.882694260394424e-06, "loss": 0.282, "step": 8379 }, { "epoch": 0.72, "learning_rate": 3.880498306961153e-06, "loss": 0.2477, "step": 8380 }, { "epoch": 0.72, "learning_rate": 3.87830282518883e-06, "loss": 0.2911, "step": 8381 }, { "epoch": 0.72, "learning_rate": 3.876107815246678e-06, "loss": 0.3041, "step": 8382 }, { "epoch": 0.72, "learning_rate": 3.8739132773038765e-06, "loss": 0.2927, "step": 8383 }, { "epoch": 0.72, "learning_rate": 3.871719211529571e-06, "loss": 0.2674, "step": 8384 }, { "epoch": 0.72, "learning_rate": 3.86952561809287e-06, "loss": 0.2781, "step": 8385 }, { "epoch": 0.72, "learning_rate": 3.867332497162836e-06, "loss": 0.2375, "step": 8386 }, { "epoch": 0.72, "learning_rate": 3.8651398489085176e-06, "loss": 0.278, "step": 8387 }, { "epoch": 0.72, "learning_rate": 3.862947673498904e-06, "loss": 0.3192, "step": 8388 }, { "epoch": 0.72, "learning_rate": 3.8607559711029586e-06, "loss": 0.2703, "step": 8389 }, { "epoch": 0.72, "learning_rate": 3.858564741889608e-06, "loss": 0.2401, "step": 8390 }, { "epoch": 0.72, "learning_rate": 3.8563739860277415e-06, "loss": 0.282, "step": 8391 }, { "epoch": 0.72, "learning_rate": 3.854183703686216e-06, "loss": 0.2543, "step": 8392 }, { "epoch": 0.72, "learning_rate": 3.851993895033836e-06, "loss": 0.2818, "step": 8393 }, { "epoch": 0.72, "learning_rate": 3.849804560239394e-06, "loss": 0.2881, "step": 8394 }, { "epoch": 0.72, "learning_rate": 3.847615699471625e-06, "loss": 0.2687, "step": 8395 }, { "epoch": 0.72, "learning_rate": 3.845427312899238e-06, "loss": 0.2484, "step": 8396 }, { "epoch": 0.72, "learning_rate": 3.843239400690903e-06, "loss": 0.5842, "step": 8397 }, { "epoch": 0.72, "learning_rate": 3.841051963015254e-06, "loss": 0.2821, "step": 8398 }, { "epoch": 0.72, "learning_rate": 3.83886500004089e-06, "loss": 0.2713, "step": 8399 }, { "epoch": 0.72, "learning_rate": 3.8366785119363624e-06, "loss": 0.2335, "step": 8400 }, { "epoch": 0.72, "learning_rate": 3.834492498870205e-06, "loss": 0.2289, "step": 8401 }, { "epoch": 0.72, "learning_rate": 3.8323069610109046e-06, "loss": 0.2355, "step": 8402 }, { "epoch": 0.72, "learning_rate": 3.830121898526901e-06, "loss": 0.3112, "step": 8403 }, { "epoch": 0.72, "learning_rate": 3.827937311586622e-06, "loss": 0.2593, "step": 8404 }, { "epoch": 0.72, "learning_rate": 3.825753200358434e-06, "loss": 0.3228, "step": 8405 }, { "epoch": 0.72, "learning_rate": 3.823569565010682e-06, "loss": 0.2648, "step": 8406 }, { "epoch": 0.72, "learning_rate": 3.821386405711669e-06, "loss": 0.2551, "step": 8407 }, { "epoch": 0.72, "learning_rate": 3.819203722629663e-06, "loss": 0.3141, "step": 8408 }, { "epoch": 0.72, "learning_rate": 3.817021515932897e-06, "loss": 0.2149, "step": 8409 }, { "epoch": 0.72, "learning_rate": 3.814839785789555e-06, "loss": 0.2427, "step": 8410 }, { "epoch": 0.72, "learning_rate": 3.8126585323678066e-06, "loss": 0.263, "step": 8411 }, { "epoch": 0.72, "learning_rate": 3.8104777558357632e-06, "loss": 0.2725, "step": 8412 }, { "epoch": 0.72, "learning_rate": 3.8082974563615104e-06, "loss": 0.2427, "step": 8413 }, { "epoch": 0.72, "learning_rate": 3.8061176341130955e-06, "loss": 0.2381, "step": 8414 }, { "epoch": 0.72, "learning_rate": 3.8039382892585297e-06, "loss": 0.2234, "step": 8415 }, { "epoch": 0.72, "learning_rate": 3.8017594219657872e-06, "loss": 0.2787, "step": 8416 }, { "epoch": 0.72, "learning_rate": 3.799581032402796e-06, "loss": 0.3348, "step": 8417 }, { "epoch": 0.72, "learning_rate": 3.7974031207374685e-06, "loss": 0.2542, "step": 8418 }, { "epoch": 0.72, "learning_rate": 3.795225687137657e-06, "loss": 0.2748, "step": 8419 }, { "epoch": 0.72, "learning_rate": 3.7930487317711907e-06, "loss": 0.2807, "step": 8420 }, { "epoch": 0.72, "learning_rate": 3.7908722548058586e-06, "loss": 0.2467, "step": 8421 }, { "epoch": 0.72, "learning_rate": 3.788696256409412e-06, "loss": 0.2634, "step": 8422 }, { "epoch": 0.72, "learning_rate": 3.7865207367495716e-06, "loss": 0.2833, "step": 8423 }, { "epoch": 0.72, "learning_rate": 3.784345695994004e-06, "loss": 0.3292, "step": 8424 }, { "epoch": 0.72, "learning_rate": 3.7821711343103652e-06, "loss": 0.3304, "step": 8425 }, { "epoch": 0.72, "learning_rate": 3.7799970518662477e-06, "loss": 0.3697, "step": 8426 }, { "epoch": 0.72, "learning_rate": 3.777823448829224e-06, "loss": 0.338, "step": 8427 }, { "epoch": 0.72, "learning_rate": 3.7756503253668244e-06, "loss": 0.2753, "step": 8428 }, { "epoch": 0.72, "learning_rate": 3.7734776816465413e-06, "loss": 0.2821, "step": 8429 }, { "epoch": 0.72, "learning_rate": 3.771305517835837e-06, "loss": 0.2437, "step": 8430 }, { "epoch": 0.72, "learning_rate": 3.769133834102119e-06, "loss": 0.2803, "step": 8431 }, { "epoch": 0.72, "learning_rate": 3.766962630612785e-06, "loss": 0.3024, "step": 8432 }, { "epoch": 0.72, "learning_rate": 3.764791907535168e-06, "loss": 0.3219, "step": 8433 }, { "epoch": 0.72, "learning_rate": 3.7626216650365833e-06, "loss": 0.2435, "step": 8434 }, { "epoch": 0.72, "learning_rate": 3.7604519032843e-06, "loss": 0.2917, "step": 8435 }, { "epoch": 0.72, "learning_rate": 3.7582826224455537e-06, "loss": 0.2458, "step": 8436 }, { "epoch": 0.72, "learning_rate": 3.756113822687546e-06, "loss": 0.2998, "step": 8437 }, { "epoch": 0.72, "learning_rate": 3.7539455041774255e-06, "loss": 0.2598, "step": 8438 }, { "epoch": 0.72, "learning_rate": 3.751777667082326e-06, "loss": 0.2447, "step": 8439 }, { "epoch": 0.72, "learning_rate": 3.749610311569334e-06, "loss": 0.2561, "step": 8440 }, { "epoch": 0.72, "learning_rate": 3.7474434378054913e-06, "loss": 0.2655, "step": 8441 }, { "epoch": 0.72, "learning_rate": 3.7452770459578134e-06, "loss": 0.3026, "step": 8442 }, { "epoch": 0.72, "learning_rate": 3.743111136193277e-06, "loss": 0.2927, "step": 8443 }, { "epoch": 0.72, "learning_rate": 3.740945708678817e-06, "loss": 0.2655, "step": 8444 }, { "epoch": 0.72, "learning_rate": 3.7387807635813343e-06, "loss": 0.275, "step": 8445 }, { "epoch": 0.72, "learning_rate": 3.7366163010676937e-06, "loss": 0.3058, "step": 8446 }, { "epoch": 0.72, "learning_rate": 3.7344523213047236e-06, "loss": 0.3077, "step": 8447 }, { "epoch": 0.72, "learning_rate": 3.7322888244592028e-06, "loss": 0.2726, "step": 8448 }, { "epoch": 0.72, "learning_rate": 3.7301258106978953e-06, "loss": 0.2393, "step": 8449 }, { "epoch": 0.72, "learning_rate": 3.7279632801875076e-06, "loss": 0.285, "step": 8450 }, { "epoch": 0.72, "learning_rate": 3.725801233094719e-06, "loss": 0.2836, "step": 8451 }, { "epoch": 0.72, "learning_rate": 3.7236396695861697e-06, "loss": 0.2501, "step": 8452 }, { "epoch": 0.72, "learning_rate": 3.7214785898284635e-06, "loss": 0.2289, "step": 8453 }, { "epoch": 0.72, "learning_rate": 3.7193179939881665e-06, "loss": 0.5929, "step": 8454 }, { "epoch": 0.72, "learning_rate": 3.717157882231798e-06, "loss": 0.2479, "step": 8455 }, { "epoch": 0.72, "learning_rate": 3.714998254725862e-06, "loss": 0.3182, "step": 8456 }, { "epoch": 0.72, "learning_rate": 3.7128391116368035e-06, "loss": 0.2921, "step": 8457 }, { "epoch": 0.73, "learning_rate": 3.710680453131039e-06, "loss": 0.2568, "step": 8458 }, { "epoch": 0.73, "learning_rate": 3.7085222793749486e-06, "loss": 0.2676, "step": 8459 }, { "epoch": 0.73, "learning_rate": 3.7063645905348753e-06, "loss": 0.2787, "step": 8460 }, { "epoch": 0.73, "learning_rate": 3.7042073867771243e-06, "loss": 0.3097, "step": 8461 }, { "epoch": 0.73, "learning_rate": 3.7020506682679524e-06, "loss": 0.2638, "step": 8462 }, { "epoch": 0.73, "learning_rate": 3.6998944351736034e-06, "loss": 0.3032, "step": 8463 }, { "epoch": 0.73, "learning_rate": 3.697738687660257e-06, "loss": 0.3312, "step": 8464 }, { "epoch": 0.73, "learning_rate": 3.6955834258940726e-06, "loss": 0.2757, "step": 8465 }, { "epoch": 0.73, "learning_rate": 3.6934286500411675e-06, "loss": 0.259, "step": 8466 }, { "epoch": 0.73, "learning_rate": 3.6912743602676195e-06, "loss": 0.2856, "step": 8467 }, { "epoch": 0.73, "learning_rate": 3.689120556739475e-06, "loss": 0.259, "step": 8468 }, { "epoch": 0.73, "learning_rate": 3.6869672396227283e-06, "loss": 0.2773, "step": 8469 }, { "epoch": 0.73, "learning_rate": 3.6848144090833602e-06, "loss": 0.2533, "step": 8470 }, { "epoch": 0.73, "learning_rate": 3.68266206528729e-06, "loss": 0.2699, "step": 8471 }, { "epoch": 0.73, "learning_rate": 3.680510208400413e-06, "loss": 0.2847, "step": 8472 }, { "epoch": 0.73, "learning_rate": 3.6783588385885826e-06, "loss": 0.2516, "step": 8473 }, { "epoch": 0.73, "learning_rate": 3.676207956017618e-06, "loss": 0.2451, "step": 8474 }, { "epoch": 0.73, "learning_rate": 3.6740575608533e-06, "loss": 0.2958, "step": 8475 }, { "epoch": 0.73, "learning_rate": 3.6719076532613606e-06, "loss": 0.2924, "step": 8476 }, { "epoch": 0.73, "learning_rate": 3.6697582334075156e-06, "loss": 0.329, "step": 8477 }, { "epoch": 0.73, "learning_rate": 3.667609301457431e-06, "loss": 0.3004, "step": 8478 }, { "epoch": 0.73, "learning_rate": 3.665460857576728e-06, "loss": 0.3032, "step": 8479 }, { "epoch": 0.73, "learning_rate": 3.663312901931002e-06, "loss": 0.3025, "step": 8480 }, { "epoch": 0.73, "learning_rate": 3.6611654346858074e-06, "loss": 0.3052, "step": 8481 }, { "epoch": 0.73, "learning_rate": 3.65901845600666e-06, "loss": 0.2354, "step": 8482 }, { "epoch": 0.73, "learning_rate": 3.656871966059038e-06, "loss": 0.2615, "step": 8483 }, { "epoch": 0.73, "learning_rate": 3.654725965008383e-06, "loss": 0.2446, "step": 8484 }, { "epoch": 0.73, "learning_rate": 3.6525804530201015e-06, "loss": 0.3176, "step": 8485 }, { "epoch": 0.73, "learning_rate": 3.650435430259548e-06, "loss": 0.2448, "step": 8486 }, { "epoch": 0.73, "learning_rate": 3.6482908968920636e-06, "loss": 0.2935, "step": 8487 }, { "epoch": 0.73, "learning_rate": 3.6461468530829293e-06, "loss": 0.2845, "step": 8488 }, { "epoch": 0.73, "learning_rate": 3.6440032989974004e-06, "loss": 0.2479, "step": 8489 }, { "epoch": 0.73, "learning_rate": 3.6418602348006903e-06, "loss": 0.2798, "step": 8490 }, { "epoch": 0.73, "learning_rate": 3.6397176606579775e-06, "loss": 0.2497, "step": 8491 }, { "epoch": 0.73, "learning_rate": 3.6375755767344047e-06, "loss": 0.3129, "step": 8492 }, { "epoch": 0.73, "learning_rate": 3.6354339831950603e-06, "loss": 0.2673, "step": 8493 }, { "epoch": 0.73, "learning_rate": 3.633292880205024e-06, "loss": 0.2958, "step": 8494 }, { "epoch": 0.73, "learning_rate": 3.6311522679293097e-06, "loss": 0.2579, "step": 8495 }, { "epoch": 0.73, "learning_rate": 3.6290121465329096e-06, "loss": 0.2701, "step": 8496 }, { "epoch": 0.73, "learning_rate": 3.626872516180774e-06, "loss": 0.2783, "step": 8497 }, { "epoch": 0.73, "learning_rate": 3.6247333770378133e-06, "loss": 0.2784, "step": 8498 }, { "epoch": 0.73, "learning_rate": 3.622594729268907e-06, "loss": 0.2903, "step": 8499 }, { "epoch": 0.73, "learning_rate": 3.6204565730388795e-06, "loss": 0.2931, "step": 8500 }, { "epoch": 0.73, "learning_rate": 3.618318908512545e-06, "loss": 0.2687, "step": 8501 }, { "epoch": 0.73, "learning_rate": 3.6161817358546513e-06, "loss": 0.2609, "step": 8502 }, { "epoch": 0.73, "learning_rate": 3.6140450552299268e-06, "loss": 0.2789, "step": 8503 }, { "epoch": 0.73, "learning_rate": 3.6119088668030557e-06, "loss": 0.3287, "step": 8504 }, { "epoch": 0.73, "learning_rate": 3.609773170738685e-06, "loss": 0.2912, "step": 8505 }, { "epoch": 0.73, "learning_rate": 3.6076379672014263e-06, "loss": 0.2817, "step": 8506 }, { "epoch": 0.73, "learning_rate": 3.6055032563558402e-06, "loss": 0.2711, "step": 8507 }, { "epoch": 0.73, "learning_rate": 3.6033690383664745e-06, "loss": 0.2748, "step": 8508 }, { "epoch": 0.73, "learning_rate": 3.601235313397813e-06, "loss": 0.2513, "step": 8509 }, { "epoch": 0.73, "learning_rate": 3.5991020816143164e-06, "loss": 0.2733, "step": 8510 }, { "epoch": 0.73, "learning_rate": 3.596969343180403e-06, "loss": 0.2764, "step": 8511 }, { "epoch": 0.73, "learning_rate": 3.594837098260454e-06, "loss": 0.264, "step": 8512 }, { "epoch": 0.73, "learning_rate": 3.5927053470188176e-06, "loss": 0.2702, "step": 8513 }, { "epoch": 0.73, "learning_rate": 3.590574089619786e-06, "loss": 0.2794, "step": 8514 }, { "epoch": 0.73, "learning_rate": 3.5884433262276376e-06, "loss": 0.2744, "step": 8515 }, { "epoch": 0.73, "learning_rate": 3.5863130570065998e-06, "loss": 0.2683, "step": 8516 }, { "epoch": 0.73, "learning_rate": 3.5841832821208577e-06, "loss": 0.27, "step": 8517 }, { "epoch": 0.73, "learning_rate": 3.5820540017345663e-06, "loss": 0.2721, "step": 8518 }, { "epoch": 0.73, "learning_rate": 3.5799252160118405e-06, "loss": 0.2531, "step": 8519 }, { "epoch": 0.73, "learning_rate": 3.5777969251167568e-06, "loss": 0.2673, "step": 8520 }, { "epoch": 0.73, "learning_rate": 3.575669129213353e-06, "loss": 0.2453, "step": 8521 }, { "epoch": 0.73, "learning_rate": 3.5735418284656287e-06, "loss": 0.2398, "step": 8522 }, { "epoch": 0.73, "learning_rate": 3.57141502303755e-06, "loss": 0.254, "step": 8523 }, { "epoch": 0.73, "learning_rate": 3.569288713093034e-06, "loss": 0.2614, "step": 8524 }, { "epoch": 0.73, "learning_rate": 3.5671628987959685e-06, "loss": 0.3077, "step": 8525 }, { "epoch": 0.73, "learning_rate": 3.565037580310201e-06, "loss": 0.2505, "step": 8526 }, { "epoch": 0.73, "learning_rate": 3.5629127577995405e-06, "loss": 0.2555, "step": 8527 }, { "epoch": 0.73, "learning_rate": 3.5607884314277583e-06, "loss": 0.2615, "step": 8528 }, { "epoch": 0.73, "learning_rate": 3.558664601358587e-06, "loss": 0.2531, "step": 8529 }, { "epoch": 0.73, "learning_rate": 3.5565412677557233e-06, "loss": 0.2532, "step": 8530 }, { "epoch": 0.73, "learning_rate": 3.5544184307828142e-06, "loss": 0.2345, "step": 8531 }, { "epoch": 0.73, "learning_rate": 3.552296090603491e-06, "loss": 0.2524, "step": 8532 }, { "epoch": 0.73, "learning_rate": 3.5501742473813226e-06, "loss": 0.2336, "step": 8533 }, { "epoch": 0.73, "learning_rate": 3.548052901279854e-06, "loss": 0.246, "step": 8534 }, { "epoch": 0.73, "learning_rate": 3.545932052462587e-06, "loss": 0.2652, "step": 8535 }, { "epoch": 0.73, "learning_rate": 3.5438117010929875e-06, "loss": 0.2462, "step": 8536 }, { "epoch": 0.73, "learning_rate": 3.541691847334484e-06, "loss": 0.2732, "step": 8537 }, { "epoch": 0.73, "learning_rate": 3.5395724913504546e-06, "loss": 0.3038, "step": 8538 }, { "epoch": 0.73, "learning_rate": 3.537453633304263e-06, "loss": 0.3066, "step": 8539 }, { "epoch": 0.73, "learning_rate": 3.5353352733592095e-06, "loss": 0.2835, "step": 8540 }, { "epoch": 0.73, "learning_rate": 3.53321741167857e-06, "loss": 0.2787, "step": 8541 }, { "epoch": 0.73, "learning_rate": 3.5311000484255796e-06, "loss": 0.5907, "step": 8542 }, { "epoch": 0.73, "learning_rate": 3.5289831837634334e-06, "loss": 0.2612, "step": 8543 }, { "epoch": 0.73, "learning_rate": 3.5268668178552922e-06, "loss": 0.3006, "step": 8544 }, { "epoch": 0.73, "learning_rate": 3.5247509508642653e-06, "loss": 0.2737, "step": 8545 }, { "epoch": 0.73, "learning_rate": 3.5226355829534475e-06, "loss": 0.2662, "step": 8546 }, { "epoch": 0.73, "learning_rate": 3.520520714285869e-06, "loss": 0.2731, "step": 8547 }, { "epoch": 0.73, "learning_rate": 3.5184063450245386e-06, "loss": 0.2498, "step": 8548 }, { "epoch": 0.73, "learning_rate": 3.5162924753324202e-06, "loss": 0.2605, "step": 8549 }, { "epoch": 0.73, "learning_rate": 3.5141791053724405e-06, "loss": 0.277, "step": 8550 }, { "epoch": 0.73, "learning_rate": 3.512066235307492e-06, "loss": 0.3588, "step": 8551 }, { "epoch": 0.73, "learning_rate": 3.509953865300414e-06, "loss": 0.2222, "step": 8552 }, { "epoch": 0.73, "learning_rate": 3.5078419955140263e-06, "loss": 0.2911, "step": 8553 }, { "epoch": 0.73, "learning_rate": 3.5057306261111024e-06, "loss": 0.2678, "step": 8554 }, { "epoch": 0.73, "learning_rate": 3.5036197572543697e-06, "loss": 0.3234, "step": 8555 }, { "epoch": 0.73, "learning_rate": 3.5015093891065253e-06, "loss": 0.2738, "step": 8556 }, { "epoch": 0.73, "learning_rate": 3.499399521830229e-06, "loss": 0.2598, "step": 8557 }, { "epoch": 0.73, "learning_rate": 3.4972901555880957e-06, "loss": 0.2654, "step": 8558 }, { "epoch": 0.73, "learning_rate": 3.4951812905427073e-06, "loss": 0.2504, "step": 8559 }, { "epoch": 0.73, "learning_rate": 3.4930729268566035e-06, "loss": 0.3011, "step": 8560 }, { "epoch": 0.73, "learning_rate": 3.49096506469229e-06, "loss": 0.2445, "step": 8561 }, { "epoch": 0.73, "learning_rate": 3.488857704212224e-06, "loss": 0.2291, "step": 8562 }, { "epoch": 0.73, "learning_rate": 3.4867508455788336e-06, "loss": 0.2383, "step": 8563 }, { "epoch": 0.73, "learning_rate": 3.484644488954505e-06, "loss": 0.2345, "step": 8564 }, { "epoch": 0.73, "learning_rate": 3.4825386345015865e-06, "loss": 0.2863, "step": 8565 }, { "epoch": 0.73, "learning_rate": 3.4804332823823862e-06, "loss": 0.2668, "step": 8566 }, { "epoch": 0.73, "learning_rate": 3.4783284327591736e-06, "loss": 0.5621, "step": 8567 }, { "epoch": 0.73, "learning_rate": 3.4762240857941843e-06, "loss": 0.5822, "step": 8568 }, { "epoch": 0.73, "learning_rate": 3.474120241649601e-06, "loss": 0.3455, "step": 8569 }, { "epoch": 0.73, "learning_rate": 3.4720169004875914e-06, "loss": 0.2673, "step": 8570 }, { "epoch": 0.73, "learning_rate": 3.46991406247026e-06, "loss": 0.2792, "step": 8571 }, { "epoch": 0.73, "learning_rate": 3.4678117277596856e-06, "loss": 0.2955, "step": 8572 }, { "epoch": 0.73, "learning_rate": 3.465709896517908e-06, "loss": 0.274, "step": 8573 }, { "epoch": 0.73, "learning_rate": 3.4636085689069244e-06, "loss": 0.3306, "step": 8574 }, { "epoch": 0.74, "learning_rate": 3.461507745088698e-06, "loss": 0.2629, "step": 8575 }, { "epoch": 0.74, "learning_rate": 3.4594074252251397e-06, "loss": 0.27, "step": 8576 }, { "epoch": 0.74, "learning_rate": 3.4573076094781466e-06, "loss": 0.2626, "step": 8577 }, { "epoch": 0.74, "learning_rate": 3.4552082980095514e-06, "loss": 0.3026, "step": 8578 }, { "epoch": 0.74, "learning_rate": 3.4531094909811614e-06, "loss": 0.2465, "step": 8579 }, { "epoch": 0.74, "learning_rate": 3.4510111885547426e-06, "loss": 0.2574, "step": 8580 }, { "epoch": 0.74, "learning_rate": 3.4489133908920225e-06, "loss": 0.2577, "step": 8581 }, { "epoch": 0.74, "learning_rate": 3.446816098154692e-06, "loss": 0.2302, "step": 8582 }, { "epoch": 0.74, "learning_rate": 3.4447193105043884e-06, "loss": 0.2762, "step": 8583 }, { "epoch": 0.74, "learning_rate": 3.4426230281027374e-06, "loss": 0.2634, "step": 8584 }, { "epoch": 0.74, "learning_rate": 3.4405272511112986e-06, "loss": 0.267, "step": 8585 }, { "epoch": 0.74, "learning_rate": 3.4384319796916075e-06, "loss": 0.2787, "step": 8586 }, { "epoch": 0.74, "learning_rate": 3.4363372140051586e-06, "loss": 0.247, "step": 8587 }, { "epoch": 0.74, "learning_rate": 3.4342429542134047e-06, "loss": 0.2725, "step": 8588 }, { "epoch": 0.74, "learning_rate": 3.432149200477766e-06, "loss": 0.2563, "step": 8589 }, { "epoch": 0.74, "learning_rate": 3.430055952959607e-06, "loss": 0.2482, "step": 8590 }, { "epoch": 0.74, "learning_rate": 3.4279632118202744e-06, "loss": 0.3, "step": 8591 }, { "epoch": 0.74, "learning_rate": 3.4258709772210686e-06, "loss": 0.2694, "step": 8592 }, { "epoch": 0.74, "learning_rate": 3.4237792493232402e-06, "loss": 0.2949, "step": 8593 }, { "epoch": 0.74, "learning_rate": 3.4216880282880128e-06, "loss": 0.2317, "step": 8594 }, { "epoch": 0.74, "learning_rate": 3.4195973142765694e-06, "loss": 0.3146, "step": 8595 }, { "epoch": 0.74, "learning_rate": 3.417507107450049e-06, "loss": 0.2536, "step": 8596 }, { "epoch": 0.74, "learning_rate": 3.4154174079695555e-06, "loss": 0.297, "step": 8597 }, { "epoch": 0.74, "learning_rate": 3.4133282159961535e-06, "loss": 0.2405, "step": 8598 }, { "epoch": 0.74, "learning_rate": 3.4112395316908697e-06, "loss": 0.2518, "step": 8599 }, { "epoch": 0.74, "learning_rate": 3.4091513552146836e-06, "loss": 0.2872, "step": 8600 }, { "epoch": 0.74, "learning_rate": 3.4070636867285455e-06, "loss": 0.3068, "step": 8601 }, { "epoch": 0.74, "learning_rate": 3.404976526393361e-06, "loss": 0.3159, "step": 8602 }, { "epoch": 0.74, "learning_rate": 3.4028898743699988e-06, "loss": 0.2369, "step": 8603 }, { "epoch": 0.74, "learning_rate": 3.4008037308192874e-06, "loss": 0.2592, "step": 8604 }, { "epoch": 0.74, "learning_rate": 3.3987180959020184e-06, "loss": 0.3264, "step": 8605 }, { "epoch": 0.74, "learning_rate": 3.3966329697789424e-06, "loss": 0.2519, "step": 8606 }, { "epoch": 0.74, "learning_rate": 3.394548352610767e-06, "loss": 0.2443, "step": 8607 }, { "epoch": 0.74, "learning_rate": 3.3924642445581647e-06, "loss": 0.2999, "step": 8608 }, { "epoch": 0.74, "learning_rate": 3.39038064578177e-06, "loss": 0.2908, "step": 8609 }, { "epoch": 0.74, "learning_rate": 3.3882975564421773e-06, "loss": 0.2714, "step": 8610 }, { "epoch": 0.74, "learning_rate": 3.3862149766999385e-06, "loss": 0.6063, "step": 8611 }, { "epoch": 0.74, "learning_rate": 3.3841329067155693e-06, "loss": 0.2453, "step": 8612 }, { "epoch": 0.74, "learning_rate": 3.38205134664955e-06, "loss": 0.2421, "step": 8613 }, { "epoch": 0.74, "learning_rate": 3.379970296662305e-06, "loss": 0.2287, "step": 8614 }, { "epoch": 0.74, "learning_rate": 3.3778897569142454e-06, "loss": 0.2653, "step": 8615 }, { "epoch": 0.74, "learning_rate": 3.3758097275657208e-06, "loss": 0.2332, "step": 8616 }, { "epoch": 0.74, "learning_rate": 3.3737302087770497e-06, "loss": 0.2803, "step": 8617 }, { "epoch": 0.74, "learning_rate": 3.3716512007085133e-06, "loss": 0.2875, "step": 8618 }, { "epoch": 0.74, "learning_rate": 3.369572703520352e-06, "loss": 0.2805, "step": 8619 }, { "epoch": 0.74, "learning_rate": 3.3674947173727667e-06, "loss": 0.2669, "step": 8620 }, { "epoch": 0.74, "learning_rate": 3.3654172424259103e-06, "loss": 0.3019, "step": 8621 }, { "epoch": 0.74, "learning_rate": 3.363340278839916e-06, "loss": 0.2605, "step": 8622 }, { "epoch": 0.74, "learning_rate": 3.3612638267748575e-06, "loss": 0.2546, "step": 8623 }, { "epoch": 0.74, "learning_rate": 3.359187886390781e-06, "loss": 0.2698, "step": 8624 }, { "epoch": 0.74, "learning_rate": 3.3571124578476888e-06, "loss": 0.2451, "step": 8625 }, { "epoch": 0.74, "learning_rate": 3.355037541305545e-06, "loss": 0.247, "step": 8626 }, { "epoch": 0.74, "learning_rate": 3.3529631369242764e-06, "loss": 0.2975, "step": 8627 }, { "epoch": 0.74, "learning_rate": 3.350889244863759e-06, "loss": 0.2687, "step": 8628 }, { "epoch": 0.74, "learning_rate": 3.348815865283848e-06, "loss": 0.2657, "step": 8629 }, { "epoch": 0.74, "learning_rate": 3.3467429983443477e-06, "loss": 0.2597, "step": 8630 }, { "epoch": 0.74, "learning_rate": 3.3446706442050203e-06, "loss": 0.2668, "step": 8631 }, { "epoch": 0.74, "learning_rate": 3.342598803025595e-06, "loss": 0.2994, "step": 8632 }, { "epoch": 0.74, "learning_rate": 3.3405274749657577e-06, "loss": 0.2716, "step": 8633 }, { "epoch": 0.74, "learning_rate": 3.3384566601851574e-06, "loss": 0.2849, "step": 8634 }, { "epoch": 0.74, "learning_rate": 3.336386358843403e-06, "loss": 0.3148, "step": 8635 }, { "epoch": 0.74, "learning_rate": 3.3343165711000613e-06, "loss": 0.2504, "step": 8636 }, { "epoch": 0.74, "learning_rate": 3.332247297114666e-06, "loss": 0.2939, "step": 8637 }, { "epoch": 0.74, "learning_rate": 3.330178537046699e-06, "loss": 0.2566, "step": 8638 }, { "epoch": 0.74, "learning_rate": 3.328110291055614e-06, "loss": 0.2408, "step": 8639 }, { "epoch": 0.74, "learning_rate": 3.3260425593008207e-06, "loss": 0.2856, "step": 8640 }, { "epoch": 0.74, "learning_rate": 3.323975341941691e-06, "loss": 0.2764, "step": 8641 }, { "epoch": 0.74, "learning_rate": 3.321908639137553e-06, "loss": 0.29, "step": 8642 }, { "epoch": 0.74, "learning_rate": 3.3198424510477014e-06, "loss": 0.2631, "step": 8643 }, { "epoch": 0.74, "learning_rate": 3.3177767778313884e-06, "loss": 0.2785, "step": 8644 }, { "epoch": 0.74, "learning_rate": 3.31571161964782e-06, "loss": 0.307, "step": 8645 }, { "epoch": 0.74, "learning_rate": 3.313646976656172e-06, "loss": 0.2286, "step": 8646 }, { "epoch": 0.74, "learning_rate": 3.3115828490155775e-06, "loss": 0.2911, "step": 8647 }, { "epoch": 0.74, "learning_rate": 3.309519236885128e-06, "loss": 0.3014, "step": 8648 }, { "epoch": 0.74, "learning_rate": 3.307456140423877e-06, "loss": 0.2965, "step": 8649 }, { "epoch": 0.74, "learning_rate": 3.305393559790838e-06, "loss": 0.2422, "step": 8650 }, { "epoch": 0.74, "learning_rate": 3.303331495144988e-06, "loss": 0.2131, "step": 8651 }, { "epoch": 0.74, "learning_rate": 3.3012699466452503e-06, "loss": 0.3192, "step": 8652 }, { "epoch": 0.74, "learning_rate": 3.299208914450532e-06, "loss": 0.2818, "step": 8653 }, { "epoch": 0.74, "learning_rate": 3.2971483987196783e-06, "loss": 0.289, "step": 8654 }, { "epoch": 0.74, "learning_rate": 3.295088399611507e-06, "loss": 0.33, "step": 8655 }, { "epoch": 0.74, "learning_rate": 3.2930289172847905e-06, "loss": 0.2634, "step": 8656 }, { "epoch": 0.74, "learning_rate": 3.290969951898265e-06, "loss": 0.2961, "step": 8657 }, { "epoch": 0.74, "learning_rate": 3.288911503610629e-06, "loss": 0.2738, "step": 8658 }, { "epoch": 0.74, "learning_rate": 3.286853572580527e-06, "loss": 0.3051, "step": 8659 }, { "epoch": 0.74, "learning_rate": 3.284796158966589e-06, "loss": 0.2639, "step": 8660 }, { "epoch": 0.74, "learning_rate": 3.282739262927377e-06, "loss": 0.2726, "step": 8661 }, { "epoch": 0.74, "learning_rate": 3.2806828846214324e-06, "loss": 0.2635, "step": 8662 }, { "epoch": 0.74, "learning_rate": 3.2786270242072504e-06, "loss": 0.2471, "step": 8663 }, { "epoch": 0.74, "learning_rate": 3.276571681843286e-06, "loss": 0.5651, "step": 8664 }, { "epoch": 0.74, "learning_rate": 3.274516857687957e-06, "loss": 0.2816, "step": 8665 }, { "epoch": 0.74, "learning_rate": 3.2724625518996322e-06, "loss": 0.3109, "step": 8666 }, { "epoch": 0.74, "learning_rate": 3.2704087646366546e-06, "loss": 0.2872, "step": 8667 }, { "epoch": 0.74, "learning_rate": 3.2683554960573207e-06, "loss": 0.2745, "step": 8668 }, { "epoch": 0.74, "learning_rate": 3.2663027463198794e-06, "loss": 0.3009, "step": 8669 }, { "epoch": 0.74, "learning_rate": 3.264250515582551e-06, "loss": 0.2886, "step": 8670 }, { "epoch": 0.74, "learning_rate": 3.2621988040035103e-06, "loss": 0.2413, "step": 8671 }, { "epoch": 0.74, "learning_rate": 3.2601476117408937e-06, "loss": 0.2578, "step": 8672 }, { "epoch": 0.74, "learning_rate": 3.258096938952796e-06, "loss": 0.2976, "step": 8673 }, { "epoch": 0.74, "learning_rate": 3.2560467857972744e-06, "loss": 0.5608, "step": 8674 }, { "epoch": 0.74, "learning_rate": 3.2539971524323455e-06, "loss": 0.2701, "step": 8675 }, { "epoch": 0.74, "learning_rate": 3.2519480390159806e-06, "loss": 0.2593, "step": 8676 }, { "epoch": 0.74, "learning_rate": 3.2498994457061184e-06, "loss": 0.2593, "step": 8677 }, { "epoch": 0.74, "learning_rate": 3.247851372660653e-06, "loss": 0.2738, "step": 8678 }, { "epoch": 0.74, "learning_rate": 3.2458038200374408e-06, "loss": 0.2813, "step": 8679 }, { "epoch": 0.74, "learning_rate": 3.2437567879942966e-06, "loss": 0.3186, "step": 8680 }, { "epoch": 0.74, "learning_rate": 3.241710276688995e-06, "loss": 0.2585, "step": 8681 }, { "epoch": 0.74, "learning_rate": 3.239664286279276e-06, "loss": 0.2809, "step": 8682 }, { "epoch": 0.74, "learning_rate": 3.237618816922826e-06, "loss": 0.2548, "step": 8683 }, { "epoch": 0.74, "learning_rate": 3.235573868777304e-06, "loss": 0.2615, "step": 8684 }, { "epoch": 0.74, "learning_rate": 3.233529442000324e-06, "loss": 0.5789, "step": 8685 }, { "epoch": 0.74, "learning_rate": 3.23148553674946e-06, "loss": 0.2833, "step": 8686 }, { "epoch": 0.74, "learning_rate": 3.2294421531822475e-06, "loss": 0.2785, "step": 8687 }, { "epoch": 0.74, "learning_rate": 3.227399291456179e-06, "loss": 0.2759, "step": 8688 }, { "epoch": 0.74, "learning_rate": 3.225356951728712e-06, "loss": 0.2632, "step": 8689 }, { "epoch": 0.74, "learning_rate": 3.223315134157253e-06, "loss": 0.2714, "step": 8690 }, { "epoch": 0.74, "learning_rate": 3.2212738388991803e-06, "loss": 0.2669, "step": 8691 }, { "epoch": 0.75, "learning_rate": 3.2192330661118243e-06, "loss": 0.2535, "step": 8692 }, { "epoch": 0.75, "learning_rate": 3.217192815952479e-06, "loss": 0.2918, "step": 8693 }, { "epoch": 0.75, "learning_rate": 3.2151530885783967e-06, "loss": 0.282, "step": 8694 }, { "epoch": 0.75, "learning_rate": 3.213113884146789e-06, "loss": 0.2822, "step": 8695 }, { "epoch": 0.75, "learning_rate": 3.211075202814832e-06, "loss": 0.2908, "step": 8696 }, { "epoch": 0.75, "learning_rate": 3.2090370447396468e-06, "loss": 0.2706, "step": 8697 }, { "epoch": 0.75, "learning_rate": 3.2069994100783376e-06, "loss": 0.256, "step": 8698 }, { "epoch": 0.75, "learning_rate": 3.2049622989879446e-06, "loss": 0.3157, "step": 8699 }, { "epoch": 0.75, "learning_rate": 3.202925711625483e-06, "loss": 0.2403, "step": 8700 }, { "epoch": 0.75, "learning_rate": 3.2008896481479223e-06, "loss": 0.2863, "step": 8701 }, { "epoch": 0.75, "learning_rate": 3.1988541087121916e-06, "loss": 0.2985, "step": 8702 }, { "epoch": 0.75, "learning_rate": 3.196819093475184e-06, "loss": 0.3075, "step": 8703 }, { "epoch": 0.75, "learning_rate": 3.194784602593739e-06, "loss": 0.2827, "step": 8704 }, { "epoch": 0.75, "learning_rate": 3.1927506362246753e-06, "loss": 0.2224, "step": 8705 }, { "epoch": 0.75, "learning_rate": 3.1907171945247595e-06, "loss": 0.269, "step": 8706 }, { "epoch": 0.75, "learning_rate": 3.1886842776507133e-06, "loss": 0.3062, "step": 8707 }, { "epoch": 0.75, "learning_rate": 3.1866518857592267e-06, "loss": 0.2865, "step": 8708 }, { "epoch": 0.75, "learning_rate": 3.1846200190069476e-06, "loss": 0.3038, "step": 8709 }, { "epoch": 0.75, "learning_rate": 3.182588677550482e-06, "loss": 0.2447, "step": 8710 }, { "epoch": 0.75, "learning_rate": 3.180557861546395e-06, "loss": 0.2328, "step": 8711 }, { "epoch": 0.75, "learning_rate": 3.1785275711512122e-06, "loss": 0.2377, "step": 8712 }, { "epoch": 0.75, "learning_rate": 3.1764978065214215e-06, "loss": 0.2819, "step": 8713 }, { "epoch": 0.75, "learning_rate": 3.174468567813461e-06, "loss": 0.31, "step": 8714 }, { "epoch": 0.75, "learning_rate": 3.1724398551837364e-06, "loss": 0.2526, "step": 8715 }, { "epoch": 0.75, "learning_rate": 3.1704116687886132e-06, "loss": 0.2666, "step": 8716 }, { "epoch": 0.75, "learning_rate": 3.168384008784412e-06, "loss": 0.3362, "step": 8717 }, { "epoch": 0.75, "learning_rate": 3.1663568753274153e-06, "loss": 0.2383, "step": 8718 }, { "epoch": 0.75, "learning_rate": 3.164330268573864e-06, "loss": 0.2584, "step": 8719 }, { "epoch": 0.75, "learning_rate": 3.1623041886799643e-06, "loss": 0.2503, "step": 8720 }, { "epoch": 0.75, "learning_rate": 3.1602786358018668e-06, "loss": 0.2581, "step": 8721 }, { "epoch": 0.75, "learning_rate": 3.1582536100956973e-06, "loss": 0.274, "step": 8722 }, { "epoch": 0.75, "learning_rate": 3.1562291117175324e-06, "loss": 0.2448, "step": 8723 }, { "epoch": 0.75, "learning_rate": 3.154205140823412e-06, "loss": 0.3199, "step": 8724 }, { "epoch": 0.75, "learning_rate": 3.152181697569334e-06, "loss": 0.2867, "step": 8725 }, { "epoch": 0.75, "learning_rate": 3.1501587821112532e-06, "loss": 0.2816, "step": 8726 }, { "epoch": 0.75, "learning_rate": 3.1481363946050925e-06, "loss": 0.2725, "step": 8727 }, { "epoch": 0.75, "learning_rate": 3.146114535206718e-06, "loss": 0.2487, "step": 8728 }, { "epoch": 0.75, "learning_rate": 3.1440932040719694e-06, "loss": 0.2401, "step": 8729 }, { "epoch": 0.75, "learning_rate": 3.1420724013566408e-06, "loss": 0.2852, "step": 8730 }, { "epoch": 0.75, "learning_rate": 3.1400521272164854e-06, "loss": 0.3552, "step": 8731 }, { "epoch": 0.75, "learning_rate": 3.1380323818072155e-06, "loss": 0.2738, "step": 8732 }, { "epoch": 0.75, "learning_rate": 3.136013165284504e-06, "loss": 0.2919, "step": 8733 }, { "epoch": 0.75, "learning_rate": 3.1339944778039844e-06, "loss": 0.5553, "step": 8734 }, { "epoch": 0.75, "learning_rate": 3.1319763195212382e-06, "loss": 0.2923, "step": 8735 }, { "epoch": 0.75, "learning_rate": 3.129958690591829e-06, "loss": 0.3061, "step": 8736 }, { "epoch": 0.75, "learning_rate": 3.127941591171254e-06, "loss": 0.283, "step": 8737 }, { "epoch": 0.75, "learning_rate": 3.125925021414985e-06, "loss": 0.2739, "step": 8738 }, { "epoch": 0.75, "learning_rate": 3.1239089814784505e-06, "loss": 0.2598, "step": 8739 }, { "epoch": 0.75, "learning_rate": 3.1218934715170355e-06, "loss": 0.2942, "step": 8740 }, { "epoch": 0.75, "learning_rate": 3.119878491686089e-06, "loss": 0.3181, "step": 8741 }, { "epoch": 0.75, "learning_rate": 3.1178640421409057e-06, "loss": 0.2711, "step": 8742 }, { "epoch": 0.75, "learning_rate": 3.115850123036761e-06, "loss": 0.2656, "step": 8743 }, { "epoch": 0.75, "learning_rate": 3.1138367345288757e-06, "loss": 0.2692, "step": 8744 }, { "epoch": 0.75, "learning_rate": 3.111823876772426e-06, "loss": 0.389, "step": 8745 }, { "epoch": 0.75, "learning_rate": 3.1098115499225567e-06, "loss": 0.2974, "step": 8746 }, { "epoch": 0.75, "learning_rate": 3.1077997541343672e-06, "loss": 0.2835, "step": 8747 }, { "epoch": 0.75, "learning_rate": 3.1057884895629174e-06, "loss": 0.2287, "step": 8748 }, { "epoch": 0.75, "learning_rate": 3.1037777563632264e-06, "loss": 0.2888, "step": 8749 }, { "epoch": 0.75, "learning_rate": 3.1017675546902704e-06, "loss": 0.2555, "step": 8750 }, { "epoch": 0.75, "learning_rate": 3.0997578846989886e-06, "loss": 0.2608, "step": 8751 }, { "epoch": 0.75, "learning_rate": 3.097748746544271e-06, "loss": 0.3033, "step": 8752 }, { "epoch": 0.75, "learning_rate": 3.095740140380975e-06, "loss": 0.3217, "step": 8753 }, { "epoch": 0.75, "learning_rate": 3.0937320663639148e-06, "loss": 0.3027, "step": 8754 }, { "epoch": 0.75, "learning_rate": 3.091724524647861e-06, "loss": 0.2849, "step": 8755 }, { "epoch": 0.75, "learning_rate": 3.0897175153875467e-06, "loss": 0.2658, "step": 8756 }, { "epoch": 0.75, "learning_rate": 3.087711038737662e-06, "loss": 0.2956, "step": 8757 }, { "epoch": 0.75, "learning_rate": 3.0857050948528576e-06, "loss": 0.2896, "step": 8758 }, { "epoch": 0.75, "learning_rate": 3.083699683887739e-06, "loss": 0.3032, "step": 8759 }, { "epoch": 0.75, "learning_rate": 3.081694805996872e-06, "loss": 0.2695, "step": 8760 }, { "epoch": 0.75, "learning_rate": 3.0796904613347855e-06, "loss": 0.2991, "step": 8761 }, { "epoch": 0.75, "learning_rate": 3.0776866500559654e-06, "loss": 0.3035, "step": 8762 }, { "epoch": 0.75, "learning_rate": 3.0756833723148526e-06, "loss": 0.3132, "step": 8763 }, { "epoch": 0.75, "learning_rate": 3.0736806282658514e-06, "loss": 0.2821, "step": 8764 }, { "epoch": 0.75, "learning_rate": 3.0716784180633276e-06, "loss": 0.2738, "step": 8765 }, { "epoch": 0.75, "learning_rate": 3.0696767418615945e-06, "loss": 0.2892, "step": 8766 }, { "epoch": 0.75, "learning_rate": 3.0676755998149333e-06, "loss": 0.2974, "step": 8767 }, { "epoch": 0.75, "learning_rate": 3.065674992077584e-06, "loss": 0.3019, "step": 8768 }, { "epoch": 0.75, "learning_rate": 3.063674918803743e-06, "loss": 0.2569, "step": 8769 }, { "epoch": 0.75, "learning_rate": 3.0616753801475653e-06, "loss": 0.2845, "step": 8770 }, { "epoch": 0.75, "learning_rate": 3.0596763762631655e-06, "loss": 0.2559, "step": 8771 }, { "epoch": 0.75, "learning_rate": 3.0576779073046214e-06, "loss": 0.2432, "step": 8772 }, { "epoch": 0.75, "learning_rate": 3.055679973425958e-06, "loss": 0.2878, "step": 8773 }, { "epoch": 0.75, "learning_rate": 3.0536825747811695e-06, "loss": 0.289, "step": 8774 }, { "epoch": 0.75, "learning_rate": 3.051685711524205e-06, "loss": 0.2903, "step": 8775 }, { "epoch": 0.75, "learning_rate": 3.0496893838089736e-06, "loss": 0.3218, "step": 8776 }, { "epoch": 0.75, "learning_rate": 3.0476935917893413e-06, "loss": 0.2651, "step": 8777 }, { "epoch": 0.75, "learning_rate": 3.045698335619135e-06, "loss": 0.2812, "step": 8778 }, { "epoch": 0.75, "learning_rate": 3.0437036154521426e-06, "loss": 0.2305, "step": 8779 }, { "epoch": 0.75, "learning_rate": 3.0417094314420958e-06, "loss": 0.3206, "step": 8780 }, { "epoch": 0.75, "learning_rate": 3.039715783742708e-06, "loss": 0.2581, "step": 8781 }, { "epoch": 0.75, "learning_rate": 3.0377226725076394e-06, "loss": 0.2673, "step": 8782 }, { "epoch": 0.75, "learning_rate": 3.0357300978905025e-06, "loss": 0.2631, "step": 8783 }, { "epoch": 0.75, "learning_rate": 3.0337380600448774e-06, "loss": 0.2949, "step": 8784 }, { "epoch": 0.75, "learning_rate": 3.031746559124301e-06, "loss": 0.2809, "step": 8785 }, { "epoch": 0.75, "learning_rate": 3.02975559528227e-06, "loss": 0.248, "step": 8786 }, { "epoch": 0.75, "learning_rate": 3.0277651686722353e-06, "loss": 0.2822, "step": 8787 }, { "epoch": 0.75, "learning_rate": 3.0257752794476113e-06, "loss": 0.2659, "step": 8788 }, { "epoch": 0.75, "learning_rate": 3.023785927761772e-06, "loss": 0.2891, "step": 8789 }, { "epoch": 0.75, "learning_rate": 3.021797113768039e-06, "loss": 0.2844, "step": 8790 }, { "epoch": 0.75, "learning_rate": 3.019808837619704e-06, "loss": 0.2602, "step": 8791 }, { "epoch": 0.75, "learning_rate": 3.017821099470014e-06, "loss": 0.2915, "step": 8792 }, { "epoch": 0.75, "learning_rate": 3.0158338994721737e-06, "loss": 0.2745, "step": 8793 }, { "epoch": 0.75, "learning_rate": 3.013847237779346e-06, "loss": 0.2757, "step": 8794 }, { "epoch": 0.75, "learning_rate": 3.011861114544654e-06, "loss": 0.3434, "step": 8795 }, { "epoch": 0.75, "learning_rate": 3.009875529921181e-06, "loss": 0.2637, "step": 8796 }, { "epoch": 0.75, "learning_rate": 3.0078904840619607e-06, "loss": 0.2512, "step": 8797 }, { "epoch": 0.75, "learning_rate": 3.005905977119992e-06, "loss": 0.264, "step": 8798 }, { "epoch": 0.75, "learning_rate": 3.0039220092482313e-06, "loss": 0.2604, "step": 8799 }, { "epoch": 0.75, "learning_rate": 3.0019385805995936e-06, "loss": 0.2977, "step": 8800 }, { "epoch": 0.75, "learning_rate": 2.999955691326952e-06, "loss": 0.2452, "step": 8801 }, { "epoch": 0.75, "learning_rate": 2.997973341583138e-06, "loss": 0.2404, "step": 8802 }, { "epoch": 0.75, "learning_rate": 2.9959915315209444e-06, "loss": 0.2798, "step": 8803 }, { "epoch": 0.75, "learning_rate": 2.994010261293111e-06, "loss": 0.2356, "step": 8804 }, { "epoch": 0.75, "learning_rate": 2.9920295310523496e-06, "loss": 0.2933, "step": 8805 }, { "epoch": 0.75, "learning_rate": 2.9900493409513256e-06, "loss": 0.291, "step": 8806 }, { "epoch": 0.75, "learning_rate": 2.98806969114266e-06, "loss": 0.2778, "step": 8807 }, { "epoch": 0.76, "learning_rate": 2.9860905817789354e-06, "loss": 0.2945, "step": 8808 }, { "epoch": 0.76, "learning_rate": 2.984112013012692e-06, "loss": 0.2766, "step": 8809 }, { "epoch": 0.76, "learning_rate": 2.9821339849964324e-06, "loss": 0.3086, "step": 8810 }, { "epoch": 0.76, "learning_rate": 2.980156497882605e-06, "loss": 0.2699, "step": 8811 }, { "epoch": 0.76, "learning_rate": 2.9781795518236288e-06, "loss": 0.2803, "step": 8812 }, { "epoch": 0.76, "learning_rate": 2.9762031469718777e-06, "loss": 0.289, "step": 8813 }, { "epoch": 0.76, "learning_rate": 2.9742272834796813e-06, "loss": 0.2631, "step": 8814 }, { "epoch": 0.76, "learning_rate": 2.9722519614993306e-06, "loss": 0.2943, "step": 8815 }, { "epoch": 0.76, "learning_rate": 2.970277181183074e-06, "loss": 0.2924, "step": 8816 }, { "epoch": 0.76, "learning_rate": 2.968302942683121e-06, "loss": 0.2261, "step": 8817 }, { "epoch": 0.76, "learning_rate": 2.966329246151626e-06, "loss": 0.2853, "step": 8818 }, { "epoch": 0.76, "learning_rate": 2.964356091740721e-06, "loss": 0.2446, "step": 8819 }, { "epoch": 0.76, "learning_rate": 2.9623834796024874e-06, "loss": 0.6219, "step": 8820 }, { "epoch": 0.76, "learning_rate": 2.9604114098889592e-06, "loss": 0.3151, "step": 8821 }, { "epoch": 0.76, "learning_rate": 2.9584398827521343e-06, "loss": 0.264, "step": 8822 }, { "epoch": 0.76, "learning_rate": 2.9564688983439716e-06, "loss": 0.2772, "step": 8823 }, { "epoch": 0.76, "learning_rate": 2.9544984568163815e-06, "loss": 0.2816, "step": 8824 }, { "epoch": 0.76, "learning_rate": 2.9525285583212382e-06, "loss": 0.2513, "step": 8825 }, { "epoch": 0.76, "learning_rate": 2.950559203010371e-06, "loss": 0.2559, "step": 8826 }, { "epoch": 0.76, "learning_rate": 2.9485903910355716e-06, "loss": 0.257, "step": 8827 }, { "epoch": 0.76, "learning_rate": 2.946622122548579e-06, "loss": 0.2834, "step": 8828 }, { "epoch": 0.76, "learning_rate": 2.9446543977011023e-06, "loss": 0.2933, "step": 8829 }, { "epoch": 0.76, "learning_rate": 2.942687216644803e-06, "loss": 0.2908, "step": 8830 }, { "epoch": 0.76, "learning_rate": 2.940720579531301e-06, "loss": 0.2822, "step": 8831 }, { "epoch": 0.76, "learning_rate": 2.9387544865121763e-06, "loss": 0.2466, "step": 8832 }, { "epoch": 0.76, "learning_rate": 2.936788937738966e-06, "loss": 0.2953, "step": 8833 }, { "epoch": 0.76, "learning_rate": 2.9348239333631655e-06, "loss": 0.303, "step": 8834 }, { "epoch": 0.76, "learning_rate": 2.9328594735362237e-06, "loss": 0.2895, "step": 8835 }, { "epoch": 0.76, "learning_rate": 2.9308955584095544e-06, "loss": 0.2406, "step": 8836 }, { "epoch": 0.76, "learning_rate": 2.9289321881345257e-06, "loss": 0.262, "step": 8837 }, { "epoch": 0.76, "learning_rate": 2.926969362862465e-06, "loss": 0.2534, "step": 8838 }, { "epoch": 0.76, "learning_rate": 2.9250070827446563e-06, "loss": 0.3336, "step": 8839 }, { "epoch": 0.76, "learning_rate": 2.923045347932344e-06, "loss": 0.2393, "step": 8840 }, { "epoch": 0.76, "learning_rate": 2.92108415857673e-06, "loss": 0.309, "step": 8841 }, { "epoch": 0.76, "learning_rate": 2.919123514828969e-06, "loss": 0.2438, "step": 8842 }, { "epoch": 0.76, "learning_rate": 2.9171634168401797e-06, "loss": 0.2555, "step": 8843 }, { "epoch": 0.76, "learning_rate": 2.9152038647614357e-06, "loss": 0.3085, "step": 8844 }, { "epoch": 0.76, "learning_rate": 2.9132448587437722e-06, "loss": 0.244, "step": 8845 }, { "epoch": 0.76, "learning_rate": 2.911286398938178e-06, "loss": 0.2952, "step": 8846 }, { "epoch": 0.76, "learning_rate": 2.9093284854956017e-06, "loss": 0.2805, "step": 8847 }, { "epoch": 0.76, "learning_rate": 2.907371118566953e-06, "loss": 0.2447, "step": 8848 }, { "epoch": 0.76, "learning_rate": 2.9054142983030884e-06, "loss": 0.2794, "step": 8849 }, { "epoch": 0.76, "learning_rate": 2.9034580248548363e-06, "loss": 0.2491, "step": 8850 }, { "epoch": 0.76, "learning_rate": 2.9015022983729733e-06, "loss": 0.2717, "step": 8851 }, { "epoch": 0.76, "learning_rate": 2.899547119008239e-06, "loss": 0.2768, "step": 8852 }, { "epoch": 0.76, "learning_rate": 2.897592486911328e-06, "loss": 0.3257, "step": 8853 }, { "epoch": 0.76, "learning_rate": 2.8956384022328943e-06, "loss": 0.3244, "step": 8854 }, { "epoch": 0.76, "learning_rate": 2.8936848651235516e-06, "loss": 0.3015, "step": 8855 }, { "epoch": 0.76, "learning_rate": 2.891731875733863e-06, "loss": 0.2301, "step": 8856 }, { "epoch": 0.76, "learning_rate": 2.889779434214356e-06, "loss": 0.275, "step": 8857 }, { "epoch": 0.76, "learning_rate": 2.8878275407155244e-06, "loss": 0.2593, "step": 8858 }, { "epoch": 0.76, "learning_rate": 2.885876195387799e-06, "loss": 0.2585, "step": 8859 }, { "epoch": 0.76, "learning_rate": 2.883925398381585e-06, "loss": 0.269, "step": 8860 }, { "epoch": 0.76, "learning_rate": 2.8819751498472405e-06, "loss": 0.2657, "step": 8861 }, { "epoch": 0.76, "learning_rate": 2.8800254499350797e-06, "loss": 0.2767, "step": 8862 }, { "epoch": 0.76, "learning_rate": 2.878076298795376e-06, "loss": 0.332, "step": 8863 }, { "epoch": 0.76, "learning_rate": 2.8761276965783613e-06, "loss": 0.2704, "step": 8864 }, { "epoch": 0.76, "learning_rate": 2.874179643434227e-06, "loss": 0.2747, "step": 8865 }, { "epoch": 0.76, "learning_rate": 2.8722321395131127e-06, "loss": 0.2766, "step": 8866 }, { "epoch": 0.76, "learning_rate": 2.8702851849651258e-06, "loss": 0.2656, "step": 8867 }, { "epoch": 0.76, "learning_rate": 2.868338779940327e-06, "loss": 0.2666, "step": 8868 }, { "epoch": 0.76, "learning_rate": 2.866392924588738e-06, "loss": 0.3022, "step": 8869 }, { "epoch": 0.76, "learning_rate": 2.864447619060333e-06, "loss": 0.2492, "step": 8870 }, { "epoch": 0.76, "learning_rate": 2.8625028635050477e-06, "loss": 0.2811, "step": 8871 }, { "epoch": 0.76, "learning_rate": 2.8605586580727783e-06, "loss": 0.2629, "step": 8872 }, { "epoch": 0.76, "learning_rate": 2.8586150029133663e-06, "loss": 0.2952, "step": 8873 }, { "epoch": 0.76, "learning_rate": 2.8566718981766238e-06, "loss": 0.2839, "step": 8874 }, { "epoch": 0.76, "learning_rate": 2.854729344012316e-06, "loss": 0.3015, "step": 8875 }, { "epoch": 0.76, "learning_rate": 2.8527873405701636e-06, "loss": 0.2704, "step": 8876 }, { "epoch": 0.76, "learning_rate": 2.850845887999848e-06, "loss": 0.2374, "step": 8877 }, { "epoch": 0.76, "learning_rate": 2.8489049864510053e-06, "loss": 0.277, "step": 8878 }, { "epoch": 0.76, "learning_rate": 2.8469646360732362e-06, "loss": 0.2593, "step": 8879 }, { "epoch": 0.76, "learning_rate": 2.845024837016085e-06, "loss": 0.3086, "step": 8880 }, { "epoch": 0.76, "learning_rate": 2.8430855894290655e-06, "loss": 0.2775, "step": 8881 }, { "epoch": 0.76, "learning_rate": 2.841146893461646e-06, "loss": 0.2693, "step": 8882 }, { "epoch": 0.76, "learning_rate": 2.8392087492632504e-06, "loss": 0.2789, "step": 8883 }, { "epoch": 0.76, "learning_rate": 2.837271156983261e-06, "loss": 0.2928, "step": 8884 }, { "epoch": 0.76, "learning_rate": 2.83533411677102e-06, "loss": 0.2761, "step": 8885 }, { "epoch": 0.76, "learning_rate": 2.833397628775827e-06, "loss": 0.264, "step": 8886 }, { "epoch": 0.76, "learning_rate": 2.8314616931469295e-06, "loss": 0.3126, "step": 8887 }, { "epoch": 0.76, "learning_rate": 2.8295263100335447e-06, "loss": 0.2698, "step": 8888 }, { "epoch": 0.76, "learning_rate": 2.8275914795848415e-06, "loss": 0.2929, "step": 8889 }, { "epoch": 0.76, "learning_rate": 2.8256572019499474e-06, "loss": 0.2853, "step": 8890 }, { "epoch": 0.76, "learning_rate": 2.8237234772779455e-06, "loss": 0.3002, "step": 8891 }, { "epoch": 0.76, "learning_rate": 2.8217903057178796e-06, "loss": 0.2512, "step": 8892 }, { "epoch": 0.76, "learning_rate": 2.8198576874187513e-06, "loss": 0.2697, "step": 8893 }, { "epoch": 0.76, "learning_rate": 2.8179256225295114e-06, "loss": 0.2708, "step": 8894 }, { "epoch": 0.76, "learning_rate": 2.815994111199074e-06, "loss": 0.2601, "step": 8895 }, { "epoch": 0.76, "learning_rate": 2.8140631535763195e-06, "loss": 0.3077, "step": 8896 }, { "epoch": 0.76, "learning_rate": 2.812132749810067e-06, "loss": 0.2704, "step": 8897 }, { "epoch": 0.76, "learning_rate": 2.810202900049106e-06, "loss": 0.2992, "step": 8898 }, { "epoch": 0.76, "learning_rate": 2.80827360444218e-06, "loss": 0.2306, "step": 8899 }, { "epoch": 0.76, "learning_rate": 2.806344863137989e-06, "loss": 0.2949, "step": 8900 }, { "epoch": 0.76, "learning_rate": 2.8044166762851898e-06, "loss": 0.2784, "step": 8901 }, { "epoch": 0.76, "learning_rate": 2.8024890440324e-06, "loss": 0.2444, "step": 8902 }, { "epoch": 0.76, "learning_rate": 2.8005619665281935e-06, "loss": 0.2661, "step": 8903 }, { "epoch": 0.76, "learning_rate": 2.798635443921094e-06, "loss": 0.5709, "step": 8904 }, { "epoch": 0.76, "learning_rate": 2.7967094763595917e-06, "loss": 0.3088, "step": 8905 }, { "epoch": 0.76, "learning_rate": 2.7947840639921308e-06, "loss": 0.2856, "step": 8906 }, { "epoch": 0.76, "learning_rate": 2.7928592069671113e-06, "loss": 0.2781, "step": 8907 }, { "epoch": 0.76, "learning_rate": 2.7909349054328937e-06, "loss": 0.2794, "step": 8908 }, { "epoch": 0.76, "learning_rate": 2.789011159537792e-06, "loss": 0.2968, "step": 8909 }, { "epoch": 0.76, "learning_rate": 2.7870879694300825e-06, "loss": 0.2686, "step": 8910 }, { "epoch": 0.76, "learning_rate": 2.7851653352579886e-06, "loss": 0.2484, "step": 8911 }, { "epoch": 0.76, "learning_rate": 2.783243257169702e-06, "loss": 0.2371, "step": 8912 }, { "epoch": 0.76, "learning_rate": 2.781321735313366e-06, "loss": 0.2642, "step": 8913 }, { "epoch": 0.76, "learning_rate": 2.779400769837083e-06, "loss": 0.2697, "step": 8914 }, { "epoch": 0.76, "learning_rate": 2.777480360888909e-06, "loss": 0.2253, "step": 8915 }, { "epoch": 0.76, "learning_rate": 2.7755605086168624e-06, "loss": 0.2537, "step": 8916 }, { "epoch": 0.76, "learning_rate": 2.7736412131689173e-06, "loss": 0.2699, "step": 8917 }, { "epoch": 0.76, "learning_rate": 2.7717224746929984e-06, "loss": 0.2928, "step": 8918 }, { "epoch": 0.76, "learning_rate": 2.769804293336994e-06, "loss": 0.2757, "step": 8919 }, { "epoch": 0.76, "learning_rate": 2.7678866692487503e-06, "loss": 0.3195, "step": 8920 }, { "epoch": 0.76, "learning_rate": 2.7659696025760674e-06, "loss": 0.2523, "step": 8921 }, { "epoch": 0.76, "learning_rate": 2.764053093466702e-06, "loss": 0.2746, "step": 8922 }, { "epoch": 0.76, "learning_rate": 2.7621371420683717e-06, "loss": 0.3416, "step": 8923 }, { "epoch": 0.76, "learning_rate": 2.7602217485287497e-06, "loss": 0.2775, "step": 8924 }, { "epoch": 0.77, "learning_rate": 2.7583069129954585e-06, "loss": 0.2582, "step": 8925 }, { "epoch": 0.77, "learning_rate": 2.75639263561609e-06, "loss": 0.3271, "step": 8926 }, { "epoch": 0.77, "learning_rate": 2.7544789165381834e-06, "loss": 0.2703, "step": 8927 }, { "epoch": 0.77, "learning_rate": 2.752565755909242e-06, "loss": 0.2889, "step": 8928 }, { "epoch": 0.77, "learning_rate": 2.750653153876721e-06, "loss": 0.2397, "step": 8929 }, { "epoch": 0.77, "learning_rate": 2.7487411105880356e-06, "loss": 0.3184, "step": 8930 }, { "epoch": 0.77, "learning_rate": 2.746829626190558e-06, "loss": 0.2506, "step": 8931 }, { "epoch": 0.77, "learning_rate": 2.7449187008316113e-06, "loss": 0.2531, "step": 8932 }, { "epoch": 0.77, "learning_rate": 2.7430083346584802e-06, "loss": 0.267, "step": 8933 }, { "epoch": 0.77, "learning_rate": 2.7410985278184144e-06, "loss": 0.2317, "step": 8934 }, { "epoch": 0.77, "learning_rate": 2.739189280458604e-06, "loss": 0.2836, "step": 8935 }, { "epoch": 0.77, "learning_rate": 2.7372805927262057e-06, "loss": 0.2641, "step": 8936 }, { "epoch": 0.77, "learning_rate": 2.7353724647683344e-06, "loss": 0.301, "step": 8937 }, { "epoch": 0.77, "learning_rate": 2.7334648967320587e-06, "loss": 0.2787, "step": 8938 }, { "epoch": 0.77, "learning_rate": 2.7315578887644057e-06, "loss": 0.2788, "step": 8939 }, { "epoch": 0.77, "learning_rate": 2.72965144101235e-06, "loss": 0.2899, "step": 8940 }, { "epoch": 0.77, "learning_rate": 2.7277455536228438e-06, "loss": 0.2459, "step": 8941 }, { "epoch": 0.77, "learning_rate": 2.725840226742774e-06, "loss": 0.2461, "step": 8942 }, { "epoch": 0.77, "learning_rate": 2.723935460518997e-06, "loss": 0.3032, "step": 8943 }, { "epoch": 0.77, "learning_rate": 2.722031255098323e-06, "loss": 0.2745, "step": 8944 }, { "epoch": 0.77, "learning_rate": 2.7201276106275176e-06, "loss": 0.2956, "step": 8945 }, { "epoch": 0.77, "learning_rate": 2.7182245272533046e-06, "loss": 0.3193, "step": 8946 }, { "epoch": 0.77, "learning_rate": 2.716322005122366e-06, "loss": 0.2828, "step": 8947 }, { "epoch": 0.77, "learning_rate": 2.7144200443813394e-06, "loss": 0.2609, "step": 8948 }, { "epoch": 0.77, "learning_rate": 2.712518645176815e-06, "loss": 0.2618, "step": 8949 }, { "epoch": 0.77, "learning_rate": 2.7106178076553446e-06, "loss": 0.3062, "step": 8950 }, { "epoch": 0.77, "learning_rate": 2.708717531963435e-06, "loss": 0.2448, "step": 8951 }, { "epoch": 0.77, "learning_rate": 2.7068178182475514e-06, "loss": 0.3056, "step": 8952 }, { "epoch": 0.77, "learning_rate": 2.7049186666541126e-06, "loss": 0.2924, "step": 8953 }, { "epoch": 0.77, "learning_rate": 2.703020077329498e-06, "loss": 0.2819, "step": 8954 }, { "epoch": 0.77, "learning_rate": 2.7011220504200432e-06, "loss": 0.3284, "step": 8955 }, { "epoch": 0.77, "learning_rate": 2.6992245860720325e-06, "loss": 0.288, "step": 8956 }, { "epoch": 0.77, "learning_rate": 2.6973276844317166e-06, "loss": 0.275, "step": 8957 }, { "epoch": 0.77, "learning_rate": 2.6954313456452995e-06, "loss": 0.2579, "step": 8958 }, { "epoch": 0.77, "learning_rate": 2.6935355698589417e-06, "loss": 0.2821, "step": 8959 }, { "epoch": 0.77, "learning_rate": 2.691640357218759e-06, "loss": 0.2684, "step": 8960 }, { "epoch": 0.77, "learning_rate": 2.6897457078708267e-06, "loss": 0.3161, "step": 8961 }, { "epoch": 0.77, "learning_rate": 2.6878516219611773e-06, "loss": 0.2882, "step": 8962 }, { "epoch": 0.77, "learning_rate": 2.6859580996357905e-06, "loss": 0.2635, "step": 8963 }, { "epoch": 0.77, "learning_rate": 2.684065141040614e-06, "loss": 0.2587, "step": 8964 }, { "epoch": 0.77, "learning_rate": 2.6821727463215476e-06, "loss": 0.2642, "step": 8965 }, { "epoch": 0.77, "learning_rate": 2.680280915624448e-06, "loss": 0.2742, "step": 8966 }, { "epoch": 0.77, "learning_rate": 2.6783896490951266e-06, "loss": 0.2397, "step": 8967 }, { "epoch": 0.77, "learning_rate": 2.6764989468793544e-06, "loss": 0.2552, "step": 8968 }, { "epoch": 0.77, "learning_rate": 2.6746088091228594e-06, "loss": 0.2483, "step": 8969 }, { "epoch": 0.77, "learning_rate": 2.6727192359713196e-06, "loss": 0.2735, "step": 8970 }, { "epoch": 0.77, "learning_rate": 2.6708302275703725e-06, "loss": 0.2709, "step": 8971 }, { "epoch": 0.77, "learning_rate": 2.668941784065623e-06, "loss": 0.3055, "step": 8972 }, { "epoch": 0.77, "learning_rate": 2.6670539056026134e-06, "loss": 0.2679, "step": 8973 }, { "epoch": 0.77, "learning_rate": 2.6651665923268555e-06, "loss": 0.2426, "step": 8974 }, { "epoch": 0.77, "learning_rate": 2.663279844383815e-06, "loss": 0.2935, "step": 8975 }, { "epoch": 0.77, "learning_rate": 2.661393661918912e-06, "loss": 0.2654, "step": 8976 }, { "epoch": 0.77, "learning_rate": 2.6595080450775268e-06, "loss": 0.2708, "step": 8977 }, { "epoch": 0.77, "learning_rate": 2.657622994004986e-06, "loss": 0.2574, "step": 8978 }, { "epoch": 0.77, "learning_rate": 2.6557385088465906e-06, "loss": 0.2859, "step": 8979 }, { "epoch": 0.77, "learning_rate": 2.65385458974758e-06, "loss": 0.2717, "step": 8980 }, { "epoch": 0.77, "learning_rate": 2.651971236853158e-06, "loss": 0.271, "step": 8981 }, { "epoch": 0.77, "learning_rate": 2.6500884503084857e-06, "loss": 0.2737, "step": 8982 }, { "epoch": 0.77, "learning_rate": 2.6482062302586774e-06, "loss": 0.278, "step": 8983 }, { "epoch": 0.77, "learning_rate": 2.646324576848809e-06, "loss": 0.2837, "step": 8984 }, { "epoch": 0.77, "learning_rate": 2.6444434902239047e-06, "loss": 0.326, "step": 8985 }, { "epoch": 0.77, "learning_rate": 2.6425629705289556e-06, "loss": 0.3069, "step": 8986 }, { "epoch": 0.77, "learning_rate": 2.640683017908895e-06, "loss": 0.2611, "step": 8987 }, { "epoch": 0.77, "learning_rate": 2.6388036325086243e-06, "loss": 0.2891, "step": 8988 }, { "epoch": 0.77, "learning_rate": 2.636924814472995e-06, "loss": 0.2443, "step": 8989 }, { "epoch": 0.77, "learning_rate": 2.6350465639468213e-06, "loss": 0.2504, "step": 8990 }, { "epoch": 0.77, "learning_rate": 2.633168881074867e-06, "loss": 0.3121, "step": 8991 }, { "epoch": 0.77, "learning_rate": 2.6312917660018534e-06, "loss": 0.2984, "step": 8992 }, { "epoch": 0.77, "learning_rate": 2.6294152188724644e-06, "loss": 0.2879, "step": 8993 }, { "epoch": 0.77, "learning_rate": 2.627539239831328e-06, "loss": 0.2751, "step": 8994 }, { "epoch": 0.77, "learning_rate": 2.6256638290230385e-06, "loss": 0.2329, "step": 8995 }, { "epoch": 0.77, "learning_rate": 2.623788986592144e-06, "loss": 0.261, "step": 8996 }, { "epoch": 0.77, "learning_rate": 2.6219147126831467e-06, "loss": 0.28, "step": 8997 }, { "epoch": 0.77, "learning_rate": 2.620041007440508e-06, "loss": 0.2652, "step": 8998 }, { "epoch": 0.77, "learning_rate": 2.618167871008642e-06, "loss": 0.2959, "step": 8999 }, { "epoch": 0.77, "learning_rate": 2.616295303531926e-06, "loss": 0.279, "step": 9000 }, { "epoch": 0.77, "learning_rate": 2.6144233051546797e-06, "loss": 0.2689, "step": 9001 }, { "epoch": 0.77, "learning_rate": 2.6125518760211933e-06, "loss": 0.2753, "step": 9002 }, { "epoch": 0.77, "learning_rate": 2.6106810162757046e-06, "loss": 0.254, "step": 9003 }, { "epoch": 0.77, "learning_rate": 2.608810726062412e-06, "loss": 0.2568, "step": 9004 }, { "epoch": 0.77, "learning_rate": 2.6069410055254674e-06, "loss": 0.2473, "step": 9005 }, { "epoch": 0.77, "learning_rate": 2.6050718548089804e-06, "loss": 0.2919, "step": 9006 }, { "epoch": 0.77, "learning_rate": 2.6032032740570177e-06, "loss": 0.2626, "step": 9007 }, { "epoch": 0.77, "learning_rate": 2.6013352634135957e-06, "loss": 0.3216, "step": 9008 }, { "epoch": 0.77, "learning_rate": 2.599467823022691e-06, "loss": 0.6083, "step": 9009 }, { "epoch": 0.77, "learning_rate": 2.5976009530282455e-06, "loss": 0.2357, "step": 9010 }, { "epoch": 0.77, "learning_rate": 2.5957346535741378e-06, "loss": 0.2939, "step": 9011 }, { "epoch": 0.77, "learning_rate": 2.593868924804218e-06, "loss": 0.3043, "step": 9012 }, { "epoch": 0.77, "learning_rate": 2.592003766862288e-06, "loss": 0.2481, "step": 9013 }, { "epoch": 0.77, "learning_rate": 2.5901391798921018e-06, "loss": 0.2776, "step": 9014 }, { "epoch": 0.77, "learning_rate": 2.5882751640373783e-06, "loss": 0.2607, "step": 9015 }, { "epoch": 0.77, "learning_rate": 2.586411719441777e-06, "loss": 0.2996, "step": 9016 }, { "epoch": 0.77, "learning_rate": 2.5845488462489344e-06, "loss": 0.3062, "step": 9017 }, { "epoch": 0.77, "learning_rate": 2.582686544602423e-06, "loss": 0.2727, "step": 9018 }, { "epoch": 0.77, "learning_rate": 2.5808248146457825e-06, "loss": 0.2786, "step": 9019 }, { "epoch": 0.77, "learning_rate": 2.5789636565225063e-06, "loss": 0.3326, "step": 9020 }, { "epoch": 0.77, "learning_rate": 2.5771030703760434e-06, "loss": 0.3009, "step": 9021 }, { "epoch": 0.77, "learning_rate": 2.575243056349801e-06, "loss": 0.2975, "step": 9022 }, { "epoch": 0.77, "learning_rate": 2.5733836145871305e-06, "loss": 0.2295, "step": 9023 }, { "epoch": 0.77, "learning_rate": 2.571524745231361e-06, "loss": 0.2505, "step": 9024 }, { "epoch": 0.77, "learning_rate": 2.569666448425756e-06, "loss": 0.2752, "step": 9025 }, { "epoch": 0.77, "learning_rate": 2.5678087243135476e-06, "loss": 0.2639, "step": 9026 }, { "epoch": 0.77, "learning_rate": 2.565951573037919e-06, "loss": 0.2641, "step": 9027 }, { "epoch": 0.77, "learning_rate": 2.56409499474201e-06, "loss": 0.2936, "step": 9028 }, { "epoch": 0.77, "learning_rate": 2.562238989568917e-06, "loss": 0.2559, "step": 9029 }, { "epoch": 0.77, "learning_rate": 2.560383557661692e-06, "loss": 0.2814, "step": 9030 }, { "epoch": 0.77, "learning_rate": 2.558528699163344e-06, "loss": 0.2956, "step": 9031 }, { "epoch": 0.77, "learning_rate": 2.556674414216833e-06, "loss": 0.2402, "step": 9032 }, { "epoch": 0.77, "learning_rate": 2.5548207029650784e-06, "loss": 0.2813, "step": 9033 }, { "epoch": 0.77, "learning_rate": 2.5529675655509567e-06, "loss": 0.2469, "step": 9034 }, { "epoch": 0.77, "learning_rate": 2.5511150021172993e-06, "loss": 0.3143, "step": 9035 }, { "epoch": 0.77, "learning_rate": 2.5492630128068895e-06, "loss": 0.2601, "step": 9036 }, { "epoch": 0.77, "learning_rate": 2.5474115977624724e-06, "loss": 0.2759, "step": 9037 }, { "epoch": 0.77, "learning_rate": 2.5455607571267484e-06, "loss": 0.3003, "step": 9038 }, { "epoch": 0.77, "learning_rate": 2.5437104910423647e-06, "loss": 0.2761, "step": 9039 }, { "epoch": 0.77, "learning_rate": 2.541860799651934e-06, "loss": 0.2429, "step": 9040 }, { "epoch": 0.77, "learning_rate": 2.5400116830980203e-06, "loss": 0.2861, "step": 9041 }, { "epoch": 0.78, "learning_rate": 2.5381631415231455e-06, "loss": 0.2895, "step": 9042 }, { "epoch": 0.78, "learning_rate": 2.5363151750697856e-06, "loss": 0.2639, "step": 9043 }, { "epoch": 0.78, "learning_rate": 2.534467783880373e-06, "loss": 0.2704, "step": 9044 }, { "epoch": 0.78, "learning_rate": 2.532620968097299e-06, "loss": 0.2745, "step": 9045 }, { "epoch": 0.78, "learning_rate": 2.530774727862899e-06, "loss": 0.2701, "step": 9046 }, { "epoch": 0.78, "learning_rate": 2.528929063319475e-06, "loss": 0.2693, "step": 9047 }, { "epoch": 0.78, "learning_rate": 2.5270839746092878e-06, "loss": 0.2646, "step": 9048 }, { "epoch": 0.78, "learning_rate": 2.5252394618745415e-06, "loss": 0.2801, "step": 9049 }, { "epoch": 0.78, "learning_rate": 2.5233955252574027e-06, "loss": 0.2787, "step": 9050 }, { "epoch": 0.78, "learning_rate": 2.521552164899994e-06, "loss": 0.2725, "step": 9051 }, { "epoch": 0.78, "learning_rate": 2.5197093809443917e-06, "loss": 0.2574, "step": 9052 }, { "epoch": 0.78, "learning_rate": 2.5178671735326333e-06, "loss": 0.2852, "step": 9053 }, { "epoch": 0.78, "learning_rate": 2.516025542806696e-06, "loss": 0.2609, "step": 9054 }, { "epoch": 0.78, "learning_rate": 2.5141844889085365e-06, "loss": 0.552, "step": 9055 }, { "epoch": 0.78, "learning_rate": 2.512344011980045e-06, "loss": 0.2188, "step": 9056 }, { "epoch": 0.78, "learning_rate": 2.51050411216308e-06, "loss": 0.2703, "step": 9057 }, { "epoch": 0.78, "learning_rate": 2.508664789599451e-06, "loss": 0.2961, "step": 9058 }, { "epoch": 0.78, "learning_rate": 2.5068260444309245e-06, "loss": 0.3074, "step": 9059 }, { "epoch": 0.78, "learning_rate": 2.504987876799224e-06, "loss": 0.2694, "step": 9060 }, { "epoch": 0.78, "learning_rate": 2.503150286846019e-06, "loss": 0.2668, "step": 9061 }, { "epoch": 0.78, "learning_rate": 2.501313274712952e-06, "loss": 0.2484, "step": 9062 }, { "epoch": 0.78, "learning_rate": 2.4994768405416024e-06, "loss": 0.2653, "step": 9063 }, { "epoch": 0.78, "learning_rate": 2.497640984473518e-06, "loss": 0.2391, "step": 9064 }, { "epoch": 0.78, "learning_rate": 2.4958057066501952e-06, "loss": 0.2624, "step": 9065 }, { "epoch": 0.78, "learning_rate": 2.4939710072130895e-06, "loss": 0.2863, "step": 9066 }, { "epoch": 0.78, "learning_rate": 2.492136886303611e-06, "loss": 0.2831, "step": 9067 }, { "epoch": 0.78, "learning_rate": 2.490303344063123e-06, "loss": 0.2546, "step": 9068 }, { "epoch": 0.78, "learning_rate": 2.48847038063295e-06, "loss": 0.2824, "step": 9069 }, { "epoch": 0.78, "learning_rate": 2.486637996154362e-06, "loss": 0.2989, "step": 9070 }, { "epoch": 0.78, "learning_rate": 2.4848061907685915e-06, "loss": 0.2344, "step": 9071 }, { "epoch": 0.78, "learning_rate": 2.4829749646168268e-06, "loss": 0.5498, "step": 9072 }, { "epoch": 0.78, "learning_rate": 2.4811443178402097e-06, "loss": 0.2698, "step": 9073 }, { "epoch": 0.78, "learning_rate": 2.4793142505798363e-06, "loss": 0.2719, "step": 9074 }, { "epoch": 0.78, "learning_rate": 2.4774847629767594e-06, "loss": 0.2679, "step": 9075 }, { "epoch": 0.78, "learning_rate": 2.47565585517199e-06, "loss": 0.2991, "step": 9076 }, { "epoch": 0.78, "learning_rate": 2.473827527306486e-06, "loss": 0.2684, "step": 9077 }, { "epoch": 0.78, "learning_rate": 2.4719997795211683e-06, "loss": 0.2932, "step": 9078 }, { "epoch": 0.78, "learning_rate": 2.470172611956909e-06, "loss": 0.2335, "step": 9079 }, { "epoch": 0.78, "learning_rate": 2.468346024754541e-06, "loss": 0.2741, "step": 9080 }, { "epoch": 0.78, "learning_rate": 2.4665200180548454e-06, "loss": 0.2979, "step": 9081 }, { "epoch": 0.78, "learning_rate": 2.464694591998563e-06, "loss": 0.275, "step": 9082 }, { "epoch": 0.78, "learning_rate": 2.4628697467263916e-06, "loss": 0.3026, "step": 9083 }, { "epoch": 0.78, "learning_rate": 2.4610454823789742e-06, "loss": 0.2519, "step": 9084 }, { "epoch": 0.78, "learning_rate": 2.459221799096918e-06, "loss": 0.2508, "step": 9085 }, { "epoch": 0.78, "learning_rate": 2.4573986970207906e-06, "loss": 0.2575, "step": 9086 }, { "epoch": 0.78, "learning_rate": 2.4555761762911e-06, "loss": 0.287, "step": 9087 }, { "epoch": 0.78, "learning_rate": 2.4537542370483203e-06, "loss": 0.3323, "step": 9088 }, { "epoch": 0.78, "learning_rate": 2.4519328794328755e-06, "loss": 0.2597, "step": 9089 }, { "epoch": 0.78, "learning_rate": 2.4501121035851494e-06, "loss": 0.2785, "step": 9090 }, { "epoch": 0.78, "learning_rate": 2.4482919096454803e-06, "loss": 0.2682, "step": 9091 }, { "epoch": 0.78, "learning_rate": 2.44647229775415e-06, "loss": 0.2406, "step": 9092 }, { "epoch": 0.78, "learning_rate": 2.444653268051418e-06, "loss": 0.5818, "step": 9093 }, { "epoch": 0.78, "learning_rate": 2.4428348206774775e-06, "loss": 0.3411, "step": 9094 }, { "epoch": 0.78, "learning_rate": 2.441016955772487e-06, "loss": 0.2684, "step": 9095 }, { "epoch": 0.78, "learning_rate": 2.439199673476561e-06, "loss": 0.2643, "step": 9096 }, { "epoch": 0.78, "learning_rate": 2.437382973929764e-06, "loss": 0.2584, "step": 9097 }, { "epoch": 0.78, "learning_rate": 2.4355668572721224e-06, "loss": 0.2581, "step": 9098 }, { "epoch": 0.78, "learning_rate": 2.433751323643606e-06, "loss": 0.2538, "step": 9099 }, { "epoch": 0.78, "learning_rate": 2.431936373184156e-06, "loss": 0.2744, "step": 9100 }, { "epoch": 0.78, "learning_rate": 2.430122006033653e-06, "loss": 0.2372, "step": 9101 }, { "epoch": 0.78, "learning_rate": 2.428308222331942e-06, "loss": 0.2873, "step": 9102 }, { "epoch": 0.78, "learning_rate": 2.42649502221882e-06, "loss": 0.2692, "step": 9103 }, { "epoch": 0.78, "learning_rate": 2.42468240583404e-06, "loss": 0.2466, "step": 9104 }, { "epoch": 0.78, "learning_rate": 2.422870373317313e-06, "loss": 0.2924, "step": 9105 }, { "epoch": 0.78, "learning_rate": 2.4210589248082914e-06, "loss": 0.2752, "step": 9106 }, { "epoch": 0.78, "learning_rate": 2.4192480604466052e-06, "loss": 0.2638, "step": 9107 }, { "epoch": 0.78, "learning_rate": 2.4174377803718184e-06, "loss": 0.2562, "step": 9108 }, { "epoch": 0.78, "learning_rate": 2.41562808472346e-06, "loss": 0.3356, "step": 9109 }, { "epoch": 0.78, "learning_rate": 2.4138189736410144e-06, "loss": 0.2842, "step": 9110 }, { "epoch": 0.78, "learning_rate": 2.412010447263917e-06, "loss": 0.2661, "step": 9111 }, { "epoch": 0.78, "learning_rate": 2.4102025057315615e-06, "loss": 0.3003, "step": 9112 }, { "epoch": 0.78, "learning_rate": 2.4083951491832947e-06, "loss": 0.3044, "step": 9113 }, { "epoch": 0.78, "learning_rate": 2.406588377758421e-06, "loss": 0.2915, "step": 9114 }, { "epoch": 0.78, "learning_rate": 2.4047821915961923e-06, "loss": 0.3181, "step": 9115 }, { "epoch": 0.78, "learning_rate": 2.402976590835824e-06, "loss": 0.2797, "step": 9116 }, { "epoch": 0.78, "learning_rate": 2.401171575616481e-06, "loss": 0.2449, "step": 9117 }, { "epoch": 0.78, "learning_rate": 2.399367146077286e-06, "loss": 0.2392, "step": 9118 }, { "epoch": 0.78, "learning_rate": 2.3975633023573164e-06, "loss": 0.2612, "step": 9119 }, { "epoch": 0.78, "learning_rate": 2.395760044595602e-06, "loss": 0.2234, "step": 9120 }, { "epoch": 0.78, "learning_rate": 2.3939573729311325e-06, "loss": 0.2739, "step": 9121 }, { "epoch": 0.78, "learning_rate": 2.3921552875028443e-06, "loss": 0.2234, "step": 9122 }, { "epoch": 0.78, "learning_rate": 2.390353788449631e-06, "loss": 0.2551, "step": 9123 }, { "epoch": 0.78, "learning_rate": 2.388552875910354e-06, "loss": 0.2613, "step": 9124 }, { "epoch": 0.78, "learning_rate": 2.3867525500238086e-06, "loss": 0.3028, "step": 9125 }, { "epoch": 0.78, "learning_rate": 2.384952810928759e-06, "loss": 0.2954, "step": 9126 }, { "epoch": 0.78, "learning_rate": 2.3831536587639193e-06, "loss": 0.278, "step": 9127 }, { "epoch": 0.78, "learning_rate": 2.38135509366796e-06, "loss": 0.2799, "step": 9128 }, { "epoch": 0.78, "learning_rate": 2.379557115779507e-06, "loss": 0.2349, "step": 9129 }, { "epoch": 0.78, "learning_rate": 2.377759725237133e-06, "loss": 0.2949, "step": 9130 }, { "epoch": 0.78, "learning_rate": 2.3759629221793823e-06, "loss": 0.2809, "step": 9131 }, { "epoch": 0.78, "learning_rate": 2.374166706744735e-06, "loss": 0.2585, "step": 9132 }, { "epoch": 0.78, "learning_rate": 2.372371079071638e-06, "loss": 0.2952, "step": 9133 }, { "epoch": 0.78, "learning_rate": 2.3705760392984887e-06, "loss": 0.2566, "step": 9134 }, { "epoch": 0.78, "learning_rate": 2.368781587563641e-06, "loss": 0.2382, "step": 9135 }, { "epoch": 0.78, "learning_rate": 2.366987724005404e-06, "loss": 0.2659, "step": 9136 }, { "epoch": 0.78, "learning_rate": 2.3651944487620315e-06, "loss": 0.272, "step": 9137 }, { "epoch": 0.78, "learning_rate": 2.363401761971752e-06, "loss": 0.2944, "step": 9138 }, { "epoch": 0.78, "learning_rate": 2.3616096637727282e-06, "loss": 0.2318, "step": 9139 }, { "epoch": 0.78, "learning_rate": 2.3598181543030885e-06, "loss": 0.3537, "step": 9140 }, { "epoch": 0.78, "learning_rate": 2.358027233700915e-06, "loss": 0.3098, "step": 9141 }, { "epoch": 0.78, "learning_rate": 2.356236902104242e-06, "loss": 0.3176, "step": 9142 }, { "epoch": 0.78, "learning_rate": 2.3544471596510633e-06, "loss": 0.3326, "step": 9143 }, { "epoch": 0.78, "learning_rate": 2.3526580064793125e-06, "loss": 0.2855, "step": 9144 }, { "epoch": 0.78, "learning_rate": 2.350869442726903e-06, "loss": 0.241, "step": 9145 }, { "epoch": 0.78, "learning_rate": 2.3490814685316777e-06, "loss": 0.2601, "step": 9146 }, { "epoch": 0.78, "learning_rate": 2.3472940840314496e-06, "loss": 0.2604, "step": 9147 }, { "epoch": 0.78, "learning_rate": 2.3455072893639784e-06, "loss": 0.259, "step": 9148 }, { "epoch": 0.78, "learning_rate": 2.3437210846669854e-06, "loss": 0.2193, "step": 9149 }, { "epoch": 0.78, "learning_rate": 2.3419354700781393e-06, "loss": 0.3079, "step": 9150 }, { "epoch": 0.78, "learning_rate": 2.3401504457350677e-06, "loss": 0.2879, "step": 9151 }, { "epoch": 0.78, "learning_rate": 2.338366011775355e-06, "loss": 0.255, "step": 9152 }, { "epoch": 0.78, "learning_rate": 2.3365821683365286e-06, "loss": 0.2527, "step": 9153 }, { "epoch": 0.78, "learning_rate": 2.3347989155560835e-06, "loss": 0.3129, "step": 9154 }, { "epoch": 0.78, "learning_rate": 2.333016253571463e-06, "loss": 0.2556, "step": 9155 }, { "epoch": 0.78, "learning_rate": 2.331234182520066e-06, "loss": 0.254, "step": 9156 }, { "epoch": 0.78, "learning_rate": 2.3294527025392454e-06, "loss": 0.2884, "step": 9157 }, { "epoch": 0.79, "learning_rate": 2.32767181376631e-06, "loss": 0.2582, "step": 9158 }, { "epoch": 0.79, "learning_rate": 2.3258915163385233e-06, "loss": 0.2702, "step": 9159 }, { "epoch": 0.79, "learning_rate": 2.324111810393097e-06, "loss": 0.2562, "step": 9160 }, { "epoch": 0.79, "learning_rate": 2.322332696067202e-06, "loss": 0.2828, "step": 9161 }, { "epoch": 0.79, "learning_rate": 2.320554173497972e-06, "loss": 0.3104, "step": 9162 }, { "epoch": 0.79, "learning_rate": 2.318776242822478e-06, "loss": 0.2874, "step": 9163 }, { "epoch": 0.79, "learning_rate": 2.3169989041777565e-06, "loss": 0.2489, "step": 9164 }, { "epoch": 0.79, "learning_rate": 2.315222157700797e-06, "loss": 0.3329, "step": 9165 }, { "epoch": 0.79, "learning_rate": 2.3134460035285433e-06, "loss": 0.2841, "step": 9166 }, { "epoch": 0.79, "learning_rate": 2.311670441797893e-06, "loss": 0.2308, "step": 9167 }, { "epoch": 0.79, "learning_rate": 2.30989547264569e-06, "loss": 0.2709, "step": 9168 }, { "epoch": 0.79, "learning_rate": 2.308121096208752e-06, "loss": 0.2532, "step": 9169 }, { "epoch": 0.79, "learning_rate": 2.30634731262383e-06, "loss": 0.2775, "step": 9170 }, { "epoch": 0.79, "learning_rate": 2.3045741220276428e-06, "loss": 0.3373, "step": 9171 }, { "epoch": 0.79, "learning_rate": 2.302801524556857e-06, "loss": 0.2536, "step": 9172 }, { "epoch": 0.79, "learning_rate": 2.301029520348097e-06, "loss": 0.2801, "step": 9173 }, { "epoch": 0.79, "learning_rate": 2.299258109537943e-06, "loss": 0.2907, "step": 9174 }, { "epoch": 0.79, "learning_rate": 2.297487292262918e-06, "loss": 0.2947, "step": 9175 }, { "epoch": 0.79, "learning_rate": 2.2957170686595176e-06, "loss": 0.2836, "step": 9176 }, { "epoch": 0.79, "learning_rate": 2.2939474388641757e-06, "loss": 0.2401, "step": 9177 }, { "epoch": 0.79, "learning_rate": 2.2921784030132886e-06, "loss": 0.2247, "step": 9178 }, { "epoch": 0.79, "learning_rate": 2.290409961243204e-06, "loss": 0.2394, "step": 9179 }, { "epoch": 0.79, "learning_rate": 2.2886421136902257e-06, "loss": 0.2419, "step": 9180 }, { "epoch": 0.79, "learning_rate": 2.2868748604906145e-06, "loss": 0.3106, "step": 9181 }, { "epoch": 0.79, "learning_rate": 2.2851082017805704e-06, "loss": 0.3028, "step": 9182 }, { "epoch": 0.79, "learning_rate": 2.2833421376962718e-06, "loss": 0.2422, "step": 9183 }, { "epoch": 0.79, "learning_rate": 2.281576668373828e-06, "loss": 0.2794, "step": 9184 }, { "epoch": 0.79, "learning_rate": 2.279811793949318e-06, "loss": 0.2437, "step": 9185 }, { "epoch": 0.79, "learning_rate": 2.278047514558769e-06, "loss": 0.2793, "step": 9186 }, { "epoch": 0.79, "learning_rate": 2.2762838303381607e-06, "loss": 0.2706, "step": 9187 }, { "epoch": 0.79, "learning_rate": 2.274520741423435e-06, "loss": 0.2377, "step": 9188 }, { "epoch": 0.79, "learning_rate": 2.2727582479504704e-06, "loss": 0.2642, "step": 9189 }, { "epoch": 0.79, "learning_rate": 2.270996350055126e-06, "loss": 0.2374, "step": 9190 }, { "epoch": 0.79, "learning_rate": 2.2692350478731893e-06, "loss": 0.2606, "step": 9191 }, { "epoch": 0.79, "learning_rate": 2.2674743415404166e-06, "loss": 0.2974, "step": 9192 }, { "epoch": 0.79, "learning_rate": 2.265714231192514e-06, "loss": 0.2502, "step": 9193 }, { "epoch": 0.79, "learning_rate": 2.2639547169651423e-06, "loss": 0.2722, "step": 9194 }, { "epoch": 0.79, "learning_rate": 2.262195798993916e-06, "loss": 0.3166, "step": 9195 }, { "epoch": 0.79, "learning_rate": 2.260437477414403e-06, "loss": 0.2901, "step": 9196 }, { "epoch": 0.79, "learning_rate": 2.2586797523621306e-06, "loss": 0.2933, "step": 9197 }, { "epoch": 0.79, "learning_rate": 2.2569226239725695e-06, "loss": 0.2832, "step": 9198 }, { "epoch": 0.79, "learning_rate": 2.255166092381149e-06, "loss": 0.2413, "step": 9199 }, { "epoch": 0.79, "learning_rate": 2.2534101577232647e-06, "loss": 0.251, "step": 9200 }, { "epoch": 0.79, "learning_rate": 2.2516548201342445e-06, "loss": 0.2373, "step": 9201 }, { "epoch": 0.79, "learning_rate": 2.249900079749385e-06, "loss": 0.2757, "step": 9202 }, { "epoch": 0.79, "learning_rate": 2.248145936703934e-06, "loss": 0.3094, "step": 9203 }, { "epoch": 0.79, "learning_rate": 2.246392391133091e-06, "loss": 0.248, "step": 9204 }, { "epoch": 0.79, "learning_rate": 2.244639443172013e-06, "loss": 0.2707, "step": 9205 }, { "epoch": 0.79, "learning_rate": 2.2428870929558012e-06, "loss": 0.2359, "step": 9206 }, { "epoch": 0.79, "learning_rate": 2.241135340619528e-06, "loss": 0.2504, "step": 9207 }, { "epoch": 0.79, "learning_rate": 2.2393841862982036e-06, "loss": 0.2808, "step": 9208 }, { "epoch": 0.79, "learning_rate": 2.2376336301267985e-06, "loss": 0.2327, "step": 9209 }, { "epoch": 0.79, "learning_rate": 2.235883672240239e-06, "loss": 0.3121, "step": 9210 }, { "epoch": 0.79, "learning_rate": 2.2341343127734028e-06, "loss": 0.2801, "step": 9211 }, { "epoch": 0.79, "learning_rate": 2.2323855518611227e-06, "loss": 0.2717, "step": 9212 }, { "epoch": 0.79, "learning_rate": 2.2306373896381795e-06, "loss": 0.2492, "step": 9213 }, { "epoch": 0.79, "learning_rate": 2.2288898262393212e-06, "loss": 0.2594, "step": 9214 }, { "epoch": 0.79, "learning_rate": 2.227142861799235e-06, "loss": 0.2529, "step": 9215 }, { "epoch": 0.79, "learning_rate": 2.22539649645257e-06, "loss": 0.2261, "step": 9216 }, { "epoch": 0.79, "learning_rate": 2.2236507303339273e-06, "loss": 0.2546, "step": 9217 }, { "epoch": 0.79, "learning_rate": 2.2219055635778618e-06, "loss": 0.2586, "step": 9218 }, { "epoch": 0.79, "learning_rate": 2.220160996318886e-06, "loss": 0.2361, "step": 9219 }, { "epoch": 0.79, "learning_rate": 2.2184170286914543e-06, "loss": 0.2383, "step": 9220 }, { "epoch": 0.79, "learning_rate": 2.216673660829992e-06, "loss": 0.2538, "step": 9221 }, { "epoch": 0.79, "learning_rate": 2.214930892868864e-06, "loss": 0.2588, "step": 9222 }, { "epoch": 0.79, "learning_rate": 2.2131887249423957e-06, "loss": 0.3054, "step": 9223 }, { "epoch": 0.79, "learning_rate": 2.211447157184864e-06, "loss": 0.2628, "step": 9224 }, { "epoch": 0.79, "learning_rate": 2.2097061897305016e-06, "loss": 0.2438, "step": 9225 }, { "epoch": 0.79, "learning_rate": 2.207965822713496e-06, "loss": 0.2842, "step": 9226 }, { "epoch": 0.79, "learning_rate": 2.2062260562679773e-06, "loss": 0.2513, "step": 9227 }, { "epoch": 0.79, "learning_rate": 2.2044868905280504e-06, "loss": 0.2938, "step": 9228 }, { "epoch": 0.79, "learning_rate": 2.2027483256277517e-06, "loss": 0.2933, "step": 9229 }, { "epoch": 0.79, "learning_rate": 2.2010103617010836e-06, "loss": 0.2712, "step": 9230 }, { "epoch": 0.79, "learning_rate": 2.1992729988820026e-06, "loss": 0.2474, "step": 9231 }, { "epoch": 0.79, "learning_rate": 2.197536237304414e-06, "loss": 0.2415, "step": 9232 }, { "epoch": 0.79, "learning_rate": 2.195800077102178e-06, "loss": 0.2503, "step": 9233 }, { "epoch": 0.79, "learning_rate": 2.1940645184091115e-06, "loss": 0.3033, "step": 9234 }, { "epoch": 0.79, "learning_rate": 2.1923295613589846e-06, "loss": 0.2966, "step": 9235 }, { "epoch": 0.79, "learning_rate": 2.190595206085513e-06, "loss": 0.2696, "step": 9236 }, { "epoch": 0.79, "learning_rate": 2.188861452722373e-06, "loss": 0.2515, "step": 9237 }, { "epoch": 0.79, "learning_rate": 2.1871283014032007e-06, "loss": 0.2637, "step": 9238 }, { "epoch": 0.79, "learning_rate": 2.1853957522615732e-06, "loss": 0.2508, "step": 9239 }, { "epoch": 0.79, "learning_rate": 2.1836638054310265e-06, "loss": 0.3, "step": 9240 }, { "epoch": 0.79, "learning_rate": 2.181932461045053e-06, "loss": 0.2824, "step": 9241 }, { "epoch": 0.79, "learning_rate": 2.1802017192370963e-06, "loss": 0.575, "step": 9242 }, { "epoch": 0.79, "learning_rate": 2.178471580140553e-06, "loss": 0.313, "step": 9243 }, { "epoch": 0.79, "learning_rate": 2.176742043888769e-06, "loss": 0.2617, "step": 9244 }, { "epoch": 0.79, "learning_rate": 2.1750131106150563e-06, "loss": 0.247, "step": 9245 }, { "epoch": 0.79, "learning_rate": 2.173284780452667e-06, "loss": 0.2808, "step": 9246 }, { "epoch": 0.79, "learning_rate": 2.171557053534814e-06, "loss": 0.6298, "step": 9247 }, { "epoch": 0.79, "learning_rate": 2.169829929994661e-06, "loss": 0.2391, "step": 9248 }, { "epoch": 0.79, "learning_rate": 2.1681034099653287e-06, "loss": 0.2638, "step": 9249 }, { "epoch": 0.79, "learning_rate": 2.1663774935798886e-06, "loss": 0.2831, "step": 9250 }, { "epoch": 0.79, "learning_rate": 2.1646521809713583e-06, "loss": 0.2561, "step": 9251 }, { "epoch": 0.79, "learning_rate": 2.162927472272728e-06, "loss": 0.272, "step": 9252 }, { "epoch": 0.79, "learning_rate": 2.161203367616922e-06, "loss": 0.2667, "step": 9253 }, { "epoch": 0.79, "learning_rate": 2.1594798671368265e-06, "loss": 0.2907, "step": 9254 }, { "epoch": 0.79, "learning_rate": 2.1577569709652833e-06, "loss": 0.2775, "step": 9255 }, { "epoch": 0.79, "learning_rate": 2.156034679235083e-06, "loss": 0.2592, "step": 9256 }, { "epoch": 0.79, "learning_rate": 2.154312992078973e-06, "loss": 0.2587, "step": 9257 }, { "epoch": 0.79, "learning_rate": 2.1525919096296455e-06, "loss": 0.2728, "step": 9258 }, { "epoch": 0.79, "learning_rate": 2.1508714320197644e-06, "loss": 0.2815, "step": 9259 }, { "epoch": 0.79, "learning_rate": 2.1491515593819266e-06, "loss": 0.2713, "step": 9260 }, { "epoch": 0.79, "learning_rate": 2.1474322918486956e-06, "loss": 0.2749, "step": 9261 }, { "epoch": 0.79, "learning_rate": 2.1457136295525817e-06, "loss": 0.3287, "step": 9262 }, { "epoch": 0.79, "learning_rate": 2.143995572626052e-06, "loss": 0.3051, "step": 9263 }, { "epoch": 0.79, "learning_rate": 2.1422781212015286e-06, "loss": 0.2537, "step": 9264 }, { "epoch": 0.79, "learning_rate": 2.140561275411377e-06, "loss": 0.2739, "step": 9265 }, { "epoch": 0.79, "learning_rate": 2.138845035387932e-06, "loss": 0.3284, "step": 9266 }, { "epoch": 0.79, "learning_rate": 2.1371294012634667e-06, "loss": 0.2881, "step": 9267 }, { "epoch": 0.79, "learning_rate": 2.135414373170215e-06, "loss": 0.2631, "step": 9268 }, { "epoch": 0.79, "learning_rate": 2.1336999512403633e-06, "loss": 0.2816, "step": 9269 }, { "epoch": 0.79, "learning_rate": 2.131986135606051e-06, "loss": 0.2986, "step": 9270 }, { "epoch": 0.79, "learning_rate": 2.130272926399374e-06, "loss": 0.2806, "step": 9271 }, { "epoch": 0.79, "learning_rate": 2.1285603237523677e-06, "loss": 0.3137, "step": 9272 }, { "epoch": 0.79, "learning_rate": 2.1268483277970442e-06, "loss": 0.2766, "step": 9273 }, { "epoch": 0.79, "learning_rate": 2.1251369386653454e-06, "loss": 0.2699, "step": 9274 }, { "epoch": 0.8, "learning_rate": 2.123426156489178e-06, "loss": 0.2676, "step": 9275 }, { "epoch": 0.8, "learning_rate": 2.1217159814004096e-06, "loss": 0.2808, "step": 9276 }, { "epoch": 0.8, "learning_rate": 2.120006413530842e-06, "loss": 0.2858, "step": 9277 }, { "epoch": 0.8, "learning_rate": 2.1182974530122435e-06, "loss": 0.2707, "step": 9278 }, { "epoch": 0.8, "learning_rate": 2.116589099976334e-06, "loss": 0.2557, "step": 9279 }, { "epoch": 0.8, "learning_rate": 2.114881354554782e-06, "loss": 0.2247, "step": 9280 }, { "epoch": 0.8, "learning_rate": 2.113174216879218e-06, "loss": 0.3052, "step": 9281 }, { "epoch": 0.8, "learning_rate": 2.111467687081209e-06, "loss": 0.2475, "step": 9282 }, { "epoch": 0.8, "learning_rate": 2.1097617652922973e-06, "loss": 0.243, "step": 9283 }, { "epoch": 0.8, "learning_rate": 2.1080564516439605e-06, "loss": 0.3114, "step": 9284 }, { "epoch": 0.8, "learning_rate": 2.1063517462676365e-06, "loss": 0.2689, "step": 9285 }, { "epoch": 0.8, "learning_rate": 2.1046476492947155e-06, "loss": 0.2492, "step": 9286 }, { "epoch": 0.8, "learning_rate": 2.1029441608565425e-06, "loss": 0.2549, "step": 9287 }, { "epoch": 0.8, "learning_rate": 2.101241281084416e-06, "loss": 0.278, "step": 9288 }, { "epoch": 0.8, "learning_rate": 2.099539010109577e-06, "loss": 0.2597, "step": 9289 }, { "epoch": 0.8, "learning_rate": 2.0978373480632386e-06, "loss": 0.2695, "step": 9290 }, { "epoch": 0.8, "learning_rate": 2.0961362950765495e-06, "loss": 0.2772, "step": 9291 }, { "epoch": 0.8, "learning_rate": 2.0944358512806207e-06, "loss": 0.259, "step": 9292 }, { "epoch": 0.8, "learning_rate": 2.0927360168065135e-06, "loss": 0.2773, "step": 9293 }, { "epoch": 0.8, "learning_rate": 2.0910367917852437e-06, "loss": 0.3198, "step": 9294 }, { "epoch": 0.8, "learning_rate": 2.0893381763477816e-06, "loss": 0.272, "step": 9295 }, { "epoch": 0.8, "learning_rate": 2.087640170625039e-06, "loss": 0.2737, "step": 9296 }, { "epoch": 0.8, "learning_rate": 2.085942774747901e-06, "loss": 0.2946, "step": 9297 }, { "epoch": 0.8, "learning_rate": 2.084245988847188e-06, "loss": 0.2377, "step": 9298 }, { "epoch": 0.8, "learning_rate": 2.0825498130536804e-06, "loss": 0.238, "step": 9299 }, { "epoch": 0.8, "learning_rate": 2.080854247498112e-06, "loss": 0.2417, "step": 9300 }, { "epoch": 0.8, "learning_rate": 2.079159292311169e-06, "loss": 0.2924, "step": 9301 }, { "epoch": 0.8, "learning_rate": 2.077464947623492e-06, "loss": 0.2801, "step": 9302 }, { "epoch": 0.8, "learning_rate": 2.075771213565665e-06, "loss": 0.2786, "step": 9303 }, { "epoch": 0.8, "learning_rate": 2.0740780902682444e-06, "loss": 0.2521, "step": 9304 }, { "epoch": 0.8, "learning_rate": 2.0723855778617175e-06, "loss": 0.2524, "step": 9305 }, { "epoch": 0.8, "learning_rate": 2.0706936764765393e-06, "loss": 0.2787, "step": 9306 }, { "epoch": 0.8, "learning_rate": 2.069002386243113e-06, "loss": 0.6023, "step": 9307 }, { "epoch": 0.8, "learning_rate": 2.067311707291794e-06, "loss": 0.2877, "step": 9308 }, { "epoch": 0.8, "learning_rate": 2.065621639752895e-06, "loss": 0.2395, "step": 9309 }, { "epoch": 0.8, "learning_rate": 2.0639321837566696e-06, "loss": 0.3077, "step": 9310 }, { "epoch": 0.8, "learning_rate": 2.0622433394333443e-06, "loss": 0.2843, "step": 9311 }, { "epoch": 0.8, "learning_rate": 2.0605551069130767e-06, "loss": 0.2504, "step": 9312 }, { "epoch": 0.8, "learning_rate": 2.0588674863259907e-06, "loss": 0.2914, "step": 9313 }, { "epoch": 0.8, "learning_rate": 2.057180477802164e-06, "loss": 0.2488, "step": 9314 }, { "epoch": 0.8, "learning_rate": 2.0554940814716174e-06, "loss": 0.2402, "step": 9315 }, { "epoch": 0.8, "learning_rate": 2.0538082974643325e-06, "loss": 0.2429, "step": 9316 }, { "epoch": 0.8, "learning_rate": 2.0521231259102404e-06, "loss": 0.2727, "step": 9317 }, { "epoch": 0.8, "learning_rate": 2.0504385669392268e-06, "loss": 0.3358, "step": 9318 }, { "epoch": 0.8, "learning_rate": 2.0487546206811304e-06, "loss": 0.2772, "step": 9319 }, { "epoch": 0.8, "learning_rate": 2.047071287265735e-06, "loss": 0.2646, "step": 9320 }, { "epoch": 0.8, "learning_rate": 2.0453885668227923e-06, "loss": 0.261, "step": 9321 }, { "epoch": 0.8, "learning_rate": 2.043706459481992e-06, "loss": 0.2285, "step": 9322 }, { "epoch": 0.8, "learning_rate": 2.042024965372985e-06, "loss": 0.2607, "step": 9323 }, { "epoch": 0.8, "learning_rate": 2.040344084625372e-06, "loss": 0.265, "step": 9324 }, { "epoch": 0.8, "learning_rate": 2.0386638173687067e-06, "loss": 0.2697, "step": 9325 }, { "epoch": 0.8, "learning_rate": 2.0369841637324992e-06, "loss": 0.2578, "step": 9326 }, { "epoch": 0.8, "learning_rate": 2.0353051238462006e-06, "loss": 0.2763, "step": 9327 }, { "epoch": 0.8, "learning_rate": 2.033626697839234e-06, "loss": 0.6049, "step": 9328 }, { "epoch": 0.8, "learning_rate": 2.0319488858409552e-06, "loss": 0.5861, "step": 9329 }, { "epoch": 0.8, "learning_rate": 2.030271687980685e-06, "loss": 0.315, "step": 9330 }, { "epoch": 0.8, "learning_rate": 2.0285951043876937e-06, "loss": 0.2921, "step": 9331 }, { "epoch": 0.8, "learning_rate": 2.0269191351912042e-06, "loss": 0.313, "step": 9332 }, { "epoch": 0.8, "learning_rate": 2.025243780520394e-06, "loss": 0.2839, "step": 9333 }, { "epoch": 0.8, "learning_rate": 2.023569040504384e-06, "loss": 0.2872, "step": 9334 }, { "epoch": 0.8, "learning_rate": 2.0218949152722643e-06, "loss": 0.2852, "step": 9335 }, { "epoch": 0.8, "learning_rate": 2.020221404953061e-06, "loss": 0.2509, "step": 9336 }, { "epoch": 0.8, "learning_rate": 2.018548509675763e-06, "loss": 0.2587, "step": 9337 }, { "epoch": 0.8, "learning_rate": 2.016876229569308e-06, "loss": 0.2901, "step": 9338 }, { "epoch": 0.8, "learning_rate": 2.0152045647625874e-06, "loss": 0.2545, "step": 9339 }, { "epoch": 0.8, "learning_rate": 2.013533515384447e-06, "loss": 0.2796, "step": 9340 }, { "epoch": 0.8, "learning_rate": 2.0118630815636763e-06, "loss": 0.2828, "step": 9341 }, { "epoch": 0.8, "learning_rate": 2.0101932634290345e-06, "loss": 0.3207, "step": 9342 }, { "epoch": 0.8, "learning_rate": 2.0085240611092137e-06, "loss": 0.2821, "step": 9343 }, { "epoch": 0.8, "learning_rate": 2.006855474732872e-06, "loss": 0.2629, "step": 9344 }, { "epoch": 0.8, "learning_rate": 2.0051875044286138e-06, "loss": 0.2532, "step": 9345 }, { "epoch": 0.8, "learning_rate": 2.003520150325e-06, "loss": 0.2896, "step": 9346 }, { "epoch": 0.8, "learning_rate": 2.001853412550544e-06, "loss": 0.2838, "step": 9347 }, { "epoch": 0.8, "learning_rate": 2.0001872912337016e-06, "loss": 0.2639, "step": 9348 }, { "epoch": 0.8, "learning_rate": 1.9985217865029005e-06, "loss": 0.2825, "step": 9349 }, { "epoch": 0.8, "learning_rate": 1.9968568984865e-06, "loss": 0.2417, "step": 9350 }, { "epoch": 0.8, "learning_rate": 1.995192627312823e-06, "loss": 0.2795, "step": 9351 }, { "epoch": 0.8, "learning_rate": 1.9935289731101503e-06, "loss": 0.3243, "step": 9352 }, { "epoch": 0.8, "learning_rate": 1.9918659360067005e-06, "loss": 0.2706, "step": 9353 }, { "epoch": 0.8, "learning_rate": 1.9902035161306574e-06, "loss": 0.2656, "step": 9354 }, { "epoch": 0.8, "learning_rate": 1.9885417136101446e-06, "loss": 0.2835, "step": 9355 }, { "epoch": 0.8, "learning_rate": 1.9868805285732538e-06, "loss": 0.2835, "step": 9356 }, { "epoch": 0.8, "learning_rate": 1.9852199611480207e-06, "loss": 0.2807, "step": 9357 }, { "epoch": 0.8, "learning_rate": 1.983560011462425e-06, "loss": 0.265, "step": 9358 }, { "epoch": 0.8, "learning_rate": 1.9819006796444185e-06, "loss": 0.2463, "step": 9359 }, { "epoch": 0.8, "learning_rate": 1.9802419658218873e-06, "loss": 0.2498, "step": 9360 }, { "epoch": 0.8, "learning_rate": 1.978583870122678e-06, "loss": 0.2531, "step": 9361 }, { "epoch": 0.8, "learning_rate": 1.9769263926745886e-06, "loss": 0.2675, "step": 9362 }, { "epoch": 0.8, "learning_rate": 1.9752695336053697e-06, "loss": 0.26, "step": 9363 }, { "epoch": 0.8, "learning_rate": 1.9736132930427263e-06, "loss": 0.2839, "step": 9364 }, { "epoch": 0.8, "learning_rate": 1.971957671114306e-06, "loss": 0.2706, "step": 9365 }, { "epoch": 0.8, "learning_rate": 1.9703026679477253e-06, "loss": 0.2814, "step": 9366 }, { "epoch": 0.8, "learning_rate": 1.968648283670538e-06, "loss": 0.2225, "step": 9367 }, { "epoch": 0.8, "learning_rate": 1.9669945184102555e-06, "loss": 0.2918, "step": 9368 }, { "epoch": 0.8, "learning_rate": 1.9653413722943437e-06, "loss": 0.2851, "step": 9369 }, { "epoch": 0.8, "learning_rate": 1.963688845450218e-06, "loss": 0.2845, "step": 9370 }, { "epoch": 0.8, "learning_rate": 1.9620369380052507e-06, "loss": 0.2684, "step": 9371 }, { "epoch": 0.8, "learning_rate": 1.9603856500867537e-06, "loss": 0.2231, "step": 9372 }, { "epoch": 0.8, "learning_rate": 1.9587349818220113e-06, "loss": 0.2791, "step": 9373 }, { "epoch": 0.8, "learning_rate": 1.957084933338241e-06, "loss": 0.2742, "step": 9374 }, { "epoch": 0.8, "learning_rate": 1.955435504762624e-06, "loss": 0.265, "step": 9375 }, { "epoch": 0.8, "learning_rate": 1.953786696222287e-06, "loss": 0.2646, "step": 9376 }, { "epoch": 0.8, "learning_rate": 1.9521385078443156e-06, "loss": 0.2541, "step": 9377 }, { "epoch": 0.8, "learning_rate": 1.9504909397557436e-06, "loss": 0.2821, "step": 9378 }, { "epoch": 0.8, "learning_rate": 1.948843992083551e-06, "loss": 0.2827, "step": 9379 }, { "epoch": 0.8, "learning_rate": 1.9471976649546876e-06, "loss": 0.2851, "step": 9380 }, { "epoch": 0.8, "learning_rate": 1.945551958496035e-06, "loss": 0.2683, "step": 9381 }, { "epoch": 0.8, "learning_rate": 1.94390687283444e-06, "loss": 0.316, "step": 9382 }, { "epoch": 0.8, "learning_rate": 1.9422624080966956e-06, "loss": 0.2656, "step": 9383 }, { "epoch": 0.8, "learning_rate": 1.940618564409551e-06, "loss": 0.3015, "step": 9384 }, { "epoch": 0.8, "learning_rate": 1.938975341899708e-06, "loss": 0.2662, "step": 9385 }, { "epoch": 0.8, "learning_rate": 1.937332740693809e-06, "loss": 0.3258, "step": 9386 }, { "epoch": 0.8, "learning_rate": 1.9356907609184695e-06, "loss": 0.3202, "step": 9387 }, { "epoch": 0.8, "learning_rate": 1.9340494027002365e-06, "loss": 0.3433, "step": 9388 }, { "epoch": 0.8, "learning_rate": 1.932408666165617e-06, "loss": 0.2412, "step": 9389 }, { "epoch": 0.8, "learning_rate": 1.9307685514410803e-06, "loss": 0.2311, "step": 9390 }, { "epoch": 0.8, "learning_rate": 1.92912905865303e-06, "loss": 0.237, "step": 9391 }, { "epoch": 0.81, "learning_rate": 1.9274901879278342e-06, "loss": 0.3076, "step": 9392 }, { "epoch": 0.81, "learning_rate": 1.925851939391803e-06, "loss": 0.2809, "step": 9393 }, { "epoch": 0.81, "learning_rate": 1.924214313171211e-06, "loss": 0.2503, "step": 9394 }, { "epoch": 0.81, "learning_rate": 1.9225773093922785e-06, "loss": 0.2345, "step": 9395 }, { "epoch": 0.81, "learning_rate": 1.920940928181171e-06, "loss": 0.2635, "step": 9396 }, { "epoch": 0.81, "learning_rate": 1.919305169664021e-06, "loss": 0.257, "step": 9397 }, { "epoch": 0.81, "learning_rate": 1.9176700339668986e-06, "loss": 0.3221, "step": 9398 }, { "epoch": 0.81, "learning_rate": 1.9160355212158345e-06, "loss": 0.2471, "step": 9399 }, { "epoch": 0.81, "learning_rate": 1.9144016315368075e-06, "loss": 0.2194, "step": 9400 }, { "epoch": 0.81, "learning_rate": 1.9127683650557505e-06, "loss": 0.2868, "step": 9401 }, { "epoch": 0.81, "learning_rate": 1.9111357218985504e-06, "loss": 0.5947, "step": 9402 }, { "epoch": 0.81, "learning_rate": 1.9095037021910366e-06, "loss": 0.2792, "step": 9403 }, { "epoch": 0.81, "learning_rate": 1.9078723060590053e-06, "loss": 0.2946, "step": 9404 }, { "epoch": 0.81, "learning_rate": 1.9062415336281904e-06, "loss": 0.2424, "step": 9405 }, { "epoch": 0.81, "learning_rate": 1.9046113850242843e-06, "loss": 0.5575, "step": 9406 }, { "epoch": 0.81, "learning_rate": 1.9029818603729332e-06, "loss": 0.2314, "step": 9407 }, { "epoch": 0.81, "learning_rate": 1.9013529597997315e-06, "loss": 0.2533, "step": 9408 }, { "epoch": 0.81, "learning_rate": 1.8997246834302297e-06, "loss": 0.2859, "step": 9409 }, { "epoch": 0.81, "learning_rate": 1.8980970313899193e-06, "loss": 0.2224, "step": 9410 }, { "epoch": 0.81, "learning_rate": 1.8964700038042628e-06, "loss": 0.2674, "step": 9411 }, { "epoch": 0.81, "learning_rate": 1.894843600798655e-06, "loss": 0.2587, "step": 9412 }, { "epoch": 0.81, "learning_rate": 1.8932178224984533e-06, "loss": 0.261, "step": 9413 }, { "epoch": 0.81, "learning_rate": 1.8915926690289643e-06, "loss": 0.2709, "step": 9414 }, { "epoch": 0.81, "learning_rate": 1.8899681405154491e-06, "loss": 0.2723, "step": 9415 }, { "epoch": 0.81, "learning_rate": 1.8883442370831183e-06, "loss": 0.2507, "step": 9416 }, { "epoch": 0.81, "learning_rate": 1.8867209588571288e-06, "loss": 0.2488, "step": 9417 }, { "epoch": 0.81, "learning_rate": 1.8850983059626026e-06, "loss": 0.2751, "step": 9418 }, { "epoch": 0.81, "learning_rate": 1.8834762785246007e-06, "loss": 0.2551, "step": 9419 }, { "epoch": 0.81, "learning_rate": 1.881854876668142e-06, "loss": 0.282, "step": 9420 }, { "epoch": 0.81, "learning_rate": 1.8802341005181957e-06, "loss": 0.2819, "step": 9421 }, { "epoch": 0.81, "learning_rate": 1.8786139501996847e-06, "loss": 0.5848, "step": 9422 }, { "epoch": 0.81, "learning_rate": 1.876994425837484e-06, "loss": 0.2703, "step": 9423 }, { "epoch": 0.81, "learning_rate": 1.8753755275564112e-06, "loss": 0.281, "step": 9424 }, { "epoch": 0.81, "learning_rate": 1.8737572554812522e-06, "loss": 0.244, "step": 9425 }, { "epoch": 0.81, "learning_rate": 1.8721396097367294e-06, "loss": 0.2943, "step": 9426 }, { "epoch": 0.81, "learning_rate": 1.870522590447521e-06, "loss": 0.2612, "step": 9427 }, { "epoch": 0.81, "learning_rate": 1.8689061977382684e-06, "loss": 0.2703, "step": 9428 }, { "epoch": 0.81, "learning_rate": 1.867290431733546e-06, "loss": 0.2958, "step": 9429 }, { "epoch": 0.81, "learning_rate": 1.8656752925578948e-06, "loss": 0.2273, "step": 9430 }, { "epoch": 0.81, "learning_rate": 1.8640607803357936e-06, "loss": 0.2496, "step": 9431 }, { "epoch": 0.81, "learning_rate": 1.8624468951916896e-06, "loss": 0.2704, "step": 9432 }, { "epoch": 0.81, "learning_rate": 1.8608336372499736e-06, "loss": 0.2464, "step": 9433 }, { "epoch": 0.81, "learning_rate": 1.8592210066349781e-06, "loss": 0.2734, "step": 9434 }, { "epoch": 0.81, "learning_rate": 1.857609003471007e-06, "loss": 0.279, "step": 9435 }, { "epoch": 0.81, "learning_rate": 1.8559976278823e-06, "loss": 0.2687, "step": 9436 }, { "epoch": 0.81, "learning_rate": 1.8543868799930542e-06, "loss": 0.2455, "step": 9437 }, { "epoch": 0.81, "learning_rate": 1.8527767599274193e-06, "loss": 0.2799, "step": 9438 }, { "epoch": 0.81, "learning_rate": 1.8511672678094949e-06, "loss": 0.2882, "step": 9439 }, { "epoch": 0.81, "learning_rate": 1.8495584037633364e-06, "loss": 0.3014, "step": 9440 }, { "epoch": 0.81, "learning_rate": 1.8479501679129375e-06, "loss": 0.2403, "step": 9441 }, { "epoch": 0.81, "learning_rate": 1.846342560382265e-06, "loss": 0.2645, "step": 9442 }, { "epoch": 0.81, "learning_rate": 1.844735581295216e-06, "loss": 0.263, "step": 9443 }, { "epoch": 0.81, "learning_rate": 1.8431292307756532e-06, "loss": 0.2413, "step": 9444 }, { "epoch": 0.81, "learning_rate": 1.8415235089473848e-06, "loss": 0.2513, "step": 9445 }, { "epoch": 0.81, "learning_rate": 1.839918415934171e-06, "loss": 0.2727, "step": 9446 }, { "epoch": 0.81, "learning_rate": 1.8383139518597293e-06, "loss": 0.347, "step": 9447 }, { "epoch": 0.81, "learning_rate": 1.8367101168477152e-06, "loss": 0.2689, "step": 9448 }, { "epoch": 0.81, "learning_rate": 1.8351069110217535e-06, "loss": 0.2823, "step": 9449 }, { "epoch": 0.81, "learning_rate": 1.8335043345054048e-06, "loss": 0.2725, "step": 9450 }, { "epoch": 0.81, "learning_rate": 1.831902387422191e-06, "loss": 0.278, "step": 9451 }, { "epoch": 0.81, "learning_rate": 1.8303010698955803e-06, "loss": 0.265, "step": 9452 }, { "epoch": 0.81, "learning_rate": 1.8287003820489956e-06, "loss": 0.2759, "step": 9453 }, { "epoch": 0.81, "learning_rate": 1.8271003240058127e-06, "loss": 0.2891, "step": 9454 }, { "epoch": 0.81, "learning_rate": 1.8255008958893483e-06, "loss": 0.2639, "step": 9455 }, { "epoch": 0.81, "learning_rate": 1.8239020978228894e-06, "loss": 0.248, "step": 9456 }, { "epoch": 0.81, "learning_rate": 1.822303929929654e-06, "loss": 0.2601, "step": 9457 }, { "epoch": 0.81, "learning_rate": 1.820706392332824e-06, "loss": 0.6052, "step": 9458 }, { "epoch": 0.81, "learning_rate": 1.8191094851555314e-06, "loss": 0.2992, "step": 9459 }, { "epoch": 0.81, "learning_rate": 1.8175132085208558e-06, "loss": 0.2775, "step": 9460 }, { "epoch": 0.81, "learning_rate": 1.8159175625518344e-06, "loss": 0.2957, "step": 9461 }, { "epoch": 0.81, "learning_rate": 1.814322547371443e-06, "loss": 0.2314, "step": 9462 }, { "epoch": 0.81, "learning_rate": 1.8127281631026284e-06, "loss": 0.2336, "step": 9463 }, { "epoch": 0.81, "learning_rate": 1.8111344098682703e-06, "loss": 0.3176, "step": 9464 }, { "epoch": 0.81, "learning_rate": 1.8095412877912056e-06, "loss": 0.265, "step": 9465 }, { "epoch": 0.81, "learning_rate": 1.8079487969942344e-06, "loss": 0.3464, "step": 9466 }, { "epoch": 0.81, "learning_rate": 1.806356937600089e-06, "loss": 0.2669, "step": 9467 }, { "epoch": 0.81, "learning_rate": 1.8047657097314675e-06, "loss": 0.2624, "step": 9468 }, { "epoch": 0.81, "learning_rate": 1.8031751135110065e-06, "loss": 0.2947, "step": 9469 }, { "epoch": 0.81, "learning_rate": 1.8015851490613079e-06, "loss": 0.275, "step": 9470 }, { "epoch": 0.81, "learning_rate": 1.799995816504919e-06, "loss": 0.2711, "step": 9471 }, { "epoch": 0.81, "learning_rate": 1.7984071159643312e-06, "loss": 0.251, "step": 9472 }, { "epoch": 0.81, "learning_rate": 1.7968190475620018e-06, "loss": 0.2641, "step": 9473 }, { "epoch": 0.81, "learning_rate": 1.795231611420325e-06, "loss": 0.313, "step": 9474 }, { "epoch": 0.81, "learning_rate": 1.7936448076616542e-06, "loss": 0.3232, "step": 9475 }, { "epoch": 0.81, "learning_rate": 1.7920586364082926e-06, "loss": 0.3124, "step": 9476 }, { "epoch": 0.81, "learning_rate": 1.7904730977824958e-06, "loss": 0.2873, "step": 9477 }, { "epoch": 0.81, "learning_rate": 1.7888881919064694e-06, "loss": 0.2629, "step": 9478 }, { "epoch": 0.81, "learning_rate": 1.7873039189023644e-06, "loss": 0.2428, "step": 9479 }, { "epoch": 0.81, "learning_rate": 1.7857202788922977e-06, "loss": 0.2529, "step": 9480 }, { "epoch": 0.81, "learning_rate": 1.784137271998323e-06, "loss": 0.2679, "step": 9481 }, { "epoch": 0.81, "learning_rate": 1.78255489834245e-06, "loss": 0.2695, "step": 9482 }, { "epoch": 0.81, "learning_rate": 1.7809731580466427e-06, "loss": 0.2533, "step": 9483 }, { "epoch": 0.81, "learning_rate": 1.7793920512328122e-06, "loss": 0.2829, "step": 9484 }, { "epoch": 0.81, "learning_rate": 1.7778115780228267e-06, "loss": 0.2685, "step": 9485 }, { "epoch": 0.81, "learning_rate": 1.776231738538492e-06, "loss": 0.3014, "step": 9486 }, { "epoch": 0.81, "learning_rate": 1.7746525329015852e-06, "loss": 0.263, "step": 9487 }, { "epoch": 0.81, "learning_rate": 1.7730739612338166e-06, "loss": 0.2858, "step": 9488 }, { "epoch": 0.81, "learning_rate": 1.7714960236568556e-06, "loss": 0.2525, "step": 9489 }, { "epoch": 0.81, "learning_rate": 1.7699187202923241e-06, "loss": 0.282, "step": 9490 }, { "epoch": 0.81, "learning_rate": 1.76834205126179e-06, "loss": 0.3347, "step": 9491 }, { "epoch": 0.81, "learning_rate": 1.7667660166867806e-06, "loss": 0.2742, "step": 9492 }, { "epoch": 0.81, "learning_rate": 1.76519061668876e-06, "loss": 0.2836, "step": 9493 }, { "epoch": 0.81, "learning_rate": 1.763615851389161e-06, "loss": 0.2911, "step": 9494 }, { "epoch": 0.81, "learning_rate": 1.7620417209093544e-06, "loss": 0.2878, "step": 9495 }, { "epoch": 0.81, "learning_rate": 1.7604682253706652e-06, "loss": 0.2726, "step": 9496 }, { "epoch": 0.81, "learning_rate": 1.7588953648943742e-06, "loss": 0.5618, "step": 9497 }, { "epoch": 0.81, "learning_rate": 1.7573231396017064e-06, "loss": 0.3093, "step": 9498 }, { "epoch": 0.81, "learning_rate": 1.7557515496138455e-06, "loss": 0.2517, "step": 9499 }, { "epoch": 0.81, "learning_rate": 1.7541805950519154e-06, "loss": 0.2678, "step": 9500 }, { "epoch": 0.81, "learning_rate": 1.7526102760370056e-06, "loss": 0.2991, "step": 9501 }, { "epoch": 0.81, "learning_rate": 1.7510405926901408e-06, "loss": 0.2891, "step": 9502 }, { "epoch": 0.81, "learning_rate": 1.7494715451323063e-06, "loss": 0.2471, "step": 9503 }, { "epoch": 0.81, "learning_rate": 1.7479031334844421e-06, "loss": 0.2979, "step": 9504 }, { "epoch": 0.81, "learning_rate": 1.746335357867428e-06, "loss": 0.2501, "step": 9505 }, { "epoch": 0.81, "learning_rate": 1.7447682184021042e-06, "loss": 0.2697, "step": 9506 }, { "epoch": 0.81, "learning_rate": 1.7432017152092507e-06, "loss": 0.2668, "step": 9507 }, { "epoch": 0.82, "learning_rate": 1.7416358484096141e-06, "loss": 0.2687, "step": 9508 }, { "epoch": 0.82, "learning_rate": 1.7400706181238824e-06, "loss": 0.2621, "step": 9509 }, { "epoch": 0.82, "learning_rate": 1.7385060244726882e-06, "loss": 0.2627, "step": 9510 }, { "epoch": 0.82, "learning_rate": 1.7369420675766347e-06, "loss": 0.2925, "step": 9511 }, { "epoch": 0.82, "learning_rate": 1.7353787475562544e-06, "loss": 0.3026, "step": 9512 }, { "epoch": 0.82, "learning_rate": 1.7338160645320435e-06, "loss": 0.2543, "step": 9513 }, { "epoch": 0.82, "learning_rate": 1.7322540186244462e-06, "loss": 0.2983, "step": 9514 }, { "epoch": 0.82, "learning_rate": 1.730692609953858e-06, "loss": 0.2759, "step": 9515 }, { "epoch": 0.82, "learning_rate": 1.7291318386406241e-06, "loss": 0.338, "step": 9516 }, { "epoch": 0.82, "learning_rate": 1.7275717048050367e-06, "loss": 0.2865, "step": 9517 }, { "epoch": 0.82, "learning_rate": 1.7260122085673525e-06, "loss": 0.3047, "step": 9518 }, { "epoch": 0.82, "learning_rate": 1.7244533500477612e-06, "loss": 0.2775, "step": 9519 }, { "epoch": 0.82, "learning_rate": 1.7228951293664142e-06, "loss": 0.285, "step": 9520 }, { "epoch": 0.82, "learning_rate": 1.7213375466434134e-06, "loss": 0.2306, "step": 9521 }, { "epoch": 0.82, "learning_rate": 1.7197806019988084e-06, "loss": 0.2451, "step": 9522 }, { "epoch": 0.82, "learning_rate": 1.7182242955526029e-06, "loss": 0.2715, "step": 9523 }, { "epoch": 0.82, "learning_rate": 1.7166686274247424e-06, "loss": 0.238, "step": 9524 }, { "epoch": 0.82, "learning_rate": 1.7151135977351397e-06, "loss": 0.2604, "step": 9525 }, { "epoch": 0.82, "learning_rate": 1.713559206603642e-06, "loss": 0.3048, "step": 9526 }, { "epoch": 0.82, "learning_rate": 1.7120054541500552e-06, "loss": 0.5493, "step": 9527 }, { "epoch": 0.82, "learning_rate": 1.7104523404941365e-06, "loss": 0.2971, "step": 9528 }, { "epoch": 0.82, "learning_rate": 1.7088998657555922e-06, "loss": 0.2821, "step": 9529 }, { "epoch": 0.82, "learning_rate": 1.7073480300540802e-06, "loss": 0.2471, "step": 9530 }, { "epoch": 0.82, "learning_rate": 1.7057968335092024e-06, "loss": 0.2377, "step": 9531 }, { "epoch": 0.82, "learning_rate": 1.7042462762405265e-06, "loss": 0.2661, "step": 9532 }, { "epoch": 0.82, "learning_rate": 1.7026963583675549e-06, "loss": 0.2929, "step": 9533 }, { "epoch": 0.82, "learning_rate": 1.7011470800097496e-06, "loss": 0.3049, "step": 9534 }, { "epoch": 0.82, "learning_rate": 1.6995984412865218e-06, "loss": 0.2688, "step": 9535 }, { "epoch": 0.82, "learning_rate": 1.6980504423172317e-06, "loss": 0.2785, "step": 9536 }, { "epoch": 0.82, "learning_rate": 1.696503083221196e-06, "loss": 0.2885, "step": 9537 }, { "epoch": 0.82, "learning_rate": 1.694956364117668e-06, "loss": 0.2571, "step": 9538 }, { "epoch": 0.82, "learning_rate": 1.6934102851258726e-06, "loss": 0.285, "step": 9539 }, { "epoch": 0.82, "learning_rate": 1.6918648463649668e-06, "loss": 0.2803, "step": 9540 }, { "epoch": 0.82, "learning_rate": 1.6903200479540627e-06, "loss": 0.2862, "step": 9541 }, { "epoch": 0.82, "learning_rate": 1.6887758900122352e-06, "loss": 0.301, "step": 9542 }, { "epoch": 0.82, "learning_rate": 1.6872323726584938e-06, "loss": 0.2347, "step": 9543 }, { "epoch": 0.82, "learning_rate": 1.6856894960118087e-06, "loss": 0.2665, "step": 9544 }, { "epoch": 0.82, "learning_rate": 1.6841472601910892e-06, "loss": 0.2335, "step": 9545 }, { "epoch": 0.82, "learning_rate": 1.6826056653152122e-06, "loss": 0.5646, "step": 9546 }, { "epoch": 0.82, "learning_rate": 1.6810647115029954e-06, "loss": 0.3072, "step": 9547 }, { "epoch": 0.82, "learning_rate": 1.6795243988732e-06, "loss": 0.3416, "step": 9548 }, { "epoch": 0.82, "learning_rate": 1.677984727544557e-06, "loss": 0.2332, "step": 9549 }, { "epoch": 0.82, "learning_rate": 1.6764456976357279e-06, "loss": 0.3188, "step": 9550 }, { "epoch": 0.82, "learning_rate": 1.674907309265338e-06, "loss": 0.287, "step": 9551 }, { "epoch": 0.82, "learning_rate": 1.6733695625519553e-06, "loss": 0.286, "step": 9552 }, { "epoch": 0.82, "learning_rate": 1.6718324576141043e-06, "loss": 0.3089, "step": 9553 }, { "epoch": 0.82, "learning_rate": 1.67029599457026e-06, "loss": 0.2725, "step": 9554 }, { "epoch": 0.82, "learning_rate": 1.6687601735388358e-06, "loss": 0.2493, "step": 9555 }, { "epoch": 0.82, "learning_rate": 1.6672249946382179e-06, "loss": 0.2916, "step": 9556 }, { "epoch": 0.82, "learning_rate": 1.6656904579867205e-06, "loss": 0.2604, "step": 9557 }, { "epoch": 0.82, "learning_rate": 1.6641565637026225e-06, "loss": 0.2554, "step": 9558 }, { "epoch": 0.82, "learning_rate": 1.6626233119041468e-06, "loss": 0.2863, "step": 9559 }, { "epoch": 0.82, "learning_rate": 1.6610907027094714e-06, "loss": 0.2711, "step": 9560 }, { "epoch": 0.82, "learning_rate": 1.6595587362367226e-06, "loss": 0.2813, "step": 9561 }, { "epoch": 0.82, "learning_rate": 1.6580274126039698e-06, "loss": 0.2722, "step": 9562 }, { "epoch": 0.82, "learning_rate": 1.6564967319292502e-06, "loss": 0.3035, "step": 9563 }, { "epoch": 0.82, "learning_rate": 1.6549666943305342e-06, "loss": 0.2395, "step": 9564 }, { "epoch": 0.82, "learning_rate": 1.653437299925751e-06, "loss": 0.2979, "step": 9565 }, { "epoch": 0.82, "learning_rate": 1.651908548832779e-06, "loss": 0.2621, "step": 9566 }, { "epoch": 0.82, "learning_rate": 1.6503804411694468e-06, "loss": 0.2891, "step": 9567 }, { "epoch": 0.82, "learning_rate": 1.6488529770535367e-06, "loss": 0.2832, "step": 9568 }, { "epoch": 0.82, "learning_rate": 1.6473261566027687e-06, "loss": 0.2579, "step": 9569 }, { "epoch": 0.82, "learning_rate": 1.6457999799348345e-06, "loss": 0.3221, "step": 9570 }, { "epoch": 0.82, "learning_rate": 1.6442744471673566e-06, "loss": 0.2507, "step": 9571 }, { "epoch": 0.82, "learning_rate": 1.6427495584179165e-06, "loss": 0.2961, "step": 9572 }, { "epoch": 0.82, "learning_rate": 1.6412253138040467e-06, "loss": 0.2866, "step": 9573 }, { "epoch": 0.82, "learning_rate": 1.6397017134432281e-06, "loss": 0.2695, "step": 9574 }, { "epoch": 0.82, "learning_rate": 1.638178757452894e-06, "loss": 0.28, "step": 9575 }, { "epoch": 0.82, "learning_rate": 1.6366564459504186e-06, "loss": 0.2577, "step": 9576 }, { "epoch": 0.82, "learning_rate": 1.6351347790531457e-06, "loss": 0.2874, "step": 9577 }, { "epoch": 0.82, "learning_rate": 1.6336137568783495e-06, "loss": 0.6031, "step": 9578 }, { "epoch": 0.82, "learning_rate": 1.6320933795432626e-06, "loss": 0.2919, "step": 9579 }, { "epoch": 0.82, "learning_rate": 1.6305736471650756e-06, "loss": 0.2827, "step": 9580 }, { "epoch": 0.82, "learning_rate": 1.6290545598609165e-06, "loss": 0.3348, "step": 9581 }, { "epoch": 0.82, "learning_rate": 1.627536117747871e-06, "loss": 0.2628, "step": 9582 }, { "epoch": 0.82, "learning_rate": 1.626018320942967e-06, "loss": 0.2819, "step": 9583 }, { "epoch": 0.82, "learning_rate": 1.6245011695631962e-06, "loss": 0.2634, "step": 9584 }, { "epoch": 0.82, "learning_rate": 1.6229846637254932e-06, "loss": 0.2584, "step": 9585 }, { "epoch": 0.82, "learning_rate": 1.6214688035467363e-06, "loss": 0.2779, "step": 9586 }, { "epoch": 0.82, "learning_rate": 1.6199535891437678e-06, "loss": 0.2803, "step": 9587 }, { "epoch": 0.82, "learning_rate": 1.6184390206333688e-06, "loss": 0.3201, "step": 9588 }, { "epoch": 0.82, "learning_rate": 1.616925098132275e-06, "loss": 0.2339, "step": 9589 }, { "epoch": 0.82, "learning_rate": 1.6154118217571723e-06, "loss": 0.2957, "step": 9590 }, { "epoch": 0.82, "learning_rate": 1.613899191624697e-06, "loss": 0.2483, "step": 9591 }, { "epoch": 0.82, "learning_rate": 1.612387207851437e-06, "loss": 0.2757, "step": 9592 }, { "epoch": 0.82, "learning_rate": 1.610875870553923e-06, "loss": 0.2422, "step": 9593 }, { "epoch": 0.82, "learning_rate": 1.6093651798486487e-06, "loss": 0.263, "step": 9594 }, { "epoch": 0.82, "learning_rate": 1.6078551358520456e-06, "loss": 0.2716, "step": 9595 }, { "epoch": 0.82, "learning_rate": 1.6063457386805004e-06, "loss": 0.2653, "step": 9596 }, { "epoch": 0.82, "learning_rate": 1.6048369884503524e-06, "loss": 0.2645, "step": 9597 }, { "epoch": 0.82, "learning_rate": 1.6033288852778882e-06, "loss": 0.2669, "step": 9598 }, { "epoch": 0.82, "learning_rate": 1.6018214292793455e-06, "loss": 0.2472, "step": 9599 }, { "epoch": 0.82, "learning_rate": 1.6003146205709064e-06, "loss": 0.2523, "step": 9600 }, { "epoch": 0.82, "learning_rate": 1.5988084592687169e-06, "loss": 0.2884, "step": 9601 }, { "epoch": 0.82, "learning_rate": 1.5973029454888578e-06, "loss": 0.2417, "step": 9602 }, { "epoch": 0.82, "learning_rate": 1.5957980793473682e-06, "loss": 0.2799, "step": 9603 }, { "epoch": 0.82, "learning_rate": 1.5942938609602365e-06, "loss": 0.28, "step": 9604 }, { "epoch": 0.82, "learning_rate": 1.5927902904434e-06, "loss": 0.2823, "step": 9605 }, { "epoch": 0.82, "learning_rate": 1.5912873679127495e-06, "loss": 0.2827, "step": 9606 }, { "epoch": 0.82, "learning_rate": 1.589785093484114e-06, "loss": 0.5929, "step": 9607 }, { "epoch": 0.82, "learning_rate": 1.5882834672732939e-06, "loss": 0.2723, "step": 9608 }, { "epoch": 0.82, "learning_rate": 1.586782489396017e-06, "loss": 0.2827, "step": 9609 }, { "epoch": 0.82, "learning_rate": 1.5852821599679747e-06, "loss": 0.3037, "step": 9610 }, { "epoch": 0.82, "learning_rate": 1.5837824791048062e-06, "loss": 0.2631, "step": 9611 }, { "epoch": 0.82, "learning_rate": 1.5822834469220982e-06, "loss": 0.2571, "step": 9612 }, { "epoch": 0.82, "learning_rate": 1.5807850635353906e-06, "loss": 0.2388, "step": 9613 }, { "epoch": 0.82, "learning_rate": 1.5792873290601662e-06, "loss": 0.2974, "step": 9614 }, { "epoch": 0.82, "learning_rate": 1.5777902436118708e-06, "loss": 0.3185, "step": 9615 }, { "epoch": 0.82, "learning_rate": 1.5762938073058853e-06, "loss": 0.2598, "step": 9616 }, { "epoch": 0.82, "learning_rate": 1.5747980202575475e-06, "loss": 0.295, "step": 9617 }, { "epoch": 0.82, "learning_rate": 1.573302882582154e-06, "loss": 0.2463, "step": 9618 }, { "epoch": 0.82, "learning_rate": 1.5718083943949337e-06, "loss": 0.2769, "step": 9619 }, { "epoch": 0.82, "learning_rate": 1.57031455581108e-06, "loss": 0.2896, "step": 9620 }, { "epoch": 0.82, "learning_rate": 1.5688213669457243e-06, "loss": 0.3014, "step": 9621 }, { "epoch": 0.82, "learning_rate": 1.5673288279139586e-06, "loss": 0.2529, "step": 9622 }, { "epoch": 0.82, "learning_rate": 1.5658369388308238e-06, "loss": 0.2572, "step": 9623 }, { "epoch": 0.82, "learning_rate": 1.5643456998112971e-06, "loss": 0.2864, "step": 9624 }, { "epoch": 0.83, "learning_rate": 1.5628551109703282e-06, "loss": 0.283, "step": 9625 }, { "epoch": 0.83, "learning_rate": 1.561365172422795e-06, "loss": 0.2617, "step": 9626 }, { "epoch": 0.83, "learning_rate": 1.5598758842835382e-06, "loss": 0.2762, "step": 9627 }, { "epoch": 0.83, "learning_rate": 1.5583872466673433e-06, "loss": 0.2802, "step": 9628 }, { "epoch": 0.83, "learning_rate": 1.5568992596889487e-06, "loss": 0.2702, "step": 9629 }, { "epoch": 0.83, "learning_rate": 1.5554119234630438e-06, "loss": 0.2714, "step": 9630 }, { "epoch": 0.83, "learning_rate": 1.553925238104257e-06, "loss": 0.3066, "step": 9631 }, { "epoch": 0.83, "learning_rate": 1.5524392037271828e-06, "loss": 0.2478, "step": 9632 }, { "epoch": 0.83, "learning_rate": 1.5509538204463536e-06, "loss": 0.2829, "step": 9633 }, { "epoch": 0.83, "learning_rate": 1.5494690883762553e-06, "loss": 0.2643, "step": 9634 }, { "epoch": 0.83, "learning_rate": 1.5479850076313241e-06, "loss": 0.2995, "step": 9635 }, { "epoch": 0.83, "learning_rate": 1.5465015783259463e-06, "loss": 0.2699, "step": 9636 }, { "epoch": 0.83, "learning_rate": 1.5450188005744593e-06, "loss": 0.2903, "step": 9637 }, { "epoch": 0.83, "learning_rate": 1.5435366744911406e-06, "loss": 0.2469, "step": 9638 }, { "epoch": 0.83, "learning_rate": 1.5420552001902355e-06, "loss": 0.2745, "step": 9639 }, { "epoch": 0.83, "learning_rate": 1.5405743777859206e-06, "loss": 0.2694, "step": 9640 }, { "epoch": 0.83, "learning_rate": 1.5390942073923343e-06, "loss": 0.2482, "step": 9641 }, { "epoch": 0.83, "learning_rate": 1.53761468912356e-06, "loss": 0.2903, "step": 9642 }, { "epoch": 0.83, "learning_rate": 1.5361358230936308e-06, "loss": 0.2616, "step": 9643 }, { "epoch": 0.83, "learning_rate": 1.5346576094165343e-06, "loss": 0.2466, "step": 9644 }, { "epoch": 0.83, "learning_rate": 1.5331800482061954e-06, "loss": 0.2435, "step": 9645 }, { "epoch": 0.83, "learning_rate": 1.5317031395765081e-06, "loss": 0.2365, "step": 9646 }, { "epoch": 0.83, "learning_rate": 1.530226883641297e-06, "loss": 0.2845, "step": 9647 }, { "epoch": 0.83, "learning_rate": 1.5287512805143467e-06, "loss": 0.2715, "step": 9648 }, { "epoch": 0.83, "learning_rate": 1.5272763303093907e-06, "loss": 0.2733, "step": 9649 }, { "epoch": 0.83, "learning_rate": 1.5258020331401102e-06, "loss": 0.2758, "step": 9650 }, { "epoch": 0.83, "learning_rate": 1.5243283891201388e-06, "loss": 0.2772, "step": 9651 }, { "epoch": 0.83, "learning_rate": 1.522855398363051e-06, "loss": 0.2529, "step": 9652 }, { "epoch": 0.83, "learning_rate": 1.5213830609823877e-06, "loss": 0.2728, "step": 9653 }, { "epoch": 0.83, "learning_rate": 1.5199113770916207e-06, "loss": 0.3022, "step": 9654 }, { "epoch": 0.83, "learning_rate": 1.518440346804182e-06, "loss": 0.2502, "step": 9655 }, { "epoch": 0.83, "learning_rate": 1.5169699702334562e-06, "loss": 0.2434, "step": 9656 }, { "epoch": 0.83, "learning_rate": 1.5155002474927683e-06, "loss": 0.2771, "step": 9657 }, { "epoch": 0.83, "learning_rate": 1.5140311786953986e-06, "loss": 0.2565, "step": 9658 }, { "epoch": 0.83, "learning_rate": 1.5125627639545725e-06, "loss": 0.2664, "step": 9659 }, { "epoch": 0.83, "learning_rate": 1.5110950033834726e-06, "loss": 0.2642, "step": 9660 }, { "epoch": 0.83, "learning_rate": 1.5096278970952272e-06, "loss": 0.2405, "step": 9661 }, { "epoch": 0.83, "learning_rate": 1.508161445202906e-06, "loss": 0.2526, "step": 9662 }, { "epoch": 0.83, "learning_rate": 1.506695647819546e-06, "loss": 0.2799, "step": 9663 }, { "epoch": 0.83, "learning_rate": 1.5052305050581173e-06, "loss": 0.2782, "step": 9664 }, { "epoch": 0.83, "learning_rate": 1.503766017031547e-06, "loss": 0.2469, "step": 9665 }, { "epoch": 0.83, "learning_rate": 1.5023021838527108e-06, "loss": 0.2711, "step": 9666 }, { "epoch": 0.83, "learning_rate": 1.5008390056344347e-06, "loss": 0.2717, "step": 9667 }, { "epoch": 0.83, "learning_rate": 1.499376482489494e-06, "loss": 0.2559, "step": 9668 }, { "epoch": 0.83, "learning_rate": 1.4979146145306068e-06, "loss": 0.2724, "step": 9669 }, { "epoch": 0.83, "learning_rate": 1.4964534018704558e-06, "loss": 0.2732, "step": 9670 }, { "epoch": 0.83, "learning_rate": 1.4949928446216567e-06, "loss": 0.29, "step": 9671 }, { "epoch": 0.83, "learning_rate": 1.493532942896785e-06, "loss": 0.5996, "step": 9672 }, { "epoch": 0.83, "learning_rate": 1.4920736968083616e-06, "loss": 0.2831, "step": 9673 }, { "epoch": 0.83, "learning_rate": 1.4906151064688602e-06, "loss": 0.2847, "step": 9674 }, { "epoch": 0.83, "learning_rate": 1.4891571719907016e-06, "loss": 0.2447, "step": 9675 }, { "epoch": 0.83, "learning_rate": 1.4876998934862497e-06, "loss": 0.2653, "step": 9676 }, { "epoch": 0.83, "learning_rate": 1.4862432710678355e-06, "loss": 0.2581, "step": 9677 }, { "epoch": 0.83, "learning_rate": 1.4847873048477191e-06, "loss": 0.275, "step": 9678 }, { "epoch": 0.83, "learning_rate": 1.4833319949381232e-06, "loss": 0.2882, "step": 9679 }, { "epoch": 0.83, "learning_rate": 1.4818773414512134e-06, "loss": 0.2963, "step": 9680 }, { "epoch": 0.83, "learning_rate": 1.4804233444991102e-06, "loss": 0.309, "step": 9681 }, { "epoch": 0.83, "learning_rate": 1.4789700041938816e-06, "loss": 0.2344, "step": 9682 }, { "epoch": 0.83, "learning_rate": 1.4775173206475357e-06, "loss": 0.269, "step": 9683 }, { "epoch": 0.83, "learning_rate": 1.4760652939720488e-06, "loss": 0.3092, "step": 9684 }, { "epoch": 0.83, "learning_rate": 1.474613924279329e-06, "loss": 0.2766, "step": 9685 }, { "epoch": 0.83, "learning_rate": 1.4731632116812434e-06, "loss": 0.2602, "step": 9686 }, { "epoch": 0.83, "learning_rate": 1.4717131562896047e-06, "loss": 0.2472, "step": 9687 }, { "epoch": 0.83, "learning_rate": 1.4702637582161761e-06, "loss": 0.2795, "step": 9688 }, { "epoch": 0.83, "learning_rate": 1.4688150175726724e-06, "loss": 0.2886, "step": 9689 }, { "epoch": 0.83, "learning_rate": 1.4673669344707498e-06, "loss": 0.2604, "step": 9690 }, { "epoch": 0.83, "learning_rate": 1.4659195090220258e-06, "loss": 0.2692, "step": 9691 }, { "epoch": 0.83, "learning_rate": 1.4644727413380566e-06, "loss": 0.2769, "step": 9692 }, { "epoch": 0.83, "learning_rate": 1.463026631530351e-06, "loss": 0.2602, "step": 9693 }, { "epoch": 0.83, "learning_rate": 1.4615811797103751e-06, "loss": 0.5746, "step": 9694 }, { "epoch": 0.83, "learning_rate": 1.4601363859895301e-06, "loss": 0.2729, "step": 9695 }, { "epoch": 0.83, "learning_rate": 1.4586922504791767e-06, "loss": 0.2681, "step": 9696 }, { "epoch": 0.83, "learning_rate": 1.457248773290617e-06, "loss": 0.2784, "step": 9697 }, { "epoch": 0.83, "learning_rate": 1.4558059545351144e-06, "loss": 0.5823, "step": 9698 }, { "epoch": 0.83, "learning_rate": 1.454363794323872e-06, "loss": 0.253, "step": 9699 }, { "epoch": 0.83, "learning_rate": 1.4529222927680375e-06, "loss": 0.2785, "step": 9700 }, { "epoch": 0.83, "learning_rate": 1.4514814499787266e-06, "loss": 0.3043, "step": 9701 }, { "epoch": 0.83, "learning_rate": 1.4500412660669828e-06, "loss": 0.3049, "step": 9702 }, { "epoch": 0.83, "learning_rate": 1.4486017411438114e-06, "loss": 0.2595, "step": 9703 }, { "epoch": 0.83, "learning_rate": 1.447162875320165e-06, "loss": 0.2455, "step": 9704 }, { "epoch": 0.83, "learning_rate": 1.4457246687069427e-06, "loss": 0.3316, "step": 9705 }, { "epoch": 0.83, "learning_rate": 1.444287121414998e-06, "loss": 0.2393, "step": 9706 }, { "epoch": 0.83, "learning_rate": 1.442850233555122e-06, "loss": 0.2883, "step": 9707 }, { "epoch": 0.83, "learning_rate": 1.4414140052380721e-06, "loss": 0.3138, "step": 9708 }, { "epoch": 0.83, "learning_rate": 1.4399784365745396e-06, "loss": 0.3044, "step": 9709 }, { "epoch": 0.83, "learning_rate": 1.4385435276751724e-06, "loss": 0.3032, "step": 9710 }, { "epoch": 0.83, "learning_rate": 1.437109278650567e-06, "loss": 0.3209, "step": 9711 }, { "epoch": 0.83, "learning_rate": 1.4356756896112678e-06, "loss": 0.2537, "step": 9712 }, { "epoch": 0.83, "learning_rate": 1.4342427606677712e-06, "loss": 0.2808, "step": 9713 }, { "epoch": 0.83, "learning_rate": 1.432810491930514e-06, "loss": 0.272, "step": 9714 }, { "epoch": 0.83, "learning_rate": 1.4313788835098964e-06, "loss": 0.2477, "step": 9715 }, { "epoch": 0.83, "learning_rate": 1.4299479355162526e-06, "loss": 0.2949, "step": 9716 }, { "epoch": 0.83, "learning_rate": 1.4285176480598772e-06, "loss": 0.2582, "step": 9717 }, { "epoch": 0.83, "learning_rate": 1.4270880212510086e-06, "loss": 0.3104, "step": 9718 }, { "epoch": 0.83, "learning_rate": 1.425659055199835e-06, "loss": 0.2592, "step": 9719 }, { "epoch": 0.83, "learning_rate": 1.4242307500164964e-06, "loss": 0.2753, "step": 9720 }, { "epoch": 0.83, "learning_rate": 1.4228031058110725e-06, "loss": 0.2289, "step": 9721 }, { "epoch": 0.83, "learning_rate": 1.4213761226936095e-06, "loss": 0.2394, "step": 9722 }, { "epoch": 0.83, "learning_rate": 1.4199498007740841e-06, "loss": 0.2959, "step": 9723 }, { "epoch": 0.83, "learning_rate": 1.4185241401624327e-06, "loss": 0.2681, "step": 9724 }, { "epoch": 0.83, "learning_rate": 1.4170991409685386e-06, "loss": 0.2711, "step": 9725 }, { "epoch": 0.83, "learning_rate": 1.4156748033022328e-06, "loss": 0.2444, "step": 9726 }, { "epoch": 0.83, "learning_rate": 1.4142511272732994e-06, "loss": 0.2516, "step": 9727 }, { "epoch": 0.83, "learning_rate": 1.4128281129914611e-06, "loss": 0.3023, "step": 9728 }, { "epoch": 0.83, "learning_rate": 1.4114057605664066e-06, "loss": 0.2648, "step": 9729 }, { "epoch": 0.83, "learning_rate": 1.409984070107755e-06, "loss": 0.2441, "step": 9730 }, { "epoch": 0.83, "learning_rate": 1.4085630417250873e-06, "loss": 0.2982, "step": 9731 }, { "epoch": 0.83, "learning_rate": 1.4071426755279293e-06, "loss": 0.29, "step": 9732 }, { "epoch": 0.83, "learning_rate": 1.4057229716257548e-06, "loss": 0.2644, "step": 9733 }, { "epoch": 0.83, "learning_rate": 1.4043039301279904e-06, "loss": 0.2658, "step": 9734 }, { "epoch": 0.83, "learning_rate": 1.402885551144002e-06, "loss": 0.2925, "step": 9735 }, { "epoch": 0.83, "learning_rate": 1.4014678347831178e-06, "loss": 0.3234, "step": 9736 }, { "epoch": 0.83, "learning_rate": 1.4000507811546094e-06, "loss": 0.2839, "step": 9737 }, { "epoch": 0.83, "learning_rate": 1.398634390367688e-06, "loss": 0.2759, "step": 9738 }, { "epoch": 0.83, "learning_rate": 1.397218662531532e-06, "loss": 0.2896, "step": 9739 }, { "epoch": 0.83, "learning_rate": 1.3958035977552509e-06, "loss": 0.2549, "step": 9740 }, { "epoch": 0.83, "learning_rate": 1.394389196147915e-06, "loss": 0.2703, "step": 9741 }, { "epoch": 0.84, "learning_rate": 1.3929754578185373e-06, "loss": 0.29, "step": 9742 }, { "epoch": 0.84, "learning_rate": 1.3915623828760837e-06, "loss": 0.2773, "step": 9743 }, { "epoch": 0.84, "learning_rate": 1.3901499714294675e-06, "loss": 0.2894, "step": 9744 }, { "epoch": 0.84, "learning_rate": 1.3887382235875446e-06, "loss": 0.2537, "step": 9745 }, { "epoch": 0.84, "learning_rate": 1.3873271394591348e-06, "loss": 0.2374, "step": 9746 }, { "epoch": 0.84, "learning_rate": 1.38591671915299e-06, "loss": 0.2666, "step": 9747 }, { "epoch": 0.84, "learning_rate": 1.3845069627778218e-06, "loss": 0.3292, "step": 9748 }, { "epoch": 0.84, "learning_rate": 1.383097870442286e-06, "loss": 0.2768, "step": 9749 }, { "epoch": 0.84, "learning_rate": 1.3816894422549888e-06, "loss": 0.2569, "step": 9750 }, { "epoch": 0.84, "learning_rate": 1.3802816783244877e-06, "loss": 0.246, "step": 9751 }, { "epoch": 0.84, "learning_rate": 1.3788745787592784e-06, "loss": 0.2312, "step": 9752 }, { "epoch": 0.84, "learning_rate": 1.377468143667824e-06, "loss": 0.2919, "step": 9753 }, { "epoch": 0.84, "learning_rate": 1.3760623731585165e-06, "loss": 0.2836, "step": 9754 }, { "epoch": 0.84, "learning_rate": 1.3746572673397096e-06, "loss": 0.2731, "step": 9755 }, { "epoch": 0.84, "learning_rate": 1.373252826319701e-06, "loss": 0.2968, "step": 9756 }, { "epoch": 0.84, "learning_rate": 1.3718490502067393e-06, "loss": 0.2946, "step": 9757 }, { "epoch": 0.84, "learning_rate": 1.370445939109022e-06, "loss": 0.3139, "step": 9758 }, { "epoch": 0.84, "learning_rate": 1.3690434931346874e-06, "loss": 0.2404, "step": 9759 }, { "epoch": 0.84, "learning_rate": 1.3676417123918374e-06, "loss": 0.3181, "step": 9760 }, { "epoch": 0.84, "learning_rate": 1.3662405969885084e-06, "loss": 0.2812, "step": 9761 }, { "epoch": 0.84, "learning_rate": 1.3648401470326932e-06, "loss": 0.2633, "step": 9762 }, { "epoch": 0.84, "learning_rate": 1.3634403626323334e-06, "loss": 0.2422, "step": 9763 }, { "epoch": 0.84, "learning_rate": 1.3620412438953145e-06, "loss": 0.2888, "step": 9764 }, { "epoch": 0.84, "learning_rate": 1.3606427909294784e-06, "loss": 0.2874, "step": 9765 }, { "epoch": 0.84, "learning_rate": 1.359245003842602e-06, "loss": 0.3331, "step": 9766 }, { "epoch": 0.84, "learning_rate": 1.35784788274243e-06, "loss": 0.3184, "step": 9767 }, { "epoch": 0.84, "learning_rate": 1.3564514277366403e-06, "loss": 0.2883, "step": 9768 }, { "epoch": 0.84, "learning_rate": 1.355055638932864e-06, "loss": 0.2527, "step": 9769 }, { "epoch": 0.84, "learning_rate": 1.353660516438684e-06, "loss": 0.2435, "step": 9770 }, { "epoch": 0.84, "learning_rate": 1.352266060361629e-06, "loss": 0.2354, "step": 9771 }, { "epoch": 0.84, "learning_rate": 1.350872270809177e-06, "loss": 0.2676, "step": 9772 }, { "epoch": 0.84, "learning_rate": 1.3494791478887504e-06, "loss": 0.2786, "step": 9773 }, { "epoch": 0.84, "learning_rate": 1.3480866917077294e-06, "loss": 0.2835, "step": 9774 }, { "epoch": 0.84, "learning_rate": 1.3466949023734387e-06, "loss": 0.298, "step": 9775 }, { "epoch": 0.84, "learning_rate": 1.3453037799931435e-06, "loss": 0.2686, "step": 9776 }, { "epoch": 0.84, "learning_rate": 1.343913324674072e-06, "loss": 0.2469, "step": 9777 }, { "epoch": 0.84, "learning_rate": 1.3425235365233892e-06, "loss": 0.2936, "step": 9778 }, { "epoch": 0.84, "learning_rate": 1.3411344156482142e-06, "loss": 0.3016, "step": 9779 }, { "epoch": 0.84, "learning_rate": 1.339745962155613e-06, "loss": 0.2621, "step": 9780 }, { "epoch": 0.84, "learning_rate": 1.3383581761526022e-06, "loss": 0.2589, "step": 9781 }, { "epoch": 0.84, "learning_rate": 1.336971057746147e-06, "loss": 0.311, "step": 9782 }, { "epoch": 0.84, "learning_rate": 1.3355846070431533e-06, "loss": 0.2666, "step": 9783 }, { "epoch": 0.84, "learning_rate": 1.33419882415049e-06, "loss": 0.3063, "step": 9784 }, { "epoch": 0.84, "learning_rate": 1.3328137091749594e-06, "loss": 0.2723, "step": 9785 }, { "epoch": 0.84, "learning_rate": 1.3314292622233227e-06, "loss": 0.2581, "step": 9786 }, { "epoch": 0.84, "learning_rate": 1.3300454834022857e-06, "loss": 0.3113, "step": 9787 }, { "epoch": 0.84, "learning_rate": 1.3286623728185044e-06, "loss": 0.3043, "step": 9788 }, { "epoch": 0.84, "learning_rate": 1.3272799305785822e-06, "loss": 0.2677, "step": 9789 }, { "epoch": 0.84, "learning_rate": 1.325898156789066e-06, "loss": 0.2315, "step": 9790 }, { "epoch": 0.84, "learning_rate": 1.324517051556463e-06, "loss": 0.601, "step": 9791 }, { "epoch": 0.84, "learning_rate": 1.3231366149872183e-06, "loss": 0.3023, "step": 9792 }, { "epoch": 0.84, "learning_rate": 1.3217568471877284e-06, "loss": 0.2868, "step": 9793 }, { "epoch": 0.84, "learning_rate": 1.320377748264341e-06, "loss": 0.2831, "step": 9794 }, { "epoch": 0.84, "learning_rate": 1.3189993183233496e-06, "loss": 0.2608, "step": 9795 }, { "epoch": 0.84, "learning_rate": 1.3176215574709982e-06, "loss": 0.2668, "step": 9796 }, { "epoch": 0.84, "learning_rate": 1.3162444658134731e-06, "loss": 0.2589, "step": 9797 }, { "epoch": 0.84, "learning_rate": 1.3148680434569206e-06, "loss": 0.2736, "step": 9798 }, { "epoch": 0.84, "learning_rate": 1.313492290507422e-06, "loss": 0.291, "step": 9799 }, { "epoch": 0.84, "learning_rate": 1.3121172070710165e-06, "loss": 0.2738, "step": 9800 }, { "epoch": 0.84, "learning_rate": 1.3107427932536886e-06, "loss": 0.2787, "step": 9801 }, { "epoch": 0.84, "learning_rate": 1.309369049161372e-06, "loss": 0.2728, "step": 9802 }, { "epoch": 0.84, "learning_rate": 1.3079959748999494e-06, "loss": 0.251, "step": 9803 }, { "epoch": 0.84, "learning_rate": 1.3066235705752439e-06, "loss": 0.2325, "step": 9804 }, { "epoch": 0.84, "learning_rate": 1.3052518362930433e-06, "loss": 0.2521, "step": 9805 }, { "epoch": 0.84, "learning_rate": 1.3038807721590663e-06, "loss": 0.2484, "step": 9806 }, { "epoch": 0.84, "learning_rate": 1.3025103782789906e-06, "loss": 0.2568, "step": 9807 }, { "epoch": 0.84, "learning_rate": 1.3011406547584392e-06, "loss": 0.2615, "step": 9808 }, { "epoch": 0.84, "learning_rate": 1.2997716017029849e-06, "loss": 0.2695, "step": 9809 }, { "epoch": 0.84, "learning_rate": 1.2984032192181473e-06, "loss": 0.2481, "step": 9810 }, { "epoch": 0.84, "learning_rate": 1.2970355074093898e-06, "loss": 0.2468, "step": 9811 }, { "epoch": 0.84, "learning_rate": 1.2956684663821363e-06, "loss": 0.2677, "step": 9812 }, { "epoch": 0.84, "learning_rate": 1.2943020962417485e-06, "loss": 0.322, "step": 9813 }, { "epoch": 0.84, "learning_rate": 1.2929363970935371e-06, "loss": 0.2497, "step": 9814 }, { "epoch": 0.84, "learning_rate": 1.2915713690427655e-06, "loss": 0.3096, "step": 9815 }, { "epoch": 0.84, "learning_rate": 1.2902070121946441e-06, "loss": 0.2708, "step": 9816 }, { "epoch": 0.84, "learning_rate": 1.2888433266543288e-06, "loss": 0.3307, "step": 9817 }, { "epoch": 0.84, "learning_rate": 1.2874803125269274e-06, "loss": 0.2576, "step": 9818 }, { "epoch": 0.84, "learning_rate": 1.286117969917493e-06, "loss": 0.2819, "step": 9819 }, { "epoch": 0.84, "learning_rate": 1.2847562989310313e-06, "loss": 0.2889, "step": 9820 }, { "epoch": 0.84, "learning_rate": 1.2833952996724864e-06, "loss": 0.2562, "step": 9821 }, { "epoch": 0.84, "learning_rate": 1.2820349722467663e-06, "loss": 0.294, "step": 9822 }, { "epoch": 0.84, "learning_rate": 1.2806753167587117e-06, "loss": 0.3049, "step": 9823 }, { "epoch": 0.84, "learning_rate": 1.2793163333131208e-06, "loss": 0.289, "step": 9824 }, { "epoch": 0.84, "learning_rate": 1.277958022014736e-06, "loss": 0.2167, "step": 9825 }, { "epoch": 0.84, "learning_rate": 1.2766003829682504e-06, "loss": 0.2513, "step": 9826 }, { "epoch": 0.84, "learning_rate": 1.2752434162783056e-06, "loss": 0.2759, "step": 9827 }, { "epoch": 0.84, "learning_rate": 1.273887122049483e-06, "loss": 0.5486, "step": 9828 }, { "epoch": 0.84, "learning_rate": 1.2725315003863292e-06, "loss": 0.2589, "step": 9829 }, { "epoch": 0.84, "learning_rate": 1.2711765513933216e-06, "loss": 0.2759, "step": 9830 }, { "epoch": 0.84, "learning_rate": 1.2698222751748946e-06, "loss": 0.2731, "step": 9831 }, { "epoch": 0.84, "learning_rate": 1.26846867183543e-06, "loss": 0.2802, "step": 9832 }, { "epoch": 0.84, "learning_rate": 1.2671157414792567e-06, "loss": 0.3047, "step": 9833 }, { "epoch": 0.84, "learning_rate": 1.2657634842106526e-06, "loss": 0.2815, "step": 9834 }, { "epoch": 0.84, "learning_rate": 1.2644119001338385e-06, "loss": 0.2588, "step": 9835 }, { "epoch": 0.84, "learning_rate": 1.2630609893529956e-06, "loss": 0.306, "step": 9836 }, { "epoch": 0.84, "learning_rate": 1.2617107519722393e-06, "loss": 0.3174, "step": 9837 }, { "epoch": 0.84, "learning_rate": 1.26036118809564e-06, "loss": 0.3262, "step": 9838 }, { "epoch": 0.84, "learning_rate": 1.2590122978272178e-06, "loss": 0.2402, "step": 9839 }, { "epoch": 0.84, "learning_rate": 1.2576640812709363e-06, "loss": 0.6742, "step": 9840 }, { "epoch": 0.84, "learning_rate": 1.256316538530713e-06, "loss": 0.3113, "step": 9841 }, { "epoch": 0.84, "learning_rate": 1.254969669710402e-06, "loss": 0.3033, "step": 9842 }, { "epoch": 0.84, "learning_rate": 1.2536234749138232e-06, "loss": 0.248, "step": 9843 }, { "epoch": 0.84, "learning_rate": 1.2522779542447272e-06, "loss": 0.2856, "step": 9844 }, { "epoch": 0.84, "learning_rate": 1.2509331078068231e-06, "loss": 0.2316, "step": 9845 }, { "epoch": 0.84, "learning_rate": 1.249588935703765e-06, "loss": 0.2607, "step": 9846 }, { "epoch": 0.84, "learning_rate": 1.2482454380391552e-06, "loss": 0.2594, "step": 9847 }, { "epoch": 0.84, "learning_rate": 1.246902614916544e-06, "loss": 0.2625, "step": 9848 }, { "epoch": 0.84, "learning_rate": 1.245560466439425e-06, "loss": 0.2535, "step": 9849 }, { "epoch": 0.84, "learning_rate": 1.2442189927112514e-06, "loss": 0.588, "step": 9850 }, { "epoch": 0.84, "learning_rate": 1.242878193835415e-06, "loss": 0.2385, "step": 9851 }, { "epoch": 0.84, "learning_rate": 1.2415380699152568e-06, "loss": 0.2433, "step": 9852 }, { "epoch": 0.84, "learning_rate": 1.240198621054066e-06, "loss": 0.2903, "step": 9853 }, { "epoch": 0.84, "learning_rate": 1.2388598473550828e-06, "loss": 0.3208, "step": 9854 }, { "epoch": 0.84, "learning_rate": 1.237521748921492e-06, "loss": 0.2714, "step": 9855 }, { "epoch": 0.84, "learning_rate": 1.2361843258564277e-06, "loss": 0.2559, "step": 9856 }, { "epoch": 0.84, "learning_rate": 1.2348475782629733e-06, "loss": 0.2958, "step": 9857 }, { "epoch": 0.85, "learning_rate": 1.2335115062441593e-06, "loss": 0.2708, "step": 9858 }, { "epoch": 0.85, "learning_rate": 1.2321761099029571e-06, "loss": 0.2709, "step": 9859 }, { "epoch": 0.85, "learning_rate": 1.2308413893423021e-06, "loss": 0.2973, "step": 9860 }, { "epoch": 0.85, "learning_rate": 1.229507344665062e-06, "loss": 0.603, "step": 9861 }, { "epoch": 0.85, "learning_rate": 1.2281739759740575e-06, "loss": 0.239, "step": 9862 }, { "epoch": 0.85, "learning_rate": 1.2268412833720611e-06, "loss": 0.2968, "step": 9863 }, { "epoch": 0.85, "learning_rate": 1.2255092669617897e-06, "loss": 0.2596, "step": 9864 }, { "epoch": 0.85, "learning_rate": 1.2241779268459098e-06, "loss": 0.2218, "step": 9865 }, { "epoch": 0.85, "learning_rate": 1.2228472631270272e-06, "loss": 0.2832, "step": 9866 }, { "epoch": 0.85, "learning_rate": 1.2215172759077143e-06, "loss": 0.2881, "step": 9867 }, { "epoch": 0.85, "learning_rate": 1.2201879652904714e-06, "loss": 0.2374, "step": 9868 }, { "epoch": 0.85, "learning_rate": 1.2188593313777575e-06, "loss": 0.2803, "step": 9869 }, { "epoch": 0.85, "learning_rate": 1.2175313742719775e-06, "loss": 0.2687, "step": 9870 }, { "epoch": 0.85, "learning_rate": 1.2162040940754826e-06, "loss": 0.2388, "step": 9871 }, { "epoch": 0.85, "learning_rate": 1.2148774908905782e-06, "loss": 0.2557, "step": 9872 }, { "epoch": 0.85, "learning_rate": 1.2135515648195029e-06, "loss": 0.2516, "step": 9873 }, { "epoch": 0.85, "learning_rate": 1.212226315964462e-06, "loss": 0.2314, "step": 9874 }, { "epoch": 0.85, "learning_rate": 1.210901744427594e-06, "loss": 0.2977, "step": 9875 }, { "epoch": 0.85, "learning_rate": 1.20957785031099e-06, "loss": 0.3063, "step": 9876 }, { "epoch": 0.85, "learning_rate": 1.208254633716691e-06, "loss": 0.2479, "step": 9877 }, { "epoch": 0.85, "learning_rate": 1.2069320947466845e-06, "loss": 0.2745, "step": 9878 }, { "epoch": 0.85, "learning_rate": 1.2056102335029052e-06, "loss": 0.2602, "step": 9879 }, { "epoch": 0.85, "learning_rate": 1.2042890500872306e-06, "loss": 0.2932, "step": 9880 }, { "epoch": 0.85, "learning_rate": 1.2029685446015005e-06, "loss": 0.3196, "step": 9881 }, { "epoch": 0.85, "learning_rate": 1.2016487171474844e-06, "loss": 0.2668, "step": 9882 }, { "epoch": 0.85, "learning_rate": 1.2003295678269112e-06, "loss": 0.2781, "step": 9883 }, { "epoch": 0.85, "learning_rate": 1.1990110967414548e-06, "loss": 0.3168, "step": 9884 }, { "epoch": 0.85, "learning_rate": 1.1976933039927363e-06, "loss": 0.5594, "step": 9885 }, { "epoch": 0.85, "learning_rate": 1.1963761896823255e-06, "loss": 0.2618, "step": 9886 }, { "epoch": 0.85, "learning_rate": 1.1950597539117348e-06, "loss": 0.2421, "step": 9887 }, { "epoch": 0.85, "learning_rate": 1.1937439967824338e-06, "loss": 0.2948, "step": 9888 }, { "epoch": 0.85, "learning_rate": 1.1924289183958349e-06, "loss": 0.2701, "step": 9889 }, { "epoch": 0.85, "learning_rate": 1.1911145188532936e-06, "loss": 0.2894, "step": 9890 }, { "epoch": 0.85, "learning_rate": 1.1898007982561177e-06, "loss": 0.2553, "step": 9891 }, { "epoch": 0.85, "learning_rate": 1.1884877567055653e-06, "loss": 0.2543, "step": 9892 }, { "epoch": 0.85, "learning_rate": 1.1871753943028375e-06, "loss": 0.2953, "step": 9893 }, { "epoch": 0.85, "learning_rate": 1.1858637111490845e-06, "loss": 0.3027, "step": 9894 }, { "epoch": 0.85, "learning_rate": 1.1845527073454045e-06, "loss": 0.2673, "step": 9895 }, { "epoch": 0.85, "learning_rate": 1.1832423829928452e-06, "loss": 0.2372, "step": 9896 }, { "epoch": 0.85, "learning_rate": 1.1819327381923972e-06, "loss": 0.2414, "step": 9897 }, { "epoch": 0.85, "learning_rate": 1.1806237730450009e-06, "loss": 0.2679, "step": 9898 }, { "epoch": 0.85, "learning_rate": 1.1793154876515477e-06, "loss": 0.2697, "step": 9899 }, { "epoch": 0.85, "learning_rate": 1.1780078821128716e-06, "loss": 0.2605, "step": 9900 }, { "epoch": 0.85, "learning_rate": 1.1767009565297583e-06, "loss": 0.3094, "step": 9901 }, { "epoch": 0.85, "learning_rate": 1.1753947110029373e-06, "loss": 0.2665, "step": 9902 }, { "epoch": 0.85, "learning_rate": 1.1740891456330894e-06, "loss": 0.2715, "step": 9903 }, { "epoch": 0.85, "learning_rate": 1.1727842605208373e-06, "loss": 0.2929, "step": 9904 }, { "epoch": 0.85, "learning_rate": 1.1714800557667616e-06, "loss": 0.2645, "step": 9905 }, { "epoch": 0.85, "learning_rate": 1.1701765314713786e-06, "loss": 0.2672, "step": 9906 }, { "epoch": 0.85, "learning_rate": 1.168873687735158e-06, "loss": 0.2319, "step": 9907 }, { "epoch": 0.85, "learning_rate": 1.1675715246585184e-06, "loss": 0.249, "step": 9908 }, { "epoch": 0.85, "learning_rate": 1.1662700423418239e-06, "loss": 0.2432, "step": 9909 }, { "epoch": 0.85, "learning_rate": 1.1649692408853875e-06, "loss": 0.2649, "step": 9910 }, { "epoch": 0.85, "learning_rate": 1.1636691203894623e-06, "loss": 0.2471, "step": 9911 }, { "epoch": 0.85, "learning_rate": 1.162369680954264e-06, "loss": 0.3283, "step": 9912 }, { "epoch": 0.85, "learning_rate": 1.16107092267994e-06, "loss": 0.3055, "step": 9913 }, { "epoch": 0.85, "learning_rate": 1.1597728456665958e-06, "loss": 0.2349, "step": 9914 }, { "epoch": 0.85, "learning_rate": 1.1584754500142792e-06, "loss": 0.2845, "step": 9915 }, { "epoch": 0.85, "learning_rate": 1.157178735822988e-06, "loss": 0.2772, "step": 9916 }, { "epoch": 0.85, "learning_rate": 1.1558827031926679e-06, "loss": 0.5519, "step": 9917 }, { "epoch": 0.85, "learning_rate": 1.1545873522232055e-06, "loss": 0.3152, "step": 9918 }, { "epoch": 0.85, "learning_rate": 1.1532926830144475e-06, "loss": 0.2501, "step": 9919 }, { "epoch": 0.85, "learning_rate": 1.1519986956661744e-06, "loss": 0.5618, "step": 9920 }, { "epoch": 0.85, "learning_rate": 1.1507053902781217e-06, "loss": 0.2558, "step": 9921 }, { "epoch": 0.85, "learning_rate": 1.1494127669499732e-06, "loss": 0.2357, "step": 9922 }, { "epoch": 0.85, "learning_rate": 1.1481208257813558e-06, "loss": 0.2667, "step": 9923 }, { "epoch": 0.85, "learning_rate": 1.1468295668718498e-06, "loss": 0.231, "step": 9924 }, { "epoch": 0.85, "learning_rate": 1.1455389903209713e-06, "loss": 0.2867, "step": 9925 }, { "epoch": 0.85, "learning_rate": 1.1442490962281983e-06, "loss": 0.3174, "step": 9926 }, { "epoch": 0.85, "learning_rate": 1.142959884692949e-06, "loss": 0.2521, "step": 9927 }, { "epoch": 0.85, "learning_rate": 1.1416713558145854e-06, "loss": 0.283, "step": 9928 }, { "epoch": 0.85, "learning_rate": 1.1403835096924244e-06, "loss": 0.2653, "step": 9929 }, { "epoch": 0.85, "learning_rate": 1.1390963464257254e-06, "loss": 0.2654, "step": 9930 }, { "epoch": 0.85, "learning_rate": 1.1378098661136993e-06, "loss": 0.2322, "step": 9931 }, { "epoch": 0.85, "learning_rate": 1.1365240688554958e-06, "loss": 0.3051, "step": 9932 }, { "epoch": 0.85, "learning_rate": 1.135238954750223e-06, "loss": 0.2597, "step": 9933 }, { "epoch": 0.85, "learning_rate": 1.1339545238969308e-06, "loss": 0.2883, "step": 9934 }, { "epoch": 0.85, "learning_rate": 1.1326707763946143e-06, "loss": 0.2836, "step": 9935 }, { "epoch": 0.85, "learning_rate": 1.131387712342219e-06, "loss": 0.3225, "step": 9936 }, { "epoch": 0.85, "learning_rate": 1.130105331838638e-06, "loss": 0.3378, "step": 9937 }, { "epoch": 0.85, "learning_rate": 1.1288236349827108e-06, "loss": 0.6455, "step": 9938 }, { "epoch": 0.85, "learning_rate": 1.1275426218732233e-06, "loss": 0.2538, "step": 9939 }, { "epoch": 0.85, "learning_rate": 1.1262622926089118e-06, "loss": 0.2444, "step": 9940 }, { "epoch": 0.85, "learning_rate": 1.1249826472884574e-06, "loss": 0.2537, "step": 9941 }, { "epoch": 0.85, "learning_rate": 1.1237036860104833e-06, "loss": 0.259, "step": 9942 }, { "epoch": 0.85, "learning_rate": 1.1224254088735752e-06, "loss": 0.2936, "step": 9943 }, { "epoch": 0.85, "learning_rate": 1.121147815976248e-06, "loss": 0.2749, "step": 9944 }, { "epoch": 0.85, "learning_rate": 1.1198709074169766e-06, "loss": 0.2701, "step": 9945 }, { "epoch": 0.85, "learning_rate": 1.1185946832941774e-06, "loss": 0.3221, "step": 9946 }, { "epoch": 0.85, "learning_rate": 1.1173191437062147e-06, "loss": 0.2545, "step": 9947 }, { "epoch": 0.85, "learning_rate": 1.1160442887514045e-06, "loss": 0.2734, "step": 9948 }, { "epoch": 0.85, "learning_rate": 1.1147701185280002e-06, "loss": 0.3171, "step": 9949 }, { "epoch": 0.85, "learning_rate": 1.1134966331342157e-06, "loss": 0.2708, "step": 9950 }, { "epoch": 0.85, "learning_rate": 1.1122238326681978e-06, "loss": 0.2604, "step": 9951 }, { "epoch": 0.85, "learning_rate": 1.1109517172280525e-06, "loss": 0.2535, "step": 9952 }, { "epoch": 0.85, "learning_rate": 1.109680286911826e-06, "loss": 0.2397, "step": 9953 }, { "epoch": 0.85, "learning_rate": 1.1084095418175156e-06, "loss": 0.2753, "step": 9954 }, { "epoch": 0.85, "learning_rate": 1.1071394820430647e-06, "loss": 0.2865, "step": 9955 }, { "epoch": 0.85, "learning_rate": 1.1058701076863575e-06, "loss": 0.2385, "step": 9956 }, { "epoch": 0.85, "learning_rate": 1.1046014188452392e-06, "loss": 0.2606, "step": 9957 }, { "epoch": 0.85, "learning_rate": 1.103333415617488e-06, "loss": 0.2532, "step": 9958 }, { "epoch": 0.85, "learning_rate": 1.102066098100838e-06, "loss": 0.2868, "step": 9959 }, { "epoch": 0.85, "learning_rate": 1.1007994663929656e-06, "loss": 0.3349, "step": 9960 }, { "epoch": 0.85, "learning_rate": 1.0995335205914993e-06, "loss": 0.2753, "step": 9961 }, { "epoch": 0.85, "learning_rate": 1.0982682607940131e-06, "loss": 0.3004, "step": 9962 }, { "epoch": 0.85, "learning_rate": 1.0970036870980195e-06, "loss": 0.2977, "step": 9963 }, { "epoch": 0.85, "learning_rate": 1.0957397996009934e-06, "loss": 0.3046, "step": 9964 }, { "epoch": 0.85, "learning_rate": 1.094476598400348e-06, "loss": 0.2841, "step": 9965 }, { "epoch": 0.85, "learning_rate": 1.0932140835934414e-06, "loss": 0.2584, "step": 9966 }, { "epoch": 0.85, "learning_rate": 1.0919522552775829e-06, "loss": 0.3019, "step": 9967 }, { "epoch": 0.85, "learning_rate": 1.0906911135500298e-06, "loss": 0.2882, "step": 9968 }, { "epoch": 0.85, "learning_rate": 1.0894306585079838e-06, "loss": 0.2058, "step": 9969 }, { "epoch": 0.85, "learning_rate": 1.088170890248591e-06, "loss": 0.2933, "step": 9970 }, { "epoch": 0.85, "learning_rate": 1.0869118088689535e-06, "loss": 0.3075, "step": 9971 }, { "epoch": 0.85, "learning_rate": 1.0856534144661146e-06, "loss": 0.2606, "step": 9972 }, { "epoch": 0.85, "learning_rate": 1.0843957071370626e-06, "loss": 0.2866, "step": 9973 }, { "epoch": 0.85, "learning_rate": 1.0831386869787353e-06, "loss": 0.2972, "step": 9974 }, { "epoch": 0.86, "learning_rate": 1.0818823540880174e-06, "loss": 0.277, "step": 9975 }, { "epoch": 0.86, "learning_rate": 1.080626708561744e-06, "loss": 0.2919, "step": 9976 }, { "epoch": 0.86, "learning_rate": 1.0793717504966906e-06, "loss": 0.2162, "step": 9977 }, { "epoch": 0.86, "learning_rate": 1.0781174799895844e-06, "loss": 0.3007, "step": 9978 }, { "epoch": 0.86, "learning_rate": 1.0768638971371014e-06, "loss": 0.2684, "step": 9979 }, { "epoch": 0.86, "learning_rate": 1.0756110020358568e-06, "loss": 0.2422, "step": 9980 }, { "epoch": 0.86, "learning_rate": 1.0743587947824186e-06, "loss": 0.3024, "step": 9981 }, { "epoch": 0.86, "learning_rate": 1.0731072754733019e-06, "loss": 0.2855, "step": 9982 }, { "epoch": 0.86, "learning_rate": 1.0718564442049672e-06, "loss": 0.2949, "step": 9983 }, { "epoch": 0.86, "learning_rate": 1.0706063010738232e-06, "loss": 0.2958, "step": 9984 }, { "epoch": 0.86, "learning_rate": 1.0693568461762238e-06, "loss": 0.2711, "step": 9985 }, { "epoch": 0.86, "learning_rate": 1.068108079608473e-06, "loss": 0.2485, "step": 9986 }, { "epoch": 0.86, "learning_rate": 1.066860001466813e-06, "loss": 0.239, "step": 9987 }, { "epoch": 0.86, "learning_rate": 1.0656126118474485e-06, "loss": 0.2626, "step": 9988 }, { "epoch": 0.86, "learning_rate": 1.0643659108465166e-06, "loss": 0.2715, "step": 9989 }, { "epoch": 0.86, "learning_rate": 1.0631198985601077e-06, "loss": 0.2724, "step": 9990 }, { "epoch": 0.86, "learning_rate": 1.0618745750842585e-06, "loss": 0.2841, "step": 9991 }, { "epoch": 0.86, "learning_rate": 1.0606299405149522e-06, "loss": 0.5852, "step": 9992 }, { "epoch": 0.86, "learning_rate": 1.059385994948121e-06, "loss": 0.301, "step": 9993 }, { "epoch": 0.86, "learning_rate": 1.0581427384796372e-06, "loss": 0.2637, "step": 9994 }, { "epoch": 0.86, "learning_rate": 1.0569001712053317e-06, "loss": 0.2565, "step": 9995 }, { "epoch": 0.86, "learning_rate": 1.0556582932209703e-06, "loss": 0.2871, "step": 9996 }, { "epoch": 0.86, "learning_rate": 1.0544171046222717e-06, "loss": 0.2781, "step": 9997 }, { "epoch": 0.86, "learning_rate": 1.053176605504902e-06, "loss": 0.6038, "step": 9998 }, { "epoch": 0.86, "learning_rate": 1.051936795964471e-06, "loss": 0.2552, "step": 9999 }, { "epoch": 0.86, "learning_rate": 1.0506976760965414e-06, "loss": 0.2501, "step": 10000 }, { "epoch": 0.86, "learning_rate": 1.0494592459966102e-06, "loss": 0.2786, "step": 10001 }, { "epoch": 0.86, "learning_rate": 1.0482215057601364e-06, "loss": 0.2624, "step": 10002 }, { "epoch": 0.86, "learning_rate": 1.0469844554825192e-06, "loss": 0.2753, "step": 10003 }, { "epoch": 0.86, "learning_rate": 1.0457480952591005e-06, "loss": 0.2574, "step": 10004 }, { "epoch": 0.86, "learning_rate": 1.0445124251851735e-06, "loss": 0.2845, "step": 10005 }, { "epoch": 0.86, "learning_rate": 1.043277445355978e-06, "loss": 0.2184, "step": 10006 }, { "epoch": 0.86, "learning_rate": 1.0420431558667033e-06, "loss": 0.2621, "step": 10007 }, { "epoch": 0.86, "learning_rate": 1.0408095568124765e-06, "loss": 0.2249, "step": 10008 }, { "epoch": 0.86, "learning_rate": 1.0395766482883806e-06, "loss": 0.2835, "step": 10009 }, { "epoch": 0.86, "learning_rate": 1.0383444303894453e-06, "loss": 0.2427, "step": 10010 }, { "epoch": 0.86, "learning_rate": 1.0371129032106375e-06, "loss": 0.3246, "step": 10011 }, { "epoch": 0.86, "learning_rate": 1.0358820668468805e-06, "loss": 0.279, "step": 10012 }, { "epoch": 0.86, "learning_rate": 1.0346519213930417e-06, "loss": 0.2651, "step": 10013 }, { "epoch": 0.86, "learning_rate": 1.033422466943933e-06, "loss": 0.3018, "step": 10014 }, { "epoch": 0.86, "learning_rate": 1.0321937035943153e-06, "loss": 0.2598, "step": 10015 }, { "epoch": 0.86, "learning_rate": 1.0309656314388949e-06, "loss": 0.2408, "step": 10016 }, { "epoch": 0.86, "learning_rate": 1.0297382505723297e-06, "loss": 0.5775, "step": 10017 }, { "epoch": 0.86, "learning_rate": 1.0285115610892138e-06, "loss": 0.2997, "step": 10018 }, { "epoch": 0.86, "learning_rate": 1.0272855630840982e-06, "loss": 0.2479, "step": 10019 }, { "epoch": 0.86, "learning_rate": 1.0260602566514755e-06, "loss": 0.2786, "step": 10020 }, { "epoch": 0.86, "learning_rate": 1.0248356418857863e-06, "loss": 0.2689, "step": 10021 }, { "epoch": 0.86, "learning_rate": 1.0236117188814187e-06, "loss": 0.2477, "step": 10022 }, { "epoch": 0.86, "learning_rate": 1.0223884877327062e-06, "loss": 0.2961, "step": 10023 }, { "epoch": 0.86, "learning_rate": 1.0211659485339308e-06, "loss": 0.2662, "step": 10024 }, { "epoch": 0.86, "learning_rate": 1.0199441013793155e-06, "loss": 0.2689, "step": 10025 }, { "epoch": 0.86, "learning_rate": 1.01872294636304e-06, "loss": 0.2758, "step": 10026 }, { "epoch": 0.86, "learning_rate": 1.0175024835792202e-06, "loss": 0.2993, "step": 10027 }, { "epoch": 0.86, "learning_rate": 1.0162827131219255e-06, "loss": 0.2344, "step": 10028 }, { "epoch": 0.86, "learning_rate": 1.0150636350851695e-06, "loss": 0.2725, "step": 10029 }, { "epoch": 0.86, "learning_rate": 1.0138452495629125e-06, "loss": 0.262, "step": 10030 }, { "epoch": 0.86, "learning_rate": 1.0126275566490628e-06, "loss": 0.2758, "step": 10031 }, { "epoch": 0.86, "learning_rate": 1.011410556437471e-06, "loss": 0.2301, "step": 10032 }, { "epoch": 0.86, "learning_rate": 1.0101942490219418e-06, "loss": 0.2752, "step": 10033 }, { "epoch": 0.86, "learning_rate": 1.0089786344962194e-06, "loss": 0.2966, "step": 10034 }, { "epoch": 0.86, "learning_rate": 1.0077637129539963e-06, "loss": 0.2946, "step": 10035 }, { "epoch": 0.86, "learning_rate": 1.0065494844889156e-06, "loss": 0.2461, "step": 10036 }, { "epoch": 0.86, "learning_rate": 1.0053359491945624e-06, "loss": 0.2614, "step": 10037 }, { "epoch": 0.86, "learning_rate": 1.004123107164472e-06, "loss": 0.2887, "step": 10038 }, { "epoch": 0.86, "learning_rate": 1.0029109584921193e-06, "loss": 0.2658, "step": 10039 }, { "epoch": 0.86, "learning_rate": 1.0016995032709354e-06, "loss": 0.2348, "step": 10040 }, { "epoch": 0.86, "learning_rate": 1.0004887415942943e-06, "loss": 0.2816, "step": 10041 }, { "epoch": 0.86, "learning_rate": 9.992786735555104e-07, "loss": 0.3228, "step": 10042 }, { "epoch": 0.86, "learning_rate": 9.980692992478524e-07, "loss": 0.287, "step": 10043 }, { "epoch": 0.86, "learning_rate": 9.968606187645336e-07, "loss": 0.2502, "step": 10044 }, { "epoch": 0.86, "learning_rate": 9.956526321987147e-07, "loss": 0.2922, "step": 10045 }, { "epoch": 0.86, "learning_rate": 9.94445339643495e-07, "loss": 0.2318, "step": 10046 }, { "epoch": 0.86, "learning_rate": 9.93238741191932e-07, "loss": 0.3, "step": 10047 }, { "epoch": 0.86, "learning_rate": 9.920328369370258e-07, "loss": 0.2662, "step": 10048 }, { "epoch": 0.86, "learning_rate": 9.908276269717166e-07, "loss": 0.2729, "step": 10049 }, { "epoch": 0.86, "learning_rate": 9.896231113888988e-07, "loss": 0.2654, "step": 10050 }, { "epoch": 0.86, "learning_rate": 9.884192902814094e-07, "loss": 0.5657, "step": 10051 }, { "epoch": 0.86, "learning_rate": 9.87216163742033e-07, "loss": 0.2981, "step": 10052 }, { "epoch": 0.86, "learning_rate": 9.860137318635021e-07, "loss": 0.3122, "step": 10053 }, { "epoch": 0.86, "learning_rate": 9.848119947384937e-07, "loss": 0.2991, "step": 10054 }, { "epoch": 0.86, "learning_rate": 9.836109524596326e-07, "loss": 0.2426, "step": 10055 }, { "epoch": 0.86, "learning_rate": 9.82410605119486e-07, "loss": 0.314, "step": 10056 }, { "epoch": 0.86, "learning_rate": 9.812109528105728e-07, "loss": 0.265, "step": 10057 }, { "epoch": 0.86, "learning_rate": 9.800119956253574e-07, "loss": 0.5778, "step": 10058 }, { "epoch": 0.86, "learning_rate": 9.788137336562464e-07, "loss": 0.2259, "step": 10059 }, { "epoch": 0.86, "learning_rate": 9.776161669955997e-07, "loss": 0.3255, "step": 10060 }, { "epoch": 0.86, "learning_rate": 9.764192957357166e-07, "loss": 0.2555, "step": 10061 }, { "epoch": 0.86, "learning_rate": 9.7522311996885e-07, "loss": 0.2515, "step": 10062 }, { "epoch": 0.86, "learning_rate": 9.740276397871906e-07, "loss": 0.2311, "step": 10063 }, { "epoch": 0.86, "learning_rate": 9.728328552828814e-07, "loss": 0.2904, "step": 10064 }, { "epoch": 0.86, "learning_rate": 9.71638766548012e-07, "loss": 0.2338, "step": 10065 }, { "epoch": 0.86, "learning_rate": 9.704453736746156e-07, "loss": 0.2412, "step": 10066 }, { "epoch": 0.86, "learning_rate": 9.692526767546727e-07, "loss": 0.305, "step": 10067 }, { "epoch": 0.86, "learning_rate": 9.680606758801126e-07, "loss": 0.2756, "step": 10068 }, { "epoch": 0.86, "learning_rate": 9.66869371142809e-07, "loss": 0.2956, "step": 10069 }, { "epoch": 0.86, "learning_rate": 9.656787626345765e-07, "loss": 0.2383, "step": 10070 }, { "epoch": 0.86, "learning_rate": 9.644888504471883e-07, "loss": 0.2682, "step": 10071 }, { "epoch": 0.86, "learning_rate": 9.632996346723522e-07, "loss": 0.2812, "step": 10072 }, { "epoch": 0.86, "learning_rate": 9.62111115401728e-07, "loss": 0.2468, "step": 10073 }, { "epoch": 0.86, "learning_rate": 9.60923292726923e-07, "loss": 0.2803, "step": 10074 }, { "epoch": 0.86, "learning_rate": 9.597361667394867e-07, "loss": 0.2731, "step": 10075 }, { "epoch": 0.86, "learning_rate": 9.585497375309183e-07, "loss": 0.2899, "step": 10076 }, { "epoch": 0.86, "learning_rate": 9.57364005192658e-07, "loss": 0.2965, "step": 10077 }, { "epoch": 0.86, "learning_rate": 9.561789698161007e-07, "loss": 0.2524, "step": 10078 }, { "epoch": 0.86, "learning_rate": 9.54994631492584e-07, "loss": 0.2448, "step": 10079 }, { "epoch": 0.86, "learning_rate": 9.538109903133862e-07, "loss": 0.2676, "step": 10080 }, { "epoch": 0.86, "learning_rate": 9.526280463697391e-07, "loss": 0.2374, "step": 10081 }, { "epoch": 0.86, "learning_rate": 9.514457997528171e-07, "loss": 0.2627, "step": 10082 }, { "epoch": 0.86, "learning_rate": 9.502642505537452e-07, "loss": 0.2521, "step": 10083 }, { "epoch": 0.86, "learning_rate": 9.49083398863585e-07, "loss": 0.2778, "step": 10084 }, { "epoch": 0.86, "learning_rate": 9.479032447733561e-07, "loss": 0.257, "step": 10085 }, { "epoch": 0.86, "learning_rate": 9.467237883740199e-07, "loss": 0.2679, "step": 10086 }, { "epoch": 0.86, "learning_rate": 9.455450297564773e-07, "loss": 0.2492, "step": 10087 }, { "epoch": 0.86, "learning_rate": 9.443669690115853e-07, "loss": 0.2881, "step": 10088 }, { "epoch": 0.86, "learning_rate": 9.431896062301427e-07, "loss": 0.2505, "step": 10089 }, { "epoch": 0.86, "learning_rate": 9.420129415028934e-07, "loss": 0.2717, "step": 10090 }, { "epoch": 0.86, "learning_rate": 9.408369749205303e-07, "loss": 0.2823, "step": 10091 }, { "epoch": 0.87, "learning_rate": 9.396617065736902e-07, "loss": 0.2504, "step": 10092 }, { "epoch": 0.87, "learning_rate": 9.384871365529591e-07, "loss": 0.2822, "step": 10093 }, { "epoch": 0.87, "learning_rate": 9.373132649488636e-07, "loss": 0.2186, "step": 10094 }, { "epoch": 0.87, "learning_rate": 9.361400918518814e-07, "loss": 0.3019, "step": 10095 }, { "epoch": 0.87, "learning_rate": 9.349676173524347e-07, "loss": 0.2479, "step": 10096 }, { "epoch": 0.87, "learning_rate": 9.337958415408932e-07, "loss": 0.2894, "step": 10097 }, { "epoch": 0.87, "learning_rate": 9.326247645075703e-07, "loss": 0.2569, "step": 10098 }, { "epoch": 0.87, "learning_rate": 9.314543863427272e-07, "loss": 0.2856, "step": 10099 }, { "epoch": 0.87, "learning_rate": 9.302847071365739e-07, "loss": 0.2401, "step": 10100 }, { "epoch": 0.87, "learning_rate": 9.291157269792572e-07, "loss": 0.2794, "step": 10101 }, { "epoch": 0.87, "learning_rate": 9.279474459608806e-07, "loss": 0.2654, "step": 10102 }, { "epoch": 0.87, "learning_rate": 9.267798641714887e-07, "loss": 0.6188, "step": 10103 }, { "epoch": 0.87, "learning_rate": 9.256129817010728e-07, "loss": 0.2567, "step": 10104 }, { "epoch": 0.87, "learning_rate": 9.244467986395699e-07, "loss": 0.2742, "step": 10105 }, { "epoch": 0.87, "learning_rate": 9.23281315076865e-07, "loss": 0.2898, "step": 10106 }, { "epoch": 0.87, "learning_rate": 9.221165311027879e-07, "loss": 0.2709, "step": 10107 }, { "epoch": 0.87, "learning_rate": 9.209524468071096e-07, "loss": 0.3021, "step": 10108 }, { "epoch": 0.87, "learning_rate": 9.197890622795603e-07, "loss": 0.278, "step": 10109 }, { "epoch": 0.87, "learning_rate": 9.186263776098014e-07, "loss": 0.2543, "step": 10110 }, { "epoch": 0.87, "learning_rate": 9.174643928874483e-07, "loss": 0.312, "step": 10111 }, { "epoch": 0.87, "learning_rate": 9.163031082020623e-07, "loss": 0.2413, "step": 10112 }, { "epoch": 0.87, "learning_rate": 9.151425236431489e-07, "loss": 0.2113, "step": 10113 }, { "epoch": 0.87, "learning_rate": 9.139826393001617e-07, "loss": 0.2538, "step": 10114 }, { "epoch": 0.87, "learning_rate": 9.128234552624937e-07, "loss": 0.3054, "step": 10115 }, { "epoch": 0.87, "learning_rate": 9.116649716194948e-07, "loss": 0.241, "step": 10116 }, { "epoch": 0.87, "learning_rate": 9.105071884604543e-07, "loss": 0.2518, "step": 10117 }, { "epoch": 0.87, "learning_rate": 9.093501058746057e-07, "loss": 0.2545, "step": 10118 }, { "epoch": 0.87, "learning_rate": 9.081937239511329e-07, "loss": 0.2581, "step": 10119 }, { "epoch": 0.87, "learning_rate": 9.070380427791636e-07, "loss": 0.3092, "step": 10120 }, { "epoch": 0.87, "learning_rate": 9.058830624477754e-07, "loss": 0.2772, "step": 10121 }, { "epoch": 0.87, "learning_rate": 9.047287830459806e-07, "loss": 0.3005, "step": 10122 }, { "epoch": 0.87, "learning_rate": 9.035752046627522e-07, "loss": 0.2443, "step": 10123 }, { "epoch": 0.87, "learning_rate": 9.024223273870014e-07, "loss": 0.3227, "step": 10124 }, { "epoch": 0.87, "learning_rate": 9.012701513075839e-07, "loss": 0.2647, "step": 10125 }, { "epoch": 0.87, "learning_rate": 9.001186765133052e-07, "loss": 0.2624, "step": 10126 }, { "epoch": 0.87, "learning_rate": 8.989679030929155e-07, "loss": 0.2267, "step": 10127 }, { "epoch": 0.87, "learning_rate": 8.978178311351094e-07, "loss": 0.3022, "step": 10128 }, { "epoch": 0.87, "learning_rate": 8.966684607285304e-07, "loss": 0.2643, "step": 10129 }, { "epoch": 0.87, "learning_rate": 8.955197919617653e-07, "loss": 0.2537, "step": 10130 }, { "epoch": 0.87, "learning_rate": 8.943718249233491e-07, "loss": 0.2786, "step": 10131 }, { "epoch": 0.87, "learning_rate": 8.932245597017597e-07, "loss": 0.2454, "step": 10132 }, { "epoch": 0.87, "learning_rate": 8.920779963854231e-07, "loss": 0.2853, "step": 10133 }, { "epoch": 0.87, "learning_rate": 8.909321350627109e-07, "loss": 0.2805, "step": 10134 }, { "epoch": 0.87, "learning_rate": 8.897869758219401e-07, "loss": 0.286, "step": 10135 }, { "epoch": 0.87, "learning_rate": 8.886425187513748e-07, "loss": 0.2698, "step": 10136 }, { "epoch": 0.87, "learning_rate": 8.87498763939223e-07, "loss": 0.2537, "step": 10137 }, { "epoch": 0.87, "learning_rate": 8.863557114736432e-07, "loss": 0.2933, "step": 10138 }, { "epoch": 0.87, "learning_rate": 8.852133614427306e-07, "loss": 0.2758, "step": 10139 }, { "epoch": 0.87, "learning_rate": 8.840717139345345e-07, "loss": 0.2505, "step": 10140 }, { "epoch": 0.87, "learning_rate": 8.829307690370481e-07, "loss": 0.2435, "step": 10141 }, { "epoch": 0.87, "learning_rate": 8.817905268382088e-07, "loss": 0.2628, "step": 10142 }, { "epoch": 0.87, "learning_rate": 8.806509874259017e-07, "loss": 0.2493, "step": 10143 }, { "epoch": 0.87, "learning_rate": 8.795121508879567e-07, "loss": 0.2852, "step": 10144 }, { "epoch": 0.87, "learning_rate": 8.783740173121502e-07, "loss": 0.2282, "step": 10145 }, { "epoch": 0.87, "learning_rate": 8.772365867862021e-07, "loss": 0.262, "step": 10146 }, { "epoch": 0.87, "learning_rate": 8.76099859397781e-07, "loss": 0.2187, "step": 10147 }, { "epoch": 0.87, "learning_rate": 8.749638352345002e-07, "loss": 0.2646, "step": 10148 }, { "epoch": 0.87, "learning_rate": 8.738285143839198e-07, "loss": 0.256, "step": 10149 }, { "epoch": 0.87, "learning_rate": 8.726938969335419e-07, "loss": 0.287, "step": 10150 }, { "epoch": 0.87, "learning_rate": 8.715599829708199e-07, "loss": 0.2524, "step": 10151 }, { "epoch": 0.87, "learning_rate": 8.704267725831517e-07, "loss": 0.3007, "step": 10152 }, { "epoch": 0.87, "learning_rate": 8.69294265857874e-07, "loss": 0.253, "step": 10153 }, { "epoch": 0.87, "learning_rate": 8.681624628822794e-07, "loss": 0.2729, "step": 10154 }, { "epoch": 0.87, "learning_rate": 8.670313637436012e-07, "loss": 0.2668, "step": 10155 }, { "epoch": 0.87, "learning_rate": 8.659009685290177e-07, "loss": 0.3208, "step": 10156 }, { "epoch": 0.87, "learning_rate": 8.647712773256545e-07, "loss": 0.2519, "step": 10157 }, { "epoch": 0.87, "learning_rate": 8.636422902205821e-07, "loss": 0.2219, "step": 10158 }, { "epoch": 0.87, "learning_rate": 8.625140073008209e-07, "loss": 0.2719, "step": 10159 }, { "epoch": 0.87, "learning_rate": 8.613864286533257e-07, "loss": 0.2439, "step": 10160 }, { "epoch": 0.87, "learning_rate": 8.602595543650117e-07, "loss": 0.28, "step": 10161 }, { "epoch": 0.87, "learning_rate": 8.591333845227312e-07, "loss": 0.2411, "step": 10162 }, { "epoch": 0.87, "learning_rate": 8.580079192132829e-07, "loss": 0.2894, "step": 10163 }, { "epoch": 0.87, "learning_rate": 8.568831585234116e-07, "loss": 0.2717, "step": 10164 }, { "epoch": 0.87, "learning_rate": 8.557591025398082e-07, "loss": 0.2485, "step": 10165 }, { "epoch": 0.87, "learning_rate": 8.54635751349111e-07, "loss": 0.2521, "step": 10166 }, { "epoch": 0.87, "learning_rate": 8.535131050379019e-07, "loss": 0.2629, "step": 10167 }, { "epoch": 0.87, "learning_rate": 8.523911636927074e-07, "loss": 0.2896, "step": 10168 }, { "epoch": 0.87, "learning_rate": 8.512699274000047e-07, "loss": 0.2623, "step": 10169 }, { "epoch": 0.87, "learning_rate": 8.501493962462092e-07, "loss": 0.3315, "step": 10170 }, { "epoch": 0.87, "learning_rate": 8.490295703176876e-07, "loss": 0.2733, "step": 10171 }, { "epoch": 0.87, "learning_rate": 8.479104497007496e-07, "loss": 0.2975, "step": 10172 }, { "epoch": 0.87, "learning_rate": 8.467920344816538e-07, "loss": 0.2535, "step": 10173 }, { "epoch": 0.87, "learning_rate": 8.456743247465992e-07, "loss": 0.2791, "step": 10174 }, { "epoch": 0.87, "learning_rate": 8.445573205817359e-07, "loss": 0.2787, "step": 10175 }, { "epoch": 0.87, "learning_rate": 8.434410220731571e-07, "loss": 0.2979, "step": 10176 }, { "epoch": 0.87, "learning_rate": 8.423254293068983e-07, "loss": 0.2758, "step": 10177 }, { "epoch": 0.87, "learning_rate": 8.412105423689465e-07, "loss": 0.2809, "step": 10178 }, { "epoch": 0.87, "learning_rate": 8.400963613452307e-07, "loss": 0.2643, "step": 10179 }, { "epoch": 0.87, "learning_rate": 8.389828863216276e-07, "loss": 0.2905, "step": 10180 }, { "epoch": 0.87, "learning_rate": 8.378701173839577e-07, "loss": 0.2662, "step": 10181 }, { "epoch": 0.87, "learning_rate": 8.367580546179877e-07, "loss": 0.2819, "step": 10182 }, { "epoch": 0.87, "learning_rate": 8.356466981094313e-07, "loss": 0.2352, "step": 10183 }, { "epoch": 0.87, "learning_rate": 8.345360479439435e-07, "loss": 0.2591, "step": 10184 }, { "epoch": 0.87, "learning_rate": 8.33426104207129e-07, "loss": 0.2668, "step": 10185 }, { "epoch": 0.87, "learning_rate": 8.323168669845383e-07, "loss": 0.2708, "step": 10186 }, { "epoch": 0.87, "learning_rate": 8.31208336361663e-07, "loss": 0.2935, "step": 10187 }, { "epoch": 0.87, "learning_rate": 8.301005124239459e-07, "loss": 0.2203, "step": 10188 }, { "epoch": 0.87, "learning_rate": 8.289933952567708e-07, "loss": 0.2464, "step": 10189 }, { "epoch": 0.87, "learning_rate": 8.278869849454718e-07, "loss": 0.2709, "step": 10190 }, { "epoch": 0.87, "learning_rate": 8.267812815753195e-07, "loss": 0.236, "step": 10191 }, { "epoch": 0.87, "learning_rate": 8.256762852315414e-07, "loss": 0.2982, "step": 10192 }, { "epoch": 0.87, "learning_rate": 8.245719959993049e-07, "loss": 0.3003, "step": 10193 }, { "epoch": 0.87, "learning_rate": 8.234684139637205e-07, "loss": 0.2906, "step": 10194 }, { "epoch": 0.87, "learning_rate": 8.223655392098484e-07, "loss": 0.2953, "step": 10195 }, { "epoch": 0.87, "learning_rate": 8.212633718226926e-07, "loss": 0.2431, "step": 10196 }, { "epoch": 0.87, "learning_rate": 8.201619118872039e-07, "loss": 0.2515, "step": 10197 }, { "epoch": 0.87, "learning_rate": 8.190611594882736e-07, "loss": 0.2704, "step": 10198 }, { "epoch": 0.87, "learning_rate": 8.179611147107458e-07, "loss": 0.2585, "step": 10199 }, { "epoch": 0.87, "learning_rate": 8.168617776394072e-07, "loss": 0.2499, "step": 10200 }, { "epoch": 0.87, "learning_rate": 8.157631483589856e-07, "loss": 0.2742, "step": 10201 }, { "epoch": 0.87, "learning_rate": 8.146652269541599e-07, "loss": 0.283, "step": 10202 }, { "epoch": 0.87, "learning_rate": 8.135680135095525e-07, "loss": 0.2634, "step": 10203 }, { "epoch": 0.87, "learning_rate": 8.124715081097323e-07, "loss": 0.2778, "step": 10204 }, { "epoch": 0.87, "learning_rate": 8.113757108392095e-07, "loss": 0.3146, "step": 10205 }, { "epoch": 0.87, "learning_rate": 8.102806217824455e-07, "loss": 0.2761, "step": 10206 }, { "epoch": 0.87, "learning_rate": 8.09186241023846e-07, "loss": 0.3047, "step": 10207 }, { "epoch": 0.88, "learning_rate": 8.080925686477548e-07, "loss": 0.3096, "step": 10208 }, { "epoch": 0.88, "learning_rate": 8.06999604738471e-07, "loss": 0.597, "step": 10209 }, { "epoch": 0.88, "learning_rate": 8.059073493802327e-07, "loss": 0.2451, "step": 10210 }, { "epoch": 0.88, "learning_rate": 8.048158026572272e-07, "loss": 0.2445, "step": 10211 }, { "epoch": 0.88, "learning_rate": 8.037249646535838e-07, "loss": 0.2629, "step": 10212 }, { "epoch": 0.88, "learning_rate": 8.026348354533808e-07, "loss": 0.3062, "step": 10213 }, { "epoch": 0.88, "learning_rate": 8.01545415140641e-07, "loss": 0.3006, "step": 10214 }, { "epoch": 0.88, "learning_rate": 8.004567037993282e-07, "loss": 0.2589, "step": 10215 }, { "epoch": 0.88, "learning_rate": 7.993687015133567e-07, "loss": 0.2894, "step": 10216 }, { "epoch": 0.88, "learning_rate": 7.982814083665825e-07, "loss": 0.2858, "step": 10217 }, { "epoch": 0.88, "learning_rate": 7.971948244428118e-07, "loss": 0.3172, "step": 10218 }, { "epoch": 0.88, "learning_rate": 7.961089498257912e-07, "loss": 0.2963, "step": 10219 }, { "epoch": 0.88, "learning_rate": 7.950237845992148e-07, "loss": 0.2882, "step": 10220 }, { "epoch": 0.88, "learning_rate": 7.939393288467234e-07, "loss": 0.3141, "step": 10221 }, { "epoch": 0.88, "learning_rate": 7.928555826518991e-07, "loss": 0.2718, "step": 10222 }, { "epoch": 0.88, "learning_rate": 7.917725460982717e-07, "loss": 0.2129, "step": 10223 }, { "epoch": 0.88, "learning_rate": 7.906902192693177e-07, "loss": 0.273, "step": 10224 }, { "epoch": 0.88, "learning_rate": 7.896086022484572e-07, "loss": 0.2366, "step": 10225 }, { "epoch": 0.88, "learning_rate": 7.885276951190568e-07, "loss": 0.2632, "step": 10226 }, { "epoch": 0.88, "learning_rate": 7.874474979644254e-07, "loss": 0.2374, "step": 10227 }, { "epoch": 0.88, "learning_rate": 7.863680108678218e-07, "loss": 0.3135, "step": 10228 }, { "epoch": 0.88, "learning_rate": 7.85289233912444e-07, "loss": 0.2545, "step": 10229 }, { "epoch": 0.88, "learning_rate": 7.842111671814401e-07, "loss": 0.2639, "step": 10230 }, { "epoch": 0.88, "learning_rate": 7.831338107579056e-07, "loss": 0.267, "step": 10231 }, { "epoch": 0.88, "learning_rate": 7.820571647248732e-07, "loss": 0.2482, "step": 10232 }, { "epoch": 0.88, "learning_rate": 7.809812291653285e-07, "loss": 0.262, "step": 10233 }, { "epoch": 0.88, "learning_rate": 7.799060041621975e-07, "loss": 0.2926, "step": 10234 }, { "epoch": 0.88, "learning_rate": 7.788314897983551e-07, "loss": 0.2347, "step": 10235 }, { "epoch": 0.88, "learning_rate": 7.777576861566149e-07, "loss": 0.2864, "step": 10236 }, { "epoch": 0.88, "learning_rate": 7.766845933197464e-07, "loss": 0.2698, "step": 10237 }, { "epoch": 0.88, "learning_rate": 7.756122113704567e-07, "loss": 0.2802, "step": 10238 }, { "epoch": 0.88, "learning_rate": 7.745405403913975e-07, "loss": 0.3302, "step": 10239 }, { "epoch": 0.88, "learning_rate": 7.734695804651693e-07, "loss": 0.2688, "step": 10240 }, { "epoch": 0.88, "learning_rate": 7.72399331674315e-07, "loss": 0.2608, "step": 10241 }, { "epoch": 0.88, "learning_rate": 7.713297941013264e-07, "loss": 0.2874, "step": 10242 }, { "epoch": 0.88, "learning_rate": 7.702609678286366e-07, "loss": 0.2923, "step": 10243 }, { "epoch": 0.88, "learning_rate": 7.691928529386261e-07, "loss": 0.2902, "step": 10244 }, { "epoch": 0.88, "learning_rate": 7.681254495136214e-07, "loss": 0.2681, "step": 10245 }, { "epoch": 0.88, "learning_rate": 7.670587576358889e-07, "loss": 0.3006, "step": 10246 }, { "epoch": 0.88, "learning_rate": 7.659927773876474e-07, "loss": 0.2382, "step": 10247 }, { "epoch": 0.88, "learning_rate": 7.649275088510544e-07, "loss": 0.295, "step": 10248 }, { "epoch": 0.88, "learning_rate": 7.638629521082186e-07, "loss": 0.2173, "step": 10249 }, { "epoch": 0.88, "learning_rate": 7.627991072411889e-07, "loss": 0.3004, "step": 10250 }, { "epoch": 0.88, "learning_rate": 7.617359743319608e-07, "loss": 0.2554, "step": 10251 }, { "epoch": 0.88, "learning_rate": 7.606735534624787e-07, "loss": 0.2717, "step": 10252 }, { "epoch": 0.88, "learning_rate": 7.59611844714625e-07, "loss": 0.3234, "step": 10253 }, { "epoch": 0.88, "learning_rate": 7.585508481702308e-07, "loss": 0.2795, "step": 10254 }, { "epoch": 0.88, "learning_rate": 7.574905639110752e-07, "loss": 0.3218, "step": 10255 }, { "epoch": 0.88, "learning_rate": 7.564309920188784e-07, "loss": 0.2847, "step": 10256 }, { "epoch": 0.88, "learning_rate": 7.553721325753061e-07, "loss": 0.2843, "step": 10257 }, { "epoch": 0.88, "learning_rate": 7.543139856619708e-07, "loss": 0.3107, "step": 10258 }, { "epoch": 0.88, "learning_rate": 7.532565513604306e-07, "loss": 0.27, "step": 10259 }, { "epoch": 0.88, "learning_rate": 7.521998297521848e-07, "loss": 0.2729, "step": 10260 }, { "epoch": 0.88, "learning_rate": 7.511438209186817e-07, "loss": 0.2521, "step": 10261 }, { "epoch": 0.88, "learning_rate": 7.500885249413126e-07, "loss": 0.267, "step": 10262 }, { "epoch": 0.88, "learning_rate": 7.490339419014147e-07, "loss": 0.2605, "step": 10263 }, { "epoch": 0.88, "learning_rate": 7.47980071880271e-07, "loss": 0.2557, "step": 10264 }, { "epoch": 0.88, "learning_rate": 7.469269149591074e-07, "loss": 0.2662, "step": 10265 }, { "epoch": 0.88, "learning_rate": 7.45874471219098e-07, "loss": 0.2609, "step": 10266 }, { "epoch": 0.88, "learning_rate": 7.448227407413566e-07, "loss": 0.2703, "step": 10267 }, { "epoch": 0.88, "learning_rate": 7.437717236069453e-07, "loss": 0.2714, "step": 10268 }, { "epoch": 0.88, "learning_rate": 7.427214198968769e-07, "loss": 0.2731, "step": 10269 }, { "epoch": 0.88, "learning_rate": 7.416718296920977e-07, "loss": 0.233, "step": 10270 }, { "epoch": 0.88, "learning_rate": 7.406229530735076e-07, "loss": 0.3277, "step": 10271 }, { "epoch": 0.88, "learning_rate": 7.395747901219474e-07, "loss": 0.3146, "step": 10272 }, { "epoch": 0.88, "learning_rate": 7.385273409182081e-07, "loss": 0.2561, "step": 10273 }, { "epoch": 0.88, "learning_rate": 7.37480605543015e-07, "loss": 0.3088, "step": 10274 }, { "epoch": 0.88, "learning_rate": 7.364345840770515e-07, "loss": 0.2448, "step": 10275 }, { "epoch": 0.88, "learning_rate": 7.353892766009396e-07, "loss": 0.2888, "step": 10276 }, { "epoch": 0.88, "learning_rate": 7.343446831952428e-07, "loss": 0.6007, "step": 10277 }, { "epoch": 0.88, "learning_rate": 7.333008039404743e-07, "loss": 0.2688, "step": 10278 }, { "epoch": 0.88, "learning_rate": 7.322576389170921e-07, "loss": 0.2797, "step": 10279 }, { "epoch": 0.88, "learning_rate": 7.312151882054974e-07, "loss": 0.3083, "step": 10280 }, { "epoch": 0.88, "learning_rate": 7.301734518860381e-07, "loss": 0.3824, "step": 10281 }, { "epoch": 0.88, "learning_rate": 7.291324300390057e-07, "loss": 0.2395, "step": 10282 }, { "epoch": 0.88, "learning_rate": 7.280921227446381e-07, "loss": 0.3444, "step": 10283 }, { "epoch": 0.88, "learning_rate": 7.270525300831133e-07, "loss": 0.3143, "step": 10284 }, { "epoch": 0.88, "learning_rate": 7.260136521345618e-07, "loss": 0.272, "step": 10285 }, { "epoch": 0.88, "learning_rate": 7.249754889790539e-07, "loss": 0.2831, "step": 10286 }, { "epoch": 0.88, "learning_rate": 7.239380406966057e-07, "loss": 0.5676, "step": 10287 }, { "epoch": 0.88, "learning_rate": 7.229013073671787e-07, "loss": 0.2584, "step": 10288 }, { "epoch": 0.88, "learning_rate": 7.218652890706801e-07, "loss": 0.2538, "step": 10289 }, { "epoch": 0.88, "learning_rate": 7.208299858869616e-07, "loss": 0.2994, "step": 10290 }, { "epoch": 0.88, "learning_rate": 7.197953978958161e-07, "loss": 0.2445, "step": 10291 }, { "epoch": 0.88, "learning_rate": 7.187615251769875e-07, "loss": 0.2707, "step": 10292 }, { "epoch": 0.88, "learning_rate": 7.177283678101598e-07, "loss": 0.3119, "step": 10293 }, { "epoch": 0.88, "learning_rate": 7.16695925874964e-07, "loss": 0.3141, "step": 10294 }, { "epoch": 0.88, "learning_rate": 7.156641994509772e-07, "loss": 0.2675, "step": 10295 }, { "epoch": 0.88, "learning_rate": 7.146331886177183e-07, "loss": 0.291, "step": 10296 }, { "epoch": 0.88, "learning_rate": 7.136028934546546e-07, "loss": 0.2991, "step": 10297 }, { "epoch": 0.88, "learning_rate": 7.125733140411928e-07, "loss": 0.545, "step": 10298 }, { "epoch": 0.88, "learning_rate": 7.115444504566882e-07, "loss": 0.2623, "step": 10299 }, { "epoch": 0.88, "learning_rate": 7.105163027804429e-07, "loss": 0.2701, "step": 10300 }, { "epoch": 0.88, "learning_rate": 7.094888710917003e-07, "loss": 0.3021, "step": 10301 }, { "epoch": 0.88, "learning_rate": 7.084621554696502e-07, "loss": 0.2534, "step": 10302 }, { "epoch": 0.88, "learning_rate": 7.074361559934251e-07, "loss": 0.2833, "step": 10303 }, { "epoch": 0.88, "learning_rate": 7.064108727421082e-07, "loss": 0.2531, "step": 10304 }, { "epoch": 0.88, "learning_rate": 7.053863057947175e-07, "loss": 0.2766, "step": 10305 }, { "epoch": 0.88, "learning_rate": 7.043624552302231e-07, "loss": 0.2597, "step": 10306 }, { "epoch": 0.88, "learning_rate": 7.03339321127543e-07, "loss": 0.6469, "step": 10307 }, { "epoch": 0.88, "learning_rate": 7.023169035655297e-07, "loss": 0.2501, "step": 10308 }, { "epoch": 0.88, "learning_rate": 7.012952026229892e-07, "loss": 0.267, "step": 10309 }, { "epoch": 0.88, "learning_rate": 7.002742183786671e-07, "loss": 0.275, "step": 10310 }, { "epoch": 0.88, "learning_rate": 6.992539509112595e-07, "loss": 0.3052, "step": 10311 }, { "epoch": 0.88, "learning_rate": 6.98234400299399e-07, "loss": 0.269, "step": 10312 }, { "epoch": 0.88, "learning_rate": 6.972155666216684e-07, "loss": 0.2802, "step": 10313 }, { "epoch": 0.88, "learning_rate": 6.961974499565982e-07, "loss": 0.2859, "step": 10314 }, { "epoch": 0.88, "learning_rate": 6.951800503826556e-07, "loss": 0.2609, "step": 10315 }, { "epoch": 0.88, "learning_rate": 6.941633679782578e-07, "loss": 0.2773, "step": 10316 }, { "epoch": 0.88, "learning_rate": 6.931474028217666e-07, "loss": 0.2495, "step": 10317 }, { "epoch": 0.88, "learning_rate": 6.921321549914872e-07, "loss": 0.2581, "step": 10318 }, { "epoch": 0.88, "learning_rate": 6.911176245656693e-07, "loss": 0.3005, "step": 10319 }, { "epoch": 0.88, "learning_rate": 6.90103811622509e-07, "loss": 0.2834, "step": 10320 }, { "epoch": 0.88, "learning_rate": 6.890907162401473e-07, "loss": 0.2974, "step": 10321 }, { "epoch": 0.88, "learning_rate": 6.880783384966638e-07, "loss": 0.2846, "step": 10322 }, { "epoch": 0.88, "learning_rate": 6.870666784700908e-07, "loss": 0.2733, "step": 10323 }, { "epoch": 0.88, "learning_rate": 6.860557362384024e-07, "loss": 0.3167, "step": 10324 }, { "epoch": 0.89, "learning_rate": 6.850455118795152e-07, "loss": 0.2252, "step": 10325 }, { "epoch": 0.89, "learning_rate": 6.840360054712946e-07, "loss": 0.2889, "step": 10326 }, { "epoch": 0.89, "learning_rate": 6.830272170915464e-07, "loss": 0.283, "step": 10327 }, { "epoch": 0.89, "learning_rate": 6.82019146818026e-07, "loss": 0.2692, "step": 10328 }, { "epoch": 0.89, "learning_rate": 6.810117947284256e-07, "loss": 0.306, "step": 10329 }, { "epoch": 0.89, "learning_rate": 6.800051609003911e-07, "loss": 0.2507, "step": 10330 }, { "epoch": 0.89, "learning_rate": 6.789992454115058e-07, "loss": 0.2523, "step": 10331 }, { "epoch": 0.89, "learning_rate": 6.779940483393033e-07, "loss": 0.2793, "step": 10332 }, { "epoch": 0.89, "learning_rate": 6.769895697612572e-07, "loss": 0.2893, "step": 10333 }, { "epoch": 0.89, "learning_rate": 6.7598580975479e-07, "loss": 0.2636, "step": 10334 }, { "epoch": 0.89, "learning_rate": 6.749827683972654e-07, "loss": 0.2209, "step": 10335 }, { "epoch": 0.89, "learning_rate": 6.739804457659927e-07, "loss": 0.2809, "step": 10336 }, { "epoch": 0.89, "learning_rate": 6.729788419382255e-07, "loss": 0.254, "step": 10337 }, { "epoch": 0.89, "learning_rate": 6.719779569911622e-07, "loss": 0.2605, "step": 10338 }, { "epoch": 0.89, "learning_rate": 6.709777910019477e-07, "loss": 0.2521, "step": 10339 }, { "epoch": 0.89, "learning_rate": 6.699783440476681e-07, "loss": 0.2465, "step": 10340 }, { "epoch": 0.89, "learning_rate": 6.689796162053575e-07, "loss": 0.2467, "step": 10341 }, { "epoch": 0.89, "learning_rate": 6.67981607551994e-07, "loss": 0.6031, "step": 10342 }, { "epoch": 0.89, "learning_rate": 6.669843181644953e-07, "loss": 0.265, "step": 10343 }, { "epoch": 0.89, "learning_rate": 6.659877481197285e-07, "loss": 0.2589, "step": 10344 }, { "epoch": 0.89, "learning_rate": 6.64991897494508e-07, "loss": 0.2261, "step": 10345 }, { "epoch": 0.89, "learning_rate": 6.639967663655844e-07, "loss": 0.2673, "step": 10346 }, { "epoch": 0.89, "learning_rate": 6.630023548096609e-07, "loss": 0.2745, "step": 10347 }, { "epoch": 0.89, "learning_rate": 6.620086629033795e-07, "loss": 0.296, "step": 10348 }, { "epoch": 0.89, "learning_rate": 6.610156907233312e-07, "loss": 0.2382, "step": 10349 }, { "epoch": 0.89, "learning_rate": 6.600234383460469e-07, "loss": 0.2868, "step": 10350 }, { "epoch": 0.89, "learning_rate": 6.590319058480044e-07, "loss": 0.3128, "step": 10351 }, { "epoch": 0.89, "learning_rate": 6.580410933056314e-07, "loss": 0.2762, "step": 10352 }, { "epoch": 0.89, "learning_rate": 6.57051000795288e-07, "loss": 0.3168, "step": 10353 }, { "epoch": 0.89, "learning_rate": 6.560616283932897e-07, "loss": 0.2469, "step": 10354 }, { "epoch": 0.89, "learning_rate": 6.550729761758901e-07, "loss": 0.2615, "step": 10355 }, { "epoch": 0.89, "learning_rate": 6.540850442192926e-07, "loss": 0.2931, "step": 10356 }, { "epoch": 0.89, "learning_rate": 6.530978325996385e-07, "loss": 0.2956, "step": 10357 }, { "epoch": 0.89, "learning_rate": 6.521113413930202e-07, "loss": 0.2905, "step": 10358 }, { "epoch": 0.89, "learning_rate": 6.511255706754715e-07, "loss": 0.2384, "step": 10359 }, { "epoch": 0.89, "learning_rate": 6.501405205229683e-07, "loss": 0.2676, "step": 10360 }, { "epoch": 0.89, "learning_rate": 6.491561910114352e-07, "loss": 0.3158, "step": 10361 }, { "epoch": 0.89, "learning_rate": 6.481725822167384e-07, "loss": 0.2765, "step": 10362 }, { "epoch": 0.89, "learning_rate": 6.471896942146905e-07, "loss": 0.2473, "step": 10363 }, { "epoch": 0.89, "learning_rate": 6.462075270810475e-07, "loss": 0.263, "step": 10364 }, { "epoch": 0.89, "learning_rate": 6.4522608089151e-07, "loss": 0.2588, "step": 10365 }, { "epoch": 0.89, "learning_rate": 6.442453557217243e-07, "loss": 0.2292, "step": 10366 }, { "epoch": 0.89, "learning_rate": 6.432653516472765e-07, "loss": 0.3415, "step": 10367 }, { "epoch": 0.89, "learning_rate": 6.422860687437027e-07, "loss": 0.2875, "step": 10368 }, { "epoch": 0.89, "learning_rate": 6.413075070864805e-07, "loss": 0.2634, "step": 10369 }, { "epoch": 0.89, "learning_rate": 6.403296667510339e-07, "loss": 0.3031, "step": 10370 }, { "epoch": 0.89, "learning_rate": 6.393525478127282e-07, "loss": 0.2927, "step": 10371 }, { "epoch": 0.89, "learning_rate": 6.383761503468766e-07, "loss": 0.2823, "step": 10372 }, { "epoch": 0.89, "learning_rate": 6.374004744287354e-07, "loss": 0.2841, "step": 10373 }, { "epoch": 0.89, "learning_rate": 6.364255201335013e-07, "loss": 0.2617, "step": 10374 }, { "epoch": 0.89, "learning_rate": 6.354512875363228e-07, "loss": 0.3224, "step": 10375 }, { "epoch": 0.89, "learning_rate": 6.344777767122867e-07, "loss": 0.2996, "step": 10376 }, { "epoch": 0.89, "learning_rate": 6.335049877364264e-07, "loss": 0.2654, "step": 10377 }, { "epoch": 0.89, "learning_rate": 6.325329206837217e-07, "loss": 0.2483, "step": 10378 }, { "epoch": 0.89, "learning_rate": 6.315615756290927e-07, "loss": 0.2231, "step": 10379 }, { "epoch": 0.89, "learning_rate": 6.305909526474085e-07, "loss": 0.265, "step": 10380 }, { "epoch": 0.89, "learning_rate": 6.296210518134771e-07, "loss": 0.2485, "step": 10381 }, { "epoch": 0.89, "learning_rate": 6.286518732020519e-07, "loss": 0.238, "step": 10382 }, { "epoch": 0.89, "learning_rate": 6.276834168878388e-07, "loss": 0.2448, "step": 10383 }, { "epoch": 0.89, "learning_rate": 6.267156829454768e-07, "loss": 0.3331, "step": 10384 }, { "epoch": 0.89, "learning_rate": 6.257486714495542e-07, "loss": 0.2851, "step": 10385 }, { "epoch": 0.89, "learning_rate": 6.247823824746058e-07, "loss": 0.2515, "step": 10386 }, { "epoch": 0.89, "learning_rate": 6.238168160951075e-07, "loss": 0.3083, "step": 10387 }, { "epoch": 0.89, "learning_rate": 6.228519723854787e-07, "loss": 0.2543, "step": 10388 }, { "epoch": 0.89, "learning_rate": 6.218878514200843e-07, "loss": 0.2869, "step": 10389 }, { "epoch": 0.89, "learning_rate": 6.209244532732394e-07, "loss": 0.259, "step": 10390 }, { "epoch": 0.89, "learning_rate": 6.199617780191924e-07, "loss": 0.2486, "step": 10391 }, { "epoch": 0.89, "learning_rate": 6.189998257321428e-07, "loss": 0.2713, "step": 10392 }, { "epoch": 0.89, "learning_rate": 6.180385964862346e-07, "loss": 0.3004, "step": 10393 }, { "epoch": 0.89, "learning_rate": 6.170780903555529e-07, "loss": 0.237, "step": 10394 }, { "epoch": 0.89, "learning_rate": 6.161183074141319e-07, "loss": 0.2733, "step": 10395 }, { "epoch": 0.89, "learning_rate": 6.151592477359413e-07, "loss": 0.5713, "step": 10396 }, { "epoch": 0.89, "learning_rate": 6.142009113949076e-07, "loss": 0.2364, "step": 10397 }, { "epoch": 0.89, "learning_rate": 6.132432984648895e-07, "loss": 0.2567, "step": 10398 }, { "epoch": 0.89, "learning_rate": 6.122864090196956e-07, "loss": 0.6224, "step": 10399 }, { "epoch": 0.89, "learning_rate": 6.113302431330803e-07, "loss": 0.2811, "step": 10400 }, { "epoch": 0.89, "learning_rate": 6.10374800878738e-07, "loss": 0.3021, "step": 10401 }, { "epoch": 0.89, "learning_rate": 6.09420082330312e-07, "loss": 0.3235, "step": 10402 }, { "epoch": 0.89, "learning_rate": 6.084660875613846e-07, "loss": 0.2883, "step": 10403 }, { "epoch": 0.89, "learning_rate": 6.07512816645488e-07, "loss": 0.2834, "step": 10404 }, { "epoch": 0.89, "learning_rate": 6.065602696560924e-07, "loss": 0.2847, "step": 10405 }, { "epoch": 0.89, "learning_rate": 6.056084466666167e-07, "loss": 0.2787, "step": 10406 }, { "epoch": 0.89, "learning_rate": 6.046573477504236e-07, "loss": 0.2586, "step": 10407 }, { "epoch": 0.89, "learning_rate": 6.037069729808188e-07, "loss": 0.2845, "step": 10408 }, { "epoch": 0.89, "learning_rate": 6.027573224310501e-07, "loss": 0.2635, "step": 10409 }, { "epoch": 0.89, "learning_rate": 6.01808396174316e-07, "loss": 0.2975, "step": 10410 }, { "epoch": 0.89, "learning_rate": 6.008601942837533e-07, "loss": 0.2719, "step": 10411 }, { "epoch": 0.89, "learning_rate": 5.999127168324425e-07, "loss": 0.3004, "step": 10412 }, { "epoch": 0.89, "learning_rate": 5.989659638934131e-07, "loss": 0.2484, "step": 10413 }, { "epoch": 0.89, "learning_rate": 5.980199355396343e-07, "loss": 0.2873, "step": 10414 }, { "epoch": 0.89, "learning_rate": 5.970746318440235e-07, "loss": 0.2653, "step": 10415 }, { "epoch": 0.89, "learning_rate": 5.961300528794378e-07, "loss": 0.267, "step": 10416 }, { "epoch": 0.89, "learning_rate": 5.951861987186824e-07, "loss": 0.2244, "step": 10417 }, { "epoch": 0.89, "learning_rate": 5.942430694345058e-07, "loss": 0.267, "step": 10418 }, { "epoch": 0.89, "learning_rate": 5.933006650995965e-07, "loss": 0.2414, "step": 10419 }, { "epoch": 0.89, "learning_rate": 5.923589857865908e-07, "loss": 0.258, "step": 10420 }, { "epoch": 0.89, "learning_rate": 5.914180315680729e-07, "loss": 0.2464, "step": 10421 }, { "epoch": 0.89, "learning_rate": 5.904778025165614e-07, "loss": 0.2617, "step": 10422 }, { "epoch": 0.89, "learning_rate": 5.895382987045284e-07, "loss": 0.2484, "step": 10423 }, { "epoch": 0.89, "learning_rate": 5.885995202043848e-07, "loss": 0.2515, "step": 10424 }, { "epoch": 0.89, "learning_rate": 5.876614670884873e-07, "loss": 0.2789, "step": 10425 }, { "epoch": 0.89, "learning_rate": 5.867241394291356e-07, "loss": 0.2703, "step": 10426 }, { "epoch": 0.89, "learning_rate": 5.857875372985733e-07, "loss": 0.2812, "step": 10427 }, { "epoch": 0.89, "learning_rate": 5.848516607689947e-07, "loss": 0.2932, "step": 10428 }, { "epoch": 0.89, "learning_rate": 5.839165099125254e-07, "loss": 0.3059, "step": 10429 }, { "epoch": 0.89, "learning_rate": 5.829820848012457e-07, "loss": 0.3168, "step": 10430 }, { "epoch": 0.89, "learning_rate": 5.820483855071768e-07, "loss": 0.2957, "step": 10431 }, { "epoch": 0.89, "learning_rate": 5.811154121022821e-07, "loss": 0.2262, "step": 10432 }, { "epoch": 0.89, "learning_rate": 5.801831646584732e-07, "loss": 0.2581, "step": 10433 }, { "epoch": 0.89, "learning_rate": 5.79251643247598e-07, "loss": 0.2501, "step": 10434 }, { "epoch": 0.89, "learning_rate": 5.783208479414603e-07, "loss": 0.2808, "step": 10435 }, { "epoch": 0.89, "learning_rate": 5.77390778811796e-07, "loss": 0.6025, "step": 10436 }, { "epoch": 0.89, "learning_rate": 5.764614359302912e-07, "loss": 0.2513, "step": 10437 }, { "epoch": 0.89, "learning_rate": 5.755328193685772e-07, "loss": 0.2607, "step": 10438 }, { "epoch": 0.89, "learning_rate": 5.746049291982247e-07, "loss": 0.2401, "step": 10439 }, { "epoch": 0.89, "learning_rate": 5.736777654907532e-07, "loss": 0.278, "step": 10440 }, { "epoch": 0.89, "learning_rate": 5.727513283176222e-07, "loss": 0.2461, "step": 10441 }, { "epoch": 0.9, "learning_rate": 5.718256177502379e-07, "loss": 0.2544, "step": 10442 }, { "epoch": 0.9, "learning_rate": 5.709006338599488e-07, "loss": 0.2806, "step": 10443 }, { "epoch": 0.9, "learning_rate": 5.699763767180477e-07, "loss": 0.2455, "step": 10444 }, { "epoch": 0.9, "learning_rate": 5.690528463957734e-07, "loss": 0.2767, "step": 10445 }, { "epoch": 0.9, "learning_rate": 5.681300429643044e-07, "loss": 0.2718, "step": 10446 }, { "epoch": 0.9, "learning_rate": 5.672079664947683e-07, "loss": 0.6139, "step": 10447 }, { "epoch": 0.9, "learning_rate": 5.662866170582338e-07, "loss": 0.2974, "step": 10448 }, { "epoch": 0.9, "learning_rate": 5.653659947257139e-07, "loss": 0.2699, "step": 10449 }, { "epoch": 0.9, "learning_rate": 5.644460995681644e-07, "loss": 0.2922, "step": 10450 }, { "epoch": 0.9, "learning_rate": 5.635269316564873e-07, "loss": 0.2414, "step": 10451 }, { "epoch": 0.9, "learning_rate": 5.62608491061527e-07, "loss": 0.2637, "step": 10452 }, { "epoch": 0.9, "learning_rate": 5.616907778540726e-07, "loss": 0.2783, "step": 10453 }, { "epoch": 0.9, "learning_rate": 5.607737921048573e-07, "loss": 0.2968, "step": 10454 }, { "epoch": 0.9, "learning_rate": 5.598575338845569e-07, "loss": 0.2773, "step": 10455 }, { "epoch": 0.9, "learning_rate": 5.589420032637949e-07, "loss": 0.2729, "step": 10456 }, { "epoch": 0.9, "learning_rate": 5.580272003131315e-07, "loss": 0.2513, "step": 10457 }, { "epoch": 0.9, "learning_rate": 5.57113125103077e-07, "loss": 0.2886, "step": 10458 }, { "epoch": 0.9, "learning_rate": 5.561997777040862e-07, "loss": 0.2676, "step": 10459 }, { "epoch": 0.9, "learning_rate": 5.552871581865515e-07, "loss": 0.2918, "step": 10460 }, { "epoch": 0.9, "learning_rate": 5.543752666208158e-07, "loss": 0.2942, "step": 10461 }, { "epoch": 0.9, "learning_rate": 5.534641030771615e-07, "loss": 0.2472, "step": 10462 }, { "epoch": 0.9, "learning_rate": 5.525536676258203e-07, "loss": 0.31, "step": 10463 }, { "epoch": 0.9, "learning_rate": 5.516439603369583e-07, "loss": 0.2626, "step": 10464 }, { "epoch": 0.9, "learning_rate": 5.507349812806939e-07, "loss": 0.2842, "step": 10465 }, { "epoch": 0.9, "learning_rate": 5.498267305270888e-07, "loss": 0.302, "step": 10466 }, { "epoch": 0.9, "learning_rate": 5.489192081461436e-07, "loss": 0.3217, "step": 10467 }, { "epoch": 0.9, "learning_rate": 5.48012414207807e-07, "loss": 0.2433, "step": 10468 }, { "epoch": 0.9, "learning_rate": 5.471063487819695e-07, "loss": 0.2498, "step": 10469 }, { "epoch": 0.9, "learning_rate": 5.462010119384665e-07, "loss": 0.2684, "step": 10470 }, { "epoch": 0.9, "learning_rate": 5.452964037470776e-07, "loss": 0.2736, "step": 10471 }, { "epoch": 0.9, "learning_rate": 5.443925242775227e-07, "loss": 0.2201, "step": 10472 }, { "epoch": 0.9, "learning_rate": 5.43489373599474e-07, "loss": 0.2712, "step": 10473 }, { "epoch": 0.9, "learning_rate": 5.425869517825366e-07, "loss": 0.2578, "step": 10474 }, { "epoch": 0.9, "learning_rate": 5.416852588962662e-07, "loss": 0.2773, "step": 10475 }, { "epoch": 0.9, "learning_rate": 5.407842950101605e-07, "loss": 0.3067, "step": 10476 }, { "epoch": 0.9, "learning_rate": 5.398840601936628e-07, "loss": 0.27, "step": 10477 }, { "epoch": 0.9, "learning_rate": 5.389845545161598e-07, "loss": 0.2852, "step": 10478 }, { "epoch": 0.9, "learning_rate": 5.380857780469762e-07, "loss": 0.2975, "step": 10479 }, { "epoch": 0.9, "learning_rate": 5.371877308553896e-07, "loss": 0.2757, "step": 10480 }, { "epoch": 0.9, "learning_rate": 5.36290413010615e-07, "loss": 0.2927, "step": 10481 }, { "epoch": 0.9, "learning_rate": 5.353938245818147e-07, "loss": 0.2905, "step": 10482 }, { "epoch": 0.9, "learning_rate": 5.34497965638091e-07, "loss": 0.3113, "step": 10483 }, { "epoch": 0.9, "learning_rate": 5.336028362484947e-07, "loss": 0.2873, "step": 10484 }, { "epoch": 0.9, "learning_rate": 5.327084364820168e-07, "loss": 0.2693, "step": 10485 }, { "epoch": 0.9, "learning_rate": 5.318147664075923e-07, "loss": 0.2186, "step": 10486 }, { "epoch": 0.9, "learning_rate": 5.30921826094104e-07, "loss": 0.3048, "step": 10487 }, { "epoch": 0.9, "learning_rate": 5.300296156103723e-07, "loss": 0.6042, "step": 10488 }, { "epoch": 0.9, "learning_rate": 5.291381350251645e-07, "loss": 0.2502, "step": 10489 }, { "epoch": 0.9, "learning_rate": 5.282473844071933e-07, "loss": 0.2612, "step": 10490 }, { "epoch": 0.9, "learning_rate": 5.273573638251117e-07, "loss": 0.2348, "step": 10491 }, { "epoch": 0.9, "learning_rate": 5.264680733475192e-07, "loss": 0.2571, "step": 10492 }, { "epoch": 0.9, "learning_rate": 5.255795130429575e-07, "loss": 0.286, "step": 10493 }, { "epoch": 0.9, "learning_rate": 5.246916829799132e-07, "loss": 0.2637, "step": 10494 }, { "epoch": 0.9, "learning_rate": 5.238045832268146e-07, "loss": 0.271, "step": 10495 }, { "epoch": 0.9, "learning_rate": 5.229182138520339e-07, "loss": 0.2603, "step": 10496 }, { "epoch": 0.9, "learning_rate": 5.220325749238919e-07, "loss": 0.3203, "step": 10497 }, { "epoch": 0.9, "learning_rate": 5.211476665106463e-07, "loss": 0.2745, "step": 10498 }, { "epoch": 0.9, "learning_rate": 5.202634886805013e-07, "loss": 0.2832, "step": 10499 }, { "epoch": 0.9, "learning_rate": 5.193800415016071e-07, "loss": 0.3075, "step": 10500 }, { "epoch": 0.9, "learning_rate": 5.184973250420544e-07, "loss": 0.3088, "step": 10501 }, { "epoch": 0.9, "learning_rate": 5.17615339369878e-07, "loss": 0.2941, "step": 10502 }, { "epoch": 0.9, "learning_rate": 5.167340845530544e-07, "loss": 0.2606, "step": 10503 }, { "epoch": 0.9, "learning_rate": 5.158535606595138e-07, "loss": 0.2651, "step": 10504 }, { "epoch": 0.9, "learning_rate": 5.149737677571164e-07, "loss": 0.2852, "step": 10505 }, { "epoch": 0.9, "learning_rate": 5.140947059136736e-07, "loss": 0.2871, "step": 10506 }, { "epoch": 0.9, "learning_rate": 5.132163751969399e-07, "loss": 0.5829, "step": 10507 }, { "epoch": 0.9, "learning_rate": 5.123387756746123e-07, "loss": 0.2967, "step": 10508 }, { "epoch": 0.9, "learning_rate": 5.114619074143335e-07, "loss": 0.2571, "step": 10509 }, { "epoch": 0.9, "learning_rate": 5.105857704836836e-07, "loss": 0.5992, "step": 10510 }, { "epoch": 0.9, "learning_rate": 5.097103649501977e-07, "loss": 0.3119, "step": 10511 }, { "epoch": 0.9, "learning_rate": 5.088356908813419e-07, "loss": 0.2571, "step": 10512 }, { "epoch": 0.9, "learning_rate": 5.079617483445332e-07, "loss": 0.2983, "step": 10513 }, { "epoch": 0.9, "learning_rate": 5.070885374071321e-07, "loss": 0.2593, "step": 10514 }, { "epoch": 0.9, "learning_rate": 5.062160581364406e-07, "loss": 0.2964, "step": 10515 }, { "epoch": 0.9, "learning_rate": 5.053443105997069e-07, "loss": 0.2898, "step": 10516 }, { "epoch": 0.9, "learning_rate": 5.044732948641162e-07, "loss": 0.2579, "step": 10517 }, { "epoch": 0.9, "learning_rate": 5.036030109968082e-07, "loss": 0.2892, "step": 10518 }, { "epoch": 0.9, "learning_rate": 5.027334590648547e-07, "loss": 0.2431, "step": 10519 }, { "epoch": 0.9, "learning_rate": 5.018646391352799e-07, "loss": 0.2501, "step": 10520 }, { "epoch": 0.9, "learning_rate": 5.00996551275047e-07, "loss": 0.2723, "step": 10521 }, { "epoch": 0.9, "learning_rate": 5.001291955510634e-07, "loss": 0.2573, "step": 10522 }, { "epoch": 0.9, "learning_rate": 4.992625720301814e-07, "loss": 0.3287, "step": 10523 }, { "epoch": 0.9, "learning_rate": 4.983966807791951e-07, "loss": 0.2615, "step": 10524 }, { "epoch": 0.9, "learning_rate": 4.975315218648457e-07, "loss": 0.2998, "step": 10525 }, { "epoch": 0.9, "learning_rate": 4.966670953538133e-07, "loss": 0.2562, "step": 10526 }, { "epoch": 0.9, "learning_rate": 4.958034013127222e-07, "loss": 0.3052, "step": 10527 }, { "epoch": 0.9, "learning_rate": 4.949404398081448e-07, "loss": 0.2487, "step": 10528 }, { "epoch": 0.9, "learning_rate": 4.940782109065911e-07, "loss": 0.2537, "step": 10529 }, { "epoch": 0.9, "learning_rate": 4.932167146745193e-07, "loss": 0.2241, "step": 10530 }, { "epoch": 0.9, "learning_rate": 4.923559511783282e-07, "loss": 0.256, "step": 10531 }, { "epoch": 0.9, "learning_rate": 4.914959204843639e-07, "loss": 0.2524, "step": 10532 }, { "epoch": 0.9, "learning_rate": 4.906366226589099e-07, "loss": 0.2492, "step": 10533 }, { "epoch": 0.9, "learning_rate": 4.897780577681954e-07, "loss": 0.2391, "step": 10534 }, { "epoch": 0.9, "learning_rate": 4.889202258784009e-07, "loss": 0.2836, "step": 10535 }, { "epoch": 0.9, "learning_rate": 4.88063127055638e-07, "loss": 0.2707, "step": 10536 }, { "epoch": 0.9, "learning_rate": 4.872067613659692e-07, "loss": 0.5853, "step": 10537 }, { "epoch": 0.9, "learning_rate": 4.863511288753986e-07, "loss": 0.2502, "step": 10538 }, { "epoch": 0.9, "learning_rate": 4.854962296498766e-07, "loss": 0.3179, "step": 10539 }, { "epoch": 0.9, "learning_rate": 4.846420637552918e-07, "loss": 0.2991, "step": 10540 }, { "epoch": 0.9, "learning_rate": 4.83788631257478e-07, "loss": 0.3018, "step": 10541 }, { "epoch": 0.9, "learning_rate": 4.829359322222182e-07, "loss": 0.2626, "step": 10542 }, { "epoch": 0.9, "learning_rate": 4.820839667152299e-07, "loss": 0.3276, "step": 10543 }, { "epoch": 0.9, "learning_rate": 4.812327348021805e-07, "loss": 0.2945, "step": 10544 }, { "epoch": 0.9, "learning_rate": 4.803822365486787e-07, "loss": 0.3055, "step": 10545 }, { "epoch": 0.9, "learning_rate": 4.795324720202754e-07, "loss": 0.2872, "step": 10546 }, { "epoch": 0.9, "learning_rate": 4.786834412824692e-07, "loss": 0.2974, "step": 10547 }, { "epoch": 0.9, "learning_rate": 4.778351444006946e-07, "loss": 0.2582, "step": 10548 }, { "epoch": 0.9, "learning_rate": 4.76987581440338e-07, "loss": 0.2956, "step": 10549 }, { "epoch": 0.9, "learning_rate": 4.761407524667239e-07, "loss": 0.3148, "step": 10550 }, { "epoch": 0.9, "learning_rate": 4.7529465754512226e-07, "loss": 0.3064, "step": 10551 }, { "epoch": 0.9, "learning_rate": 4.7444929674074435e-07, "loss": 0.2509, "step": 10552 }, { "epoch": 0.9, "learning_rate": 4.73604670118748e-07, "loss": 0.2363, "step": 10553 }, { "epoch": 0.9, "learning_rate": 4.7276077774423334e-07, "loss": 0.2545, "step": 10554 }, { "epoch": 0.9, "learning_rate": 4.7191761968224057e-07, "loss": 0.2847, "step": 10555 }, { "epoch": 0.9, "learning_rate": 4.7107519599775997e-07, "loss": 0.2504, "step": 10556 }, { "epoch": 0.9, "learning_rate": 4.7023350675571846e-07, "loss": 0.6125, "step": 10557 }, { "epoch": 0.91, "learning_rate": 4.693925520209908e-07, "loss": 0.2773, "step": 10558 }, { "epoch": 0.91, "learning_rate": 4.6855233185839175e-07, "loss": 0.2301, "step": 10559 }, { "epoch": 0.91, "learning_rate": 4.677128463326841e-07, "loss": 0.3303, "step": 10560 }, { "epoch": 0.91, "learning_rate": 4.668740955085704e-07, "loss": 0.2538, "step": 10561 }, { "epoch": 0.91, "learning_rate": 4.660360794506946e-07, "loss": 0.261, "step": 10562 }, { "epoch": 0.91, "learning_rate": 4.651987982236517e-07, "loss": 0.2473, "step": 10563 }, { "epoch": 0.91, "learning_rate": 4.6436225189197103e-07, "loss": 0.2572, "step": 10564 }, { "epoch": 0.91, "learning_rate": 4.635264405201312e-07, "loss": 0.3335, "step": 10565 }, { "epoch": 0.91, "learning_rate": 4.6269136417255167e-07, "loss": 0.2767, "step": 10566 }, { "epoch": 0.91, "learning_rate": 4.618570229135966e-07, "loss": 0.2702, "step": 10567 }, { "epoch": 0.91, "learning_rate": 4.610234168075733e-07, "loss": 0.3082, "step": 10568 }, { "epoch": 0.91, "learning_rate": 4.601905459187317e-07, "loss": 0.2458, "step": 10569 }, { "epoch": 0.91, "learning_rate": 4.5935841031126693e-07, "loss": 0.2302, "step": 10570 }, { "epoch": 0.91, "learning_rate": 4.585270100493122e-07, "loss": 0.2791, "step": 10571 }, { "epoch": 0.91, "learning_rate": 4.576963451969485e-07, "loss": 0.2523, "step": 10572 }, { "epoch": 0.91, "learning_rate": 4.5686641581820345e-07, "loss": 0.2899, "step": 10573 }, { "epoch": 0.91, "learning_rate": 4.5603722197703925e-07, "loss": 0.2668, "step": 10574 }, { "epoch": 0.91, "learning_rate": 4.552087637373681e-07, "loss": 0.2677, "step": 10575 }, { "epoch": 0.91, "learning_rate": 4.543810411630434e-07, "loss": 0.2477, "step": 10576 }, { "epoch": 0.91, "learning_rate": 4.535540543178629e-07, "loss": 0.2773, "step": 10577 }, { "epoch": 0.91, "learning_rate": 4.5272780326556466e-07, "loss": 0.2397, "step": 10578 }, { "epoch": 0.91, "learning_rate": 4.5190228806983205e-07, "loss": 0.2968, "step": 10579 }, { "epoch": 0.91, "learning_rate": 4.5107750879429424e-07, "loss": 0.2383, "step": 10580 }, { "epoch": 0.91, "learning_rate": 4.5025346550251815e-07, "loss": 0.2869, "step": 10581 }, { "epoch": 0.91, "learning_rate": 4.494301582580185e-07, "loss": 0.2935, "step": 10582 }, { "epoch": 0.91, "learning_rate": 4.4860758712425124e-07, "loss": 0.2528, "step": 10583 }, { "epoch": 0.91, "learning_rate": 4.4778575216461673e-07, "loss": 0.2806, "step": 10584 }, { "epoch": 0.91, "learning_rate": 4.4696465344245874e-07, "loss": 0.2936, "step": 10585 }, { "epoch": 0.91, "learning_rate": 4.4614429102105893e-07, "loss": 0.2203, "step": 10586 }, { "epoch": 0.91, "learning_rate": 4.4532466496365334e-07, "loss": 0.2712, "step": 10587 }, { "epoch": 0.91, "learning_rate": 4.4450577533341035e-07, "loss": 0.2574, "step": 10588 }, { "epoch": 0.91, "learning_rate": 4.436876221934472e-07, "loss": 0.295, "step": 10589 }, { "epoch": 0.91, "learning_rate": 4.4287020560682345e-07, "loss": 0.263, "step": 10590 }, { "epoch": 0.91, "learning_rate": 4.42053525636541e-07, "loss": 0.2476, "step": 10591 }, { "epoch": 0.91, "learning_rate": 4.4123758234554615e-07, "loss": 0.2755, "step": 10592 }, { "epoch": 0.91, "learning_rate": 4.404223757967252e-07, "loss": 0.5533, "step": 10593 }, { "epoch": 0.91, "learning_rate": 4.396079060529146e-07, "loss": 0.2665, "step": 10594 }, { "epoch": 0.91, "learning_rate": 4.387941731768852e-07, "loss": 0.267, "step": 10595 }, { "epoch": 0.91, "learning_rate": 4.3798117723135915e-07, "loss": 0.2541, "step": 10596 }, { "epoch": 0.91, "learning_rate": 4.371689182789962e-07, "loss": 0.2609, "step": 10597 }, { "epoch": 0.91, "learning_rate": 4.363573963824008e-07, "loss": 0.2586, "step": 10598 }, { "epoch": 0.91, "learning_rate": 4.3554661160412406e-07, "loss": 0.2878, "step": 10599 }, { "epoch": 0.91, "learning_rate": 4.3473656400665256e-07, "loss": 0.2692, "step": 10600 }, { "epoch": 0.91, "learning_rate": 4.339272536524253e-07, "loss": 0.2986, "step": 10601 }, { "epoch": 0.91, "learning_rate": 4.331186806038179e-07, "loss": 0.2589, "step": 10602 }, { "epoch": 0.91, "learning_rate": 4.323108449231506e-07, "loss": 0.2644, "step": 10603 }, { "epoch": 0.91, "learning_rate": 4.315037466726879e-07, "loss": 0.269, "step": 10604 }, { "epoch": 0.91, "learning_rate": 4.306973859146368e-07, "loss": 0.2852, "step": 10605 }, { "epoch": 0.91, "learning_rate": 4.298917627111476e-07, "loss": 0.2578, "step": 10606 }, { "epoch": 0.91, "learning_rate": 4.290868771243151e-07, "loss": 0.2289, "step": 10607 }, { "epoch": 0.91, "learning_rate": 4.282827292161762e-07, "loss": 0.241, "step": 10608 }, { "epoch": 0.91, "learning_rate": 4.2747931904870697e-07, "loss": 0.2812, "step": 10609 }, { "epoch": 0.91, "learning_rate": 4.266766466838335e-07, "loss": 0.3018, "step": 10610 }, { "epoch": 0.91, "learning_rate": 4.2587471218342057e-07, "loss": 0.6152, "step": 10611 }, { "epoch": 0.91, "learning_rate": 4.250735156092778e-07, "loss": 0.2538, "step": 10612 }, { "epoch": 0.91, "learning_rate": 4.242730570231568e-07, "loss": 0.2151, "step": 10613 }, { "epoch": 0.91, "learning_rate": 4.2347333648675383e-07, "loss": 0.2544, "step": 10614 }, { "epoch": 0.91, "learning_rate": 4.2267435406170845e-07, "loss": 0.3024, "step": 10615 }, { "epoch": 0.91, "learning_rate": 4.2187610980959916e-07, "loss": 0.3073, "step": 10616 }, { "epoch": 0.91, "learning_rate": 4.210786037919523e-07, "loss": 0.2786, "step": 10617 }, { "epoch": 0.91, "learning_rate": 4.2028183607023766e-07, "loss": 0.3167, "step": 10618 }, { "epoch": 0.91, "learning_rate": 4.194858067058627e-07, "loss": 0.2816, "step": 10619 }, { "epoch": 0.91, "learning_rate": 4.186905157601839e-07, "loss": 0.2636, "step": 10620 }, { "epoch": 0.91, "learning_rate": 4.1789596329449676e-07, "loss": 0.6168, "step": 10621 }, { "epoch": 0.91, "learning_rate": 4.1710214937004223e-07, "loss": 0.2958, "step": 10622 }, { "epoch": 0.91, "learning_rate": 4.1630907404800467e-07, "loss": 0.2322, "step": 10623 }, { "epoch": 0.91, "learning_rate": 4.155167373895075e-07, "loss": 0.3427, "step": 10624 }, { "epoch": 0.91, "learning_rate": 4.14725139455624e-07, "loss": 0.2393, "step": 10625 }, { "epoch": 0.91, "learning_rate": 4.139342803073632e-07, "loss": 0.2492, "step": 10626 }, { "epoch": 0.91, "learning_rate": 4.131441600056818e-07, "loss": 0.295, "step": 10627 }, { "epoch": 0.91, "learning_rate": 4.12354778611479e-07, "loss": 0.2583, "step": 10628 }, { "epoch": 0.91, "learning_rate": 4.11566136185596e-07, "loss": 0.2529, "step": 10629 }, { "epoch": 0.91, "learning_rate": 4.1077823278881767e-07, "loss": 0.2549, "step": 10630 }, { "epoch": 0.91, "learning_rate": 4.099910684818698e-07, "loss": 0.2022, "step": 10631 }, { "epoch": 0.91, "learning_rate": 4.092046433254271e-07, "loss": 0.3221, "step": 10632 }, { "epoch": 0.91, "learning_rate": 4.084189573801001e-07, "loss": 0.2886, "step": 10633 }, { "epoch": 0.91, "learning_rate": 4.076340107064458e-07, "loss": 0.274, "step": 10634 }, { "epoch": 0.91, "learning_rate": 4.0684980336496484e-07, "loss": 0.282, "step": 10635 }, { "epoch": 0.91, "learning_rate": 4.0606633541609983e-07, "loss": 0.2231, "step": 10636 }, { "epoch": 0.91, "learning_rate": 4.052836069202382e-07, "loss": 0.2781, "step": 10637 }, { "epoch": 0.91, "learning_rate": 4.045016179377048e-07, "loss": 0.2819, "step": 10638 }, { "epoch": 0.91, "learning_rate": 4.0372036852877607e-07, "loss": 0.2317, "step": 10639 }, { "epoch": 0.91, "learning_rate": 4.029398587536637e-07, "loss": 0.2639, "step": 10640 }, { "epoch": 0.91, "learning_rate": 4.021600886725263e-07, "loss": 0.2709, "step": 10641 }, { "epoch": 0.91, "learning_rate": 4.013810583454647e-07, "loss": 0.3183, "step": 10642 }, { "epoch": 0.91, "learning_rate": 4.006027678325242e-07, "loss": 0.2614, "step": 10643 }, { "epoch": 0.91, "learning_rate": 3.9982521719369003e-07, "loss": 0.2722, "step": 10644 }, { "epoch": 0.91, "learning_rate": 3.990484064888911e-07, "loss": 0.2424, "step": 10645 }, { "epoch": 0.91, "learning_rate": 3.982723357780027e-07, "loss": 0.2834, "step": 10646 }, { "epoch": 0.91, "learning_rate": 3.9749700512083824e-07, "loss": 0.24, "step": 10647 }, { "epoch": 0.91, "learning_rate": 3.9672241457715755e-07, "loss": 0.2673, "step": 10648 }, { "epoch": 0.91, "learning_rate": 3.959485642066618e-07, "loss": 0.2567, "step": 10649 }, { "epoch": 0.91, "learning_rate": 3.951754540689956e-07, "loss": 0.2541, "step": 10650 }, { "epoch": 0.91, "learning_rate": 3.944030842237467e-07, "loss": 0.2701, "step": 10651 }, { "epoch": 0.91, "learning_rate": 3.936314547304454e-07, "loss": 0.2679, "step": 10652 }, { "epoch": 0.91, "learning_rate": 3.928605656485662e-07, "loss": 0.2813, "step": 10653 }, { "epoch": 0.91, "learning_rate": 3.920904170375239e-07, "loss": 0.2947, "step": 10654 }, { "epoch": 0.91, "learning_rate": 3.913210089566766e-07, "loss": 0.2719, "step": 10655 }, { "epoch": 0.91, "learning_rate": 3.905523414653301e-07, "loss": 0.2612, "step": 10656 }, { "epoch": 0.91, "learning_rate": 3.897844146227259e-07, "loss": 0.2624, "step": 10657 }, { "epoch": 0.91, "learning_rate": 3.8901722848805443e-07, "loss": 0.2831, "step": 10658 }, { "epoch": 0.91, "learning_rate": 3.8825078312044515e-07, "loss": 0.2256, "step": 10659 }, { "epoch": 0.91, "learning_rate": 3.8748507857897187e-07, "loss": 0.2902, "step": 10660 }, { "epoch": 0.91, "learning_rate": 3.8672011492265404e-07, "loss": 0.2784, "step": 10661 }, { "epoch": 0.91, "learning_rate": 3.8595589221044674e-07, "loss": 0.277, "step": 10662 }, { "epoch": 0.91, "learning_rate": 3.8519241050125724e-07, "loss": 0.2922, "step": 10663 }, { "epoch": 0.91, "learning_rate": 3.844296698539274e-07, "loss": 0.2989, "step": 10664 }, { "epoch": 0.91, "learning_rate": 3.8366767032724685e-07, "loss": 0.2772, "step": 10665 }, { "epoch": 0.91, "learning_rate": 3.8290641197994526e-07, "loss": 0.2678, "step": 10666 }, { "epoch": 0.91, "learning_rate": 3.821458948706991e-07, "loss": 0.323, "step": 10667 }, { "epoch": 0.91, "learning_rate": 3.8138611905812584e-07, "loss": 0.253, "step": 10668 }, { "epoch": 0.91, "learning_rate": 3.806270846007798e-07, "loss": 0.308, "step": 10669 }, { "epoch": 0.91, "learning_rate": 3.7986879155717084e-07, "loss": 0.2569, "step": 10670 }, { "epoch": 0.91, "learning_rate": 3.7911123998573995e-07, "loss": 0.3113, "step": 10671 }, { "epoch": 0.91, "learning_rate": 3.78354429944876e-07, "loss": 0.2806, "step": 10672 }, { "epoch": 0.91, "learning_rate": 3.775983614929102e-07, "loss": 0.2773, "step": 10673 }, { "epoch": 0.91, "learning_rate": 3.768430346881169e-07, "loss": 0.2744, "step": 10674 }, { "epoch": 0.92, "learning_rate": 3.760884495887152e-07, "loss": 0.2924, "step": 10675 }, { "epoch": 0.92, "learning_rate": 3.7533460625285955e-07, "loss": 0.2761, "step": 10676 }, { "epoch": 0.92, "learning_rate": 3.7458150473865806e-07, "loss": 0.2402, "step": 10677 }, { "epoch": 0.92, "learning_rate": 3.7382914510415316e-07, "loss": 0.3066, "step": 10678 }, { "epoch": 0.92, "learning_rate": 3.7307752740733283e-07, "loss": 0.2853, "step": 10679 }, { "epoch": 0.92, "learning_rate": 3.7232665170612857e-07, "loss": 0.249, "step": 10680 }, { "epoch": 0.92, "learning_rate": 3.715765180584141e-07, "loss": 0.287, "step": 10681 }, { "epoch": 0.92, "learning_rate": 3.708271265220087e-07, "loss": 0.2442, "step": 10682 }, { "epoch": 0.92, "learning_rate": 3.7007847715466506e-07, "loss": 0.2762, "step": 10683 }, { "epoch": 0.92, "learning_rate": 3.6933057001409366e-07, "loss": 0.2545, "step": 10684 }, { "epoch": 0.92, "learning_rate": 3.685834051579329e-07, "loss": 0.3158, "step": 10685 }, { "epoch": 0.92, "learning_rate": 3.678369826437733e-07, "loss": 0.2856, "step": 10686 }, { "epoch": 0.92, "learning_rate": 3.670913025291456e-07, "loss": 0.2534, "step": 10687 }, { "epoch": 0.92, "learning_rate": 3.663463648715226e-07, "loss": 0.2485, "step": 10688 }, { "epoch": 0.92, "learning_rate": 3.656021697283196e-07, "loss": 0.283, "step": 10689 }, { "epoch": 0.92, "learning_rate": 3.6485871715689735e-07, "loss": 0.2891, "step": 10690 }, { "epoch": 0.92, "learning_rate": 3.6411600721455776e-07, "loss": 0.2845, "step": 10691 }, { "epoch": 0.92, "learning_rate": 3.633740399585428e-07, "loss": 0.2684, "step": 10692 }, { "epoch": 0.92, "learning_rate": 3.6263281544603903e-07, "loss": 0.2914, "step": 10693 }, { "epoch": 0.92, "learning_rate": 3.6189233373418064e-07, "loss": 0.2717, "step": 10694 }, { "epoch": 0.92, "learning_rate": 3.611525948800376e-07, "loss": 0.2533, "step": 10695 }, { "epoch": 0.92, "learning_rate": 3.6041359894062544e-07, "loss": 0.2678, "step": 10696 }, { "epoch": 0.92, "learning_rate": 3.596753459729019e-07, "loss": 0.2552, "step": 10697 }, { "epoch": 0.92, "learning_rate": 3.589378360337692e-07, "loss": 0.2858, "step": 10698 }, { "epoch": 0.92, "learning_rate": 3.582010691800708e-07, "loss": 0.2289, "step": 10699 }, { "epoch": 0.92, "learning_rate": 3.574650454685902e-07, "loss": 0.2962, "step": 10700 }, { "epoch": 0.92, "learning_rate": 3.5672976495606084e-07, "loss": 0.2483, "step": 10701 }, { "epoch": 0.92, "learning_rate": 3.5599522769915074e-07, "loss": 0.24, "step": 10702 }, { "epoch": 0.92, "learning_rate": 3.5526143375447684e-07, "loss": 0.2428, "step": 10703 }, { "epoch": 0.92, "learning_rate": 3.5452838317859615e-07, "loss": 0.3191, "step": 10704 }, { "epoch": 0.92, "learning_rate": 3.537960760280068e-07, "loss": 0.2846, "step": 10705 }, { "epoch": 0.92, "learning_rate": 3.5306451235915475e-07, "loss": 0.2684, "step": 10706 }, { "epoch": 0.92, "learning_rate": 3.523336922284204e-07, "loss": 0.2854, "step": 10707 }, { "epoch": 0.92, "learning_rate": 3.5160361569213766e-07, "loss": 0.3141, "step": 10708 }, { "epoch": 0.92, "learning_rate": 3.5087428280657144e-07, "loss": 0.2583, "step": 10709 }, { "epoch": 0.92, "learning_rate": 3.50145693627939e-07, "loss": 0.2971, "step": 10710 }, { "epoch": 0.92, "learning_rate": 3.4941784821239445e-07, "loss": 0.2782, "step": 10711 }, { "epoch": 0.92, "learning_rate": 3.486907466160372e-07, "loss": 0.2786, "step": 10712 }, { "epoch": 0.92, "learning_rate": 3.4796438889491025e-07, "loss": 0.2624, "step": 10713 }, { "epoch": 0.92, "learning_rate": 3.472387751049944e-07, "loss": 0.2983, "step": 10714 }, { "epoch": 0.92, "learning_rate": 3.4651390530221927e-07, "loss": 0.2867, "step": 10715 }, { "epoch": 0.92, "learning_rate": 3.457897795424525e-07, "loss": 0.303, "step": 10716 }, { "epoch": 0.92, "learning_rate": 3.450663978815061e-07, "loss": 0.3206, "step": 10717 }, { "epoch": 0.92, "learning_rate": 3.443437603751354e-07, "loss": 0.2963, "step": 10718 }, { "epoch": 0.92, "learning_rate": 3.436218670790381e-07, "loss": 0.2662, "step": 10719 }, { "epoch": 0.92, "learning_rate": 3.4290071804885526e-07, "loss": 0.2805, "step": 10720 }, { "epoch": 0.92, "learning_rate": 3.4218031334016465e-07, "loss": 0.2844, "step": 10721 }, { "epoch": 0.92, "learning_rate": 3.414606530084974e-07, "loss": 0.2551, "step": 10722 }, { "epoch": 0.92, "learning_rate": 3.4074173710931804e-07, "loss": 0.2351, "step": 10723 }, { "epoch": 0.92, "learning_rate": 3.4002356569803775e-07, "loss": 0.2902, "step": 10724 }, { "epoch": 0.92, "learning_rate": 3.3930613883000897e-07, "loss": 0.2559, "step": 10725 }, { "epoch": 0.92, "learning_rate": 3.3858945656052855e-07, "loss": 0.238, "step": 10726 }, { "epoch": 0.92, "learning_rate": 3.3787351894483566e-07, "loss": 0.2661, "step": 10727 }, { "epoch": 0.92, "learning_rate": 3.3715832603810727e-07, "loss": 0.2454, "step": 10728 }, { "epoch": 0.92, "learning_rate": 3.3644387789547264e-07, "loss": 0.2573, "step": 10729 }, { "epoch": 0.92, "learning_rate": 3.357301745719932e-07, "loss": 0.2639, "step": 10730 }, { "epoch": 0.92, "learning_rate": 3.3501721612267833e-07, "loss": 0.2828, "step": 10731 }, { "epoch": 0.92, "learning_rate": 3.34305002602483e-07, "loss": 0.3252, "step": 10732 }, { "epoch": 0.92, "learning_rate": 3.335935340662966e-07, "loss": 0.2589, "step": 10733 }, { "epoch": 0.92, "learning_rate": 3.3288281056895746e-07, "loss": 0.2573, "step": 10734 }, { "epoch": 0.92, "learning_rate": 3.321728321652451e-07, "loss": 0.345, "step": 10735 }, { "epoch": 0.92, "learning_rate": 3.314635989098802e-07, "loss": 0.3053, "step": 10736 }, { "epoch": 0.92, "learning_rate": 3.307551108575291e-07, "loss": 0.2269, "step": 10737 }, { "epoch": 0.92, "learning_rate": 3.300473680627947e-07, "loss": 0.2351, "step": 10738 }, { "epoch": 0.92, "learning_rate": 3.2934037058023115e-07, "loss": 0.2972, "step": 10739 }, { "epoch": 0.92, "learning_rate": 3.28634118464326e-07, "loss": 0.2173, "step": 10740 }, { "epoch": 0.92, "learning_rate": 3.2792861176951465e-07, "loss": 0.2764, "step": 10741 }, { "epoch": 0.92, "learning_rate": 3.2722385055017567e-07, "loss": 0.262, "step": 10742 }, { "epoch": 0.92, "learning_rate": 3.26519834860628e-07, "loss": 0.3237, "step": 10743 }, { "epoch": 0.92, "learning_rate": 3.258165647551337e-07, "loss": 0.2888, "step": 10744 }, { "epoch": 0.92, "learning_rate": 3.2511404028789604e-07, "loss": 0.2547, "step": 10745 }, { "epoch": 0.92, "learning_rate": 3.2441226151306403e-07, "loss": 0.2758, "step": 10746 }, { "epoch": 0.92, "learning_rate": 3.2371122848472655e-07, "loss": 0.222, "step": 10747 }, { "epoch": 0.92, "learning_rate": 3.230109412569149e-07, "loss": 0.267, "step": 10748 }, { "epoch": 0.92, "learning_rate": 3.223113998836036e-07, "loss": 0.265, "step": 10749 }, { "epoch": 0.92, "learning_rate": 3.216126044187118e-07, "loss": 0.2225, "step": 10750 }, { "epoch": 0.92, "learning_rate": 3.209145549160997e-07, "loss": 0.2956, "step": 10751 }, { "epoch": 0.92, "learning_rate": 3.2021725142956537e-07, "loss": 0.2444, "step": 10752 }, { "epoch": 0.92, "learning_rate": 3.1952069401285814e-07, "loss": 0.2349, "step": 10753 }, { "epoch": 0.92, "learning_rate": 3.188248827196616e-07, "loss": 0.2723, "step": 10754 }, { "epoch": 0.92, "learning_rate": 3.181298176036074e-07, "loss": 0.5592, "step": 10755 }, { "epoch": 0.92, "learning_rate": 3.1743549871826704e-07, "loss": 0.2473, "step": 10756 }, { "epoch": 0.92, "learning_rate": 3.167419261171556e-07, "loss": 0.2812, "step": 10757 }, { "epoch": 0.92, "learning_rate": 3.160490998537313e-07, "loss": 0.288, "step": 10758 }, { "epoch": 0.92, "learning_rate": 3.1535701998139045e-07, "loss": 0.2789, "step": 10759 }, { "epoch": 0.92, "learning_rate": 3.146656865534803e-07, "loss": 0.2982, "step": 10760 }, { "epoch": 0.92, "learning_rate": 3.139750996232804e-07, "loss": 0.2981, "step": 10761 }, { "epoch": 0.92, "learning_rate": 3.132852592440194e-07, "loss": 0.2327, "step": 10762 }, { "epoch": 0.92, "learning_rate": 3.1259616546886804e-07, "loss": 0.2523, "step": 10763 }, { "epoch": 0.92, "learning_rate": 3.119078183509372e-07, "loss": 0.3148, "step": 10764 }, { "epoch": 0.92, "learning_rate": 3.1122021794328214e-07, "loss": 0.2629, "step": 10765 }, { "epoch": 0.92, "learning_rate": 3.1053336429889616e-07, "loss": 0.5923, "step": 10766 }, { "epoch": 0.92, "learning_rate": 3.098472574707245e-07, "loss": 0.2656, "step": 10767 }, { "epoch": 0.92, "learning_rate": 3.0916189751164506e-07, "loss": 0.3156, "step": 10768 }, { "epoch": 0.92, "learning_rate": 3.08477284474481e-07, "loss": 0.2507, "step": 10769 }, { "epoch": 0.92, "learning_rate": 3.077934184120035e-07, "loss": 0.2425, "step": 10770 }, { "epoch": 0.92, "learning_rate": 3.0711029937691704e-07, "loss": 0.2481, "step": 10771 }, { "epoch": 0.92, "learning_rate": 3.06427927421874e-07, "loss": 0.2932, "step": 10772 }, { "epoch": 0.92, "learning_rate": 3.0574630259947e-07, "loss": 0.2834, "step": 10773 }, { "epoch": 0.92, "learning_rate": 3.050654249622398e-07, "loss": 0.2822, "step": 10774 }, { "epoch": 0.92, "learning_rate": 3.0438529456266463e-07, "loss": 0.2178, "step": 10775 }, { "epoch": 0.92, "learning_rate": 3.037059114531604e-07, "loss": 0.2725, "step": 10776 }, { "epoch": 0.92, "learning_rate": 3.0302727568609637e-07, "loss": 0.3517, "step": 10777 }, { "epoch": 0.92, "learning_rate": 3.0234938731377394e-07, "loss": 0.2649, "step": 10778 }, { "epoch": 0.92, "learning_rate": 3.016722463884436e-07, "loss": 0.2627, "step": 10779 }, { "epoch": 0.92, "learning_rate": 3.009958529622958e-07, "loss": 0.3549, "step": 10780 }, { "epoch": 0.92, "learning_rate": 3.0032020708746334e-07, "loss": 0.308, "step": 10781 }, { "epoch": 0.92, "learning_rate": 2.996453088160234e-07, "loss": 0.2462, "step": 10782 }, { "epoch": 0.92, "learning_rate": 2.989711581999899e-07, "loss": 0.2692, "step": 10783 }, { "epoch": 0.92, "learning_rate": 2.982977552913269e-07, "loss": 0.2842, "step": 10784 }, { "epoch": 0.92, "learning_rate": 2.97625100141935e-07, "loss": 0.2784, "step": 10785 }, { "epoch": 0.92, "learning_rate": 2.969531928036595e-07, "loss": 0.2428, "step": 10786 }, { "epoch": 0.92, "learning_rate": 2.9628203332828675e-07, "loss": 0.33, "step": 10787 }, { "epoch": 0.92, "learning_rate": 2.9561162176754863e-07, "loss": 0.2586, "step": 10788 }, { "epoch": 0.92, "learning_rate": 2.949419581731161e-07, "loss": 0.2617, "step": 10789 }, { "epoch": 0.92, "learning_rate": 2.9427304259660117e-07, "loss": 0.3091, "step": 10790 }, { "epoch": 0.92, "learning_rate": 2.9360487508956594e-07, "loss": 0.275, "step": 10791 }, { "epoch": 0.93, "learning_rate": 2.9293745570350365e-07, "loss": 0.2443, "step": 10792 }, { "epoch": 0.93, "learning_rate": 2.922707844898598e-07, "loss": 0.2662, "step": 10793 }, { "epoch": 0.93, "learning_rate": 2.9160486150001556e-07, "loss": 0.2491, "step": 10794 }, { "epoch": 0.93, "learning_rate": 2.909396867852987e-07, "loss": 0.2719, "step": 10795 }, { "epoch": 0.93, "learning_rate": 2.9027526039697717e-07, "loss": 0.2585, "step": 10796 }, { "epoch": 0.93, "learning_rate": 2.8961158238625997e-07, "loss": 0.2435, "step": 10797 }, { "epoch": 0.93, "learning_rate": 2.889486528043028e-07, "loss": 0.3191, "step": 10798 }, { "epoch": 0.93, "learning_rate": 2.8828647170219937e-07, "loss": 0.2316, "step": 10799 }, { "epoch": 0.93, "learning_rate": 2.876250391309876e-07, "loss": 0.2142, "step": 10800 }, { "epoch": 0.93, "learning_rate": 2.869643551416479e-07, "loss": 0.2639, "step": 10801 }, { "epoch": 0.93, "learning_rate": 2.863044197851017e-07, "loss": 0.2616, "step": 10802 }, { "epoch": 0.93, "learning_rate": 2.85645233112215e-07, "loss": 0.2632, "step": 10803 }, { "epoch": 0.93, "learning_rate": 2.8498679517379277e-07, "loss": 0.2493, "step": 10804 }, { "epoch": 0.93, "learning_rate": 2.843291060205855e-07, "loss": 0.2602, "step": 10805 }, { "epoch": 0.93, "learning_rate": 2.836721657032848e-07, "loss": 0.291, "step": 10806 }, { "epoch": 0.93, "learning_rate": 2.8301597427252137e-07, "loss": 0.2439, "step": 10807 }, { "epoch": 0.93, "learning_rate": 2.823605317788769e-07, "loss": 0.2838, "step": 10808 }, { "epoch": 0.93, "learning_rate": 2.8170583827286435e-07, "loss": 0.2596, "step": 10809 }, { "epoch": 0.93, "learning_rate": 2.810518938049478e-07, "loss": 0.2753, "step": 10810 }, { "epoch": 0.93, "learning_rate": 2.8039869842552583e-07, "loss": 0.2918, "step": 10811 }, { "epoch": 0.93, "learning_rate": 2.797462521849481e-07, "loss": 0.3076, "step": 10812 }, { "epoch": 0.93, "learning_rate": 2.790945551335e-07, "loss": 0.2723, "step": 10813 }, { "epoch": 0.93, "learning_rate": 2.784436073214103e-07, "loss": 0.2469, "step": 10814 }, { "epoch": 0.93, "learning_rate": 2.777934087988532e-07, "loss": 0.3251, "step": 10815 }, { "epoch": 0.93, "learning_rate": 2.771439596159409e-07, "loss": 0.2961, "step": 10816 }, { "epoch": 0.93, "learning_rate": 2.7649525982272996e-07, "loss": 0.5551, "step": 10817 }, { "epoch": 0.93, "learning_rate": 2.7584730946921825e-07, "loss": 0.2514, "step": 10818 }, { "epoch": 0.93, "learning_rate": 2.75200108605348e-07, "loss": 0.3074, "step": 10819 }, { "epoch": 0.93, "learning_rate": 2.745536572810026e-07, "loss": 0.3062, "step": 10820 }, { "epoch": 0.93, "learning_rate": 2.739079555460056e-07, "loss": 0.2678, "step": 10821 }, { "epoch": 0.93, "learning_rate": 2.73263003450126e-07, "loss": 0.286, "step": 10822 }, { "epoch": 0.93, "learning_rate": 2.726188010430719e-07, "loss": 0.3054, "step": 10823 }, { "epoch": 0.93, "learning_rate": 2.719753483744969e-07, "loss": 0.294, "step": 10824 }, { "epoch": 0.93, "learning_rate": 2.7133264549399464e-07, "loss": 0.2742, "step": 10825 }, { "epoch": 0.93, "learning_rate": 2.70690692451101e-07, "loss": 0.2859, "step": 10826 }, { "epoch": 0.93, "learning_rate": 2.700494892952954e-07, "loss": 0.2513, "step": 10827 }, { "epoch": 0.93, "learning_rate": 2.69409036075996e-07, "loss": 0.2698, "step": 10828 }, { "epoch": 0.93, "learning_rate": 2.687693328425711e-07, "loss": 0.2748, "step": 10829 }, { "epoch": 0.93, "learning_rate": 2.681303796443202e-07, "loss": 0.3055, "step": 10830 }, { "epoch": 0.93, "learning_rate": 2.6749217653049385e-07, "loss": 0.2891, "step": 10831 }, { "epoch": 0.93, "learning_rate": 2.6685472355028053e-07, "loss": 0.2393, "step": 10832 }, { "epoch": 0.93, "learning_rate": 2.66218020752812e-07, "loss": 0.249, "step": 10833 }, { "epoch": 0.93, "learning_rate": 2.655820681871635e-07, "loss": 0.2439, "step": 10834 }, { "epoch": 0.93, "learning_rate": 2.6494686590234797e-07, "loss": 0.285, "step": 10835 }, { "epoch": 0.93, "learning_rate": 2.6431241394732856e-07, "loss": 0.2506, "step": 10836 }, { "epoch": 0.93, "learning_rate": 2.636787123710016e-07, "loss": 0.2686, "step": 10837 }, { "epoch": 0.93, "learning_rate": 2.6304576122221035e-07, "loss": 0.2657, "step": 10838 }, { "epoch": 0.93, "learning_rate": 2.624135605497402e-07, "loss": 0.5769, "step": 10839 }, { "epoch": 0.93, "learning_rate": 2.617821104023177e-07, "loss": 0.2706, "step": 10840 }, { "epoch": 0.93, "learning_rate": 2.6115141082861396e-07, "loss": 0.2601, "step": 10841 }, { "epoch": 0.93, "learning_rate": 2.605214618772356e-07, "loss": 0.2571, "step": 10842 }, { "epoch": 0.93, "learning_rate": 2.598922635967416e-07, "loss": 0.2645, "step": 10843 }, { "epoch": 0.93, "learning_rate": 2.592638160356231e-07, "loss": 0.2885, "step": 10844 }, { "epoch": 0.93, "learning_rate": 2.586361192423181e-07, "loss": 0.2585, "step": 10845 }, { "epoch": 0.93, "learning_rate": 2.5800917326521013e-07, "loss": 0.5839, "step": 10846 }, { "epoch": 0.93, "learning_rate": 2.573829781526171e-07, "loss": 0.2693, "step": 10847 }, { "epoch": 0.93, "learning_rate": 2.56757533952805e-07, "loss": 0.2687, "step": 10848 }, { "epoch": 0.93, "learning_rate": 2.561328407139785e-07, "loss": 0.286, "step": 10849 }, { "epoch": 0.93, "learning_rate": 2.555088984842868e-07, "loss": 0.2788, "step": 10850 }, { "epoch": 0.93, "learning_rate": 2.548857073118216e-07, "loss": 0.2451, "step": 10851 }, { "epoch": 0.93, "learning_rate": 2.5426326724461215e-07, "loss": 0.2814, "step": 10852 }, { "epoch": 0.93, "learning_rate": 2.5364157833063676e-07, "loss": 0.279, "step": 10853 }, { "epoch": 0.93, "learning_rate": 2.530206406178104e-07, "loss": 0.2783, "step": 10854 }, { "epoch": 0.93, "learning_rate": 2.524004541539904e-07, "loss": 0.272, "step": 10855 }, { "epoch": 0.93, "learning_rate": 2.517810189869796e-07, "loss": 0.2605, "step": 10856 }, { "epoch": 0.93, "learning_rate": 2.5116233516452094e-07, "loss": 0.2546, "step": 10857 }, { "epoch": 0.93, "learning_rate": 2.505444027342996e-07, "loss": 0.2748, "step": 10858 }, { "epoch": 0.93, "learning_rate": 2.4992722174393966e-07, "loss": 0.2784, "step": 10859 }, { "epoch": 0.93, "learning_rate": 2.493107922410165e-07, "loss": 0.2602, "step": 10860 }, { "epoch": 0.93, "learning_rate": 2.486951142730354e-07, "loss": 0.3102, "step": 10861 }, { "epoch": 0.93, "learning_rate": 2.480801878874528e-07, "loss": 0.2849, "step": 10862 }, { "epoch": 0.93, "learning_rate": 2.474660131316642e-07, "loss": 0.2665, "step": 10863 }, { "epoch": 0.93, "learning_rate": 2.468525900530061e-07, "loss": 0.2656, "step": 10864 }, { "epoch": 0.93, "learning_rate": 2.4623991869875965e-07, "loss": 0.27, "step": 10865 }, { "epoch": 0.93, "learning_rate": 2.456279991161437e-07, "loss": 0.2473, "step": 10866 }, { "epoch": 0.93, "learning_rate": 2.450168313523249e-07, "loss": 0.277, "step": 10867 }, { "epoch": 0.93, "learning_rate": 2.444064154544079e-07, "loss": 0.2339, "step": 10868 }, { "epoch": 0.93, "learning_rate": 2.437967514694406e-07, "loss": 0.2688, "step": 10869 }, { "epoch": 0.93, "learning_rate": 2.4318783944441314e-07, "loss": 0.3087, "step": 10870 }, { "epoch": 0.93, "learning_rate": 2.4257967942625694e-07, "loss": 0.3168, "step": 10871 }, { "epoch": 0.93, "learning_rate": 2.4197227146184664e-07, "loss": 0.2666, "step": 10872 }, { "epoch": 0.93, "learning_rate": 2.4136561559799597e-07, "loss": 0.2789, "step": 10873 }, { "epoch": 0.93, "learning_rate": 2.4075971188146754e-07, "loss": 0.2799, "step": 10874 }, { "epoch": 0.93, "learning_rate": 2.401545603589572e-07, "loss": 0.2864, "step": 10875 }, { "epoch": 0.93, "learning_rate": 2.3955016107710896e-07, "loss": 0.2412, "step": 10876 }, { "epoch": 0.93, "learning_rate": 2.3894651408250536e-07, "loss": 0.2651, "step": 10877 }, { "epoch": 0.93, "learning_rate": 2.3834361942167484e-07, "loss": 0.2861, "step": 10878 }, { "epoch": 0.93, "learning_rate": 2.3774147714108463e-07, "loss": 0.2487, "step": 10879 }, { "epoch": 0.93, "learning_rate": 2.371400872871432e-07, "loss": 0.2917, "step": 10880 }, { "epoch": 0.93, "learning_rate": 2.365394499062057e-07, "loss": 0.2616, "step": 10881 }, { "epoch": 0.93, "learning_rate": 2.3593956504456396e-07, "loss": 0.2626, "step": 10882 }, { "epoch": 0.93, "learning_rate": 2.353404327484543e-07, "loss": 0.2569, "step": 10883 }, { "epoch": 0.93, "learning_rate": 2.347420530640565e-07, "loss": 0.2606, "step": 10884 }, { "epoch": 0.93, "learning_rate": 2.3414442603748922e-07, "loss": 0.3099, "step": 10885 }, { "epoch": 0.93, "learning_rate": 2.335475517148167e-07, "loss": 0.3089, "step": 10886 }, { "epoch": 0.93, "learning_rate": 2.329514301420388e-07, "loss": 0.3102, "step": 10887 }, { "epoch": 0.93, "learning_rate": 2.3235606136510545e-07, "loss": 0.2842, "step": 10888 }, { "epoch": 0.93, "learning_rate": 2.3176144542990443e-07, "loss": 0.2803, "step": 10889 }, { "epoch": 0.93, "learning_rate": 2.3116758238226233e-07, "loss": 0.2562, "step": 10890 }, { "epoch": 0.93, "learning_rate": 2.3057447226795705e-07, "loss": 0.2645, "step": 10891 }, { "epoch": 0.93, "learning_rate": 2.2998211513269753e-07, "loss": 0.2889, "step": 10892 }, { "epoch": 0.93, "learning_rate": 2.293905110221406e-07, "loss": 0.2865, "step": 10893 }, { "epoch": 0.93, "learning_rate": 2.2879965998188646e-07, "loss": 0.259, "step": 10894 }, { "epoch": 0.93, "learning_rate": 2.2820956205747312e-07, "loss": 0.2379, "step": 10895 }, { "epoch": 0.93, "learning_rate": 2.2762021729438423e-07, "loss": 0.2462, "step": 10896 }, { "epoch": 0.93, "learning_rate": 2.2703162573804006e-07, "loss": 0.2617, "step": 10897 }, { "epoch": 0.93, "learning_rate": 2.264437874338099e-07, "loss": 0.2662, "step": 10898 }, { "epoch": 0.93, "learning_rate": 2.2585670242699975e-07, "loss": 0.2913, "step": 10899 }, { "epoch": 0.93, "learning_rate": 2.2527037076286008e-07, "loss": 0.2559, "step": 10900 }, { "epoch": 0.93, "learning_rate": 2.2468479248658026e-07, "loss": 0.3108, "step": 10901 }, { "epoch": 0.93, "learning_rate": 2.2409996764329644e-07, "loss": 0.3046, "step": 10902 }, { "epoch": 0.93, "learning_rate": 2.235158962780837e-07, "loss": 0.2772, "step": 10903 }, { "epoch": 0.93, "learning_rate": 2.2293257843595706e-07, "loss": 0.2725, "step": 10904 }, { "epoch": 0.93, "learning_rate": 2.223500141618795e-07, "loss": 0.2646, "step": 10905 }, { "epoch": 0.93, "learning_rate": 2.2176820350074846e-07, "loss": 0.2773, "step": 10906 }, { "epoch": 0.93, "learning_rate": 2.2118714649740912e-07, "loss": 0.2639, "step": 10907 }, { "epoch": 0.94, "learning_rate": 2.206068431966446e-07, "loss": 0.2736, "step": 10908 }, { "epoch": 0.94, "learning_rate": 2.2002729364318464e-07, "loss": 0.226, "step": 10909 }, { "epoch": 0.94, "learning_rate": 2.1944849788169798e-07, "loss": 0.2361, "step": 10910 }, { "epoch": 0.94, "learning_rate": 2.1887045595679112e-07, "loss": 0.2853, "step": 10911 }, { "epoch": 0.94, "learning_rate": 2.182931679130218e-07, "loss": 0.2546, "step": 10912 }, { "epoch": 0.94, "learning_rate": 2.1771663379488106e-07, "loss": 0.2852, "step": 10913 }, { "epoch": 0.94, "learning_rate": 2.1714085364680671e-07, "loss": 0.2612, "step": 10914 }, { "epoch": 0.94, "learning_rate": 2.1656582751317657e-07, "loss": 0.2672, "step": 10915 }, { "epoch": 0.94, "learning_rate": 2.1599155543831074e-07, "loss": 0.261, "step": 10916 }, { "epoch": 0.94, "learning_rate": 2.1541803746647272e-07, "loss": 0.3041, "step": 10917 }, { "epoch": 0.94, "learning_rate": 2.1484527364186492e-07, "loss": 0.2885, "step": 10918 }, { "epoch": 0.94, "learning_rate": 2.1427326400863424e-07, "loss": 0.2695, "step": 10919 }, { "epoch": 0.94, "learning_rate": 2.1370200861086655e-07, "loss": 0.2617, "step": 10920 }, { "epoch": 0.94, "learning_rate": 2.1313150749259216e-07, "loss": 0.263, "step": 10921 }, { "epoch": 0.94, "learning_rate": 2.1256176069778367e-07, "loss": 0.2766, "step": 10922 }, { "epoch": 0.94, "learning_rate": 2.1199276827035374e-07, "loss": 0.2861, "step": 10923 }, { "epoch": 0.94, "learning_rate": 2.1142453025415734e-07, "loss": 0.2974, "step": 10924 }, { "epoch": 0.94, "learning_rate": 2.1085704669299045e-07, "loss": 0.2552, "step": 10925 }, { "epoch": 0.94, "learning_rate": 2.102903176305926e-07, "loss": 0.2514, "step": 10926 }, { "epoch": 0.94, "learning_rate": 2.097243431106466e-07, "loss": 0.3233, "step": 10927 }, { "epoch": 0.94, "learning_rate": 2.091591231767709e-07, "loss": 0.2919, "step": 10928 }, { "epoch": 0.94, "learning_rate": 2.0859465787253396e-07, "loss": 0.263, "step": 10929 }, { "epoch": 0.94, "learning_rate": 2.0803094724143879e-07, "loss": 0.2747, "step": 10930 }, { "epoch": 0.94, "learning_rate": 2.0746799132693506e-07, "loss": 0.2407, "step": 10931 }, { "epoch": 0.94, "learning_rate": 2.069057901724114e-07, "loss": 0.2614, "step": 10932 }, { "epoch": 0.94, "learning_rate": 2.0634434382120205e-07, "loss": 0.264, "step": 10933 }, { "epoch": 0.94, "learning_rate": 2.0578365231657792e-07, "loss": 0.2674, "step": 10934 }, { "epoch": 0.94, "learning_rate": 2.0522371570175447e-07, "loss": 0.2305, "step": 10935 }, { "epoch": 0.94, "learning_rate": 2.046645340198905e-07, "loss": 0.2805, "step": 10936 }, { "epoch": 0.94, "learning_rate": 2.0410610731408377e-07, "loss": 0.2699, "step": 10937 }, { "epoch": 0.94, "learning_rate": 2.0354843562737537e-07, "loss": 0.217, "step": 10938 }, { "epoch": 0.94, "learning_rate": 2.0299151900274873e-07, "loss": 0.3044, "step": 10939 }, { "epoch": 0.94, "learning_rate": 2.0243535748312615e-07, "loss": 0.2817, "step": 10940 }, { "epoch": 0.94, "learning_rate": 2.018799511113767e-07, "loss": 0.3231, "step": 10941 }, { "epoch": 0.94, "learning_rate": 2.0132529993030392e-07, "loss": 0.2599, "step": 10942 }, { "epoch": 0.94, "learning_rate": 2.0077140398266248e-07, "loss": 0.2949, "step": 10943 }, { "epoch": 0.94, "learning_rate": 2.002182633111416e-07, "loss": 0.6014, "step": 10944 }, { "epoch": 0.94, "learning_rate": 1.9966587795837377e-07, "loss": 0.2759, "step": 10945 }, { "epoch": 0.94, "learning_rate": 1.9911424796693611e-07, "loss": 0.3049, "step": 10946 }, { "epoch": 0.94, "learning_rate": 1.985633733793446e-07, "loss": 0.2601, "step": 10947 }, { "epoch": 0.94, "learning_rate": 1.9801325423805862e-07, "loss": 0.3292, "step": 10948 }, { "epoch": 0.94, "learning_rate": 1.9746389058547534e-07, "loss": 0.2265, "step": 10949 }, { "epoch": 0.94, "learning_rate": 1.9691528246394197e-07, "loss": 0.5916, "step": 10950 }, { "epoch": 0.94, "learning_rate": 1.963674299157403e-07, "loss": 0.2798, "step": 10951 }, { "epoch": 0.94, "learning_rate": 1.9582033298309434e-07, "loss": 0.2923, "step": 10952 }, { "epoch": 0.94, "learning_rate": 1.9527399170817473e-07, "loss": 0.2465, "step": 10953 }, { "epoch": 0.94, "learning_rate": 1.9472840613308787e-07, "loss": 0.2719, "step": 10954 }, { "epoch": 0.94, "learning_rate": 1.9418357629988782e-07, "loss": 0.3484, "step": 10955 }, { "epoch": 0.94, "learning_rate": 1.936395022505644e-07, "loss": 0.2772, "step": 10956 }, { "epoch": 0.94, "learning_rate": 1.930961840270551e-07, "loss": 0.2726, "step": 10957 }, { "epoch": 0.94, "learning_rate": 1.9255362167123316e-07, "loss": 0.2411, "step": 10958 }, { "epoch": 0.94, "learning_rate": 1.920118152249173e-07, "loss": 0.264, "step": 10959 }, { "epoch": 0.94, "learning_rate": 1.914707647298697e-07, "loss": 0.3056, "step": 10960 }, { "epoch": 0.94, "learning_rate": 1.909304702277903e-07, "loss": 0.2653, "step": 10961 }, { "epoch": 0.94, "learning_rate": 1.903909317603214e-07, "loss": 0.2652, "step": 10962 }, { "epoch": 0.94, "learning_rate": 1.898521493690486e-07, "loss": 0.2892, "step": 10963 }, { "epoch": 0.94, "learning_rate": 1.8931412309549867e-07, "loss": 0.2734, "step": 10964 }, { "epoch": 0.94, "learning_rate": 1.8877685298114178e-07, "loss": 0.2504, "step": 10965 }, { "epoch": 0.94, "learning_rate": 1.882403390673837e-07, "loss": 0.262, "step": 10966 }, { "epoch": 0.94, "learning_rate": 1.8770458139558134e-07, "loss": 0.2695, "step": 10967 }, { "epoch": 0.94, "learning_rate": 1.87169580007025e-07, "loss": 0.2392, "step": 10968 }, { "epoch": 0.94, "learning_rate": 1.866353349429506e-07, "loss": 0.2555, "step": 10969 }, { "epoch": 0.94, "learning_rate": 1.861018462445352e-07, "loss": 0.2729, "step": 10970 }, { "epoch": 0.94, "learning_rate": 1.8556911395289811e-07, "loss": 0.2485, "step": 10971 }, { "epoch": 0.94, "learning_rate": 1.8503713810909984e-07, "loss": 0.2606, "step": 10972 }, { "epoch": 0.94, "learning_rate": 1.8450591875413981e-07, "loss": 0.3301, "step": 10973 }, { "epoch": 0.94, "learning_rate": 1.8397545592896527e-07, "loss": 0.2673, "step": 10974 }, { "epoch": 0.94, "learning_rate": 1.8344574967446015e-07, "loss": 0.2759, "step": 10975 }, { "epoch": 0.94, "learning_rate": 1.8291680003145074e-07, "loss": 0.2781, "step": 10976 }, { "epoch": 0.94, "learning_rate": 1.823886070407077e-07, "loss": 0.2952, "step": 10977 }, { "epoch": 0.94, "learning_rate": 1.8186117074293964e-07, "loss": 0.3367, "step": 10978 }, { "epoch": 0.94, "learning_rate": 1.8133449117880064e-07, "loss": 0.2573, "step": 10979 }, { "epoch": 0.94, "learning_rate": 1.808085683888827e-07, "loss": 0.2839, "step": 10980 }, { "epoch": 0.94, "learning_rate": 1.8028340241372345e-07, "loss": 0.5588, "step": 10981 }, { "epoch": 0.94, "learning_rate": 1.797589932937982e-07, "loss": 0.2917, "step": 10982 }, { "epoch": 0.94, "learning_rate": 1.792353410695269e-07, "loss": 0.2467, "step": 10983 }, { "epoch": 0.94, "learning_rate": 1.7871244578126835e-07, "loss": 0.2821, "step": 10984 }, { "epoch": 0.94, "learning_rate": 1.7819030746932696e-07, "loss": 0.3131, "step": 10985 }, { "epoch": 0.94, "learning_rate": 1.7766892617394727e-07, "loss": 0.2578, "step": 10986 }, { "epoch": 0.94, "learning_rate": 1.771483019353104e-07, "loss": 0.2864, "step": 10987 }, { "epoch": 0.94, "learning_rate": 1.7662843479354874e-07, "loss": 0.2723, "step": 10988 }, { "epoch": 0.94, "learning_rate": 1.761093247887269e-07, "loss": 0.2858, "step": 10989 }, { "epoch": 0.94, "learning_rate": 1.755909719608573e-07, "loss": 0.3066, "step": 10990 }, { "epoch": 0.94, "learning_rate": 1.750733763498924e-07, "loss": 0.2475, "step": 10991 }, { "epoch": 0.94, "learning_rate": 1.745565379957248e-07, "loss": 0.2643, "step": 10992 }, { "epoch": 0.94, "learning_rate": 1.7404045693819037e-07, "loss": 0.2493, "step": 10993 }, { "epoch": 0.94, "learning_rate": 1.7352513321706621e-07, "loss": 0.303, "step": 10994 }, { "epoch": 0.94, "learning_rate": 1.7301056687207053e-07, "loss": 0.2623, "step": 10995 }, { "epoch": 0.94, "learning_rate": 1.724967579428638e-07, "loss": 0.2954, "step": 10996 }, { "epoch": 0.94, "learning_rate": 1.7198370646904773e-07, "loss": 0.3277, "step": 10997 }, { "epoch": 0.94, "learning_rate": 1.714714124901662e-07, "loss": 0.2625, "step": 10998 }, { "epoch": 0.94, "learning_rate": 1.709598760457043e-07, "loss": 0.2752, "step": 10999 }, { "epoch": 0.94, "learning_rate": 1.7044909717508828e-07, "loss": 0.3015, "step": 11000 }, { "epoch": 0.94, "learning_rate": 1.6993907591768556e-07, "loss": 0.2481, "step": 11001 }, { "epoch": 0.94, "learning_rate": 1.6942981231280798e-07, "loss": 0.3054, "step": 11002 }, { "epoch": 0.94, "learning_rate": 1.6892130639970638e-07, "loss": 0.2666, "step": 11003 }, { "epoch": 0.94, "learning_rate": 1.6841355821757277e-07, "loss": 0.2927, "step": 11004 }, { "epoch": 0.94, "learning_rate": 1.679065678055447e-07, "loss": 0.2709, "step": 11005 }, { "epoch": 0.94, "learning_rate": 1.6740033520269538e-07, "loss": 0.2939, "step": 11006 }, { "epoch": 0.94, "learning_rate": 1.668948604480436e-07, "loss": 0.2606, "step": 11007 }, { "epoch": 0.94, "learning_rate": 1.6639014358054927e-07, "loss": 0.2576, "step": 11008 }, { "epoch": 0.94, "learning_rate": 1.6588618463911356e-07, "loss": 0.2776, "step": 11009 }, { "epoch": 0.94, "learning_rate": 1.6538298366257975e-07, "loss": 0.2117, "step": 11010 }, { "epoch": 0.94, "learning_rate": 1.6488054068972914e-07, "loss": 0.5712, "step": 11011 }, { "epoch": 0.94, "learning_rate": 1.643788557592918e-07, "loss": 0.3251, "step": 11012 }, { "epoch": 0.94, "learning_rate": 1.6387792890993238e-07, "loss": 0.2996, "step": 11013 }, { "epoch": 0.94, "learning_rate": 1.6337776018026108e-07, "loss": 0.2424, "step": 11014 }, { "epoch": 0.94, "learning_rate": 1.628783496088271e-07, "loss": 0.272, "step": 11015 }, { "epoch": 0.94, "learning_rate": 1.6237969723412294e-07, "loss": 0.265, "step": 11016 }, { "epoch": 0.94, "learning_rate": 1.6188180309458345e-07, "loss": 0.2539, "step": 11017 }, { "epoch": 0.94, "learning_rate": 1.6138466722858237e-07, "loss": 0.2629, "step": 11018 }, { "epoch": 0.94, "learning_rate": 1.6088828967443793e-07, "loss": 0.2788, "step": 11019 }, { "epoch": 0.94, "learning_rate": 1.6039267047040728e-07, "loss": 0.292, "step": 11020 }, { "epoch": 0.94, "learning_rate": 1.5989780965468994e-07, "loss": 0.2114, "step": 11021 }, { "epoch": 0.94, "learning_rate": 1.5940370726542864e-07, "loss": 0.2867, "step": 11022 }, { "epoch": 0.94, "learning_rate": 1.589103633407052e-07, "loss": 0.2458, "step": 11023 }, { "epoch": 0.94, "learning_rate": 1.5841777791854584e-07, "loss": 0.3018, "step": 11024 }, { "epoch": 0.95, "learning_rate": 1.5792595103691466e-07, "loss": 0.307, "step": 11025 }, { "epoch": 0.95, "learning_rate": 1.5743488273372133e-07, "loss": 0.2418, "step": 11026 }, { "epoch": 0.95, "learning_rate": 1.5694457304681222e-07, "loss": 0.3038, "step": 11027 }, { "epoch": 0.95, "learning_rate": 1.5645502201397933e-07, "loss": 0.259, "step": 11028 }, { "epoch": 0.95, "learning_rate": 1.5596622967295584e-07, "loss": 0.2399, "step": 11029 }, { "epoch": 0.95, "learning_rate": 1.554781960614138e-07, "loss": 0.3043, "step": 11030 }, { "epoch": 0.95, "learning_rate": 1.549909212169709e-07, "loss": 0.2683, "step": 11031 }, { "epoch": 0.95, "learning_rate": 1.5450440517717934e-07, "loss": 0.2667, "step": 11032 }, { "epoch": 0.95, "learning_rate": 1.5401864797954248e-07, "loss": 0.2121, "step": 11033 }, { "epoch": 0.95, "learning_rate": 1.5353364966149697e-07, "loss": 0.2802, "step": 11034 }, { "epoch": 0.95, "learning_rate": 1.5304941026042408e-07, "loss": 0.2393, "step": 11035 }, { "epoch": 0.95, "learning_rate": 1.5256592981364947e-07, "loss": 0.265, "step": 11036 }, { "epoch": 0.95, "learning_rate": 1.520832083584345e-07, "loss": 0.2571, "step": 11037 }, { "epoch": 0.95, "learning_rate": 1.51601245931986e-07, "loss": 0.2621, "step": 11038 }, { "epoch": 0.95, "learning_rate": 1.5112004257144986e-07, "loss": 0.3093, "step": 11039 }, { "epoch": 0.95, "learning_rate": 1.506395983139175e-07, "loss": 0.3196, "step": 11040 }, { "epoch": 0.95, "learning_rate": 1.501599131964182e-07, "loss": 0.2657, "step": 11041 }, { "epoch": 0.95, "learning_rate": 1.4968098725592127e-07, "loss": 0.3131, "step": 11042 }, { "epoch": 0.95, "learning_rate": 1.4920282052934387e-07, "loss": 0.2822, "step": 11043 }, { "epoch": 0.95, "learning_rate": 1.487254130535376e-07, "loss": 0.2692, "step": 11044 }, { "epoch": 0.95, "learning_rate": 1.482487648653008e-07, "loss": 0.2626, "step": 11045 }, { "epoch": 0.95, "learning_rate": 1.477728760013697e-07, "loss": 0.2756, "step": 11046 }, { "epoch": 0.95, "learning_rate": 1.4729774649842376e-07, "loss": 0.2456, "step": 11047 }, { "epoch": 0.95, "learning_rate": 1.4682337639308486e-07, "loss": 0.2608, "step": 11048 }, { "epoch": 0.95, "learning_rate": 1.4634976572191372e-07, "loss": 0.2312, "step": 11049 }, { "epoch": 0.95, "learning_rate": 1.458769145214145e-07, "loss": 0.2748, "step": 11050 }, { "epoch": 0.95, "learning_rate": 1.4540482282803136e-07, "loss": 0.2961, "step": 11051 }, { "epoch": 0.95, "learning_rate": 1.4493349067815188e-07, "loss": 0.2699, "step": 11052 }, { "epoch": 0.95, "learning_rate": 1.4446291810810365e-07, "loss": 0.2286, "step": 11053 }, { "epoch": 0.95, "learning_rate": 1.4399310515415655e-07, "loss": 0.2567, "step": 11054 }, { "epoch": 0.95, "learning_rate": 1.4352405185252048e-07, "loss": 0.3259, "step": 11055 }, { "epoch": 0.95, "learning_rate": 1.430557582393477e-07, "loss": 0.2772, "step": 11056 }, { "epoch": 0.95, "learning_rate": 1.425882243507337e-07, "loss": 0.2618, "step": 11057 }, { "epoch": 0.95, "learning_rate": 1.4212145022271196e-07, "loss": 0.2341, "step": 11058 }, { "epoch": 0.95, "learning_rate": 1.4165543589125918e-07, "loss": 0.2669, "step": 11059 }, { "epoch": 0.95, "learning_rate": 1.4119018139229333e-07, "loss": 0.3201, "step": 11060 }, { "epoch": 0.95, "learning_rate": 1.4072568676167575e-07, "loss": 0.2262, "step": 11061 }, { "epoch": 0.95, "learning_rate": 1.4026195203520666e-07, "loss": 0.2884, "step": 11062 }, { "epoch": 0.95, "learning_rate": 1.3979897724862523e-07, "loss": 0.2607, "step": 11063 }, { "epoch": 0.95, "learning_rate": 1.3933676243762072e-07, "loss": 0.2772, "step": 11064 }, { "epoch": 0.95, "learning_rate": 1.3887530763781465e-07, "loss": 0.2661, "step": 11065 }, { "epoch": 0.95, "learning_rate": 1.384146128847741e-07, "loss": 0.3054, "step": 11066 }, { "epoch": 0.95, "learning_rate": 1.3795467821400842e-07, "loss": 0.2769, "step": 11067 }, { "epoch": 0.95, "learning_rate": 1.37495503660966e-07, "loss": 0.2525, "step": 11068 }, { "epoch": 0.95, "learning_rate": 1.3703708926103842e-07, "loss": 0.2617, "step": 11069 }, { "epoch": 0.95, "learning_rate": 1.365794350495564e-07, "loss": 0.309, "step": 11070 }, { "epoch": 0.95, "learning_rate": 1.3612254106179723e-07, "loss": 0.251, "step": 11071 }, { "epoch": 0.95, "learning_rate": 1.3566640733297166e-07, "loss": 0.3071, "step": 11072 }, { "epoch": 0.95, "learning_rate": 1.352110338982382e-07, "loss": 0.2515, "step": 11073 }, { "epoch": 0.95, "learning_rate": 1.3475642079269659e-07, "loss": 0.2426, "step": 11074 }, { "epoch": 0.95, "learning_rate": 1.343025680513832e-07, "loss": 0.3289, "step": 11075 }, { "epoch": 0.95, "learning_rate": 1.338494757092812e-07, "loss": 0.2855, "step": 11076 }, { "epoch": 0.95, "learning_rate": 1.333971438013104e-07, "loss": 0.2477, "step": 11077 }, { "epoch": 0.95, "learning_rate": 1.329455723623352e-07, "loss": 0.2708, "step": 11078 }, { "epoch": 0.95, "learning_rate": 1.324947614271621e-07, "loss": 0.2846, "step": 11079 }, { "epoch": 0.95, "learning_rate": 1.320447110305345e-07, "loss": 0.2825, "step": 11080 }, { "epoch": 0.95, "learning_rate": 1.3159542120714352e-07, "loss": 0.2745, "step": 11081 }, { "epoch": 0.95, "learning_rate": 1.3114689199161478e-07, "loss": 0.2184, "step": 11082 }, { "epoch": 0.95, "learning_rate": 1.3069912341852064e-07, "loss": 0.2872, "step": 11083 }, { "epoch": 0.95, "learning_rate": 1.3025211552237127e-07, "loss": 0.2186, "step": 11084 }, { "epoch": 0.95, "learning_rate": 1.2980586833762242e-07, "loss": 0.3228, "step": 11085 }, { "epoch": 0.95, "learning_rate": 1.2936038189866773e-07, "loss": 0.2629, "step": 11086 }, { "epoch": 0.95, "learning_rate": 1.289156562398408e-07, "loss": 0.2468, "step": 11087 }, { "epoch": 0.95, "learning_rate": 1.2847169139542204e-07, "loss": 0.2712, "step": 11088 }, { "epoch": 0.95, "learning_rate": 1.2802848739962737e-07, "loss": 0.2634, "step": 11089 }, { "epoch": 0.95, "learning_rate": 1.2758604428661836e-07, "loss": 0.2529, "step": 11090 }, { "epoch": 0.95, "learning_rate": 1.2714436209049664e-07, "loss": 0.2556, "step": 11091 }, { "epoch": 0.95, "learning_rate": 1.2670344084530384e-07, "loss": 0.2754, "step": 11092 }, { "epoch": 0.95, "learning_rate": 1.2626328058502502e-07, "loss": 0.271, "step": 11093 }, { "epoch": 0.95, "learning_rate": 1.2582388134358414e-07, "loss": 0.2536, "step": 11094 }, { "epoch": 0.95, "learning_rate": 1.2538524315484968e-07, "loss": 0.2448, "step": 11095 }, { "epoch": 0.95, "learning_rate": 1.2494736605262792e-07, "loss": 0.2592, "step": 11096 }, { "epoch": 0.95, "learning_rate": 1.2451025007066963e-07, "loss": 0.2485, "step": 11097 }, { "epoch": 0.95, "learning_rate": 1.2407389524266456e-07, "loss": 0.2639, "step": 11098 }, { "epoch": 0.95, "learning_rate": 1.2363830160224465e-07, "loss": 0.3061, "step": 11099 }, { "epoch": 0.95, "learning_rate": 1.2320346918298644e-07, "loss": 0.2926, "step": 11100 }, { "epoch": 0.95, "learning_rate": 1.2276939801839972e-07, "loss": 0.3254, "step": 11101 }, { "epoch": 0.95, "learning_rate": 1.223360881419433e-07, "loss": 0.27, "step": 11102 }, { "epoch": 0.95, "learning_rate": 1.2190353958701495e-07, "loss": 0.2877, "step": 11103 }, { "epoch": 0.95, "learning_rate": 1.214717523869524e-07, "loss": 0.2866, "step": 11104 }, { "epoch": 0.95, "learning_rate": 1.2104072657503573e-07, "loss": 0.2492, "step": 11105 }, { "epoch": 0.95, "learning_rate": 1.2061046218448724e-07, "loss": 0.221, "step": 11106 }, { "epoch": 0.95, "learning_rate": 1.201809592484682e-07, "loss": 0.258, "step": 11107 }, { "epoch": 0.95, "learning_rate": 1.1975221780008317e-07, "loss": 0.2326, "step": 11108 }, { "epoch": 0.95, "learning_rate": 1.19324237872378e-07, "loss": 0.2755, "step": 11109 }, { "epoch": 0.95, "learning_rate": 1.1889701949833743e-07, "loss": 0.3179, "step": 11110 }, { "epoch": 0.95, "learning_rate": 1.1847056271089174e-07, "loss": 0.2632, "step": 11111 }, { "epoch": 0.95, "learning_rate": 1.1804486754290912e-07, "loss": 0.2695, "step": 11112 }, { "epoch": 0.95, "learning_rate": 1.1761993402719884e-07, "loss": 0.2848, "step": 11113 }, { "epoch": 0.95, "learning_rate": 1.1719576219651585e-07, "loss": 0.2698, "step": 11114 }, { "epoch": 0.95, "learning_rate": 1.1677235208354842e-07, "loss": 0.2657, "step": 11115 }, { "epoch": 0.95, "learning_rate": 1.163497037209349e-07, "loss": 0.2778, "step": 11116 }, { "epoch": 0.95, "learning_rate": 1.1592781714125034e-07, "loss": 0.2684, "step": 11117 }, { "epoch": 0.95, "learning_rate": 1.1550669237700985e-07, "loss": 0.256, "step": 11118 }, { "epoch": 0.95, "learning_rate": 1.150863294606741e-07, "loss": 0.3174, "step": 11119 }, { "epoch": 0.95, "learning_rate": 1.1466672842464055e-07, "loss": 0.3174, "step": 11120 }, { "epoch": 0.95, "learning_rate": 1.1424788930125108e-07, "loss": 0.598, "step": 11121 }, { "epoch": 0.95, "learning_rate": 1.1382981212278655e-07, "loss": 0.2747, "step": 11122 }, { "epoch": 0.95, "learning_rate": 1.1341249692147116e-07, "loss": 0.3002, "step": 11123 }, { "epoch": 0.95, "learning_rate": 1.1299594372947031e-07, "loss": 0.286, "step": 11124 }, { "epoch": 0.95, "learning_rate": 1.1258015257888832e-07, "loss": 0.2589, "step": 11125 }, { "epoch": 0.95, "learning_rate": 1.12165123501774e-07, "loss": 0.2772, "step": 11126 }, { "epoch": 0.95, "learning_rate": 1.1175085653011397e-07, "loss": 0.2831, "step": 11127 }, { "epoch": 0.95, "learning_rate": 1.1133735169583826e-07, "loss": 0.2903, "step": 11128 }, { "epoch": 0.95, "learning_rate": 1.1092460903081803e-07, "loss": 0.2384, "step": 11129 }, { "epoch": 0.95, "learning_rate": 1.1051262856686673e-07, "loss": 0.3017, "step": 11130 }, { "epoch": 0.95, "learning_rate": 1.1010141033573562e-07, "loss": 0.2823, "step": 11131 }, { "epoch": 0.95, "learning_rate": 1.0969095436912047e-07, "loss": 0.264, "step": 11132 }, { "epoch": 0.95, "learning_rate": 1.0928126069865819e-07, "loss": 0.2909, "step": 11133 }, { "epoch": 0.95, "learning_rate": 1.0887232935592351e-07, "loss": 0.2753, "step": 11134 }, { "epoch": 0.95, "learning_rate": 1.0846416037243678e-07, "loss": 0.2318, "step": 11135 }, { "epoch": 0.95, "learning_rate": 1.080567537796573e-07, "loss": 0.324, "step": 11136 }, { "epoch": 0.95, "learning_rate": 1.0765010960898548e-07, "loss": 0.2839, "step": 11137 }, { "epoch": 0.95, "learning_rate": 1.0724422789176404e-07, "loss": 0.3337, "step": 11138 }, { "epoch": 0.95, "learning_rate": 1.068391086592746e-07, "loss": 0.2611, "step": 11139 }, { "epoch": 0.95, "learning_rate": 1.0643475194274444e-07, "loss": 0.2267, "step": 11140 }, { "epoch": 0.95, "learning_rate": 1.060311577733375e-07, "loss": 0.2205, "step": 11141 }, { "epoch": 0.96, "learning_rate": 1.0562832618216223e-07, "loss": 0.2562, "step": 11142 }, { "epoch": 0.96, "learning_rate": 1.0522625720026491e-07, "loss": 0.2975, "step": 11143 }, { "epoch": 0.96, "learning_rate": 1.0482495085863631e-07, "loss": 0.2576, "step": 11144 }, { "epoch": 0.96, "learning_rate": 1.0442440718820834e-07, "loss": 0.308, "step": 11145 }, { "epoch": 0.96, "learning_rate": 1.0402462621984965e-07, "loss": 0.2617, "step": 11146 }, { "epoch": 0.96, "learning_rate": 1.0362560798437671e-07, "loss": 0.2844, "step": 11147 }, { "epoch": 0.96, "learning_rate": 1.0322735251254156e-07, "loss": 0.5356, "step": 11148 }, { "epoch": 0.96, "learning_rate": 1.028298598350408e-07, "loss": 0.2559, "step": 11149 }, { "epoch": 0.96, "learning_rate": 1.0243312998251209e-07, "loss": 0.2758, "step": 11150 }, { "epoch": 0.96, "learning_rate": 1.0203716298553212e-07, "loss": 0.2247, "step": 11151 }, { "epoch": 0.96, "learning_rate": 1.016419588746198e-07, "loss": 0.2969, "step": 11152 }, { "epoch": 0.96, "learning_rate": 1.0124751768023633e-07, "loss": 0.2437, "step": 11153 }, { "epoch": 0.96, "learning_rate": 1.0085383943278293e-07, "loss": 0.3262, "step": 11154 }, { "epoch": 0.96, "learning_rate": 1.0046092416260312e-07, "loss": 0.2685, "step": 11155 }, { "epoch": 0.96, "learning_rate": 1.0006877189997821e-07, "loss": 0.2482, "step": 11156 }, { "epoch": 0.96, "learning_rate": 9.967738267513737e-08, "loss": 0.2437, "step": 11157 }, { "epoch": 0.96, "learning_rate": 9.928675651824427e-08, "loss": 0.284, "step": 11158 }, { "epoch": 0.96, "learning_rate": 9.889689345940812e-08, "loss": 0.2286, "step": 11159 }, { "epoch": 0.96, "learning_rate": 9.85077935286749e-08, "loss": 0.2334, "step": 11160 }, { "epoch": 0.96, "learning_rate": 9.811945675603729e-08, "loss": 0.3089, "step": 11161 }, { "epoch": 0.96, "learning_rate": 9.773188317142579e-08, "loss": 0.2933, "step": 11162 }, { "epoch": 0.96, "learning_rate": 9.734507280471094e-08, "loss": 0.2717, "step": 11163 }, { "epoch": 0.96, "learning_rate": 9.69590256857078e-08, "loss": 0.2385, "step": 11164 }, { "epoch": 0.96, "learning_rate": 9.657374184417146e-08, "loss": 0.22, "step": 11165 }, { "epoch": 0.96, "learning_rate": 9.61892213097959e-08, "loss": 0.2663, "step": 11166 }, { "epoch": 0.96, "learning_rate": 9.580546411221858e-08, "loss": 0.2419, "step": 11167 }, { "epoch": 0.96, "learning_rate": 9.542247028101914e-08, "loss": 0.2545, "step": 11168 }, { "epoch": 0.96, "learning_rate": 9.50402398457162e-08, "loss": 0.2546, "step": 11169 }, { "epoch": 0.96, "learning_rate": 9.46587728357673e-08, "loss": 0.2878, "step": 11170 }, { "epoch": 0.96, "learning_rate": 9.427806928057893e-08, "loss": 0.3085, "step": 11171 }, { "epoch": 0.96, "learning_rate": 9.389812920949093e-08, "loss": 0.2669, "step": 11172 }, { "epoch": 0.96, "learning_rate": 9.351895265178656e-08, "loss": 0.5581, "step": 11173 }, { "epoch": 0.96, "learning_rate": 9.314053963669245e-08, "loss": 0.2675, "step": 11174 }, { "epoch": 0.96, "learning_rate": 9.276289019337415e-08, "loss": 0.2682, "step": 11175 }, { "epoch": 0.96, "learning_rate": 9.238600435094058e-08, "loss": 0.2626, "step": 11176 }, { "epoch": 0.96, "learning_rate": 9.200988213843631e-08, "loss": 0.2632, "step": 11177 }, { "epoch": 0.96, "learning_rate": 9.163452358485591e-08, "loss": 0.2552, "step": 11178 }, { "epoch": 0.96, "learning_rate": 9.125992871912626e-08, "loss": 0.2536, "step": 11179 }, { "epoch": 0.96, "learning_rate": 9.088609757012201e-08, "loss": 0.2545, "step": 11180 }, { "epoch": 0.96, "learning_rate": 9.051303016665347e-08, "loss": 0.2369, "step": 11181 }, { "epoch": 0.96, "learning_rate": 9.014072653747763e-08, "loss": 0.2917, "step": 11182 }, { "epoch": 0.96, "learning_rate": 8.97691867112882e-08, "loss": 0.2599, "step": 11183 }, { "epoch": 0.96, "learning_rate": 8.939841071672117e-08, "loss": 0.249, "step": 11184 }, { "epoch": 0.96, "learning_rate": 8.90283985823559e-08, "loss": 0.3525, "step": 11185 }, { "epoch": 0.96, "learning_rate": 8.865915033671069e-08, "loss": 0.2525, "step": 11186 }, { "epoch": 0.96, "learning_rate": 8.829066600824277e-08, "loss": 0.3111, "step": 11187 }, { "epoch": 0.96, "learning_rate": 8.79229456253572e-08, "loss": 0.266, "step": 11188 }, { "epoch": 0.96, "learning_rate": 8.755598921639241e-08, "loss": 0.2385, "step": 11189 }, { "epoch": 0.96, "learning_rate": 8.718979680963469e-08, "loss": 0.3097, "step": 11190 }, { "epoch": 0.96, "learning_rate": 8.682436843330477e-08, "loss": 0.3012, "step": 11191 }, { "epoch": 0.96, "learning_rate": 8.645970411557125e-08, "loss": 0.2763, "step": 11192 }, { "epoch": 0.96, "learning_rate": 8.609580388454052e-08, "loss": 0.2753, "step": 11193 }, { "epoch": 0.96, "learning_rate": 8.573266776825683e-08, "loss": 0.2708, "step": 11194 }, { "epoch": 0.96, "learning_rate": 8.537029579471334e-08, "loss": 0.2969, "step": 11195 }, { "epoch": 0.96, "learning_rate": 8.50086879918366e-08, "loss": 0.2747, "step": 11196 }, { "epoch": 0.96, "learning_rate": 8.464784438749985e-08, "loss": 0.3044, "step": 11197 }, { "epoch": 0.96, "learning_rate": 8.428776500951308e-08, "loss": 0.2467, "step": 11198 }, { "epoch": 0.96, "learning_rate": 8.392844988563075e-08, "loss": 0.284, "step": 11199 }, { "epoch": 0.96, "learning_rate": 8.35698990435474e-08, "loss": 0.3075, "step": 11200 }, { "epoch": 0.96, "learning_rate": 8.321211251089645e-08, "loss": 0.2837, "step": 11201 }, { "epoch": 0.96, "learning_rate": 8.285509031525696e-08, "loss": 0.2766, "step": 11202 }, { "epoch": 0.96, "learning_rate": 8.249883248414359e-08, "loss": 0.2557, "step": 11203 }, { "epoch": 0.96, "learning_rate": 8.21433390450177e-08, "loss": 0.2319, "step": 11204 }, { "epoch": 0.96, "learning_rate": 8.178861002527628e-08, "loss": 0.5624, "step": 11205 }, { "epoch": 0.96, "learning_rate": 8.143464545226298e-08, "loss": 0.2836, "step": 11206 }, { "epoch": 0.96, "learning_rate": 8.108144535325713e-08, "loss": 0.2432, "step": 11207 }, { "epoch": 0.96, "learning_rate": 8.072900975548248e-08, "loss": 0.2726, "step": 11208 }, { "epoch": 0.96, "learning_rate": 8.037733868610509e-08, "loss": 0.3044, "step": 11209 }, { "epoch": 0.96, "learning_rate": 8.002643217222661e-08, "loss": 0.2598, "step": 11210 }, { "epoch": 0.96, "learning_rate": 7.967629024089429e-08, "loss": 0.3063, "step": 11211 }, { "epoch": 0.96, "learning_rate": 7.932691291909656e-08, "loss": 0.2604, "step": 11212 }, { "epoch": 0.96, "learning_rate": 7.897830023376074e-08, "loss": 0.2823, "step": 11213 }, { "epoch": 0.96, "learning_rate": 7.863045221175647e-08, "loss": 0.2502, "step": 11214 }, { "epoch": 0.96, "learning_rate": 7.82833688798934e-08, "loss": 0.304, "step": 11215 }, { "epoch": 0.96, "learning_rate": 7.793705026492459e-08, "loss": 0.2799, "step": 11216 }, { "epoch": 0.96, "learning_rate": 7.759149639354091e-08, "loss": 0.3023, "step": 11217 }, { "epoch": 0.96, "learning_rate": 7.72467072923766e-08, "loss": 0.2812, "step": 11218 }, { "epoch": 0.96, "learning_rate": 7.690268298800596e-08, "loss": 0.2649, "step": 11219 }, { "epoch": 0.96, "learning_rate": 7.655942350694556e-08, "loss": 0.2226, "step": 11220 }, { "epoch": 0.96, "learning_rate": 7.621692887565202e-08, "loss": 0.2224, "step": 11221 }, { "epoch": 0.96, "learning_rate": 7.587519912052199e-08, "loss": 0.2657, "step": 11222 }, { "epoch": 0.96, "learning_rate": 7.553423426789664e-08, "loss": 0.2788, "step": 11223 }, { "epoch": 0.96, "learning_rate": 7.51940343440527e-08, "loss": 0.2589, "step": 11224 }, { "epoch": 0.96, "learning_rate": 7.485459937521256e-08, "loss": 0.2335, "step": 11225 }, { "epoch": 0.96, "learning_rate": 7.451592938753971e-08, "loss": 0.2879, "step": 11226 }, { "epoch": 0.96, "learning_rate": 7.417802440713439e-08, "loss": 0.2693, "step": 11227 }, { "epoch": 0.96, "learning_rate": 7.384088446004356e-08, "loss": 0.2811, "step": 11228 }, { "epoch": 0.96, "learning_rate": 7.350450957224864e-08, "loss": 0.2823, "step": 11229 }, { "epoch": 0.96, "learning_rate": 7.31688997696789e-08, "loss": 0.2487, "step": 11230 }, { "epoch": 0.96, "learning_rate": 7.283405507820141e-08, "loss": 0.2932, "step": 11231 }, { "epoch": 0.96, "learning_rate": 7.249997552362109e-08, "loss": 0.2537, "step": 11232 }, { "epoch": 0.96, "learning_rate": 7.21666611316918e-08, "loss": 0.2381, "step": 11233 }, { "epoch": 0.96, "learning_rate": 7.183411192810075e-08, "loss": 0.24, "step": 11234 }, { "epoch": 0.96, "learning_rate": 7.150232793847967e-08, "loss": 0.2476, "step": 11235 }, { "epoch": 0.96, "learning_rate": 7.117130918840032e-08, "loss": 0.303, "step": 11236 }, { "epoch": 0.96, "learning_rate": 7.084105570337785e-08, "loss": 0.2562, "step": 11237 }, { "epoch": 0.96, "learning_rate": 7.051156750886523e-08, "loss": 0.5919, "step": 11238 }, { "epoch": 0.96, "learning_rate": 7.01828446302566e-08, "loss": 0.366, "step": 11239 }, { "epoch": 0.96, "learning_rate": 6.985488709289057e-08, "loss": 0.2456, "step": 11240 }, { "epoch": 0.96, "learning_rate": 6.952769492204359e-08, "loss": 0.2977, "step": 11241 }, { "epoch": 0.96, "learning_rate": 6.920126814293438e-08, "loss": 0.274, "step": 11242 }, { "epoch": 0.96, "learning_rate": 6.887560678072169e-08, "loss": 0.265, "step": 11243 }, { "epoch": 0.96, "learning_rate": 6.855071086050547e-08, "loss": 0.2159, "step": 11244 }, { "epoch": 0.96, "learning_rate": 6.822658040732899e-08, "loss": 0.2888, "step": 11245 }, { "epoch": 0.96, "learning_rate": 6.790321544617117e-08, "loss": 0.2625, "step": 11246 }, { "epoch": 0.96, "learning_rate": 6.758061600195986e-08, "loss": 0.2703, "step": 11247 }, { "epoch": 0.96, "learning_rate": 6.725878209955628e-08, "loss": 0.3077, "step": 11248 }, { "epoch": 0.96, "learning_rate": 6.693771376376612e-08, "loss": 0.3311, "step": 11249 }, { "epoch": 0.96, "learning_rate": 6.661741101933628e-08, "loss": 0.2686, "step": 11250 }, { "epoch": 0.96, "learning_rate": 6.629787389095476e-08, "loss": 0.2845, "step": 11251 }, { "epoch": 0.96, "learning_rate": 6.597910240324967e-08, "loss": 0.3286, "step": 11252 }, { "epoch": 0.96, "learning_rate": 6.566109658078912e-08, "loss": 0.2987, "step": 11253 }, { "epoch": 0.96, "learning_rate": 6.534385644808461e-08, "loss": 0.2812, "step": 11254 }, { "epoch": 0.96, "learning_rate": 6.502738202958658e-08, "loss": 0.3301, "step": 11255 }, { "epoch": 0.96, "learning_rate": 6.471167334968887e-08, "loss": 0.2943, "step": 11256 }, { "epoch": 0.96, "learning_rate": 6.439673043272199e-08, "loss": 0.2758, "step": 11257 }, { "epoch": 0.97, "learning_rate": 6.40825533029632e-08, "loss": 0.2596, "step": 11258 }, { "epoch": 0.97, "learning_rate": 6.376914198462648e-08, "loss": 0.2995, "step": 11259 }, { "epoch": 0.97, "learning_rate": 6.345649650186691e-08, "loss": 0.2594, "step": 11260 }, { "epoch": 0.97, "learning_rate": 6.314461687878415e-08, "loss": 0.2642, "step": 11261 }, { "epoch": 0.97, "learning_rate": 6.28335031394134e-08, "loss": 0.2514, "step": 11262 }, { "epoch": 0.97, "learning_rate": 6.252315530773545e-08, "loss": 0.2921, "step": 11263 }, { "epoch": 0.97, "learning_rate": 6.22135734076712e-08, "loss": 0.2561, "step": 11264 }, { "epoch": 0.97, "learning_rate": 6.190475746307933e-08, "loss": 0.2963, "step": 11265 }, { "epoch": 0.97, "learning_rate": 6.159670749776414e-08, "loss": 0.2861, "step": 11266 }, { "epoch": 0.97, "learning_rate": 6.128942353546775e-08, "loss": 0.263, "step": 11267 }, { "epoch": 0.97, "learning_rate": 6.098290559987342e-08, "loss": 0.282, "step": 11268 }, { "epoch": 0.97, "learning_rate": 6.06771537146067e-08, "loss": 0.2546, "step": 11269 }, { "epoch": 0.97, "learning_rate": 6.037216790323319e-08, "loss": 0.3136, "step": 11270 }, { "epoch": 0.97, "learning_rate": 6.006794818926077e-08, "loss": 0.2807, "step": 11271 }, { "epoch": 0.97, "learning_rate": 5.976449459613509e-08, "loss": 0.2397, "step": 11272 }, { "epoch": 0.97, "learning_rate": 5.946180714724636e-08, "loss": 0.2944, "step": 11273 }, { "epoch": 0.97, "learning_rate": 5.915988586592481e-08, "loss": 0.2356, "step": 11274 }, { "epoch": 0.97, "learning_rate": 5.8858730775438465e-08, "loss": 0.2962, "step": 11275 }, { "epoch": 0.97, "learning_rate": 5.855834189900211e-08, "loss": 0.3015, "step": 11276 }, { "epoch": 0.97, "learning_rate": 5.8258719259765006e-08, "loss": 0.2362, "step": 11277 }, { "epoch": 0.97, "learning_rate": 5.795986288082422e-08, "loss": 0.2847, "step": 11278 }, { "epoch": 0.97, "learning_rate": 5.7661772785211345e-08, "loss": 0.2512, "step": 11279 }, { "epoch": 0.97, "learning_rate": 5.7364448995901324e-08, "loss": 0.2693, "step": 11280 }, { "epoch": 0.97, "learning_rate": 5.706789153581249e-08, "loss": 0.2752, "step": 11281 }, { "epoch": 0.97, "learning_rate": 5.677210042780212e-08, "loss": 0.2554, "step": 11282 }, { "epoch": 0.97, "learning_rate": 5.647707569466643e-08, "loss": 0.2544, "step": 11283 }, { "epoch": 0.97, "learning_rate": 5.618281735914499e-08, "loss": 0.3107, "step": 11284 }, { "epoch": 0.97, "learning_rate": 5.5889325443918565e-08, "loss": 0.2534, "step": 11285 }, { "epoch": 0.97, "learning_rate": 5.5596599971606823e-08, "loss": 0.2894, "step": 11286 }, { "epoch": 0.97, "learning_rate": 5.530464096477395e-08, "loss": 0.2811, "step": 11287 }, { "epoch": 0.97, "learning_rate": 5.5013448445919716e-08, "loss": 0.2862, "step": 11288 }, { "epoch": 0.97, "learning_rate": 5.4723022437489506e-08, "loss": 0.2396, "step": 11289 }, { "epoch": 0.97, "learning_rate": 5.443336296186874e-08, "loss": 0.2404, "step": 11290 }, { "epoch": 0.97, "learning_rate": 5.414447004138068e-08, "loss": 0.2172, "step": 11291 }, { "epoch": 0.97, "learning_rate": 5.3856343698294176e-08, "loss": 0.3086, "step": 11292 }, { "epoch": 0.97, "learning_rate": 5.3568983954813694e-08, "loss": 0.2487, "step": 11293 }, { "epoch": 0.97, "learning_rate": 5.3282390833090393e-08, "loss": 0.2922, "step": 11294 }, { "epoch": 0.97, "learning_rate": 5.299656435521217e-08, "loss": 0.2282, "step": 11295 }, { "epoch": 0.97, "learning_rate": 5.271150454320917e-08, "loss": 0.2324, "step": 11296 }, { "epoch": 0.97, "learning_rate": 5.2427211419051605e-08, "loss": 0.265, "step": 11297 }, { "epoch": 0.97, "learning_rate": 5.214368500465305e-08, "loss": 0.278, "step": 11298 }, { "epoch": 0.97, "learning_rate": 5.186092532186493e-08, "loss": 0.2397, "step": 11299 }, { "epoch": 0.97, "learning_rate": 5.157893239248202e-08, "loss": 0.2766, "step": 11300 }, { "epoch": 0.97, "learning_rate": 5.129770623823804e-08, "loss": 0.2867, "step": 11301 }, { "epoch": 0.97, "learning_rate": 5.1017246880809e-08, "loss": 0.3179, "step": 11302 }, { "epoch": 0.97, "learning_rate": 5.073755434181093e-08, "loss": 0.2878, "step": 11303 }, { "epoch": 0.97, "learning_rate": 5.0458628642802156e-08, "loss": 0.2625, "step": 11304 }, { "epoch": 0.97, "learning_rate": 5.01804698052788e-08, "loss": 0.3158, "step": 11305 }, { "epoch": 0.97, "learning_rate": 4.99030778506826e-08, "loss": 0.2543, "step": 11306 }, { "epoch": 0.97, "learning_rate": 4.962645280039202e-08, "loss": 0.2661, "step": 11307 }, { "epoch": 0.97, "learning_rate": 4.935059467572778e-08, "loss": 0.2993, "step": 11308 }, { "epoch": 0.97, "learning_rate": 4.907550349795287e-08, "loss": 0.2518, "step": 11309 }, { "epoch": 0.97, "learning_rate": 4.8801179288268105e-08, "loss": 0.3093, "step": 11310 }, { "epoch": 0.97, "learning_rate": 4.85276220678188e-08, "loss": 0.2347, "step": 11311 }, { "epoch": 0.97, "learning_rate": 4.82548318576892e-08, "loss": 0.2411, "step": 11312 }, { "epoch": 0.97, "learning_rate": 4.798280867890359e-08, "loss": 0.319, "step": 11313 }, { "epoch": 0.97, "learning_rate": 4.771155255242854e-08, "loss": 0.2524, "step": 11314 }, { "epoch": 0.97, "learning_rate": 4.744106349917066e-08, "loss": 0.2753, "step": 11315 }, { "epoch": 0.97, "learning_rate": 4.717134153997993e-08, "loss": 0.573, "step": 11316 }, { "epoch": 0.97, "learning_rate": 4.6902386695644174e-08, "loss": 0.2864, "step": 11317 }, { "epoch": 0.97, "learning_rate": 4.663419898689125e-08, "loss": 0.3143, "step": 11318 }, { "epoch": 0.97, "learning_rate": 4.6366778434393524e-08, "loss": 0.2847, "step": 11319 }, { "epoch": 0.97, "learning_rate": 4.610012505876338e-08, "loss": 0.2456, "step": 11320 }, { "epoch": 0.97, "learning_rate": 4.583423888055105e-08, "loss": 0.2608, "step": 11321 }, { "epoch": 0.97, "learning_rate": 4.556911992025015e-08, "loss": 0.2645, "step": 11322 }, { "epoch": 0.97, "learning_rate": 4.530476819829655e-08, "loss": 0.2343, "step": 11323 }, { "epoch": 0.97, "learning_rate": 4.504118373506283e-08, "loss": 0.2856, "step": 11324 }, { "epoch": 0.97, "learning_rate": 4.477836655086498e-08, "loss": 0.2693, "step": 11325 }, { "epoch": 0.97, "learning_rate": 4.451631666596123e-08, "loss": 0.2758, "step": 11326 }, { "epoch": 0.97, "learning_rate": 4.4255034100548766e-08, "loss": 0.3412, "step": 11327 }, { "epoch": 0.97, "learning_rate": 4.3994518874765914e-08, "loss": 0.2557, "step": 11328 }, { "epoch": 0.97, "learning_rate": 4.3734771008689947e-08, "loss": 0.3047, "step": 11329 }, { "epoch": 0.97, "learning_rate": 4.347579052234374e-08, "loss": 0.2413, "step": 11330 }, { "epoch": 0.97, "learning_rate": 4.3217577435686886e-08, "loss": 0.2524, "step": 11331 }, { "epoch": 0.97, "learning_rate": 4.296013176862013e-08, "loss": 0.3312, "step": 11332 }, { "epoch": 0.97, "learning_rate": 4.2703453540988704e-08, "loss": 0.3398, "step": 11333 }, { "epoch": 0.97, "learning_rate": 4.244754277257346e-08, "loss": 0.2469, "step": 11334 }, { "epoch": 0.97, "learning_rate": 4.219239948310083e-08, "loss": 0.2914, "step": 11335 }, { "epoch": 0.97, "learning_rate": 4.193802369223399e-08, "loss": 0.2906, "step": 11336 }, { "epoch": 0.97, "learning_rate": 4.168441541958168e-08, "loss": 0.2784, "step": 11337 }, { "epoch": 0.97, "learning_rate": 4.143157468468717e-08, "loss": 0.2576, "step": 11338 }, { "epoch": 0.97, "learning_rate": 4.117950150704153e-08, "loss": 0.2973, "step": 11339 }, { "epoch": 0.97, "learning_rate": 4.092819590607144e-08, "loss": 0.2581, "step": 11340 }, { "epoch": 0.97, "learning_rate": 4.067765790114697e-08, "loss": 0.2953, "step": 11341 }, { "epoch": 0.97, "learning_rate": 4.0427887511578224e-08, "loss": 0.2675, "step": 11342 }, { "epoch": 0.97, "learning_rate": 4.017888475661536e-08, "loss": 0.2936, "step": 11343 }, { "epoch": 0.97, "learning_rate": 3.99306496554519e-08, "loss": 0.2623, "step": 11344 }, { "epoch": 0.97, "learning_rate": 3.968318222722034e-08, "loss": 0.3088, "step": 11345 }, { "epoch": 0.97, "learning_rate": 3.943648249099319e-08, "loss": 0.2405, "step": 11346 }, { "epoch": 0.97, "learning_rate": 3.9190550465785236e-08, "loss": 0.2439, "step": 11347 }, { "epoch": 0.97, "learning_rate": 3.894538617055243e-08, "loss": 0.2798, "step": 11348 }, { "epoch": 0.97, "learning_rate": 3.8700989624189666e-08, "loss": 0.2879, "step": 11349 }, { "epoch": 0.97, "learning_rate": 3.845736084553408e-08, "loss": 0.265, "step": 11350 }, { "epoch": 0.97, "learning_rate": 3.8214499853364007e-08, "loss": 0.2798, "step": 11351 }, { "epoch": 0.97, "learning_rate": 3.79724066663989e-08, "loss": 0.2814, "step": 11352 }, { "epoch": 0.97, "learning_rate": 3.773108130329495e-08, "loss": 0.2737, "step": 11353 }, { "epoch": 0.97, "learning_rate": 3.749052378265505e-08, "loss": 0.2594, "step": 11354 }, { "epoch": 0.97, "learning_rate": 3.725073412301994e-08, "loss": 0.2728, "step": 11355 }, { "epoch": 0.97, "learning_rate": 3.7011712342870376e-08, "loss": 0.2447, "step": 11356 }, { "epoch": 0.97, "learning_rate": 3.677345846062941e-08, "loss": 0.2601, "step": 11357 }, { "epoch": 0.97, "learning_rate": 3.653597249466012e-08, "loss": 0.2836, "step": 11358 }, { "epoch": 0.97, "learning_rate": 3.6299254463267877e-08, "loss": 0.2546, "step": 11359 }, { "epoch": 0.97, "learning_rate": 3.606330438469585e-08, "loss": 0.2912, "step": 11360 }, { "epoch": 0.97, "learning_rate": 3.5828122277132836e-08, "loss": 0.2618, "step": 11361 }, { "epoch": 0.97, "learning_rate": 3.559370815870211e-08, "loss": 0.2906, "step": 11362 }, { "epoch": 0.97, "learning_rate": 3.536006204747366e-08, "loss": 0.2775, "step": 11363 }, { "epoch": 0.97, "learning_rate": 3.5127183961454204e-08, "loss": 0.2874, "step": 11364 }, { "epoch": 0.97, "learning_rate": 3.4895073918593814e-08, "loss": 0.2669, "step": 11365 }, { "epoch": 0.97, "learning_rate": 3.466373193678263e-08, "loss": 0.2559, "step": 11366 }, { "epoch": 0.97, "learning_rate": 3.443315803385083e-08, "loss": 0.2654, "step": 11367 }, { "epoch": 0.97, "learning_rate": 3.4203352227569766e-08, "loss": 0.2661, "step": 11368 }, { "epoch": 0.97, "learning_rate": 3.397431453565192e-08, "loss": 0.2279, "step": 11369 }, { "epoch": 0.97, "learning_rate": 3.3746044975749845e-08, "loss": 0.2541, "step": 11370 }, { "epoch": 0.97, "learning_rate": 3.351854356545725e-08, "loss": 0.2838, "step": 11371 }, { "epoch": 0.97, "learning_rate": 3.3291810322311214e-08, "loss": 0.2454, "step": 11372 }, { "epoch": 0.97, "learning_rate": 3.306584526378442e-08, "loss": 0.2866, "step": 11373 }, { "epoch": 0.97, "learning_rate": 3.284064840729406e-08, "loss": 0.3113, "step": 11374 }, { "epoch": 0.98, "learning_rate": 3.261621977019846e-08, "loss": 0.2747, "step": 11375 }, { "epoch": 0.98, "learning_rate": 3.239255936979269e-08, "loss": 0.278, "step": 11376 }, { "epoch": 0.98, "learning_rate": 3.216966722331849e-08, "loss": 0.2897, "step": 11377 }, { "epoch": 0.98, "learning_rate": 3.1947543347953246e-08, "loss": 0.3158, "step": 11378 }, { "epoch": 0.98, "learning_rate": 3.1726187760817704e-08, "loss": 0.2634, "step": 11379 }, { "epoch": 0.98, "learning_rate": 3.1505600478973775e-08, "loss": 0.3062, "step": 11380 }, { "epoch": 0.98, "learning_rate": 3.128578151942119e-08, "loss": 0.2733, "step": 11381 }, { "epoch": 0.98, "learning_rate": 3.106673089910417e-08, "loss": 0.2527, "step": 11382 }, { "epoch": 0.98, "learning_rate": 3.0848448634905884e-08, "loss": 0.3233, "step": 11383 }, { "epoch": 0.98, "learning_rate": 3.063093474364953e-08, "loss": 0.319, "step": 11384 }, { "epoch": 0.98, "learning_rate": 3.04141892421006e-08, "loss": 0.265, "step": 11385 }, { "epoch": 0.98, "learning_rate": 3.019821214696572e-08, "loss": 0.281, "step": 11386 }, { "epoch": 0.98, "learning_rate": 2.998300347488936e-08, "loss": 0.2572, "step": 11387 }, { "epoch": 0.98, "learning_rate": 2.9768563242460468e-08, "loss": 0.6128, "step": 11388 }, { "epoch": 0.98, "learning_rate": 2.9554891466205826e-08, "loss": 0.2444, "step": 11389 }, { "epoch": 0.98, "learning_rate": 2.9341988162595593e-08, "loss": 0.286, "step": 11390 }, { "epoch": 0.98, "learning_rate": 2.912985334803775e-08, "loss": 0.2689, "step": 11391 }, { "epoch": 0.98, "learning_rate": 2.891848703888367e-08, "loss": 0.2728, "step": 11392 }, { "epoch": 0.98, "learning_rate": 2.8707889251423647e-08, "loss": 0.2765, "step": 11393 }, { "epoch": 0.98, "learning_rate": 2.849806000189026e-08, "loss": 0.2752, "step": 11394 }, { "epoch": 0.98, "learning_rate": 2.8288999306456122e-08, "loss": 0.2593, "step": 11395 }, { "epoch": 0.98, "learning_rate": 2.8080707181232792e-08, "loss": 0.2748, "step": 11396 }, { "epoch": 0.98, "learning_rate": 2.7873183642277423e-08, "loss": 0.2015, "step": 11397 }, { "epoch": 0.98, "learning_rate": 2.766642870558278e-08, "loss": 0.2672, "step": 11398 }, { "epoch": 0.98, "learning_rate": 2.7460442387085005e-08, "loss": 0.2762, "step": 11399 }, { "epoch": 0.98, "learning_rate": 2.7255224702660288e-08, "loss": 0.3265, "step": 11400 }, { "epoch": 0.98, "learning_rate": 2.705077566812708e-08, "loss": 0.3335, "step": 11401 }, { "epoch": 0.98, "learning_rate": 2.6847095299241678e-08, "loss": 0.3121, "step": 11402 }, { "epoch": 0.98, "learning_rate": 2.6644183611702623e-08, "loss": 0.2772, "step": 11403 }, { "epoch": 0.98, "learning_rate": 2.6442040621150743e-08, "loss": 0.2513, "step": 11404 }, { "epoch": 0.98, "learning_rate": 2.62406663431658e-08, "loss": 0.2474, "step": 11405 }, { "epoch": 0.98, "learning_rate": 2.6040060793268705e-08, "loss": 0.2712, "step": 11406 }, { "epoch": 0.98, "learning_rate": 2.5840223986920432e-08, "loss": 0.3283, "step": 11407 }, { "epoch": 0.98, "learning_rate": 2.5641155939524208e-08, "loss": 0.2649, "step": 11408 }, { "epoch": 0.98, "learning_rate": 2.544285666642221e-08, "loss": 0.2557, "step": 11409 }, { "epoch": 0.98, "learning_rate": 2.5245326182899987e-08, "loss": 0.2036, "step": 11410 }, { "epoch": 0.98, "learning_rate": 2.5048564504180918e-08, "loss": 0.2394, "step": 11411 }, { "epoch": 0.98, "learning_rate": 2.4852571645430645e-08, "loss": 0.3246, "step": 11412 }, { "epoch": 0.98, "learning_rate": 2.4657347621755977e-08, "loss": 0.2924, "step": 11413 }, { "epoch": 0.98, "learning_rate": 2.4462892448202657e-08, "loss": 0.275, "step": 11414 }, { "epoch": 0.98, "learning_rate": 2.4269206139759804e-08, "loss": 0.2529, "step": 11415 }, { "epoch": 0.98, "learning_rate": 2.407628871135437e-08, "loss": 0.2819, "step": 11416 }, { "epoch": 0.98, "learning_rate": 2.3884140177856675e-08, "loss": 0.2571, "step": 11417 }, { "epoch": 0.98, "learning_rate": 2.369276055407599e-08, "loss": 0.2749, "step": 11418 }, { "epoch": 0.98, "learning_rate": 2.3502149854762734e-08, "loss": 0.2868, "step": 11419 }, { "epoch": 0.98, "learning_rate": 2.3312308094607382e-08, "loss": 0.2536, "step": 11420 }, { "epoch": 0.98, "learning_rate": 2.3123235288244895e-08, "loss": 0.5137, "step": 11421 }, { "epoch": 0.98, "learning_rate": 2.2934931450245833e-08, "loss": 0.2585, "step": 11422 }, { "epoch": 0.98, "learning_rate": 2.2747396595123038e-08, "loss": 0.2614, "step": 11423 }, { "epoch": 0.98, "learning_rate": 2.256063073733272e-08, "loss": 0.3104, "step": 11424 }, { "epoch": 0.98, "learning_rate": 2.2374633891268928e-08, "loss": 0.2795, "step": 11425 }, { "epoch": 0.98, "learning_rate": 2.218940607126685e-08, "loss": 0.2569, "step": 11426 }, { "epoch": 0.98, "learning_rate": 2.2004947291603964e-08, "loss": 0.2558, "step": 11427 }, { "epoch": 0.98, "learning_rate": 2.182125756649778e-08, "loss": 0.2653, "step": 11428 }, { "epoch": 0.98, "learning_rate": 2.163833691010475e-08, "loss": 0.2581, "step": 11429 }, { "epoch": 0.98, "learning_rate": 2.1456185336524714e-08, "loss": 0.3249, "step": 11430 }, { "epoch": 0.98, "learning_rate": 2.1274802859795328e-08, "loss": 0.2426, "step": 11431 }, { "epoch": 0.98, "learning_rate": 2.109418949389874e-08, "loss": 0.2612, "step": 11432 }, { "epoch": 0.98, "learning_rate": 2.091434525275382e-08, "loss": 0.2803, "step": 11433 }, { "epoch": 0.98, "learning_rate": 2.0735270150223917e-08, "loss": 0.274, "step": 11434 }, { "epoch": 0.98, "learning_rate": 2.055696420011022e-08, "loss": 0.2836, "step": 11435 }, { "epoch": 0.98, "learning_rate": 2.037942741615617e-08, "loss": 0.2904, "step": 11436 }, { "epoch": 0.98, "learning_rate": 2.0202659812045278e-08, "loss": 0.2227, "step": 11437 }, { "epoch": 0.98, "learning_rate": 2.002666140140108e-08, "loss": 0.251, "step": 11438 }, { "epoch": 0.98, "learning_rate": 1.98514321977894e-08, "loss": 0.3287, "step": 11439 }, { "epoch": 0.98, "learning_rate": 1.9676972214716095e-08, "loss": 0.2678, "step": 11440 }, { "epoch": 0.98, "learning_rate": 1.9503281465627077e-08, "loss": 0.2937, "step": 11441 }, { "epoch": 0.98, "learning_rate": 1.9330359963910527e-08, "loss": 0.3317, "step": 11442 }, { "epoch": 0.98, "learning_rate": 1.9158207722893564e-08, "loss": 0.2131, "step": 11443 }, { "epoch": 0.98, "learning_rate": 1.8986824755846677e-08, "loss": 0.6558, "step": 11444 }, { "epoch": 0.98, "learning_rate": 1.8816211075975976e-08, "loss": 0.2593, "step": 11445 }, { "epoch": 0.98, "learning_rate": 1.864636669643427e-08, "loss": 0.2697, "step": 11446 }, { "epoch": 0.98, "learning_rate": 1.847729163030998e-08, "loss": 0.2943, "step": 11447 }, { "epoch": 0.98, "learning_rate": 1.8308985890637122e-08, "loss": 0.2432, "step": 11448 }, { "epoch": 0.98, "learning_rate": 1.814144949038643e-08, "loss": 0.3052, "step": 11449 }, { "epoch": 0.98, "learning_rate": 1.7974682442470915e-08, "loss": 0.2794, "step": 11450 }, { "epoch": 0.98, "learning_rate": 1.780868475974362e-08, "loss": 0.2299, "step": 11451 }, { "epoch": 0.98, "learning_rate": 1.7643456454999875e-08, "loss": 0.2826, "step": 11452 }, { "epoch": 0.98, "learning_rate": 1.747899754097504e-08, "loss": 0.2925, "step": 11453 }, { "epoch": 0.98, "learning_rate": 1.7315308030342314e-08, "loss": 0.5677, "step": 11454 }, { "epoch": 0.98, "learning_rate": 1.7152387935721603e-08, "loss": 0.2896, "step": 11455 }, { "epoch": 0.98, "learning_rate": 1.699023726966731e-08, "loss": 0.3215, "step": 11456 }, { "epoch": 0.98, "learning_rate": 1.682885604467721e-08, "loss": 0.2579, "step": 11457 }, { "epoch": 0.98, "learning_rate": 1.666824427319136e-08, "loss": 0.2515, "step": 11458 }, { "epoch": 0.98, "learning_rate": 1.6508401967588738e-08, "loss": 0.2817, "step": 11459 }, { "epoch": 0.98, "learning_rate": 1.6349329140188385e-08, "loss": 0.2773, "step": 11460 }, { "epoch": 0.98, "learning_rate": 1.6191025803250493e-08, "loss": 0.28, "step": 11461 }, { "epoch": 0.98, "learning_rate": 1.6033491968976412e-08, "loss": 0.2805, "step": 11462 }, { "epoch": 0.98, "learning_rate": 1.587672764950976e-08, "loss": 0.285, "step": 11463 }, { "epoch": 0.98, "learning_rate": 1.5720732856930878e-08, "loss": 0.2961, "step": 11464 }, { "epoch": 0.98, "learning_rate": 1.5565507603264585e-08, "loss": 0.2628, "step": 11465 }, { "epoch": 0.98, "learning_rate": 1.541105190047465e-08, "loss": 0.2618, "step": 11466 }, { "epoch": 0.98, "learning_rate": 1.525736576046599e-08, "loss": 0.2624, "step": 11467 }, { "epoch": 0.98, "learning_rate": 1.5104449195082473e-08, "loss": 0.3469, "step": 11468 }, { "epoch": 0.98, "learning_rate": 1.4952302216112437e-08, "loss": 0.3184, "step": 11469 }, { "epoch": 0.98, "learning_rate": 1.480092483527984e-08, "loss": 0.2355, "step": 11470 }, { "epoch": 0.98, "learning_rate": 1.4650317064254238e-08, "loss": 0.3027, "step": 11471 }, { "epoch": 0.98, "learning_rate": 1.4500478914644122e-08, "loss": 0.2668, "step": 11472 }, { "epoch": 0.98, "learning_rate": 1.4351410397995813e-08, "loss": 0.2556, "step": 11473 }, { "epoch": 0.98, "learning_rate": 1.4203111525801228e-08, "loss": 0.2491, "step": 11474 }, { "epoch": 0.98, "learning_rate": 1.4055582309489002e-08, "loss": 0.2595, "step": 11475 }, { "epoch": 0.98, "learning_rate": 1.390882276043115e-08, "loss": 0.2816, "step": 11476 }, { "epoch": 0.98, "learning_rate": 1.3762832889937517e-08, "loss": 0.3431, "step": 11477 }, { "epoch": 0.98, "learning_rate": 1.3617612709262428e-08, "loss": 0.2835, "step": 11478 }, { "epoch": 0.98, "learning_rate": 1.3473162229596937e-08, "loss": 0.2776, "step": 11479 }, { "epoch": 0.98, "learning_rate": 1.3329481462075466e-08, "loss": 0.2713, "step": 11480 }, { "epoch": 0.98, "learning_rate": 1.3186570417771383e-08, "loss": 0.2608, "step": 11481 }, { "epoch": 0.98, "learning_rate": 1.3044429107700319e-08, "loss": 0.2422, "step": 11482 }, { "epoch": 0.98, "learning_rate": 1.2903057542817954e-08, "loss": 0.2609, "step": 11483 }, { "epoch": 0.98, "learning_rate": 1.2762455734020018e-08, "loss": 0.2527, "step": 11484 }, { "epoch": 0.98, "learning_rate": 1.262262369214451e-08, "loss": 0.2543, "step": 11485 }, { "epoch": 0.98, "learning_rate": 1.248356142796725e-08, "loss": 0.5543, "step": 11486 }, { "epoch": 0.98, "learning_rate": 1.2345268952207445e-08, "loss": 0.3311, "step": 11487 }, { "epoch": 0.98, "learning_rate": 1.2207746275523235e-08, "loss": 0.3029, "step": 11488 }, { "epoch": 0.98, "learning_rate": 1.2070993408516141e-08, "loss": 0.2455, "step": 11489 }, { "epoch": 0.98, "learning_rate": 1.1935010361724397e-08, "loss": 0.2894, "step": 11490 }, { "epoch": 0.98, "learning_rate": 1.1799797145628511e-08, "loss": 0.307, "step": 11491 }, { "epoch": 0.99, "learning_rate": 1.1665353770652366e-08, "loss": 0.2742, "step": 11492 }, { "epoch": 0.99, "learning_rate": 1.1531680247156562e-08, "loss": 0.2823, "step": 11493 }, { "epoch": 0.99, "learning_rate": 1.1398776585445082e-08, "loss": 0.2738, "step": 11494 }, { "epoch": 0.99, "learning_rate": 1.126664279575973e-08, "loss": 0.2755, "step": 11495 }, { "epoch": 0.99, "learning_rate": 1.1135278888286805e-08, "loss": 0.2807, "step": 11496 }, { "epoch": 0.99, "learning_rate": 1.1004684873149319e-08, "loss": 0.2877, "step": 11497 }, { "epoch": 0.99, "learning_rate": 1.0874860760413664e-08, "loss": 0.2685, "step": 11498 }, { "epoch": 0.99, "learning_rate": 1.0745806560086281e-08, "loss": 0.2725, "step": 11499 }, { "epoch": 0.99, "learning_rate": 1.0617522282113656e-08, "loss": 0.3019, "step": 11500 }, { "epoch": 0.99, "learning_rate": 1.0490007936383439e-08, "loss": 0.2753, "step": 11501 }, { "epoch": 0.99, "learning_rate": 1.0363263532724433e-08, "loss": 0.2572, "step": 11502 }, { "epoch": 0.99, "learning_rate": 1.0237289080904377e-08, "loss": 0.2715, "step": 11503 }, { "epoch": 0.99, "learning_rate": 1.0112084590633287e-08, "loss": 0.2794, "step": 11504 }, { "epoch": 0.99, "learning_rate": 9.987650071561217e-09, "loss": 0.2914, "step": 11505 }, { "epoch": 0.99, "learning_rate": 9.863985533278275e-09, "loss": 0.2257, "step": 11506 }, { "epoch": 0.99, "learning_rate": 9.741090985316836e-09, "loss": 0.2817, "step": 11507 }, { "epoch": 0.99, "learning_rate": 9.618966437149324e-09, "loss": 0.2543, "step": 11508 }, { "epoch": 0.99, "learning_rate": 9.497611898188209e-09, "loss": 0.2716, "step": 11509 }, { "epoch": 0.99, "learning_rate": 9.377027377786007e-09, "loss": 0.2936, "step": 11510 }, { "epoch": 0.99, "learning_rate": 9.25721288523751e-09, "loss": 0.276, "step": 11511 }, { "epoch": 0.99, "learning_rate": 9.13816842977755e-09, "loss": 0.25, "step": 11512 }, { "epoch": 0.99, "learning_rate": 9.01989402058101e-09, "loss": 0.2637, "step": 11513 }, { "epoch": 0.99, "learning_rate": 8.902389666765044e-09, "loss": 0.2573, "step": 11514 }, { "epoch": 0.99, "learning_rate": 8.785655377384628e-09, "loss": 0.3189, "step": 11515 }, { "epoch": 0.99, "learning_rate": 8.66969116143701e-09, "loss": 0.2859, "step": 11516 }, { "epoch": 0.99, "learning_rate": 8.554497027862818e-09, "loss": 0.3009, "step": 11517 }, { "epoch": 0.99, "learning_rate": 8.440072985537174e-09, "loss": 0.2725, "step": 11518 }, { "epoch": 0.99, "learning_rate": 8.326419043281909e-09, "loss": 0.3018, "step": 11519 }, { "epoch": 0.99, "learning_rate": 8.213535209855571e-09, "loss": 0.259, "step": 11520 }, { "epoch": 0.99, "learning_rate": 8.101421493958984e-09, "loss": 0.2982, "step": 11521 }, { "epoch": 0.99, "learning_rate": 7.990077904234117e-09, "loss": 0.2548, "step": 11522 }, { "epoch": 0.99, "learning_rate": 7.879504449261887e-09, "loss": 0.2206, "step": 11523 }, { "epoch": 0.99, "learning_rate": 7.769701137564368e-09, "loss": 0.5447, "step": 11524 }, { "epoch": 0.99, "learning_rate": 7.660667977605896e-09, "loss": 0.3079, "step": 11525 }, { "epoch": 0.99, "learning_rate": 7.552404977788641e-09, "loss": 0.2728, "step": 11526 }, { "epoch": 0.99, "learning_rate": 7.44491214645815e-09, "loss": 0.3044, "step": 11527 }, { "epoch": 0.99, "learning_rate": 7.338189491900016e-09, "loss": 0.3257, "step": 11528 }, { "epoch": 0.99, "learning_rate": 7.232237022338773e-09, "loss": 0.2479, "step": 11529 }, { "epoch": 0.99, "learning_rate": 7.12705474594011e-09, "loss": 0.2343, "step": 11530 }, { "epoch": 0.99, "learning_rate": 7.0226426708119855e-09, "loss": 0.2923, "step": 11531 }, { "epoch": 0.99, "learning_rate": 6.919000805002407e-09, "loss": 0.2963, "step": 11532 }, { "epoch": 0.99, "learning_rate": 6.816129156499429e-09, "loss": 0.2526, "step": 11533 }, { "epoch": 0.99, "learning_rate": 6.714027733230044e-09, "loss": 0.2718, "step": 11534 }, { "epoch": 0.99, "learning_rate": 6.612696543066843e-09, "loss": 0.2905, "step": 11535 }, { "epoch": 0.99, "learning_rate": 6.512135593816915e-09, "loss": 0.2753, "step": 11536 }, { "epoch": 0.99, "learning_rate": 6.412344893232947e-09, "loss": 0.2878, "step": 11537 }, { "epoch": 0.99, "learning_rate": 6.3133244490043434e-09, "loss": 0.2676, "step": 11538 }, { "epoch": 0.99, "learning_rate": 6.215074268766108e-09, "loss": 0.2621, "step": 11539 }, { "epoch": 0.99, "learning_rate": 6.117594360088852e-09, "loss": 0.2385, "step": 11540 }, { "epoch": 0.99, "learning_rate": 6.020884730485455e-09, "loss": 0.306, "step": 11541 }, { "epoch": 0.99, "learning_rate": 5.924945387411063e-09, "loss": 0.3141, "step": 11542 }, { "epoch": 0.99, "learning_rate": 5.8297763382597625e-09, "loss": 0.2732, "step": 11543 }, { "epoch": 0.99, "learning_rate": 5.735377590366797e-09, "loss": 0.2704, "step": 11544 }, { "epoch": 0.99, "learning_rate": 5.6417491510074584e-09, "loss": 0.3116, "step": 11545 }, { "epoch": 0.99, "learning_rate": 5.548891027398195e-09, "loss": 0.2813, "step": 11546 }, { "epoch": 0.99, "learning_rate": 5.456803226696616e-09, "loss": 0.3231, "step": 11547 }, { "epoch": 0.99, "learning_rate": 5.365485756000377e-09, "loss": 0.2432, "step": 11548 }, { "epoch": 0.99, "learning_rate": 5.274938622348291e-09, "loss": 0.2744, "step": 11549 }, { "epoch": 0.99, "learning_rate": 5.185161832718111e-09, "loss": 0.2561, "step": 11550 }, { "epoch": 0.99, "learning_rate": 5.096155394028746e-09, "loss": 0.2561, "step": 11551 }, { "epoch": 0.99, "learning_rate": 5.007919313142484e-09, "loss": 0.2926, "step": 11552 }, { "epoch": 0.99, "learning_rate": 4.920453596859443e-09, "loss": 0.2908, "step": 11553 }, { "epoch": 0.99, "learning_rate": 4.833758251919785e-09, "loss": 0.2725, "step": 11554 }, { "epoch": 0.99, "learning_rate": 4.747833285007053e-09, "loss": 0.2717, "step": 11555 }, { "epoch": 0.99, "learning_rate": 4.662678702742618e-09, "loss": 0.2628, "step": 11556 }, { "epoch": 0.99, "learning_rate": 4.578294511691228e-09, "loss": 0.29, "step": 11557 }, { "epoch": 0.99, "learning_rate": 4.494680718355459e-09, "loss": 0.2753, "step": 11558 }, { "epoch": 0.99, "learning_rate": 4.411837329181268e-09, "loss": 0.2796, "step": 11559 }, { "epoch": 0.99, "learning_rate": 4.329764350552434e-09, "loss": 0.3345, "step": 11560 }, { "epoch": 0.99, "learning_rate": 4.248461788795011e-09, "loss": 0.2124, "step": 11561 }, { "epoch": 0.99, "learning_rate": 4.167929650176206e-09, "loss": 0.265, "step": 11562 }, { "epoch": 0.99, "learning_rate": 4.088167940902166e-09, "loss": 0.2834, "step": 11563 }, { "epoch": 0.99, "learning_rate": 4.009176667121306e-09, "loss": 0.5439, "step": 11564 }, { "epoch": 0.99, "learning_rate": 3.93095583492209e-09, "loss": 0.2127, "step": 11565 }, { "epoch": 0.99, "learning_rate": 3.853505450331918e-09, "loss": 0.2473, "step": 11566 }, { "epoch": 0.99, "learning_rate": 3.7768255193204595e-09, "loss": 0.2492, "step": 11567 }, { "epoch": 0.99, "learning_rate": 3.700916047799652e-09, "loss": 0.2236, "step": 11568 }, { "epoch": 0.99, "learning_rate": 3.6257770416192606e-09, "loss": 0.2706, "step": 11569 }, { "epoch": 0.99, "learning_rate": 3.5514085065690984e-09, "loss": 0.2792, "step": 11570 }, { "epoch": 0.99, "learning_rate": 3.4778104483834676e-09, "loss": 0.2814, "step": 11571 }, { "epoch": 0.99, "learning_rate": 3.4049828727333866e-09, "loss": 0.306, "step": 11572 }, { "epoch": 0.99, "learning_rate": 3.3329257852332543e-09, "loss": 0.2882, "step": 11573 }, { "epoch": 0.99, "learning_rate": 3.2616391914364056e-09, "loss": 0.2967, "step": 11574 }, { "epoch": 0.99, "learning_rate": 3.1911230968362238e-09, "loss": 0.2654, "step": 11575 }, { "epoch": 0.99, "learning_rate": 3.1213775068683617e-09, "loss": 0.2779, "step": 11576 }, { "epoch": 0.99, "learning_rate": 3.0524024269096286e-09, "loss": 0.2856, "step": 11577 }, { "epoch": 0.99, "learning_rate": 2.9841978622746624e-09, "loss": 0.3293, "step": 11578 }, { "epoch": 0.99, "learning_rate": 2.9167638182214796e-09, "loss": 0.2673, "step": 11579 }, { "epoch": 0.99, "learning_rate": 2.850100299947034e-09, "loss": 0.2715, "step": 11580 }, { "epoch": 0.99, "learning_rate": 2.7842073125894377e-09, "loss": 0.2838, "step": 11581 }, { "epoch": 0.99, "learning_rate": 2.7190848612279606e-09, "loss": 0.2588, "step": 11582 }, { "epoch": 0.99, "learning_rate": 2.654732950880812e-09, "loss": 0.3109, "step": 11583 }, { "epoch": 0.99, "learning_rate": 2.591151586508467e-09, "loss": 0.3022, "step": 11584 }, { "epoch": 0.99, "learning_rate": 2.5283407730114506e-09, "loss": 0.3181, "step": 11585 }, { "epoch": 0.99, "learning_rate": 2.4663005152314455e-09, "loss": 0.2777, "step": 11586 }, { "epoch": 0.99, "learning_rate": 2.405030817949072e-09, "loss": 0.2429, "step": 11587 }, { "epoch": 0.99, "learning_rate": 2.3445316858883295e-09, "loss": 0.2407, "step": 11588 }, { "epoch": 0.99, "learning_rate": 2.2848031237099332e-09, "loss": 0.2783, "step": 11589 }, { "epoch": 0.99, "learning_rate": 2.225845136019089e-09, "loss": 0.2467, "step": 11590 }, { "epoch": 0.99, "learning_rate": 2.1676577273610498e-09, "loss": 0.2602, "step": 11591 }, { "epoch": 0.99, "learning_rate": 2.110240902217786e-09, "loss": 0.2925, "step": 11592 }, { "epoch": 0.99, "learning_rate": 2.053594665016867e-09, "loss": 0.2975, "step": 11593 }, { "epoch": 0.99, "learning_rate": 1.9977190201225793e-09, "loss": 0.2503, "step": 11594 }, { "epoch": 0.99, "learning_rate": 1.9426139718436986e-09, "loss": 0.2598, "step": 11595 }, { "epoch": 0.99, "learning_rate": 1.8882795244257177e-09, "loss": 0.2744, "step": 11596 }, { "epoch": 0.99, "learning_rate": 1.8347156820563983e-09, "loss": 0.2524, "step": 11597 }, { "epoch": 0.99, "learning_rate": 1.7819224488657695e-09, "loss": 0.31, "step": 11598 }, { "epoch": 0.99, "learning_rate": 1.7298998289216884e-09, "loss": 0.2989, "step": 11599 }, { "epoch": 0.99, "learning_rate": 1.678647826234281e-09, "loss": 0.3439, "step": 11600 }, { "epoch": 0.99, "learning_rate": 1.6281664447526103e-09, "loss": 0.28, "step": 11601 }, { "epoch": 0.99, "learning_rate": 1.5784556883691183e-09, "loss": 0.5886, "step": 11602 }, { "epoch": 0.99, "learning_rate": 1.5295155609151845e-09, "loss": 0.2851, "step": 11603 }, { "epoch": 0.99, "learning_rate": 1.4813460661611267e-09, "loss": 0.2918, "step": 11604 }, { "epoch": 0.99, "learning_rate": 1.433947207821751e-09, "loss": 0.2743, "step": 11605 }, { "epoch": 0.99, "learning_rate": 1.3873189895485806e-09, "loss": 0.2662, "step": 11606 }, { "epoch": 0.99, "learning_rate": 1.3414614149365179e-09, "loss": 0.2792, "step": 11607 }, { "epoch": 1.0, "learning_rate": 1.2963744875205132e-09, "loss": 0.2714, "step": 11608 }, { "epoch": 1.0, "learning_rate": 1.2520582107733436e-09, "loss": 0.2455, "step": 11609 }, { "epoch": 1.0, "learning_rate": 1.2085125881133863e-09, "loss": 0.2542, "step": 11610 }, { "epoch": 1.0, "learning_rate": 1.165737622895735e-09, "loss": 0.2796, "step": 11611 }, { "epoch": 1.0, "learning_rate": 1.1237333184177523e-09, "loss": 0.2532, "step": 11612 }, { "epoch": 1.0, "learning_rate": 1.082499677915738e-09, "loss": 0.2457, "step": 11613 }, { "epoch": 1.0, "learning_rate": 1.042036704568261e-09, "loss": 0.2759, "step": 11614 }, { "epoch": 1.0, "learning_rate": 1.002344401495048e-09, "loss": 0.2508, "step": 11615 }, { "epoch": 1.0, "learning_rate": 9.634227717547629e-10, "loss": 0.5614, "step": 11616 }, { "epoch": 1.0, "learning_rate": 9.252718183472287e-10, "loss": 0.2728, "step": 11617 }, { "epoch": 1.0, "learning_rate": 8.878915442123159e-10, "loss": 0.2224, "step": 11618 }, { "epoch": 1.0, "learning_rate": 8.512819522310533e-10, "loss": 0.5873, "step": 11619 }, { "epoch": 1.0, "learning_rate": 8.154430452267381e-10, "loss": 0.2794, "step": 11620 }, { "epoch": 1.0, "learning_rate": 7.803748259604949e-10, "loss": 0.2524, "step": 11621 }, { "epoch": 1.0, "learning_rate": 7.460772971357167e-10, "loss": 0.2491, "step": 11622 }, { "epoch": 1.0, "learning_rate": 7.125504613947343e-10, "loss": 0.2253, "step": 11623 }, { "epoch": 1.0, "learning_rate": 6.797943213232572e-10, "loss": 0.2627, "step": 11624 }, { "epoch": 1.0, "learning_rate": 6.478088794448223e-10, "loss": 0.3286, "step": 11625 }, { "epoch": 1.0, "learning_rate": 6.165941382241248e-10, "loss": 0.2869, "step": 11626 }, { "epoch": 1.0, "learning_rate": 5.861501000692382e-10, "loss": 0.2546, "step": 11627 }, { "epoch": 1.0, "learning_rate": 5.564767673249538e-10, "loss": 0.3231, "step": 11628 }, { "epoch": 1.0, "learning_rate": 5.27574142278331e-10, "loss": 0.2661, "step": 11629 }, { "epoch": 1.0, "learning_rate": 4.994422271575872e-10, "loss": 0.2284, "step": 11630 }, { "epoch": 1.0, "learning_rate": 4.720810241309881e-10, "loss": 0.2263, "step": 11631 }, { "epoch": 1.0, "learning_rate": 4.454905353068473e-10, "loss": 0.23, "step": 11632 }, { "epoch": 1.0, "learning_rate": 4.1967076273574657e-10, "loss": 0.254, "step": 11633 }, { "epoch": 1.0, "learning_rate": 3.946217084072057e-10, "loss": 0.2627, "step": 11634 }, { "epoch": 1.0, "learning_rate": 3.7034337425079225e-10, "loss": 0.232, "step": 11635 }, { "epoch": 1.0, "learning_rate": 3.4683576213945245e-10, "loss": 0.3064, "step": 11636 }, { "epoch": 1.0, "learning_rate": 3.240988738839601e-10, "loss": 0.24, "step": 11637 }, { "epoch": 1.0, "learning_rate": 3.0213271123735735e-10, "loss": 0.2811, "step": 11638 }, { "epoch": 1.0, "learning_rate": 2.80937275891624e-10, "loss": 0.5745, "step": 11639 }, { "epoch": 1.0, "learning_rate": 2.605125694821187e-10, "loss": 0.2322, "step": 11640 }, { "epoch": 1.0, "learning_rate": 2.4085859358202733e-10, "loss": 0.5648, "step": 11641 }, { "epoch": 1.0, "learning_rate": 2.2197534970569424e-10, "loss": 0.2676, "step": 11642 }, { "epoch": 1.0, "learning_rate": 2.0386283930973194e-10, "loss": 0.2805, "step": 11643 }, { "epoch": 1.0, "learning_rate": 1.8652106378969082e-10, "loss": 0.2921, "step": 11644 }, { "epoch": 1.0, "learning_rate": 1.6995002448116916e-10, "loss": 0.2927, "step": 11645 }, { "epoch": 1.0, "learning_rate": 1.5414972266314389e-10, "loss": 0.2812, "step": 11646 }, { "epoch": 1.0, "learning_rate": 1.3912015955241942e-10, "loss": 0.2792, "step": 11647 }, { "epoch": 1.0, "learning_rate": 1.248613363069584e-10, "loss": 0.2933, "step": 11648 }, { "epoch": 1.0, "learning_rate": 1.1137325402699184e-10, "loss": 0.3054, "step": 11649 }, { "epoch": 1.0, "learning_rate": 9.865591375168848e-11, "loss": 0.2521, "step": 11650 }, { "epoch": 1.0, "learning_rate": 8.670931646026504e-11, "loss": 0.2402, "step": 11651 }, { "epoch": 1.0, "learning_rate": 7.553346307420662e-11, "loss": 0.2681, "step": 11652 }, { "epoch": 1.0, "learning_rate": 6.512835445615651e-11, "loss": 0.2566, "step": 11653 }, { "epoch": 1.0, "learning_rate": 5.549399140547529e-11, "loss": 0.2853, "step": 11654 }, { "epoch": 1.0, "learning_rate": 4.663037466712261e-11, "loss": 0.2484, "step": 11655 }, { "epoch": 1.0, "learning_rate": 3.853750492277542e-11, "loss": 0.2497, "step": 11656 }, { "epoch": 1.0, "learning_rate": 3.1215382796379036e-11, "loss": 0.3162, "step": 11657 }, { "epoch": 1.0, "learning_rate": 2.466400885303699e-11, "loss": 0.2766, "step": 11658 }, { "epoch": 1.0, "learning_rate": 1.8883383597900763e-11, "loss": 0.2817, "step": 11659 }, { "epoch": 1.0, "learning_rate": 1.3873507475059556e-11, "loss": 0.2537, "step": 11660 }, { "epoch": 1.0, "learning_rate": 9.634380871981209e-12, "loss": 0.262, "step": 11661 }, { "epoch": 1.0, "learning_rate": 6.166004113961066e-12, "loss": 0.239, "step": 11662 }, { "epoch": 1.0, "learning_rate": 3.468377470783324e-12, "loss": 0.2936, "step": 11663 }, { "epoch": 1.0, "learning_rate": 1.5415011478392416e-12, "loss": 0.2358, "step": 11664 }, { "epoch": 1.0, "learning_rate": 3.853752938987043e-13, "loss": 0.6515, "step": 11665 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.3406, "step": 11666 }, { "epoch": 1.0, "step": 11666, "total_flos": 1.6909495139958784e+16, "train_loss": 0.30232172586201356, "train_runtime": 93155.4817, "train_samples_per_second": 16.029, "train_steps_per_second": 0.125 } ], "max_steps": 11666, "num_train_epochs": 1, "total_flos": 1.6909495139958784e+16, "trial_name": null, "trial_params": null }