diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5266 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 873, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.7037037037037036e-07, + "loss": 1.6596, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 7.407407407407407e-07, + "loss": 1.7984, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.111111111111111e-06, + "loss": 1.7185, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.4814814814814815e-06, + "loss": 1.7882, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.8518518518518519e-06, + "loss": 1.7184, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.222222222222222e-06, + "loss": 1.7167, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.5925925925925925e-06, + "loss": 1.6061, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 2.962962962962963e-06, + "loss": 1.7068, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.4754, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 3.7037037037037037e-06, + "loss": 1.6545, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.074074074074074e-06, + "loss": 1.6856, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 4.444444444444444e-06, + "loss": 1.6343, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 4.814814814814815e-06, + "loss": 1.5836, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 5.185185185185185e-06, + "loss": 1.6395, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 5.555555555555557e-06, + "loss": 1.7376, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 5.925925925925926e-06, + "loss": 1.4839, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 6.296296296296297e-06, + "loss": 1.7036, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 6.666666666666667e-06, + "loss": 1.7867, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 7.0370370370370375e-06, + "loss": 1.6244, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 7.4074074074074075e-06, + "loss": 1.6133, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 7.77777777777778e-06, + "loss": 1.5338, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 8.148148148148148e-06, + "loss": 1.4873, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 8.518518518518519e-06, + "loss": 1.566, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 8.888888888888888e-06, + "loss": 1.5234, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 9.25925925925926e-06, + "loss": 1.5517, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 9.62962962962963e-06, + "loss": 1.6403, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.5553, + "step": 27 + }, + { + "epoch": 0.03, + "learning_rate": 9.99996552545612e-06, + "loss": 1.4303, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 9.999862102299874e-06, + "loss": 1.5491, + "step": 29 + }, + { + "epoch": 0.03, + "learning_rate": 9.99968973195745e-06, + "loss": 1.5947, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 9.999448416805802e-06, + "loss": 1.6149, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 9.999138160172624e-06, + "loss": 1.6031, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 9.998758966336296e-06, + "loss": 1.4953, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 9.998310840525835e-06, + "loss": 1.3904, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 9.99779378892081e-06, + "loss": 1.5396, + "step": 35 + }, + { + "epoch": 0.04, + "learning_rate": 9.997207818651273e-06, + "loss": 1.5207, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 9.996552937797646e-06, + "loss": 1.4792, + "step": 37 + }, + { + "epoch": 0.04, + "learning_rate": 9.995829155390613e-06, + "loss": 1.549, + "step": 38 + }, + { + "epoch": 0.04, + "learning_rate": 9.995036481411005e-06, + "loss": 1.5933, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 9.994174926789648e-06, + "loss": 1.5199, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 9.993244503407227e-06, + "loss": 1.4438, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 9.99224522409411e-06, + "loss": 1.5248, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 9.991177102630173e-06, + "loss": 1.4997, + "step": 43 + }, + { + "epoch": 0.05, + "learning_rate": 9.99004015374462e-06, + "loss": 1.4953, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 9.988834393115768e-06, + "loss": 1.5326, + "step": 45 + }, + { + "epoch": 0.05, + "learning_rate": 9.987559837370832e-06, + "loss": 1.4969, + "step": 46 + }, + { + "epoch": 0.05, + "learning_rate": 9.986216504085709e-06, + "loss": 1.4415, + "step": 47 + }, + { + "epoch": 0.05, + "learning_rate": 9.984804411784717e-06, + "loss": 1.5773, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 9.983323579940351e-06, + "loss": 1.4887, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 9.981774028973013e-06, + "loss": 1.4998, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 9.980155780250728e-06, + "loss": 1.5108, + "step": 51 + }, + { + "epoch": 0.06, + "learning_rate": 9.97846885608885e-06, + "loss": 1.5011, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 9.976713279749754e-06, + "loss": 1.4729, + "step": 53 + }, + { + "epoch": 0.06, + "learning_rate": 9.97488907544252e-06, + "loss": 1.4965, + "step": 54 + }, + { + "epoch": 0.06, + "learning_rate": 9.972996268322594e-06, + "loss": 1.5399, + "step": 55 + }, + { + "epoch": 0.06, + "learning_rate": 9.971034884491436e-06, + "loss": 1.4602, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 9.969004950996175e-06, + "loss": 1.4677, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 9.96690649582922e-06, + "loss": 1.4103, + "step": 58 + }, + { + "epoch": 0.07, + "learning_rate": 9.964739547927892e-06, + "loss": 1.4356, + "step": 59 + }, + { + "epoch": 0.07, + "learning_rate": 9.962504137173997e-06, + "loss": 1.502, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 9.96020029439345e-06, + "loss": 1.5595, + "step": 61 + }, + { + "epoch": 0.07, + "learning_rate": 9.957828051355817e-06, + "loss": 1.4218, + "step": 62 + }, + { + "epoch": 0.07, + "learning_rate": 9.955387440773902e-06, + "loss": 1.4533, + "step": 63 + }, + { + "epoch": 0.07, + "learning_rate": 9.952878496303274e-06, + "loss": 1.4632, + "step": 64 + }, + { + "epoch": 0.07, + "learning_rate": 9.950301252541824e-06, + "loss": 1.3781, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 9.94765574502927e-06, + "loss": 1.4568, + "step": 66 + }, + { + "epoch": 0.08, + "learning_rate": 9.944942010246681e-06, + "loss": 1.4398, + "step": 67 + }, + { + "epoch": 0.08, + "learning_rate": 9.942160085615963e-06, + "loss": 1.4723, + "step": 68 + }, + { + "epoch": 0.08, + "learning_rate": 9.939310009499348e-06, + "loss": 1.4004, + "step": 69 + }, + { + "epoch": 0.08, + "learning_rate": 9.936391821198868e-06, + "loss": 1.3648, + "step": 70 + }, + { + "epoch": 0.08, + "learning_rate": 9.933405560955805e-06, + "loss": 1.4415, + "step": 71 + }, + { + "epoch": 0.08, + "learning_rate": 9.930351269950144e-06, + "loss": 1.4612, + "step": 72 + }, + { + "epoch": 0.08, + "learning_rate": 9.9272289903e-06, + "loss": 1.4325, + "step": 73 + }, + { + "epoch": 0.08, + "learning_rate": 9.924038765061042e-06, + "loss": 1.5181, + "step": 74 + }, + { + "epoch": 0.09, + "learning_rate": 9.92078063822589e-06, + "loss": 1.3426, + "step": 75 + }, + { + "epoch": 0.09, + "learning_rate": 9.917454654723522e-06, + "loss": 1.3409, + "step": 76 + }, + { + "epoch": 0.09, + "learning_rate": 9.914060860418644e-06, + "loss": 1.3939, + "step": 77 + }, + { + "epoch": 0.09, + "learning_rate": 9.910599302111057e-06, + "loss": 1.4446, + "step": 78 + }, + { + "epoch": 0.09, + "learning_rate": 9.907070027535022e-06, + "loss": 1.4905, + "step": 79 + }, + { + "epoch": 0.09, + "learning_rate": 9.903473085358589e-06, + "loss": 1.3647, + "step": 80 + }, + { + "epoch": 0.09, + "learning_rate": 9.899808525182935e-06, + "loss": 1.4727, + "step": 81 + }, + { + "epoch": 0.09, + "learning_rate": 9.896076397541676e-06, + "loss": 1.4646, + "step": 82 + }, + { + "epoch": 0.1, + "learning_rate": 9.892276753900173e-06, + "loss": 1.3373, + "step": 83 + }, + { + "epoch": 0.1, + "learning_rate": 9.888409646654818e-06, + "loss": 1.5058, + "step": 84 + }, + { + "epoch": 0.1, + "learning_rate": 9.884475129132312e-06, + "loss": 1.4937, + "step": 85 + }, + { + "epoch": 0.1, + "learning_rate": 9.880473255588937e-06, + "loss": 1.4754, + "step": 86 + }, + { + "epoch": 0.1, + "learning_rate": 9.876404081209796e-06, + "loss": 1.4366, + "step": 87 + }, + { + "epoch": 0.1, + "learning_rate": 9.872267662108064e-06, + "loss": 1.4724, + "step": 88 + }, + { + "epoch": 0.1, + "learning_rate": 9.868064055324204e-06, + "loss": 1.4958, + "step": 89 + }, + { + "epoch": 0.1, + "learning_rate": 9.863793318825186e-06, + "loss": 1.4369, + "step": 90 + }, + { + "epoch": 0.1, + "learning_rate": 9.859455511503691e-06, + "loss": 1.4184, + "step": 91 + }, + { + "epoch": 0.11, + "learning_rate": 9.855050693177286e-06, + "loss": 1.3802, + "step": 92 + }, + { + "epoch": 0.11, + "learning_rate": 9.850578924587614e-06, + "loss": 1.4848, + "step": 93 + }, + { + "epoch": 0.11, + "learning_rate": 9.846040267399548e-06, + "loss": 1.5789, + "step": 94 + }, + { + "epoch": 0.11, + "learning_rate": 9.841434784200341e-06, + "loss": 1.3857, + "step": 95 + }, + { + "epoch": 0.11, + "learning_rate": 9.83676253849877e-06, + "loss": 1.418, + "step": 96 + }, + { + "epoch": 0.11, + "learning_rate": 9.832023594724248e-06, + "loss": 1.4117, + "step": 97 + }, + { + "epoch": 0.11, + "learning_rate": 9.827218018225944e-06, + "loss": 1.5229, + "step": 98 + }, + { + "epoch": 0.11, + "learning_rate": 9.822345875271884e-06, + "loss": 1.4827, + "step": 99 + }, + { + "epoch": 0.11, + "learning_rate": 9.817407233048028e-06, + "loss": 1.3705, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 9.812402159657352e-06, + "loss": 1.4836, + "step": 101 + }, + { + "epoch": 0.12, + "learning_rate": 9.807330724118906e-06, + "loss": 1.4261, + "step": 102 + }, + { + "epoch": 0.12, + "learning_rate": 9.802192996366859e-06, + "loss": 1.3848, + "step": 103 + }, + { + "epoch": 0.12, + "learning_rate": 9.796989047249539e-06, + "loss": 1.3516, + "step": 104 + }, + { + "epoch": 0.12, + "learning_rate": 9.791718948528457e-06, + "loss": 1.4449, + "step": 105 + }, + { + "epoch": 0.12, + "learning_rate": 9.786382772877312e-06, + "loss": 1.4151, + "step": 106 + }, + { + "epoch": 0.12, + "learning_rate": 9.780980593880993e-06, + "loss": 1.4627, + "step": 107 + }, + { + "epoch": 0.12, + "learning_rate": 9.775512486034564e-06, + "loss": 1.3104, + "step": 108 + }, + { + "epoch": 0.12, + "learning_rate": 9.76997852474223e-06, + "loss": 1.3921, + "step": 109 + }, + { + "epoch": 0.13, + "learning_rate": 9.76437878631631e-06, + "loss": 1.4407, + "step": 110 + }, + { + "epoch": 0.13, + "learning_rate": 9.758713347976179e-06, + "loss": 1.5017, + "step": 111 + }, + { + "epoch": 0.13, + "learning_rate": 9.752982287847193e-06, + "loss": 1.4565, + "step": 112 + }, + { + "epoch": 0.13, + "learning_rate": 9.747185684959626e-06, + "loss": 1.4413, + "step": 113 + }, + { + "epoch": 0.13, + "learning_rate": 9.741323619247575e-06, + "loss": 1.3756, + "step": 114 + }, + { + "epoch": 0.13, + "learning_rate": 9.735396171547859e-06, + "loss": 1.3519, + "step": 115 + }, + { + "epoch": 0.13, + "learning_rate": 9.7294034235989e-06, + "loss": 1.4479, + "step": 116 + }, + { + "epoch": 0.13, + "learning_rate": 9.723345458039595e-06, + "loss": 1.4406, + "step": 117 + }, + { + "epoch": 0.14, + "learning_rate": 9.717222358408188e-06, + "loss": 1.4624, + "step": 118 + }, + { + "epoch": 0.14, + "learning_rate": 9.711034209141102e-06, + "loss": 1.4615, + "step": 119 + }, + { + "epoch": 0.14, + "learning_rate": 9.704781095571788e-06, + "loss": 1.4321, + "step": 120 + }, + { + "epoch": 0.14, + "learning_rate": 9.698463103929542e-06, + "loss": 1.4256, + "step": 121 + }, + { + "epoch": 0.14, + "learning_rate": 9.692080321338317e-06, + "loss": 1.4684, + "step": 122 + }, + { + "epoch": 0.14, + "learning_rate": 9.685632835815519e-06, + "loss": 1.3634, + "step": 123 + }, + { + "epoch": 0.14, + "learning_rate": 9.679120736270796e-06, + "loss": 1.5006, + "step": 124 + }, + { + "epoch": 0.14, + "learning_rate": 9.672544112504813e-06, + "loss": 1.3559, + "step": 125 + }, + { + "epoch": 0.14, + "learning_rate": 9.665903055208013e-06, + "loss": 1.3583, + "step": 126 + }, + { + "epoch": 0.15, + "learning_rate": 9.659197655959364e-06, + "loss": 1.3658, + "step": 127 + }, + { + "epoch": 0.15, + "learning_rate": 9.6524280072251e-06, + "loss": 1.3334, + "step": 128 + }, + { + "epoch": 0.15, + "learning_rate": 9.645594202357438e-06, + "loss": 1.408, + "step": 129 + }, + { + "epoch": 0.15, + "learning_rate": 9.638696335593304e-06, + "loss": 1.4738, + "step": 130 + }, + { + "epoch": 0.15, + "learning_rate": 9.63173450205302e-06, + "loss": 1.4019, + "step": 131 + }, + { + "epoch": 0.15, + "learning_rate": 9.624708797739002e-06, + "loss": 1.3439, + "step": 132 + }, + { + "epoch": 0.15, + "learning_rate": 9.617619319534427e-06, + "loss": 1.4233, + "step": 133 + }, + { + "epoch": 0.15, + "learning_rate": 9.610466165201912e-06, + "loss": 1.329, + "step": 134 + }, + { + "epoch": 0.15, + "learning_rate": 9.603249433382145e-06, + "loss": 1.3863, + "step": 135 + }, + { + "epoch": 0.16, + "learning_rate": 9.595969223592544e-06, + "loss": 1.3281, + "step": 136 + }, + { + "epoch": 0.16, + "learning_rate": 9.588625636225871e-06, + "loss": 1.443, + "step": 137 + }, + { + "epoch": 0.16, + "learning_rate": 9.58121877254886e-06, + "loss": 1.3968, + "step": 138 + }, + { + "epoch": 0.16, + "learning_rate": 9.573748734700806e-06, + "loss": 1.3448, + "step": 139 + }, + { + "epoch": 0.16, + "learning_rate": 9.566215625692168e-06, + "loss": 1.4587, + "step": 140 + }, + { + "epoch": 0.16, + "learning_rate": 9.558619549403148e-06, + "loss": 1.4847, + "step": 141 + }, + { + "epoch": 0.16, + "learning_rate": 9.550960610582251e-06, + "loss": 1.3366, + "step": 142 + }, + { + "epoch": 0.16, + "learning_rate": 9.543238914844844e-06, + "loss": 1.4567, + "step": 143 + }, + { + "epoch": 0.16, + "learning_rate": 9.535454568671705e-06, + "loss": 1.3963, + "step": 144 + }, + { + "epoch": 0.17, + "learning_rate": 9.527607679407545e-06, + "loss": 1.3651, + "step": 145 + }, + { + "epoch": 0.17, + "learning_rate": 9.519698355259537e-06, + "loss": 1.3715, + "step": 146 + }, + { + "epoch": 0.17, + "learning_rate": 9.51172670529582e-06, + "loss": 1.4307, + "step": 147 + }, + { + "epoch": 0.17, + "learning_rate": 9.503692839443988e-06, + "loss": 1.4342, + "step": 148 + }, + { + "epoch": 0.17, + "learning_rate": 9.495596868489588e-06, + "loss": 1.3459, + "step": 149 + }, + { + "epoch": 0.17, + "learning_rate": 9.487438904074581e-06, + "loss": 1.3055, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 9.47921905869581e-06, + "loss": 1.3297, + "step": 151 + }, + { + "epoch": 0.17, + "learning_rate": 9.47093744570344e-06, + "loss": 1.3391, + "step": 152 + }, + { + "epoch": 0.18, + "learning_rate": 9.462594179299408e-06, + "loss": 1.3502, + "step": 153 + }, + { + "epoch": 0.18, + "learning_rate": 9.45418937453583e-06, + "loss": 1.4598, + "step": 154 + }, + { + "epoch": 0.18, + "learning_rate": 9.445723147313434e-06, + "loss": 1.3428, + "step": 155 + }, + { + "epoch": 0.18, + "learning_rate": 9.437195614379947e-06, + "loss": 1.3898, + "step": 156 + }, + { + "epoch": 0.18, + "learning_rate": 9.428606893328493e-06, + "loss": 1.352, + "step": 157 + }, + { + "epoch": 0.18, + "learning_rate": 9.41995710259597e-06, + "loss": 1.4388, + "step": 158 + }, + { + "epoch": 0.18, + "learning_rate": 9.41124636146141e-06, + "loss": 1.3507, + "step": 159 + }, + { + "epoch": 0.18, + "learning_rate": 9.402474790044348e-06, + "loss": 1.3915, + "step": 160 + }, + { + "epoch": 0.18, + "learning_rate": 9.39364250930315e-06, + "loss": 1.4424, + "step": 161 + }, + { + "epoch": 0.19, + "learning_rate": 9.384749641033358e-06, + "loss": 1.3962, + "step": 162 + }, + { + "epoch": 0.19, + "learning_rate": 9.375796307866003e-06, + "loss": 1.2982, + "step": 163 + }, + { + "epoch": 0.19, + "learning_rate": 9.366782633265917e-06, + "loss": 1.3802, + "step": 164 + }, + { + "epoch": 0.19, + "learning_rate": 9.357708741530025e-06, + "loss": 1.4136, + "step": 165 + }, + { + "epoch": 0.19, + "learning_rate": 9.348574757785642e-06, + "loss": 1.3946, + "step": 166 + }, + { + "epoch": 0.19, + "learning_rate": 9.339380807988734e-06, + "loss": 1.2952, + "step": 167 + }, + { + "epoch": 0.19, + "learning_rate": 9.330127018922195e-06, + "loss": 1.3998, + "step": 168 + }, + { + "epoch": 0.19, + "learning_rate": 9.320813518194084e-06, + "loss": 1.3156, + "step": 169 + }, + { + "epoch": 0.19, + "learning_rate": 9.311440434235879e-06, + "loss": 1.4532, + "step": 170 + }, + { + "epoch": 0.2, + "learning_rate": 9.302007896300697e-06, + "loss": 1.3771, + "step": 171 + }, + { + "epoch": 0.2, + "learning_rate": 9.292516034461517e-06, + "loss": 1.4247, + "step": 172 + }, + { + "epoch": 0.2, + "learning_rate": 9.28296497960938e-06, + "loss": 1.3896, + "step": 173 + }, + { + "epoch": 0.2, + "learning_rate": 9.273354863451589e-06, + "loss": 1.4133, + "step": 174 + }, + { + "epoch": 0.2, + "learning_rate": 9.263685818509895e-06, + "loss": 1.3593, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 9.253957978118664e-06, + "loss": 1.3283, + "step": 176 + }, + { + "epoch": 0.2, + "learning_rate": 9.244171476423037e-06, + "loss": 1.3792, + "step": 177 + }, + { + "epoch": 0.2, + "learning_rate": 9.234326448377089e-06, + "loss": 1.3244, + "step": 178 + }, + { + "epoch": 0.21, + "learning_rate": 9.22442302974196e-06, + "loss": 1.3083, + "step": 179 + }, + { + "epoch": 0.21, + "learning_rate": 9.214461357083986e-06, + "loss": 1.3197, + "step": 180 + }, + { + "epoch": 0.21, + "learning_rate": 9.204441567772817e-06, + "loss": 1.4067, + "step": 181 + }, + { + "epoch": 0.21, + "learning_rate": 9.194363799979517e-06, + "loss": 1.3608, + "step": 182 + }, + { + "epoch": 0.21, + "learning_rate": 9.184228192674667e-06, + "loss": 1.3958, + "step": 183 + }, + { + "epoch": 0.21, + "learning_rate": 9.17403488562644e-06, + "loss": 1.3592, + "step": 184 + }, + { + "epoch": 0.21, + "learning_rate": 9.163784019398686e-06, + "loss": 1.362, + "step": 185 + }, + { + "epoch": 0.21, + "learning_rate": 9.153475735348973e-06, + "loss": 1.303, + "step": 186 + }, + { + "epoch": 0.21, + "learning_rate": 9.143110175626662e-06, + "loss": 1.3781, + "step": 187 + }, + { + "epoch": 0.22, + "learning_rate": 9.13268748317093e-06, + "loss": 1.4138, + "step": 188 + }, + { + "epoch": 0.22, + "learning_rate": 9.122207801708802e-06, + "loss": 1.438, + "step": 189 + }, + { + "epoch": 0.22, + "learning_rate": 9.111671275753175e-06, + "loss": 1.4004, + "step": 190 + }, + { + "epoch": 0.22, + "learning_rate": 9.101078050600823e-06, + "loss": 1.3989, + "step": 191 + }, + { + "epoch": 0.22, + "learning_rate": 9.090428272330381e-06, + "loss": 1.4085, + "step": 192 + }, + { + "epoch": 0.22, + "learning_rate": 9.079722087800353e-06, + "loss": 1.3661, + "step": 193 + }, + { + "epoch": 0.22, + "learning_rate": 9.06895964464707e-06, + "loss": 1.3699, + "step": 194 + }, + { + "epoch": 0.22, + "learning_rate": 9.058141091282656e-06, + "loss": 1.3042, + "step": 195 + }, + { + "epoch": 0.22, + "learning_rate": 9.047266576892993e-06, + "loss": 1.2713, + "step": 196 + }, + { + "epoch": 0.23, + "learning_rate": 9.036336251435647e-06, + "loss": 1.3376, + "step": 197 + }, + { + "epoch": 0.23, + "learning_rate": 9.025350265637816e-06, + "loss": 1.3499, + "step": 198 + }, + { + "epoch": 0.23, + "learning_rate": 9.014308770994235e-06, + "loss": 1.3658, + "step": 199 + }, + { + "epoch": 0.23, + "learning_rate": 9.003211919765102e-06, + "loss": 1.331, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 8.992059864973972e-06, + "loss": 1.2886, + "step": 201 + }, + { + "epoch": 0.23, + "learning_rate": 8.980852760405645e-06, + "loss": 1.3809, + "step": 202 + }, + { + "epoch": 0.23, + "learning_rate": 8.96959076060405e-06, + "loss": 1.3197, + "step": 203 + }, + { + "epoch": 0.23, + "learning_rate": 8.958274020870107e-06, + "loss": 1.4306, + "step": 204 + }, + { + "epoch": 0.23, + "learning_rate": 8.946902697259593e-06, + "loss": 1.3622, + "step": 205 + }, + { + "epoch": 0.24, + "learning_rate": 8.935476946580988e-06, + "loss": 1.3956, + "step": 206 + }, + { + "epoch": 0.24, + "learning_rate": 8.923996926393306e-06, + "loss": 1.3504, + "step": 207 + }, + { + "epoch": 0.24, + "learning_rate": 8.912462795003932e-06, + "loss": 1.3969, + "step": 208 + }, + { + "epoch": 0.24, + "learning_rate": 8.900874711466436e-06, + "loss": 1.4044, + "step": 209 + }, + { + "epoch": 0.24, + "learning_rate": 8.889232835578372e-06, + "loss": 1.3154, + "step": 210 + }, + { + "epoch": 0.24, + "learning_rate": 8.877537327879087e-06, + "loss": 1.3014, + "step": 211 + }, + { + "epoch": 0.24, + "learning_rate": 8.865788349647496e-06, + "loss": 1.3628, + "step": 212 + }, + { + "epoch": 0.24, + "learning_rate": 8.853986062899869e-06, + "loss": 1.3848, + "step": 213 + }, + { + "epoch": 0.25, + "learning_rate": 8.842130630387583e-06, + "loss": 1.382, + "step": 214 + }, + { + "epoch": 0.25, + "learning_rate": 8.83022221559489e-06, + "loss": 1.3952, + "step": 215 + }, + { + "epoch": 0.25, + "learning_rate": 8.818260982736662e-06, + "loss": 1.3529, + "step": 216 + }, + { + "epoch": 0.25, + "learning_rate": 8.80624709675611e-06, + "loss": 1.2393, + "step": 217 + }, + { + "epoch": 0.25, + "learning_rate": 8.794180723322537e-06, + "loss": 1.4167, + "step": 218 + }, + { + "epoch": 0.25, + "learning_rate": 8.782062028829028e-06, + "loss": 1.3627, + "step": 219 + }, + { + "epoch": 0.25, + "learning_rate": 8.769891180390168e-06, + "loss": 1.374, + "step": 220 + }, + { + "epoch": 0.25, + "learning_rate": 8.757668345839739e-06, + "loss": 1.385, + "step": 221 + }, + { + "epoch": 0.25, + "learning_rate": 8.745393693728395e-06, + "loss": 1.3289, + "step": 222 + }, + { + "epoch": 0.26, + "learning_rate": 8.733067393321354e-06, + "loss": 1.3307, + "step": 223 + }, + { + "epoch": 0.26, + "learning_rate": 8.72068961459605e-06, + "loss": 1.3902, + "step": 224 + }, + { + "epoch": 0.26, + "learning_rate": 8.708260528239788e-06, + "loss": 1.4119, + "step": 225 + }, + { + "epoch": 0.26, + "learning_rate": 8.695780305647405e-06, + "loss": 1.3628, + "step": 226 + }, + { + "epoch": 0.26, + "learning_rate": 8.683249118918895e-06, + "loss": 1.2731, + "step": 227 + }, + { + "epoch": 0.26, + "learning_rate": 8.670667140857034e-06, + "loss": 1.3873, + "step": 228 + }, + { + "epoch": 0.26, + "learning_rate": 8.658034544965003e-06, + "loss": 1.3426, + "step": 229 + }, + { + "epoch": 0.26, + "learning_rate": 8.645351505443997e-06, + "loss": 1.3858, + "step": 230 + }, + { + "epoch": 0.26, + "learning_rate": 8.632618197190817e-06, + "loss": 1.4124, + "step": 231 + }, + { + "epoch": 0.27, + "learning_rate": 8.619834795795458e-06, + "loss": 1.2791, + "step": 232 + }, + { + "epoch": 0.27, + "learning_rate": 8.607001477538697e-06, + "loss": 1.3503, + "step": 233 + }, + { + "epoch": 0.27, + "learning_rate": 8.594118419389648e-06, + "loss": 1.4009, + "step": 234 + }, + { + "epoch": 0.27, + "learning_rate": 8.581185799003334e-06, + "loss": 1.3875, + "step": 235 + }, + { + "epoch": 0.27, + "learning_rate": 8.568203794718228e-06, + "loss": 1.212, + "step": 236 + }, + { + "epoch": 0.27, + "learning_rate": 8.555172585553804e-06, + "loss": 1.4082, + "step": 237 + }, + { + "epoch": 0.27, + "learning_rate": 8.542092351208058e-06, + "loss": 1.3676, + "step": 238 + }, + { + "epoch": 0.27, + "learning_rate": 8.528963272055036e-06, + "loss": 1.3153, + "step": 239 + }, + { + "epoch": 0.27, + "learning_rate": 8.515785529142339e-06, + "loss": 1.3265, + "step": 240 + }, + { + "epoch": 0.28, + "learning_rate": 8.502559304188644e-06, + "loss": 1.2758, + "step": 241 + }, + { + "epoch": 0.28, + "learning_rate": 8.489284779581179e-06, + "loss": 1.365, + "step": 242 + }, + { + "epoch": 0.28, + "learning_rate": 8.475962138373212e-06, + "loss": 1.3663, + "step": 243 + }, + { + "epoch": 0.28, + "learning_rate": 8.46259156428154e-06, + "loss": 1.3429, + "step": 244 + }, + { + "epoch": 0.28, + "learning_rate": 8.449173241683934e-06, + "loss": 1.3457, + "step": 245 + }, + { + "epoch": 0.28, + "learning_rate": 8.43570735561662e-06, + "loss": 1.3705, + "step": 246 + }, + { + "epoch": 0.28, + "learning_rate": 8.422194091771709e-06, + "loss": 1.2759, + "step": 247 + }, + { + "epoch": 0.28, + "learning_rate": 8.408633636494643e-06, + "loss": 1.2924, + "step": 248 + }, + { + "epoch": 0.29, + "learning_rate": 8.395026176781627e-06, + "loss": 1.3508, + "step": 249 + }, + { + "epoch": 0.29, + "learning_rate": 8.381371900277045e-06, + "loss": 1.3227, + "step": 250 + }, + { + "epoch": 0.29, + "learning_rate": 8.367670995270883e-06, + "loss": 1.3489, + "step": 251 + }, + { + "epoch": 0.29, + "learning_rate": 8.353923650696119e-06, + "loss": 1.4203, + "step": 252 + }, + { + "epoch": 0.29, + "learning_rate": 8.340130056126126e-06, + "loss": 1.3484, + "step": 253 + }, + { + "epoch": 0.29, + "learning_rate": 8.326290401772057e-06, + "loss": 1.2958, + "step": 254 + }, + { + "epoch": 0.29, + "learning_rate": 8.312404878480222e-06, + "loss": 1.3201, + "step": 255 + }, + { + "epoch": 0.29, + "learning_rate": 8.298473677729453e-06, + "loss": 1.2972, + "step": 256 + }, + { + "epoch": 0.29, + "learning_rate": 8.284496991628465e-06, + "loss": 1.3441, + "step": 257 + }, + { + "epoch": 0.3, + "learning_rate": 8.270475012913212e-06, + "loss": 1.2578, + "step": 258 + }, + { + "epoch": 0.3, + "learning_rate": 8.25640793494422e-06, + "loss": 1.2723, + "step": 259 + }, + { + "epoch": 0.3, + "learning_rate": 8.24229595170393e-06, + "loss": 1.3463, + "step": 260 + }, + { + "epoch": 0.3, + "learning_rate": 8.228139257794012e-06, + "loss": 1.2866, + "step": 261 + }, + { + "epoch": 0.3, + "learning_rate": 8.213938048432697e-06, + "loss": 1.2462, + "step": 262 + }, + { + "epoch": 0.3, + "learning_rate": 8.19969251945207e-06, + "loss": 1.2395, + "step": 263 + }, + { + "epoch": 0.3, + "learning_rate": 8.185402867295373e-06, + "loss": 1.3298, + "step": 264 + }, + { + "epoch": 0.3, + "learning_rate": 8.171069289014307e-06, + "loss": 1.3464, + "step": 265 + }, + { + "epoch": 0.3, + "learning_rate": 8.156691982266299e-06, + "loss": 1.3382, + "step": 266 + }, + { + "epoch": 0.31, + "learning_rate": 8.142271145311784e-06, + "loss": 1.3392, + "step": 267 + }, + { + "epoch": 0.31, + "learning_rate": 8.127806977011476e-06, + "loss": 1.3726, + "step": 268 + }, + { + "epoch": 0.31, + "learning_rate": 8.113299676823614e-06, + "loss": 1.3787, + "step": 269 + }, + { + "epoch": 0.31, + "learning_rate": 8.098749444801226e-06, + "loss": 1.3239, + "step": 270 + }, + { + "epoch": 0.31, + "learning_rate": 8.08415648158935e-06, + "loss": 1.2717, + "step": 271 + }, + { + "epoch": 0.31, + "learning_rate": 8.069520988422292e-06, + "loss": 1.2725, + "step": 272 + }, + { + "epoch": 0.31, + "learning_rate": 8.054843167120827e-06, + "loss": 1.2724, + "step": 273 + }, + { + "epoch": 0.31, + "learning_rate": 8.040123220089437e-06, + "loss": 1.2731, + "step": 274 + }, + { + "epoch": 0.32, + "learning_rate": 8.025361350313506e-06, + "loss": 1.3407, + "step": 275 + }, + { + "epoch": 0.32, + "learning_rate": 8.010557761356523e-06, + "loss": 1.3206, + "step": 276 + }, + { + "epoch": 0.32, + "learning_rate": 7.99571265735728e-06, + "loss": 1.3384, + "step": 277 + }, + { + "epoch": 0.32, + "learning_rate": 7.980826243027052e-06, + "loss": 1.3022, + "step": 278 + }, + { + "epoch": 0.32, + "learning_rate": 7.965898723646777e-06, + "loss": 1.4177, + "step": 279 + }, + { + "epoch": 0.32, + "learning_rate": 7.950930305064224e-06, + "loss": 1.2906, + "step": 280 + }, + { + "epoch": 0.32, + "learning_rate": 7.935921193691153e-06, + "loss": 1.3392, + "step": 281 + }, + { + "epoch": 0.32, + "learning_rate": 7.920871596500473e-06, + "loss": 1.3754, + "step": 282 + }, + { + "epoch": 0.32, + "learning_rate": 7.905781721023384e-06, + "loss": 1.3237, + "step": 283 + }, + { + "epoch": 0.33, + "learning_rate": 7.890651775346512e-06, + "loss": 1.3333, + "step": 284 + }, + { + "epoch": 0.33, + "learning_rate": 7.875481968109052e-06, + "loss": 1.329, + "step": 285 + }, + { + "epoch": 0.33, + "learning_rate": 7.860272508499877e-06, + "loss": 1.3569, + "step": 286 + }, + { + "epoch": 0.33, + "learning_rate": 7.845023606254658e-06, + "loss": 1.3997, + "step": 287 + }, + { + "epoch": 0.33, + "learning_rate": 7.829735471652978e-06, + "loss": 1.2569, + "step": 288 + }, + { + "epoch": 0.33, + "learning_rate": 7.814408315515419e-06, + "loss": 1.3157, + "step": 289 + }, + { + "epoch": 0.33, + "learning_rate": 7.799042349200672e-06, + "loss": 1.3385, + "step": 290 + }, + { + "epoch": 0.33, + "learning_rate": 7.783637784602608e-06, + "loss": 1.3519, + "step": 291 + }, + { + "epoch": 0.33, + "learning_rate": 7.768194834147362e-06, + "loss": 1.3092, + "step": 292 + }, + { + "epoch": 0.34, + "learning_rate": 7.752713710790405e-06, + "loss": 1.4118, + "step": 293 + }, + { + "epoch": 0.34, + "learning_rate": 7.7371946280136e-06, + "loss": 1.4384, + "step": 294 + }, + { + "epoch": 0.34, + "learning_rate": 7.721637799822269e-06, + "loss": 1.319, + "step": 295 + }, + { + "epoch": 0.34, + "learning_rate": 7.706043440742235e-06, + "loss": 1.4091, + "step": 296 + }, + { + "epoch": 0.34, + "learning_rate": 7.690411765816864e-06, + "loss": 1.3586, + "step": 297 + }, + { + "epoch": 0.34, + "learning_rate": 7.674742990604101e-06, + "loss": 1.3887, + "step": 298 + }, + { + "epoch": 0.34, + "learning_rate": 7.659037331173498e-06, + "loss": 1.4584, + "step": 299 + }, + { + "epoch": 0.34, + "learning_rate": 7.643295004103232e-06, + "loss": 1.3274, + "step": 300 + }, + { + "epoch": 0.34, + "learning_rate": 7.627516226477123e-06, + "loss": 1.3145, + "step": 301 + }, + { + "epoch": 0.35, + "learning_rate": 7.611701215881635e-06, + "loss": 1.3378, + "step": 302 + }, + { + "epoch": 0.35, + "learning_rate": 7.595850190402877e-06, + "loss": 1.3808, + "step": 303 + }, + { + "epoch": 0.35, + "learning_rate": 7.579963368623602e-06, + "loss": 1.2816, + "step": 304 + }, + { + "epoch": 0.35, + "learning_rate": 7.564040969620179e-06, + "loss": 1.3451, + "step": 305 + }, + { + "epoch": 0.35, + "learning_rate": 7.548083212959588e-06, + "loss": 1.3767, + "step": 306 + }, + { + "epoch": 0.35, + "learning_rate": 7.532090318696382e-06, + "loss": 1.2972, + "step": 307 + }, + { + "epoch": 0.35, + "learning_rate": 7.516062507369655e-06, + "loss": 1.3871, + "step": 308 + }, + { + "epoch": 0.35, + "learning_rate": 7.500000000000001e-06, + "loss": 1.2995, + "step": 309 + }, + { + "epoch": 0.36, + "learning_rate": 7.483903018086466e-06, + "loss": 1.3784, + "step": 310 + }, + { + "epoch": 0.36, + "learning_rate": 7.467771783603492e-06, + "loss": 1.2722, + "step": 311 + }, + { + "epoch": 0.36, + "learning_rate": 7.4516065189978625e-06, + "loss": 1.2584, + "step": 312 + }, + { + "epoch": 0.36, + "learning_rate": 7.435407447185623e-06, + "loss": 1.3558, + "step": 313 + }, + { + "epoch": 0.36, + "learning_rate": 7.419174791549023e-06, + "loss": 1.3191, + "step": 314 + }, + { + "epoch": 0.36, + "learning_rate": 7.402908775933419e-06, + "loss": 1.3213, + "step": 315 + }, + { + "epoch": 0.36, + "learning_rate": 7.386609624644201e-06, + "loss": 1.2918, + "step": 316 + }, + { + "epoch": 0.36, + "learning_rate": 7.370277562443689e-06, + "loss": 1.2747, + "step": 317 + }, + { + "epoch": 0.36, + "learning_rate": 7.353912814548042e-06, + "loss": 1.3291, + "step": 318 + }, + { + "epoch": 0.37, + "learning_rate": 7.337515606624148e-06, + "loss": 1.3367, + "step": 319 + }, + { + "epoch": 0.37, + "learning_rate": 7.321086164786513e-06, + "loss": 1.3425, + "step": 320 + }, + { + "epoch": 0.37, + "learning_rate": 7.30462471559414e-06, + "loss": 1.2055, + "step": 321 + }, + { + "epoch": 0.37, + "learning_rate": 7.288131486047414e-06, + "loss": 1.2522, + "step": 322 + }, + { + "epoch": 0.37, + "learning_rate": 7.2716067035849595e-06, + "loss": 1.3425, + "step": 323 + }, + { + "epoch": 0.37, + "learning_rate": 7.25505059608051e-06, + "loss": 1.3494, + "step": 324 + }, + { + "epoch": 0.37, + "learning_rate": 7.23846339183977e-06, + "loss": 1.3205, + "step": 325 + }, + { + "epoch": 0.37, + "learning_rate": 7.221845319597258e-06, + "loss": 1.3643, + "step": 326 + }, + { + "epoch": 0.37, + "learning_rate": 7.2051966085131584e-06, + "loss": 1.2947, + "step": 327 + }, + { + "epoch": 0.38, + "learning_rate": 7.18851748817016e-06, + "loss": 1.2807, + "step": 328 + }, + { + "epoch": 0.38, + "learning_rate": 7.1718081885702905e-06, + "loss": 1.2995, + "step": 329 + }, + { + "epoch": 0.38, + "learning_rate": 7.155068940131741e-06, + "loss": 1.2976, + "step": 330 + }, + { + "epoch": 0.38, + "learning_rate": 7.138299973685694e-06, + "loss": 1.3224, + "step": 331 + }, + { + "epoch": 0.38, + "learning_rate": 7.121501520473137e-06, + "loss": 1.3198, + "step": 332 + }, + { + "epoch": 0.38, + "learning_rate": 7.104673812141676e-06, + "loss": 1.2902, + "step": 333 + }, + { + "epoch": 0.38, + "learning_rate": 7.087817080742337e-06, + "loss": 1.2266, + "step": 334 + }, + { + "epoch": 0.38, + "learning_rate": 7.070931558726373e-06, + "loss": 1.1909, + "step": 335 + }, + { + "epoch": 0.38, + "learning_rate": 7.054017478942048e-06, + "loss": 1.2823, + "step": 336 + }, + { + "epoch": 0.39, + "learning_rate": 7.037075074631441e-06, + "loss": 1.3414, + "step": 337 + }, + { + "epoch": 0.39, + "learning_rate": 7.0201045794272135e-06, + "loss": 1.2356, + "step": 338 + }, + { + "epoch": 0.39, + "learning_rate": 7.003106227349399e-06, + "loss": 1.3991, + "step": 339 + }, + { + "epoch": 0.39, + "learning_rate": 6.9860802528021705e-06, + "loss": 1.3584, + "step": 340 + }, + { + "epoch": 0.39, + "learning_rate": 6.969026890570612e-06, + "loss": 1.391, + "step": 341 + }, + { + "epoch": 0.39, + "learning_rate": 6.9519463758174745e-06, + "loss": 1.2975, + "step": 342 + }, + { + "epoch": 0.39, + "learning_rate": 6.934838944079944e-06, + "loss": 1.2221, + "step": 343 + }, + { + "epoch": 0.39, + "learning_rate": 6.917704831266381e-06, + "loss": 1.266, + "step": 344 + }, + { + "epoch": 0.4, + "learning_rate": 6.9005442736530745e-06, + "loss": 1.2379, + "step": 345 + }, + { + "epoch": 0.4, + "learning_rate": 6.883357507880985e-06, + "loss": 1.2859, + "step": 346 + }, + { + "epoch": 0.4, + "learning_rate": 6.866144770952474e-06, + "loss": 1.3748, + "step": 347 + }, + { + "epoch": 0.4, + "learning_rate": 6.848906300228047e-06, + "loss": 1.2665, + "step": 348 + }, + { + "epoch": 0.4, + "learning_rate": 6.831642333423068e-06, + "loss": 1.3045, + "step": 349 + }, + { + "epoch": 0.4, + "learning_rate": 6.814353108604488e-06, + "loss": 1.3242, + "step": 350 + }, + { + "epoch": 0.4, + "learning_rate": 6.797038864187564e-06, + "loss": 1.3638, + "step": 351 + }, + { + "epoch": 0.4, + "learning_rate": 6.77969983893257e-06, + "loss": 1.3131, + "step": 352 + }, + { + "epoch": 0.4, + "learning_rate": 6.762336271941499e-06, + "loss": 1.265, + "step": 353 + }, + { + "epoch": 0.41, + "learning_rate": 6.7449484026547705e-06, + "loss": 1.3479, + "step": 354 + }, + { + "epoch": 0.41, + "learning_rate": 6.7275364708479316e-06, + "loss": 1.3901, + "step": 355 + }, + { + "epoch": 0.41, + "learning_rate": 6.710100716628345e-06, + "loss": 1.3811, + "step": 356 + }, + { + "epoch": 0.41, + "learning_rate": 6.692641380431879e-06, + "loss": 1.303, + "step": 357 + }, + { + "epoch": 0.41, + "learning_rate": 6.675158703019594e-06, + "loss": 1.2914, + "step": 358 + }, + { + "epoch": 0.41, + "learning_rate": 6.657652925474424e-06, + "loss": 1.2605, + "step": 359 + }, + { + "epoch": 0.41, + "learning_rate": 6.640124289197845e-06, + "loss": 1.2949, + "step": 360 + }, + { + "epoch": 0.41, + "learning_rate": 6.622573035906557e-06, + "loss": 1.2792, + "step": 361 + }, + { + "epoch": 0.41, + "learning_rate": 6.604999407629137e-06, + "loss": 1.2529, + "step": 362 + }, + { + "epoch": 0.42, + "learning_rate": 6.5874036467027135e-06, + "loss": 1.2953, + "step": 363 + }, + { + "epoch": 0.42, + "learning_rate": 6.5697859957696195e-06, + "loss": 1.3389, + "step": 364 + }, + { + "epoch": 0.42, + "learning_rate": 6.552146697774049e-06, + "loss": 1.3, + "step": 365 + }, + { + "epoch": 0.42, + "learning_rate": 6.534485995958699e-06, + "loss": 1.3147, + "step": 366 + }, + { + "epoch": 0.42, + "learning_rate": 6.51680413386143e-06, + "loss": 1.2693, + "step": 367 + }, + { + "epoch": 0.42, + "learning_rate": 6.499101355311891e-06, + "loss": 1.2744, + "step": 368 + }, + { + "epoch": 0.42, + "learning_rate": 6.481377904428171e-06, + "loss": 1.3277, + "step": 369 + }, + { + "epoch": 0.42, + "learning_rate": 6.4636340256134224e-06, + "loss": 1.3469, + "step": 370 + }, + { + "epoch": 0.42, + "learning_rate": 6.445869963552496e-06, + "loss": 1.3045, + "step": 371 + }, + { + "epoch": 0.43, + "learning_rate": 6.428085963208567e-06, + "loss": 1.2607, + "step": 372 + }, + { + "epoch": 0.43, + "learning_rate": 6.410282269819756e-06, + "loss": 1.2707, + "step": 373 + }, + { + "epoch": 0.43, + "learning_rate": 6.392459128895747e-06, + "loss": 1.2619, + "step": 374 + }, + { + "epoch": 0.43, + "learning_rate": 6.374616786214402e-06, + "loss": 1.2985, + "step": 375 + }, + { + "epoch": 0.43, + "learning_rate": 6.356755487818371e-06, + "loss": 1.3691, + "step": 376 + }, + { + "epoch": 0.43, + "learning_rate": 6.338875480011698e-06, + "loss": 1.3009, + "step": 377 + }, + { + "epoch": 0.43, + "learning_rate": 6.3209770093564315e-06, + "loss": 1.3129, + "step": 378 + }, + { + "epoch": 0.43, + "learning_rate": 6.303060322669214e-06, + "loss": 1.2962, + "step": 379 + }, + { + "epoch": 0.44, + "learning_rate": 6.285125667017886e-06, + "loss": 1.3026, + "step": 380 + }, + { + "epoch": 0.44, + "learning_rate": 6.267173289718079e-06, + "loss": 1.2494, + "step": 381 + }, + { + "epoch": 0.44, + "learning_rate": 6.249203438329799e-06, + "loss": 1.2837, + "step": 382 + }, + { + "epoch": 0.44, + "learning_rate": 6.23121636065402e-06, + "loss": 1.2899, + "step": 383 + }, + { + "epoch": 0.44, + "learning_rate": 6.213212304729259e-06, + "loss": 1.2559, + "step": 384 + }, + { + "epoch": 0.44, + "learning_rate": 6.195191518828163e-06, + "loss": 1.2479, + "step": 385 + }, + { + "epoch": 0.44, + "learning_rate": 6.177154251454082e-06, + "loss": 1.3153, + "step": 386 + }, + { + "epoch": 0.44, + "learning_rate": 6.1591007513376425e-06, + "loss": 1.3329, + "step": 387 + }, + { + "epoch": 0.44, + "learning_rate": 6.141031267433316e-06, + "loss": 1.2713, + "step": 388 + }, + { + "epoch": 0.45, + "learning_rate": 6.122946048915991e-06, + "loss": 1.3049, + "step": 389 + }, + { + "epoch": 0.45, + "learning_rate": 6.1048453451775305e-06, + "loss": 1.3078, + "step": 390 + }, + { + "epoch": 0.45, + "learning_rate": 6.086729405823335e-06, + "loss": 1.2158, + "step": 391 + }, + { + "epoch": 0.45, + "learning_rate": 6.0685984806689055e-06, + "loss": 1.2278, + "step": 392 + }, + { + "epoch": 0.45, + "learning_rate": 6.05045281973639e-06, + "loss": 1.2554, + "step": 393 + }, + { + "epoch": 0.45, + "learning_rate": 6.032292673251143e-06, + "loss": 1.257, + "step": 394 + }, + { + "epoch": 0.45, + "learning_rate": 6.014118291638272e-06, + "loss": 1.2632, + "step": 395 + }, + { + "epoch": 0.45, + "learning_rate": 5.995929925519181e-06, + "loss": 1.2637, + "step": 396 + }, + { + "epoch": 0.45, + "learning_rate": 5.977727825708123e-06, + "loss": 1.4115, + "step": 397 + }, + { + "epoch": 0.46, + "learning_rate": 5.959512243208732e-06, + "loss": 1.3101, + "step": 398 + }, + { + "epoch": 0.46, + "learning_rate": 5.941283429210568e-06, + "loss": 1.2819, + "step": 399 + }, + { + "epoch": 0.46, + "learning_rate": 5.9230416350856505e-06, + "loss": 1.1701, + "step": 400 + }, + { + "epoch": 0.46, + "learning_rate": 5.904787112384991e-06, + "loss": 1.3451, + "step": 401 + }, + { + "epoch": 0.46, + "learning_rate": 5.886520112835128e-06, + "loss": 1.294, + "step": 402 + }, + { + "epoch": 0.46, + "learning_rate": 5.8682408883346535e-06, + "loss": 1.289, + "step": 403 + }, + { + "epoch": 0.46, + "learning_rate": 5.849949690950736e-06, + "loss": 1.3007, + "step": 404 + }, + { + "epoch": 0.46, + "learning_rate": 5.831646772915651e-06, + "loss": 1.2988, + "step": 405 + }, + { + "epoch": 0.47, + "learning_rate": 5.8133323866233005e-06, + "loss": 1.3402, + "step": 406 + }, + { + "epoch": 0.47, + "learning_rate": 5.795006784625728e-06, + "loss": 1.2917, + "step": 407 + }, + { + "epoch": 0.47, + "learning_rate": 5.776670219629643e-06, + "loss": 1.2742, + "step": 408 + }, + { + "epoch": 0.47, + "learning_rate": 5.75832294449293e-06, + "loss": 1.1589, + "step": 409 + }, + { + "epoch": 0.47, + "learning_rate": 5.739965212221168e-06, + "loss": 1.2647, + "step": 410 + }, + { + "epoch": 0.47, + "learning_rate": 5.7215972759641335e-06, + "loss": 1.2089, + "step": 411 + }, + { + "epoch": 0.47, + "learning_rate": 5.703219389012317e-06, + "loss": 1.2267, + "step": 412 + }, + { + "epoch": 0.47, + "learning_rate": 5.684831804793427e-06, + "loss": 1.2323, + "step": 413 + }, + { + "epoch": 0.47, + "learning_rate": 5.666434776868895e-06, + "loss": 1.2575, + "step": 414 + }, + { + "epoch": 0.48, + "learning_rate": 5.64802855893038e-06, + "loss": 1.2609, + "step": 415 + }, + { + "epoch": 0.48, + "learning_rate": 5.629613404796267e-06, + "loss": 1.2414, + "step": 416 + }, + { + "epoch": 0.48, + "learning_rate": 5.611189568408173e-06, + "loss": 1.2782, + "step": 417 + }, + { + "epoch": 0.48, + "learning_rate": 5.592757303827441e-06, + "loss": 1.2637, + "step": 418 + }, + { + "epoch": 0.48, + "learning_rate": 5.574316865231637e-06, + "loss": 1.2786, + "step": 419 + }, + { + "epoch": 0.48, + "learning_rate": 5.5558685069110444e-06, + "loss": 1.2729, + "step": 420 + }, + { + "epoch": 0.48, + "learning_rate": 5.537412483265156e-06, + "loss": 1.2916, + "step": 421 + }, + { + "epoch": 0.48, + "learning_rate": 5.518949048799176e-06, + "loss": 1.2674, + "step": 422 + }, + { + "epoch": 0.48, + "learning_rate": 5.500478458120493e-06, + "loss": 1.2308, + "step": 423 + }, + { + "epoch": 0.49, + "learning_rate": 5.482000965935182e-06, + "loss": 1.3994, + "step": 424 + }, + { + "epoch": 0.49, + "learning_rate": 5.463516827044492e-06, + "loss": 1.2365, + "step": 425 + }, + { + "epoch": 0.49, + "learning_rate": 5.445026296341325e-06, + "loss": 1.3897, + "step": 426 + }, + { + "epoch": 0.49, + "learning_rate": 5.4265296288067235e-06, + "loss": 1.3042, + "step": 427 + }, + { + "epoch": 0.49, + "learning_rate": 5.408027079506362e-06, + "loss": 1.2559, + "step": 428 + }, + { + "epoch": 0.49, + "learning_rate": 5.389518903587016e-06, + "loss": 1.2159, + "step": 429 + }, + { + "epoch": 0.49, + "learning_rate": 5.371005356273058e-06, + "loss": 1.3298, + "step": 430 + }, + { + "epoch": 0.49, + "learning_rate": 5.352486692862926e-06, + "loss": 1.2856, + "step": 431 + }, + { + "epoch": 0.49, + "learning_rate": 5.3339631687256085e-06, + "loss": 1.3069, + "step": 432 + }, + { + "epoch": 0.5, + "learning_rate": 5.3154350392971245e-06, + "loss": 1.2573, + "step": 433 + }, + { + "epoch": 0.5, + "learning_rate": 5.296902560077e-06, + "loss": 1.322, + "step": 434 + }, + { + "epoch": 0.5, + "learning_rate": 5.278365986624743e-06, + "loss": 1.3452, + "step": 435 + }, + { + "epoch": 0.5, + "learning_rate": 5.259825574556315e-06, + "loss": 1.2812, + "step": 436 + }, + { + "epoch": 0.5, + "learning_rate": 5.241281579540619e-06, + "loss": 1.2764, + "step": 437 + }, + { + "epoch": 0.5, + "learning_rate": 5.222734257295963e-06, + "loss": 1.2183, + "step": 438 + }, + { + "epoch": 0.5, + "learning_rate": 5.2041838635865336e-06, + "loss": 1.4043, + "step": 439 + }, + { + "epoch": 0.5, + "learning_rate": 5.1856306542188805e-06, + "loss": 1.3147, + "step": 440 + }, + { + "epoch": 0.51, + "learning_rate": 5.1670748850383734e-06, + "loss": 1.2799, + "step": 441 + }, + { + "epoch": 0.51, + "learning_rate": 5.148516811925684e-06, + "loss": 1.2555, + "step": 442 + }, + { + "epoch": 0.51, + "learning_rate": 5.129956690793255e-06, + "loss": 1.2698, + "step": 443 + }, + { + "epoch": 0.51, + "learning_rate": 5.111394777581769e-06, + "loss": 1.3233, + "step": 444 + }, + { + "epoch": 0.51, + "learning_rate": 5.0928313282566255e-06, + "loss": 1.2816, + "step": 445 + }, + { + "epoch": 0.51, + "learning_rate": 5.074266598804402e-06, + "loss": 1.2227, + "step": 446 + }, + { + "epoch": 0.51, + "learning_rate": 5.0557008452293275e-06, + "loss": 1.2858, + "step": 447 + }, + { + "epoch": 0.51, + "learning_rate": 5.037134323549763e-06, + "loss": 1.2475, + "step": 448 + }, + { + "epoch": 0.51, + "learning_rate": 5.0185672897946515e-06, + "loss": 1.2544, + "step": 449 + }, + { + "epoch": 0.52, + "learning_rate": 5e-06, + "loss": 1.3282, + "step": 450 + }, + { + "epoch": 0.52, + "learning_rate": 4.981432710205351e-06, + "loss": 1.2523, + "step": 451 + }, + { + "epoch": 0.52, + "learning_rate": 4.962865676450239e-06, + "loss": 1.2541, + "step": 452 + }, + { + "epoch": 0.52, + "learning_rate": 4.944299154770673e-06, + "loss": 1.3198, + "step": 453 + }, + { + "epoch": 0.52, + "learning_rate": 4.925733401195601e-06, + "loss": 1.3546, + "step": 454 + }, + { + "epoch": 0.52, + "learning_rate": 4.907168671743377e-06, + "loss": 1.2943, + "step": 455 + }, + { + "epoch": 0.52, + "learning_rate": 4.888605222418232e-06, + "loss": 1.2895, + "step": 456 + }, + { + "epoch": 0.52, + "learning_rate": 4.8700433092067474e-06, + "loss": 1.3898, + "step": 457 + }, + { + "epoch": 0.52, + "learning_rate": 4.8514831880743175e-06, + "loss": 1.2536, + "step": 458 + }, + { + "epoch": 0.53, + "learning_rate": 4.832925114961629e-06, + "loss": 1.2977, + "step": 459 + }, + { + "epoch": 0.53, + "learning_rate": 4.814369345781121e-06, + "loss": 1.2635, + "step": 460 + }, + { + "epoch": 0.53, + "learning_rate": 4.795816136413467e-06, + "loss": 1.442, + "step": 461 + }, + { + "epoch": 0.53, + "learning_rate": 4.777265742704039e-06, + "loss": 1.2602, + "step": 462 + }, + { + "epoch": 0.53, + "learning_rate": 4.758718420459383e-06, + "loss": 1.3203, + "step": 463 + }, + { + "epoch": 0.53, + "learning_rate": 4.740174425443687e-06, + "loss": 1.2639, + "step": 464 + }, + { + "epoch": 0.53, + "learning_rate": 4.7216340133752604e-06, + "loss": 1.2657, + "step": 465 + }, + { + "epoch": 0.53, + "learning_rate": 4.703097439923e-06, + "loss": 1.2655, + "step": 466 + }, + { + "epoch": 0.53, + "learning_rate": 4.684564960702877e-06, + "loss": 1.287, + "step": 467 + }, + { + "epoch": 0.54, + "learning_rate": 4.666036831274392e-06, + "loss": 1.2951, + "step": 468 + }, + { + "epoch": 0.54, + "learning_rate": 4.647513307137076e-06, + "loss": 1.2133, + "step": 469 + }, + { + "epoch": 0.54, + "learning_rate": 4.628994643726942e-06, + "loss": 1.3232, + "step": 470 + }, + { + "epoch": 0.54, + "learning_rate": 4.610481096412985e-06, + "loss": 1.2329, + "step": 471 + }, + { + "epoch": 0.54, + "learning_rate": 4.591972920493638e-06, + "loss": 1.259, + "step": 472 + }, + { + "epoch": 0.54, + "learning_rate": 4.573470371193277e-06, + "loss": 1.2908, + "step": 473 + }, + { + "epoch": 0.54, + "learning_rate": 4.554973703658676e-06, + "loss": 1.3604, + "step": 474 + }, + { + "epoch": 0.54, + "learning_rate": 4.53648317295551e-06, + "loss": 1.296, + "step": 475 + }, + { + "epoch": 0.55, + "learning_rate": 4.517999034064819e-06, + "loss": 1.3323, + "step": 476 + }, + { + "epoch": 0.55, + "learning_rate": 4.499521541879508e-06, + "loss": 1.2711, + "step": 477 + }, + { + "epoch": 0.55, + "learning_rate": 4.4810509512008245e-06, + "loss": 1.1972, + "step": 478 + }, + { + "epoch": 0.55, + "learning_rate": 4.462587516734845e-06, + "loss": 1.276, + "step": 479 + }, + { + "epoch": 0.55, + "learning_rate": 4.444131493088956e-06, + "loss": 1.3355, + "step": 480 + }, + { + "epoch": 0.55, + "learning_rate": 4.425683134768365e-06, + "loss": 1.3096, + "step": 481 + }, + { + "epoch": 0.55, + "learning_rate": 4.40724269617256e-06, + "loss": 1.2206, + "step": 482 + }, + { + "epoch": 0.55, + "learning_rate": 4.388810431591829e-06, + "loss": 1.2797, + "step": 483 + }, + { + "epoch": 0.55, + "learning_rate": 4.3703865952037354e-06, + "loss": 1.3047, + "step": 484 + }, + { + "epoch": 0.56, + "learning_rate": 4.351971441069622e-06, + "loss": 1.3317, + "step": 485 + }, + { + "epoch": 0.56, + "learning_rate": 4.333565223131107e-06, + "loss": 1.1875, + "step": 486 + }, + { + "epoch": 0.56, + "learning_rate": 4.315168195206574e-06, + "loss": 1.2847, + "step": 487 + }, + { + "epoch": 0.56, + "learning_rate": 4.296780610987685e-06, + "loss": 1.1669, + "step": 488 + }, + { + "epoch": 0.56, + "learning_rate": 4.278402724035868e-06, + "loss": 1.2493, + "step": 489 + }, + { + "epoch": 0.56, + "learning_rate": 4.260034787778833e-06, + "loss": 1.2311, + "step": 490 + }, + { + "epoch": 0.56, + "learning_rate": 4.241677055507071e-06, + "loss": 1.2648, + "step": 491 + }, + { + "epoch": 0.56, + "learning_rate": 4.223329780370359e-06, + "loss": 1.2404, + "step": 492 + }, + { + "epoch": 0.56, + "learning_rate": 4.2049932153742726e-06, + "loss": 1.2094, + "step": 493 + }, + { + "epoch": 0.57, + "learning_rate": 4.186667613376702e-06, + "loss": 1.303, + "step": 494 + }, + { + "epoch": 0.57, + "learning_rate": 4.1683532270843505e-06, + "loss": 1.2948, + "step": 495 + }, + { + "epoch": 0.57, + "learning_rate": 4.150050309049267e-06, + "loss": 1.2527, + "step": 496 + }, + { + "epoch": 0.57, + "learning_rate": 4.131759111665349e-06, + "loss": 1.3017, + "step": 497 + }, + { + "epoch": 0.57, + "learning_rate": 4.113479887164873e-06, + "loss": 1.2143, + "step": 498 + }, + { + "epoch": 0.57, + "learning_rate": 4.09521288761501e-06, + "loss": 1.3148, + "step": 499 + }, + { + "epoch": 0.57, + "learning_rate": 4.076958364914352e-06, + "loss": 1.3612, + "step": 500 + }, + { + "epoch": 0.57, + "learning_rate": 4.0587165707894326e-06, + "loss": 1.1973, + "step": 501 + }, + { + "epoch": 0.58, + "learning_rate": 4.04048775679127e-06, + "loss": 1.3197, + "step": 502 + }, + { + "epoch": 0.58, + "learning_rate": 4.022272174291878e-06, + "loss": 1.2792, + "step": 503 + }, + { + "epoch": 0.58, + "learning_rate": 4.004070074480821e-06, + "loss": 1.2688, + "step": 504 + }, + { + "epoch": 0.58, + "learning_rate": 3.985881708361729e-06, + "loss": 1.2597, + "step": 505 + }, + { + "epoch": 0.58, + "learning_rate": 3.967707326748857e-06, + "loss": 1.2096, + "step": 506 + }, + { + "epoch": 0.58, + "learning_rate": 3.94954718026361e-06, + "loss": 1.3257, + "step": 507 + }, + { + "epoch": 0.58, + "learning_rate": 3.931401519331095e-06, + "loss": 1.2747, + "step": 508 + }, + { + "epoch": 0.58, + "learning_rate": 3.913270594176665e-06, + "loss": 1.2471, + "step": 509 + }, + { + "epoch": 0.58, + "learning_rate": 3.895154654822471e-06, + "loss": 1.1345, + "step": 510 + }, + { + "epoch": 0.59, + "learning_rate": 3.87705395108401e-06, + "loss": 1.2536, + "step": 511 + }, + { + "epoch": 0.59, + "learning_rate": 3.858968732566685e-06, + "loss": 1.219, + "step": 512 + }, + { + "epoch": 0.59, + "learning_rate": 3.840899248662358e-06, + "loss": 1.3678, + "step": 513 + }, + { + "epoch": 0.59, + "learning_rate": 3.822845748545919e-06, + "loss": 1.2971, + "step": 514 + }, + { + "epoch": 0.59, + "learning_rate": 3.8048084811718377e-06, + "loss": 1.2638, + "step": 515 + }, + { + "epoch": 0.59, + "learning_rate": 3.786787695270743e-06, + "loss": 1.254, + "step": 516 + }, + { + "epoch": 0.59, + "learning_rate": 3.7687836393459828e-06, + "loss": 1.2319, + "step": 517 + }, + { + "epoch": 0.59, + "learning_rate": 3.7507965616702015e-06, + "loss": 1.2633, + "step": 518 + }, + { + "epoch": 0.59, + "learning_rate": 3.732826710281923e-06, + "loss": 1.2397, + "step": 519 + }, + { + "epoch": 0.6, + "learning_rate": 3.7148743329821146e-06, + "loss": 1.2457, + "step": 520 + }, + { + "epoch": 0.6, + "learning_rate": 3.6969396773307888e-06, + "loss": 1.1932, + "step": 521 + }, + { + "epoch": 0.6, + "learning_rate": 3.6790229906435706e-06, + "loss": 1.232, + "step": 522 + }, + { + "epoch": 0.6, + "learning_rate": 3.6611245199883037e-06, + "loss": 1.2936, + "step": 523 + }, + { + "epoch": 0.6, + "learning_rate": 3.6432445121816308e-06, + "loss": 1.2693, + "step": 524 + }, + { + "epoch": 0.6, + "learning_rate": 3.6253832137856e-06, + "loss": 1.3185, + "step": 525 + }, + { + "epoch": 0.6, + "learning_rate": 3.6075408711042536e-06, + "loss": 1.2861, + "step": 526 + }, + { + "epoch": 0.6, + "learning_rate": 3.5897177301802455e-06, + "loss": 1.3201, + "step": 527 + }, + { + "epoch": 0.6, + "learning_rate": 3.571914036791435e-06, + "loss": 1.2737, + "step": 528 + }, + { + "epoch": 0.61, + "learning_rate": 3.5541300364475067e-06, + "loss": 1.2632, + "step": 529 + }, + { + "epoch": 0.61, + "learning_rate": 3.5363659743865797e-06, + "loss": 1.2531, + "step": 530 + }, + { + "epoch": 0.61, + "learning_rate": 3.518622095571831e-06, + "loss": 1.2531, + "step": 531 + }, + { + "epoch": 0.61, + "learning_rate": 3.5008986446881088e-06, + "loss": 1.2296, + "step": 532 + }, + { + "epoch": 0.61, + "learning_rate": 3.4831958661385716e-06, + "loss": 1.2491, + "step": 533 + }, + { + "epoch": 0.61, + "learning_rate": 3.465514004041301e-06, + "loss": 1.242, + "step": 534 + }, + { + "epoch": 0.61, + "learning_rate": 3.4478533022259527e-06, + "loss": 1.2202, + "step": 535 + }, + { + "epoch": 0.61, + "learning_rate": 3.4302140042303813e-06, + "loss": 1.2563, + "step": 536 + }, + { + "epoch": 0.62, + "learning_rate": 3.4125963532972878e-06, + "loss": 1.1436, + "step": 537 + }, + { + "epoch": 0.62, + "learning_rate": 3.395000592370864e-06, + "loss": 1.3096, + "step": 538 + }, + { + "epoch": 0.62, + "learning_rate": 3.3774269640934447e-06, + "loss": 1.3111, + "step": 539 + }, + { + "epoch": 0.62, + "learning_rate": 3.3598757108021546e-06, + "loss": 1.237, + "step": 540 + }, + { + "epoch": 0.62, + "learning_rate": 3.342347074525578e-06, + "loss": 1.2538, + "step": 541 + }, + { + "epoch": 0.62, + "learning_rate": 3.3248412969804065e-06, + "loss": 1.1838, + "step": 542 + }, + { + "epoch": 0.62, + "learning_rate": 3.307358619568123e-06, + "loss": 1.2605, + "step": 543 + }, + { + "epoch": 0.62, + "learning_rate": 3.289899283371657e-06, + "loss": 1.2921, + "step": 544 + }, + { + "epoch": 0.62, + "learning_rate": 3.2724635291520697e-06, + "loss": 1.2745, + "step": 545 + }, + { + "epoch": 0.63, + "learning_rate": 3.2550515973452295e-06, + "loss": 1.2481, + "step": 546 + }, + { + "epoch": 0.63, + "learning_rate": 3.2376637280585025e-06, + "loss": 1.2874, + "step": 547 + }, + { + "epoch": 0.63, + "learning_rate": 3.2203001610674322e-06, + "loss": 1.2855, + "step": 548 + }, + { + "epoch": 0.63, + "learning_rate": 3.202961135812437e-06, + "loss": 1.2453, + "step": 549 + }, + { + "epoch": 0.63, + "learning_rate": 3.185646891395514e-06, + "loss": 1.2974, + "step": 550 + }, + { + "epoch": 0.63, + "learning_rate": 3.1683576665769344e-06, + "loss": 1.2085, + "step": 551 + }, + { + "epoch": 0.63, + "learning_rate": 3.1510936997719557e-06, + "loss": 1.192, + "step": 552 + }, + { + "epoch": 0.63, + "learning_rate": 3.1338552290475265e-06, + "loss": 1.2529, + "step": 553 + }, + { + "epoch": 0.63, + "learning_rate": 3.1166424921190174e-06, + "loss": 1.3066, + "step": 554 + }, + { + "epoch": 0.64, + "learning_rate": 3.0994557263469267e-06, + "loss": 1.3012, + "step": 555 + }, + { + "epoch": 0.64, + "learning_rate": 3.0822951687336215e-06, + "loss": 1.2765, + "step": 556 + }, + { + "epoch": 0.64, + "learning_rate": 3.065161055920057e-06, + "loss": 1.1885, + "step": 557 + }, + { + "epoch": 0.64, + "learning_rate": 3.0480536241825263e-06, + "loss": 1.2846, + "step": 558 + }, + { + "epoch": 0.64, + "learning_rate": 3.03097310942939e-06, + "loss": 1.2697, + "step": 559 + }, + { + "epoch": 0.64, + "learning_rate": 3.013919747197832e-06, + "loss": 1.2531, + "step": 560 + }, + { + "epoch": 0.64, + "learning_rate": 2.996893772650602e-06, + "loss": 1.2579, + "step": 561 + }, + { + "epoch": 0.64, + "learning_rate": 2.9798954205727886e-06, + "loss": 1.2048, + "step": 562 + }, + { + "epoch": 0.64, + "learning_rate": 2.96292492536856e-06, + "loss": 1.1587, + "step": 563 + }, + { + "epoch": 0.65, + "learning_rate": 2.9459825210579534e-06, + "loss": 1.2425, + "step": 564 + }, + { + "epoch": 0.65, + "learning_rate": 2.929068441273629e-06, + "loss": 1.2004, + "step": 565 + }, + { + "epoch": 0.65, + "learning_rate": 2.9121829192576647e-06, + "loss": 1.2269, + "step": 566 + }, + { + "epoch": 0.65, + "learning_rate": 2.8953261878583263e-06, + "loss": 1.2296, + "step": 567 + }, + { + "epoch": 0.65, + "learning_rate": 2.8784984795268644e-06, + "loss": 1.2511, + "step": 568 + }, + { + "epoch": 0.65, + "learning_rate": 2.861700026314308e-06, + "loss": 1.2154, + "step": 569 + }, + { + "epoch": 0.65, + "learning_rate": 2.844931059868261e-06, + "loss": 1.2472, + "step": 570 + }, + { + "epoch": 0.65, + "learning_rate": 2.828191811429709e-06, + "loss": 1.2231, + "step": 571 + }, + { + "epoch": 0.66, + "learning_rate": 2.811482511829842e-06, + "loss": 1.2338, + "step": 572 + }, + { + "epoch": 0.66, + "learning_rate": 2.7948033914868415e-06, + "loss": 1.1874, + "step": 573 + }, + { + "epoch": 0.66, + "learning_rate": 2.778154680402745e-06, + "loss": 1.1784, + "step": 574 + }, + { + "epoch": 0.66, + "learning_rate": 2.7615366081602306e-06, + "loss": 1.2601, + "step": 575 + }, + { + "epoch": 0.66, + "learning_rate": 2.74494940391949e-06, + "loss": 1.2372, + "step": 576 + }, + { + "epoch": 0.66, + "learning_rate": 2.7283932964150417e-06, + "loss": 1.2243, + "step": 577 + }, + { + "epoch": 0.66, + "learning_rate": 2.711868513952587e-06, + "loss": 1.2287, + "step": 578 + }, + { + "epoch": 0.66, + "learning_rate": 2.69537528440586e-06, + "loss": 1.2571, + "step": 579 + }, + { + "epoch": 0.66, + "learning_rate": 2.6789138352134885e-06, + "loss": 1.3302, + "step": 580 + }, + { + "epoch": 0.67, + "learning_rate": 2.6624843933758547e-06, + "loss": 1.2508, + "step": 581 + }, + { + "epoch": 0.67, + "learning_rate": 2.6460871854519594e-06, + "loss": 1.3273, + "step": 582 + }, + { + "epoch": 0.67, + "learning_rate": 2.6297224375563126e-06, + "loss": 1.2187, + "step": 583 + }, + { + "epoch": 0.67, + "learning_rate": 2.613390375355801e-06, + "loss": 1.2563, + "step": 584 + }, + { + "epoch": 0.67, + "learning_rate": 2.5970912240665815e-06, + "loss": 1.2393, + "step": 585 + }, + { + "epoch": 0.67, + "learning_rate": 2.5808252084509784e-06, + "loss": 1.2318, + "step": 586 + }, + { + "epoch": 0.67, + "learning_rate": 2.5645925528143778e-06, + "loss": 1.2727, + "step": 587 + }, + { + "epoch": 0.67, + "learning_rate": 2.54839348100214e-06, + "loss": 1.2413, + "step": 588 + }, + { + "epoch": 0.67, + "learning_rate": 2.5322282163965096e-06, + "loss": 1.2198, + "step": 589 + }, + { + "epoch": 0.68, + "learning_rate": 2.5160969819135368e-06, + "loss": 1.2257, + "step": 590 + }, + { + "epoch": 0.68, + "learning_rate": 2.5000000000000015e-06, + "loss": 1.1988, + "step": 591 + }, + { + "epoch": 0.68, + "learning_rate": 2.483937492630345e-06, + "loss": 1.2751, + "step": 592 + }, + { + "epoch": 0.68, + "learning_rate": 2.4679096813036202e-06, + "loss": 1.2558, + "step": 593 + }, + { + "epoch": 0.68, + "learning_rate": 2.4519167870404126e-06, + "loss": 1.2104, + "step": 594 + }, + { + "epoch": 0.68, + "learning_rate": 2.4359590303798243e-06, + "loss": 1.263, + "step": 595 + }, + { + "epoch": 0.68, + "learning_rate": 2.4200366313764e-06, + "loss": 1.3031, + "step": 596 + }, + { + "epoch": 0.68, + "learning_rate": 2.4041498095971253e-06, + "loss": 1.3141, + "step": 597 + }, + { + "epoch": 0.68, + "learning_rate": 2.388298784118366e-06, + "loss": 1.2409, + "step": 598 + }, + { + "epoch": 0.69, + "learning_rate": 2.3724837735228773e-06, + "loss": 1.2706, + "step": 599 + }, + { + "epoch": 0.69, + "learning_rate": 2.356704995896768e-06, + "loss": 1.3215, + "step": 600 + }, + { + "epoch": 0.69, + "learning_rate": 2.340962668826503e-06, + "loss": 1.248, + "step": 601 + }, + { + "epoch": 0.69, + "learning_rate": 2.3252570093959e-06, + "loss": 1.2072, + "step": 602 + }, + { + "epoch": 0.69, + "learning_rate": 2.309588234183137e-06, + "loss": 1.2875, + "step": 603 + }, + { + "epoch": 0.69, + "learning_rate": 2.293956559257766e-06, + "loss": 1.2191, + "step": 604 + }, + { + "epoch": 0.69, + "learning_rate": 2.2783622001777322e-06, + "loss": 1.1606, + "step": 605 + }, + { + "epoch": 0.69, + "learning_rate": 2.262805371986402e-06, + "loss": 1.3171, + "step": 606 + }, + { + "epoch": 0.7, + "learning_rate": 2.247286289209597e-06, + "loss": 1.2794, + "step": 607 + }, + { + "epoch": 0.7, + "learning_rate": 2.231805165852637e-06, + "loss": 1.2513, + "step": 608 + }, + { + "epoch": 0.7, + "learning_rate": 2.216362215397393e-06, + "loss": 1.2643, + "step": 609 + }, + { + "epoch": 0.7, + "learning_rate": 2.2009576507993273e-06, + "loss": 1.2275, + "step": 610 + }, + { + "epoch": 0.7, + "learning_rate": 2.1855916844845827e-06, + "loss": 1.2788, + "step": 611 + }, + { + "epoch": 0.7, + "learning_rate": 2.1702645283470238e-06, + "loss": 1.2536, + "step": 612 + }, + { + "epoch": 0.7, + "learning_rate": 2.1549763937453445e-06, + "loss": 1.2581, + "step": 613 + }, + { + "epoch": 0.7, + "learning_rate": 2.1397274915001254e-06, + "loss": 1.2904, + "step": 614 + }, + { + "epoch": 0.7, + "learning_rate": 2.1245180318909482e-06, + "loss": 1.2637, + "step": 615 + }, + { + "epoch": 0.71, + "learning_rate": 2.1093482246534896e-06, + "loss": 1.2643, + "step": 616 + }, + { + "epoch": 0.71, + "learning_rate": 2.0942182789766174e-06, + "loss": 1.2461, + "step": 617 + }, + { + "epoch": 0.71, + "learning_rate": 2.0791284034995296e-06, + "loss": 1.2346, + "step": 618 + }, + { + "epoch": 0.71, + "learning_rate": 2.064078806308848e-06, + "loss": 1.2702, + "step": 619 + }, + { + "epoch": 0.71, + "learning_rate": 2.0490696949357774e-06, + "loss": 1.2606, + "step": 620 + }, + { + "epoch": 0.71, + "learning_rate": 2.0341012763532243e-06, + "loss": 1.2199, + "step": 621 + }, + { + "epoch": 0.71, + "learning_rate": 2.0191737569729492e-06, + "loss": 1.2485, + "step": 622 + }, + { + "epoch": 0.71, + "learning_rate": 2.004287342642721e-06, + "loss": 1.2254, + "step": 623 + }, + { + "epoch": 0.71, + "learning_rate": 1.989442238643478e-06, + "loss": 1.1819, + "step": 624 + }, + { + "epoch": 0.72, + "learning_rate": 1.974638649686495e-06, + "loss": 1.3182, + "step": 625 + }, + { + "epoch": 0.72, + "learning_rate": 1.959876779910564e-06, + "loss": 1.2954, + "step": 626 + }, + { + "epoch": 0.72, + "learning_rate": 1.945156832879174e-06, + "loss": 1.2334, + "step": 627 + }, + { + "epoch": 0.72, + "learning_rate": 1.930479011577711e-06, + "loss": 1.1988, + "step": 628 + }, + { + "epoch": 0.72, + "learning_rate": 1.91584351841065e-06, + "loss": 1.236, + "step": 629 + }, + { + "epoch": 0.72, + "learning_rate": 1.9012505551987764e-06, + "loss": 1.3011, + "step": 630 + }, + { + "epoch": 0.72, + "learning_rate": 1.8867003231763847e-06, + "loss": 1.2476, + "step": 631 + }, + { + "epoch": 0.72, + "learning_rate": 1.872193022988526e-06, + "loss": 1.2689, + "step": 632 + }, + { + "epoch": 0.73, + "learning_rate": 1.8577288546882167e-06, + "loss": 1.1812, + "step": 633 + }, + { + "epoch": 0.73, + "learning_rate": 1.8433080177337043e-06, + "loss": 1.2817, + "step": 634 + }, + { + "epoch": 0.73, + "learning_rate": 1.8289307109856941e-06, + "loss": 1.2886, + "step": 635 + }, + { + "epoch": 0.73, + "learning_rate": 1.8145971327046274e-06, + "loss": 1.2686, + "step": 636 + }, + { + "epoch": 0.73, + "learning_rate": 1.8003074805479314e-06, + "loss": 1.1913, + "step": 637 + }, + { + "epoch": 0.73, + "learning_rate": 1.7860619515673034e-06, + "loss": 1.3015, + "step": 638 + }, + { + "epoch": 0.73, + "learning_rate": 1.771860742205988e-06, + "loss": 1.3085, + "step": 639 + }, + { + "epoch": 0.73, + "learning_rate": 1.7577040482960723e-06, + "loss": 1.2569, + "step": 640 + }, + { + "epoch": 0.73, + "learning_rate": 1.7435920650557808e-06, + "loss": 1.3083, + "step": 641 + }, + { + "epoch": 0.74, + "learning_rate": 1.7295249870867898e-06, + "loss": 1.2116, + "step": 642 + }, + { + "epoch": 0.74, + "learning_rate": 1.7155030083715362e-06, + "loss": 1.1131, + "step": 643 + }, + { + "epoch": 0.74, + "learning_rate": 1.7015263222705492e-06, + "loss": 1.212, + "step": 644 + }, + { + "epoch": 0.74, + "learning_rate": 1.6875951215197779e-06, + "loss": 1.256, + "step": 645 + }, + { + "epoch": 0.74, + "learning_rate": 1.6737095982279444e-06, + "loss": 1.291, + "step": 646 + }, + { + "epoch": 0.74, + "learning_rate": 1.6598699438738764e-06, + "loss": 1.2236, + "step": 647 + }, + { + "epoch": 0.74, + "learning_rate": 1.646076349303884e-06, + "loss": 1.157, + "step": 648 + }, + { + "epoch": 0.74, + "learning_rate": 1.6323290047291196e-06, + "loss": 1.314, + "step": 649 + }, + { + "epoch": 0.74, + "learning_rate": 1.618628099722957e-06, + "loss": 1.2609, + "step": 650 + }, + { + "epoch": 0.75, + "learning_rate": 1.604973823218376e-06, + "loss": 1.2084, + "step": 651 + }, + { + "epoch": 0.75, + "learning_rate": 1.5913663635053578e-06, + "loss": 1.2334, + "step": 652 + }, + { + "epoch": 0.75, + "learning_rate": 1.5778059082282932e-06, + "loss": 1.2386, + "step": 653 + }, + { + "epoch": 0.75, + "learning_rate": 1.56429264438338e-06, + "loss": 1.3237, + "step": 654 + }, + { + "epoch": 0.75, + "learning_rate": 1.550826758316068e-06, + "loss": 1.2669, + "step": 655 + }, + { + "epoch": 0.75, + "learning_rate": 1.5374084357184621e-06, + "loss": 1.1954, + "step": 656 + }, + { + "epoch": 0.75, + "learning_rate": 1.5240378616267887e-06, + "loss": 1.2389, + "step": 657 + }, + { + "epoch": 0.75, + "learning_rate": 1.510715220418823e-06, + "loss": 1.2534, + "step": 658 + }, + { + "epoch": 0.75, + "learning_rate": 1.4974406958113557e-06, + "loss": 1.1756, + "step": 659 + }, + { + "epoch": 0.76, + "learning_rate": 1.4842144708576606e-06, + "loss": 1.1699, + "step": 660 + }, + { + "epoch": 0.76, + "learning_rate": 1.4710367279449662e-06, + "loss": 1.2823, + "step": 661 + }, + { + "epoch": 0.76, + "learning_rate": 1.457907648791943e-06, + "loss": 1.2844, + "step": 662 + }, + { + "epoch": 0.76, + "learning_rate": 1.4448274144461965e-06, + "loss": 1.2512, + "step": 663 + }, + { + "epoch": 0.76, + "learning_rate": 1.431796205281773e-06, + "loss": 1.2727, + "step": 664 + }, + { + "epoch": 0.76, + "learning_rate": 1.4188142009966689e-06, + "loss": 1.2276, + "step": 665 + }, + { + "epoch": 0.76, + "learning_rate": 1.4058815806103542e-06, + "loss": 1.2762, + "step": 666 + }, + { + "epoch": 0.76, + "learning_rate": 1.3929985224613051e-06, + "loss": 1.3201, + "step": 667 + }, + { + "epoch": 0.77, + "learning_rate": 1.3801652042045416e-06, + "loss": 1.1758, + "step": 668 + }, + { + "epoch": 0.77, + "learning_rate": 1.367381802809185e-06, + "loss": 1.2246, + "step": 669 + }, + { + "epoch": 0.77, + "learning_rate": 1.3546484945560029e-06, + "loss": 1.1833, + "step": 670 + }, + { + "epoch": 0.77, + "learning_rate": 1.3419654550349987e-06, + "loss": 1.1716, + "step": 671 + }, + { + "epoch": 0.77, + "learning_rate": 1.329332859142967e-06, + "loss": 1.2747, + "step": 672 + }, + { + "epoch": 0.77, + "learning_rate": 1.3167508810811058e-06, + "loss": 1.2786, + "step": 673 + }, + { + "epoch": 0.77, + "learning_rate": 1.3042196943525942e-06, + "loss": 1.2917, + "step": 674 + }, + { + "epoch": 0.77, + "learning_rate": 1.2917394717602123e-06, + "loss": 1.2502, + "step": 675 + }, + { + "epoch": 0.77, + "learning_rate": 1.2793103854039518e-06, + "loss": 1.2029, + "step": 676 + }, + { + "epoch": 0.78, + "learning_rate": 1.2669326066786458e-06, + "loss": 1.2311, + "step": 677 + }, + { + "epoch": 0.78, + "learning_rate": 1.2546063062716069e-06, + "loss": 1.1958, + "step": 678 + }, + { + "epoch": 0.78, + "learning_rate": 1.242331654160263e-06, + "loss": 1.313, + "step": 679 + }, + { + "epoch": 0.78, + "learning_rate": 1.2301088196098332e-06, + "loss": 1.2361, + "step": 680 + }, + { + "epoch": 0.78, + "learning_rate": 1.2179379711709738e-06, + "loss": 1.2073, + "step": 681 + }, + { + "epoch": 0.78, + "learning_rate": 1.205819276677464e-06, + "loss": 1.2211, + "step": 682 + }, + { + "epoch": 0.78, + "learning_rate": 1.1937529032438905e-06, + "loss": 1.2087, + "step": 683 + }, + { + "epoch": 0.78, + "learning_rate": 1.1817390172633402e-06, + "loss": 1.2407, + "step": 684 + }, + { + "epoch": 0.78, + "learning_rate": 1.1697777844051105e-06, + "loss": 1.201, + "step": 685 + }, + { + "epoch": 0.79, + "learning_rate": 1.1578693696124193e-06, + "loss": 1.2321, + "step": 686 + }, + { + "epoch": 0.79, + "learning_rate": 1.1460139371001339e-06, + "loss": 1.2191, + "step": 687 + }, + { + "epoch": 0.79, + "learning_rate": 1.1342116503525059e-06, + "loss": 1.2591, + "step": 688 + }, + { + "epoch": 0.79, + "learning_rate": 1.1224626721209141e-06, + "loss": 1.188, + "step": 689 + }, + { + "epoch": 0.79, + "learning_rate": 1.1107671644216305e-06, + "loss": 1.2702, + "step": 690 + }, + { + "epoch": 0.79, + "learning_rate": 1.0991252885335651e-06, + "loss": 1.2203, + "step": 691 + }, + { + "epoch": 0.79, + "learning_rate": 1.0875372049960697e-06, + "loss": 1.235, + "step": 692 + }, + { + "epoch": 0.79, + "learning_rate": 1.0760030736066952e-06, + "loss": 1.2553, + "step": 693 + }, + { + "epoch": 0.79, + "learning_rate": 1.064523053419015e-06, + "loss": 1.1923, + "step": 694 + }, + { + "epoch": 0.8, + "learning_rate": 1.0530973027404073e-06, + "loss": 1.2705, + "step": 695 + }, + { + "epoch": 0.8, + "learning_rate": 1.041725979129894e-06, + "loss": 1.2714, + "step": 696 + }, + { + "epoch": 0.8, + "learning_rate": 1.0304092393959513e-06, + "loss": 1.2139, + "step": 697 + }, + { + "epoch": 0.8, + "learning_rate": 1.0191472395943552e-06, + "loss": 1.2619, + "step": 698 + }, + { + "epoch": 0.8, + "learning_rate": 1.0079401350260288e-06, + "loss": 1.1787, + "step": 699 + }, + { + "epoch": 0.8, + "learning_rate": 9.967880802348989e-07, + "loss": 1.19, + "step": 700 + }, + { + "epoch": 0.8, + "learning_rate": 9.856912290057668e-07, + "loss": 1.2921, + "step": 701 + }, + { + "epoch": 0.8, + "learning_rate": 9.746497343621857e-07, + "loss": 1.1421, + "step": 702 + }, + { + "epoch": 0.81, + "learning_rate": 9.63663748564353e-07, + "loss": 1.24, + "step": 703 + }, + { + "epoch": 0.81, + "learning_rate": 9.527334231070084e-07, + "loss": 1.2213, + "step": 704 + }, + { + "epoch": 0.81, + "learning_rate": 9.418589087173441e-07, + "loss": 1.2802, + "step": 705 + }, + { + "epoch": 0.81, + "learning_rate": 9.310403553529335e-07, + "loss": 1.1891, + "step": 706 + }, + { + "epoch": 0.81, + "learning_rate": 9.20277912199648e-07, + "loss": 1.218, + "step": 707 + }, + { + "epoch": 0.81, + "learning_rate": 9.095717276696214e-07, + "loss": 1.2472, + "step": 708 + }, + { + "epoch": 0.81, + "learning_rate": 8.989219493991791e-07, + "loss": 1.1592, + "step": 709 + }, + { + "epoch": 0.81, + "learning_rate": 8.883287242468242e-07, + "loss": 1.2343, + "step": 710 + }, + { + "epoch": 0.81, + "learning_rate": 8.777921982911996e-07, + "loss": 1.2461, + "step": 711 + }, + { + "epoch": 0.82, + "learning_rate": 8.673125168290713e-07, + "loss": 1.226, + "step": 712 + }, + { + "epoch": 0.82, + "learning_rate": 8.568898243733398e-07, + "loss": 1.2083, + "step": 713 + }, + { + "epoch": 0.82, + "learning_rate": 8.46524264651028e-07, + "loss": 1.2606, + "step": 714 + }, + { + "epoch": 0.82, + "learning_rate": 8.362159806013176e-07, + "loss": 1.2723, + "step": 715 + }, + { + "epoch": 0.82, + "learning_rate": 8.259651143735603e-07, + "loss": 1.1759, + "step": 716 + }, + { + "epoch": 0.82, + "learning_rate": 8.157718073253351e-07, + "loss": 1.3235, + "step": 717 + }, + { + "epoch": 0.82, + "learning_rate": 8.056362000204848e-07, + "loss": 1.1881, + "step": 718 + }, + { + "epoch": 0.82, + "learning_rate": 7.955584322271853e-07, + "loss": 1.2148, + "step": 719 + }, + { + "epoch": 0.82, + "learning_rate": 7.85538642916015e-07, + "loss": 1.2402, + "step": 720 + }, + { + "epoch": 0.83, + "learning_rate": 7.755769702580412e-07, + "loss": 1.2338, + "step": 721 + }, + { + "epoch": 0.83, + "learning_rate": 7.656735516229125e-07, + "loss": 1.2621, + "step": 722 + }, + { + "epoch": 0.83, + "learning_rate": 7.558285235769647e-07, + "loss": 1.1569, + "step": 723 + }, + { + "epoch": 0.83, + "learning_rate": 7.46042021881338e-07, + "loss": 1.2019, + "step": 724 + }, + { + "epoch": 0.83, + "learning_rate": 7.363141814901054e-07, + "loss": 1.2361, + "step": 725 + }, + { + "epoch": 0.83, + "learning_rate": 7.266451365484106e-07, + "loss": 1.2144, + "step": 726 + }, + { + "epoch": 0.83, + "learning_rate": 7.170350203906218e-07, + "loss": 1.2816, + "step": 727 + }, + { + "epoch": 0.83, + "learning_rate": 7.074839655384835e-07, + "loss": 1.1859, + "step": 728 + }, + { + "epoch": 0.84, + "learning_rate": 6.979921036993042e-07, + "loss": 1.304, + "step": 729 + }, + { + "epoch": 0.84, + "learning_rate": 6.885595657641214e-07, + "loss": 1.201, + "step": 730 + }, + { + "epoch": 0.84, + "learning_rate": 6.791864818059179e-07, + "loss": 1.1713, + "step": 731 + }, + { + "epoch": 0.84, + "learning_rate": 6.698729810778065e-07, + "loss": 1.2106, + "step": 732 + }, + { + "epoch": 0.84, + "learning_rate": 6.606191920112664e-07, + "loss": 1.2678, + "step": 733 + }, + { + "epoch": 0.84, + "learning_rate": 6.514252422143591e-07, + "loss": 1.3211, + "step": 734 + }, + { + "epoch": 0.84, + "learning_rate": 6.422912584699753e-07, + "loss": 1.2193, + "step": 735 + }, + { + "epoch": 0.84, + "learning_rate": 6.332173667340841e-07, + "loss": 1.204, + "step": 736 + }, + { + "epoch": 0.84, + "learning_rate": 6.242036921339973e-07, + "loss": 1.2639, + "step": 737 + }, + { + "epoch": 0.85, + "learning_rate": 6.152503589666426e-07, + "loss": 1.2609, + "step": 738 + }, + { + "epoch": 0.85, + "learning_rate": 6.063574906968511e-07, + "loss": 1.2043, + "step": 739 + }, + { + "epoch": 0.85, + "learning_rate": 5.975252099556544e-07, + "loss": 1.2423, + "step": 740 + }, + { + "epoch": 0.85, + "learning_rate": 5.887536385385917e-07, + "loss": 1.2067, + "step": 741 + }, + { + "epoch": 0.85, + "learning_rate": 5.800428974040311e-07, + "loss": 1.2295, + "step": 742 + }, + { + "epoch": 0.85, + "learning_rate": 5.713931066715078e-07, + "loss": 1.1883, + "step": 743 + }, + { + "epoch": 0.85, + "learning_rate": 5.628043856200543e-07, + "loss": 1.196, + "step": 744 + }, + { + "epoch": 0.85, + "learning_rate": 5.542768526865678e-07, + "loss": 1.2536, + "step": 745 + }, + { + "epoch": 0.85, + "learning_rate": 5.458106254641715e-07, + "loss": 1.2407, + "step": 746 + }, + { + "epoch": 0.86, + "learning_rate": 5.374058207005945e-07, + "loss": 1.2345, + "step": 747 + }, + { + "epoch": 0.86, + "learning_rate": 5.290625542965611e-07, + "loss": 1.1855, + "step": 748 + }, + { + "epoch": 0.86, + "learning_rate": 5.207809413041914e-07, + "loss": 1.2697, + "step": 749 + }, + { + "epoch": 0.86, + "learning_rate": 5.125610959254213e-07, + "loss": 1.2528, + "step": 750 + }, + { + "epoch": 0.86, + "learning_rate": 5.044031315104136e-07, + "loss": 1.2428, + "step": 751 + }, + { + "epoch": 0.86, + "learning_rate": 4.963071605560144e-07, + "loss": 1.2263, + "step": 752 + }, + { + "epoch": 0.86, + "learning_rate": 4.882732947041818e-07, + "loss": 1.2617, + "step": 753 + }, + { + "epoch": 0.86, + "learning_rate": 4.803016447404629e-07, + "loss": 1.1969, + "step": 754 + }, + { + "epoch": 0.86, + "learning_rate": 4.723923205924558e-07, + "loss": 1.1646, + "step": 755 + }, + { + "epoch": 0.87, + "learning_rate": 4.6454543132829653e-07, + "loss": 1.2267, + "step": 756 + }, + { + "epoch": 0.87, + "learning_rate": 4.5676108515515684e-07, + "loss": 1.2072, + "step": 757 + }, + { + "epoch": 0.87, + "learning_rate": 4.4903938941775084e-07, + "loss": 1.1877, + "step": 758 + }, + { + "epoch": 0.87, + "learning_rate": 4.413804505968533e-07, + "loss": 1.2038, + "step": 759 + }, + { + "epoch": 0.87, + "learning_rate": 4.3378437430783294e-07, + "loss": 1.2549, + "step": 760 + }, + { + "epoch": 0.87, + "learning_rate": 4.262512652991968e-07, + "loss": 1.268, + "step": 761 + }, + { + "epoch": 0.87, + "learning_rate": 4.187812274511427e-07, + "loss": 1.2731, + "step": 762 + }, + { + "epoch": 0.87, + "learning_rate": 4.113743637741296e-07, + "loss": 1.3252, + "step": 763 + }, + { + "epoch": 0.88, + "learning_rate": 4.040307764074586e-07, + "loss": 1.1899, + "step": 764 + }, + { + "epoch": 0.88, + "learning_rate": 3.9675056661785563e-07, + "loss": 1.2422, + "step": 765 + }, + { + "epoch": 0.88, + "learning_rate": 3.895338347980898e-07, + "loss": 1.2417, + "step": 766 + }, + { + "epoch": 0.88, + "learning_rate": 3.8238068046557276e-07, + "loss": 1.1714, + "step": 767 + }, + { + "epoch": 0.88, + "learning_rate": 3.752912022610006e-07, + "loss": 1.243, + "step": 768 + }, + { + "epoch": 0.88, + "learning_rate": 3.6826549794698074e-07, + "loss": 1.2715, + "step": 769 + }, + { + "epoch": 0.88, + "learning_rate": 3.6130366440669693e-07, + "loss": 1.2184, + "step": 770 + }, + { + "epoch": 0.88, + "learning_rate": 3.544057976425619e-07, + "loss": 1.1844, + "step": 771 + }, + { + "epoch": 0.88, + "learning_rate": 3.4757199277490106e-07, + "loss": 1.1547, + "step": 772 + }, + { + "epoch": 0.89, + "learning_rate": 3.408023440406355e-07, + "loss": 1.211, + "step": 773 + }, + { + "epoch": 0.89, + "learning_rate": 3.340969447919873e-07, + "loss": 1.1662, + "step": 774 + }, + { + "epoch": 0.89, + "learning_rate": 3.2745588749518775e-07, + "loss": 1.3596, + "step": 775 + }, + { + "epoch": 0.89, + "learning_rate": 3.2087926372920577e-07, + "loss": 1.1916, + "step": 776 + }, + { + "epoch": 0.89, + "learning_rate": 3.143671641844831e-07, + "loss": 1.2438, + "step": 777 + }, + { + "epoch": 0.89, + "learning_rate": 3.0791967866168394e-07, + "loss": 1.2936, + "step": 778 + }, + { + "epoch": 0.89, + "learning_rate": 3.015368960704584e-07, + "loss": 1.2453, + "step": 779 + }, + { + "epoch": 0.89, + "learning_rate": 2.9521890442821276e-07, + "loss": 1.2201, + "step": 780 + }, + { + "epoch": 0.89, + "learning_rate": 2.889657908589e-07, + "loss": 1.2094, + "step": 781 + }, + { + "epoch": 0.9, + "learning_rate": 2.8277764159181484e-07, + "loss": 1.1386, + "step": 782 + }, + { + "epoch": 0.9, + "learning_rate": 2.7665454196040665e-07, + "loss": 1.1688, + "step": 783 + }, + { + "epoch": 0.9, + "learning_rate": 2.7059657640110204e-07, + "loss": 1.2446, + "step": 784 + }, + { + "epoch": 0.9, + "learning_rate": 2.6460382845214125e-07, + "loss": 1.2187, + "step": 785 + }, + { + "epoch": 0.9, + "learning_rate": 2.5867638075242454e-07, + "loss": 1.2678, + "step": 786 + }, + { + "epoch": 0.9, + "learning_rate": 2.5281431504037555e-07, + "loss": 1.2246, + "step": 787 + }, + { + "epoch": 0.9, + "learning_rate": 2.470177121528089e-07, + "loss": 1.268, + "step": 788 + }, + { + "epoch": 0.9, + "learning_rate": 2.4128665202382327e-07, + "loss": 1.1303, + "step": 789 + }, + { + "epoch": 0.9, + "learning_rate": 2.356212136836894e-07, + "loss": 1.1405, + "step": 790 + }, + { + "epoch": 0.91, + "learning_rate": 2.3002147525777118e-07, + "loss": 1.2348, + "step": 791 + }, + { + "epoch": 0.91, + "learning_rate": 2.2448751396543788e-07, + "loss": 1.2552, + "step": 792 + }, + { + "epoch": 0.91, + "learning_rate": 2.1901940611900707e-07, + "loss": 1.2936, + "step": 793 + }, + { + "epoch": 0.91, + "learning_rate": 2.1361722712268772e-07, + "loss": 1.289, + "step": 794 + }, + { + "epoch": 0.91, + "learning_rate": 2.0828105147154275e-07, + "loss": 1.2411, + "step": 795 + }, + { + "epoch": 0.91, + "learning_rate": 2.0301095275046145e-07, + "loss": 1.2261, + "step": 796 + }, + { + "epoch": 0.91, + "learning_rate": 1.9780700363314255e-07, + "loss": 1.1747, + "step": 797 + }, + { + "epoch": 0.91, + "learning_rate": 1.926692758810955e-07, + "loss": 1.2051, + "step": 798 + }, + { + "epoch": 0.92, + "learning_rate": 1.8759784034264927e-07, + "loss": 1.2246, + "step": 799 + }, + { + "epoch": 0.92, + "learning_rate": 1.825927669519728e-07, + "loss": 1.2018, + "step": 800 + }, + { + "epoch": 0.92, + "learning_rate": 1.776541247281177e-07, + "loss": 1.1812, + "step": 801 + }, + { + "epoch": 0.92, + "learning_rate": 1.7278198177405614e-07, + "loss": 1.2621, + "step": 802 + }, + { + "epoch": 0.92, + "learning_rate": 1.679764052757532e-07, + "loss": 1.2021, + "step": 803 + }, + { + "epoch": 0.92, + "learning_rate": 1.6323746150123e-07, + "loss": 1.2577, + "step": 804 + }, + { + "epoch": 0.92, + "learning_rate": 1.5856521579965866e-07, + "loss": 1.2777, + "step": 805 + }, + { + "epoch": 0.92, + "learning_rate": 1.5395973260045273e-07, + "loss": 1.274, + "step": 806 + }, + { + "epoch": 0.92, + "learning_rate": 1.4942107541238705e-07, + "loss": 1.2606, + "step": 807 + }, + { + "epoch": 0.93, + "learning_rate": 1.449493068227159e-07, + "loss": 1.1909, + "step": 808 + }, + { + "epoch": 0.93, + "learning_rate": 1.4054448849631087e-07, + "loss": 1.1955, + "step": 809 + }, + { + "epoch": 0.93, + "learning_rate": 1.3620668117481471e-07, + "loss": 1.2731, + "step": 810 + }, + { + "epoch": 0.93, + "learning_rate": 1.319359446757973e-07, + "loss": 1.3342, + "step": 811 + }, + { + "epoch": 0.93, + "learning_rate": 1.2773233789193816e-07, + "loss": 1.1588, + "step": 812 + }, + { + "epoch": 0.93, + "learning_rate": 1.2359591879020528e-07, + "loss": 1.2291, + "step": 813 + }, + { + "epoch": 0.93, + "learning_rate": 1.1952674441106483e-07, + "loss": 1.1973, + "step": 814 + }, + { + "epoch": 0.93, + "learning_rate": 1.1552487086768871e-07, + "loss": 1.2551, + "step": 815 + }, + { + "epoch": 0.93, + "learning_rate": 1.1159035334518343e-07, + "loss": 1.2953, + "step": 816 + }, + { + "epoch": 0.94, + "learning_rate": 1.0772324609982787e-07, + "loss": 1.2416, + "step": 817 + }, + { + "epoch": 0.94, + "learning_rate": 1.03923602458324e-07, + "loss": 1.2722, + "step": 818 + }, + { + "epoch": 0.94, + "learning_rate": 1.0019147481706626e-07, + "loss": 1.1502, + "step": 819 + }, + { + "epoch": 0.94, + "learning_rate": 9.652691464141273e-08, + "loss": 1.2267, + "step": 820 + }, + { + "epoch": 0.94, + "learning_rate": 9.292997246497959e-08, + "loss": 1.2338, + "step": 821 + }, + { + "epoch": 0.94, + "learning_rate": 8.940069788894389e-08, + "loss": 1.3175, + "step": 822 + }, + { + "epoch": 0.94, + "learning_rate": 8.593913958135691e-08, + "loss": 1.2424, + "step": 823 + }, + { + "epoch": 0.94, + "learning_rate": 8.254534527647851e-08, + "loss": 1.2945, + "step": 824 + }, + { + "epoch": 0.95, + "learning_rate": 7.921936177411049e-08, + "loss": 1.1953, + "step": 825 + }, + { + "epoch": 0.95, + "learning_rate": 7.59612349389599e-08, + "loss": 1.226, + "step": 826 + }, + { + "epoch": 0.95, + "learning_rate": 7.277100970000062e-08, + "loss": 1.2159, + "step": 827 + }, + { + "epoch": 0.95, + "learning_rate": 6.964873004985717e-08, + "loss": 1.1667, + "step": 828 + }, + { + "epoch": 0.95, + "learning_rate": 6.659443904419638e-08, + "loss": 1.2288, + "step": 829 + }, + { + "epoch": 0.95, + "learning_rate": 6.360817880113335e-08, + "loss": 1.2918, + "step": 830 + }, + { + "epoch": 0.95, + "learning_rate": 6.06899905006525e-08, + "loss": 1.2764, + "step": 831 + }, + { + "epoch": 0.95, + "learning_rate": 5.783991438403802e-08, + "loss": 1.3111, + "step": 832 + }, + { + "epoch": 0.95, + "learning_rate": 5.505798975331933e-08, + "loss": 1.2598, + "step": 833 + }, + { + "epoch": 0.96, + "learning_rate": 5.234425497072981e-08, + "loss": 1.2023, + "step": 834 + }, + { + "epoch": 0.96, + "learning_rate": 4.9698747458176714e-08, + "loss": 1.2719, + "step": 835 + }, + { + "epoch": 0.96, + "learning_rate": 4.712150369672652e-08, + "loss": 1.198, + "step": 836 + }, + { + "epoch": 0.96, + "learning_rate": 4.461255922609986e-08, + "loss": 1.2503, + "step": 837 + }, + { + "epoch": 0.96, + "learning_rate": 4.217194864418295e-08, + "loss": 1.2348, + "step": 838 + }, + { + "epoch": 0.96, + "learning_rate": 3.979970560655133e-08, + "loss": 1.3072, + "step": 839 + }, + { + "epoch": 0.96, + "learning_rate": 3.749586282600359e-08, + "loss": 1.1894, + "step": 840 + }, + { + "epoch": 0.96, + "learning_rate": 3.526045207211059e-08, + "loss": 1.233, + "step": 841 + }, + { + "epoch": 0.96, + "learning_rate": 3.309350417077972e-08, + "loss": 1.209, + "step": 842 + }, + { + "epoch": 0.97, + "learning_rate": 3.0995049003826325e-08, + "loss": 1.1935, + "step": 843 + }, + { + "epoch": 0.97, + "learning_rate": 2.8965115508564622e-08, + "loss": 1.3144, + "step": 844 + }, + { + "epoch": 0.97, + "learning_rate": 2.700373167740744e-08, + "loss": 1.2205, + "step": 845 + }, + { + "epoch": 0.97, + "learning_rate": 2.511092455747932e-08, + "loss": 1.1731, + "step": 846 + }, + { + "epoch": 0.97, + "learning_rate": 2.3286720250246255e-08, + "loss": 1.087, + "step": 847 + }, + { + "epoch": 0.97, + "learning_rate": 2.153114391115152e-08, + "loss": 1.2086, + "step": 848 + }, + { + "epoch": 0.97, + "learning_rate": 1.984421974927375e-08, + "loss": 1.1868, + "step": 849 + }, + { + "epoch": 0.97, + "learning_rate": 1.8225971026987755e-08, + "loss": 1.2852, + "step": 850 + }, + { + "epoch": 0.97, + "learning_rate": 1.6676420059649756e-08, + "loss": 1.289, + "step": 851 + }, + { + "epoch": 0.98, + "learning_rate": 1.5195588215283773e-08, + "loss": 1.2034, + "step": 852 + }, + { + "epoch": 0.98, + "learning_rate": 1.3783495914291844e-08, + "loss": 1.1606, + "step": 853 + }, + { + "epoch": 0.98, + "learning_rate": 1.244016262916814e-08, + "loss": 1.1817, + "step": 854 + }, + { + "epoch": 0.98, + "learning_rate": 1.1165606884234182e-08, + "loss": 1.1963, + "step": 855 + }, + { + "epoch": 0.98, + "learning_rate": 9.959846255381267e-09, + "loss": 1.23, + "step": 856 + }, + { + "epoch": 0.98, + "learning_rate": 8.822897369827333e-09, + "loss": 1.1343, + "step": 857 + }, + { + "epoch": 0.98, + "learning_rate": 7.754775905891576e-09, + "loss": 1.2412, + "step": 858 + }, + { + "epoch": 0.98, + "learning_rate": 6.755496592773525e-09, + "loss": 1.3113, + "step": 859 + }, + { + "epoch": 0.99, + "learning_rate": 5.825073210352084e-09, + "loss": 1.2007, + "step": 860 + }, + { + "epoch": 0.99, + "learning_rate": 4.9635185889967966e-09, + "loss": 1.172, + "step": 861 + }, + { + "epoch": 0.99, + "learning_rate": 4.170844609387992e-09, + "loss": 1.2584, + "step": 862 + }, + { + "epoch": 0.99, + "learning_rate": 3.4470622023558e-09, + "loss": 1.2319, + "step": 863 + }, + { + "epoch": 0.99, + "learning_rate": 2.792181348726941e-09, + "loss": 1.2648, + "step": 864 + }, + { + "epoch": 0.99, + "learning_rate": 2.20621107918928e-09, + "loss": 1.2215, + "step": 865 + }, + { + "epoch": 0.99, + "learning_rate": 1.6891594741663686e-09, + "loss": 1.3185, + "step": 866 + }, + { + "epoch": 0.99, + "learning_rate": 1.2410336637047604e-09, + "loss": 1.2158, + "step": 867 + }, + { + "epoch": 0.99, + "learning_rate": 8.618398273779749e-10, + "loss": 1.2343, + "step": 868 + }, + { + "epoch": 1.0, + "learning_rate": 5.515831941993455e-10, + "loss": 1.2476, + "step": 869 + }, + { + "epoch": 1.0, + "learning_rate": 3.1026804255207544e-10, + "loss": 1.2134, + "step": 870 + }, + { + "epoch": 1.0, + "learning_rate": 1.378977001276205e-10, + "loss": 1.22, + "step": 871 + }, + { + "epoch": 1.0, + "learning_rate": 3.447454388127991e-11, + "loss": 1.2135, + "step": 872 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 1.1774, + "step": 873 + }, + { + "epoch": 1.0, + "step": 873, + "total_flos": 6.557120667907523e+18, + "train_loss": 1.3115756642777485, + "train_runtime": 2453.1326, + "train_samples_per_second": 91.074, + "train_steps_per_second": 0.356 + } + ], + "logging_steps": 1.0, + "max_steps": 873, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 6.557120667907523e+18, + "trial_name": null, + "trial_params": null +}