{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.695652173913044e-06, "loss": 1.5025, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.739130434782609e-05, "loss": 1.2014, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.608695652173913e-05, "loss": 1.4252, "step": 3 }, { "epoch": 0.01, "learning_rate": 3.478260869565218e-05, "loss": 1.4064, "step": 4 }, { "epoch": 0.01, "learning_rate": 4.347826086956522e-05, "loss": 1.6361, "step": 5 }, { "epoch": 0.01, "learning_rate": 5.217391304347826e-05, "loss": 1.3016, "step": 6 }, { "epoch": 0.01, "learning_rate": 6.086956521739131e-05, "loss": 1.4057, "step": 7 }, { "epoch": 0.01, "learning_rate": 6.956521739130436e-05, "loss": 1.5792, "step": 8 }, { "epoch": 0.01, "learning_rate": 7.82608695652174e-05, "loss": 1.442, "step": 9 }, { "epoch": 0.01, "learning_rate": 8.695652173913044e-05, "loss": 1.3623, "step": 10 }, { "epoch": 0.01, "learning_rate": 9.565217391304348e-05, "loss": 1.2958, "step": 11 }, { "epoch": 0.02, "learning_rate": 0.00010434782608695653, "loss": 1.4166, "step": 12 }, { "epoch": 0.02, "learning_rate": 0.00011304347826086956, "loss": 1.426, "step": 13 }, { "epoch": 0.02, "learning_rate": 0.00012173913043478263, "loss": 1.2285, "step": 14 }, { "epoch": 0.02, "learning_rate": 0.00013043478260869567, "loss": 1.3128, "step": 15 }, { "epoch": 0.02, "learning_rate": 0.0001391304347826087, "loss": 1.2511, "step": 16 }, { "epoch": 0.02, "learning_rate": 0.00014782608695652173, "loss": 1.2222, "step": 17 }, { "epoch": 0.02, "learning_rate": 0.0001565217391304348, "loss": 1.294, "step": 18 }, { "epoch": 0.03, "learning_rate": 0.00016521739130434784, "loss": 1.447, "step": 19 }, { "epoch": 0.03, "learning_rate": 0.00017391304347826088, "loss": 1.2369, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.00018260869565217392, "loss": 1.469, "step": 21 }, { "epoch": 0.03, "learning_rate": 0.00019130434782608697, "loss": 1.2539, "step": 22 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 1.2429, "step": 23 }, { "epoch": 0.03, "learning_rate": 0.0001999990663152786, "loss": 1.2823, "step": 24 }, { "epoch": 0.03, "learning_rate": 0.00019999626527854967, "loss": 1.1539, "step": 25 }, { "epoch": 0.03, "learning_rate": 0.00019999159694211892, "loss": 1.1343, "step": 26 }, { "epoch": 0.04, "learning_rate": 0.0001999850613931615, "loss": 1.0831, "step": 27 }, { "epoch": 0.04, "learning_rate": 0.0001999766587537202, "loss": 1.3442, "step": 28 }, { "epoch": 0.04, "learning_rate": 0.00019996638918070336, "loss": 1.2974, "step": 29 }, { "epoch": 0.04, "learning_rate": 0.00019995425286588184, "loss": 1.2714, "step": 30 }, { "epoch": 0.04, "learning_rate": 0.00019994025003588548, "loss": 1.4062, "step": 31 }, { "epoch": 0.04, "learning_rate": 0.00019992438095219886, "loss": 1.2071, "step": 32 }, { "epoch": 0.04, "learning_rate": 0.00019990664591115634, "loss": 1.2311, "step": 33 }, { "epoch": 0.05, "learning_rate": 0.00019988704524393676, "loss": 1.2589, "step": 34 }, { "epoch": 0.05, "learning_rate": 0.00019986557931655688, "loss": 1.2766, "step": 35 }, { "epoch": 0.05, "learning_rate": 0.00019984224852986494, "loss": 1.2625, "step": 36 }, { "epoch": 0.05, "learning_rate": 0.00019981705331953293, "loss": 1.4641, "step": 37 }, { "epoch": 0.05, "learning_rate": 0.00019978999415604847, "loss": 1.4287, "step": 38 }, { "epoch": 0.05, "learning_rate": 0.0001997610715447061, "loss": 1.336, "step": 39 }, { "epoch": 0.05, "learning_rate": 0.00019973028602559786, "loss": 1.1569, "step": 40 }, { "epoch": 0.05, "learning_rate": 0.00019969763817360314, "loss": 1.1655, "step": 41 }, { "epoch": 0.06, "learning_rate": 0.00019966312859837787, "loss": 1.3616, "step": 42 }, { "epoch": 0.06, "learning_rate": 0.0001996267579443434, "loss": 1.3017, "step": 43 }, { "epoch": 0.06, "learning_rate": 0.0001995885268906742, "loss": 1.3204, "step": 44 }, { "epoch": 0.06, "learning_rate": 0.0001995484361512853, "loss": 1.4284, "step": 45 }, { "epoch": 0.06, "learning_rate": 0.0001995064864748188, "loss": 1.3212, "step": 46 }, { "epoch": 0.06, "learning_rate": 0.00019946267864463026, "loss": 1.2725, "step": 47 }, { "epoch": 0.06, "learning_rate": 0.00019941701347877367, "loss": 1.3024, "step": 48 }, { "epoch": 0.07, "learning_rate": 0.0001993694918299864, "loss": 1.3055, "step": 49 }, { "epoch": 0.07, "learning_rate": 0.00019932011458567315, "loss": 1.2735, "step": 50 }, { "epoch": 0.07, "learning_rate": 0.00019926888266788955, "loss": 1.3213, "step": 51 }, { "epoch": 0.07, "learning_rate": 0.00019921579703332474, "loss": 1.243, "step": 52 }, { "epoch": 0.07, "learning_rate": 0.0001991608586732837, "loss": 1.3062, "step": 53 }, { "epoch": 0.07, "learning_rate": 0.00019910406861366848, "loss": 1.2931, "step": 54 }, { "epoch": 0.07, "learning_rate": 0.00019904542791495937, "loss": 1.4384, "step": 55 }, { "epoch": 0.07, "learning_rate": 0.00019898493767219483, "loss": 1.2831, "step": 56 }, { "epoch": 0.08, "learning_rate": 0.0001989225990149512, "loss": 1.2887, "step": 57 }, { "epoch": 0.08, "learning_rate": 0.0001988584131073215, "loss": 1.2165, "step": 58 }, { "epoch": 0.08, "learning_rate": 0.00019879238114789373, "loss": 1.1418, "step": 59 }, { "epoch": 0.08, "learning_rate": 0.00019872450436972855, "loss": 1.3254, "step": 60 }, { "epoch": 0.08, "learning_rate": 0.00019865478404033617, "loss": 1.2422, "step": 61 }, { "epoch": 0.08, "learning_rate": 0.0001985832214616527, "loss": 1.2131, "step": 62 }, { "epoch": 0.08, "learning_rate": 0.00019850981797001592, "loss": 1.1483, "step": 63 }, { "epoch": 0.09, "learning_rate": 0.00019843457493614014, "loss": 1.271, "step": 64 }, { "epoch": 0.09, "learning_rate": 0.00019835749376509082, "loss": 1.3895, "step": 65 }, { "epoch": 0.09, "learning_rate": 0.00019827857589625817, "loss": 1.3028, "step": 66 }, { "epoch": 0.09, "learning_rate": 0.00019819782280333038, "loss": 1.3495, "step": 67 }, { "epoch": 0.09, "learning_rate": 0.00019811523599426602, "loss": 1.2658, "step": 68 }, { "epoch": 0.09, "learning_rate": 0.0001980308170112659, "loss": 1.1571, "step": 69 }, { "epoch": 0.09, "learning_rate": 0.0001979445674307444, "loss": 1.3985, "step": 70 }, { "epoch": 0.09, "learning_rate": 0.00019785648886329972, "loss": 1.073, "step": 71 }, { "epoch": 0.1, "learning_rate": 0.0001977665829536842, "loss": 1.2254, "step": 72 }, { "epoch": 0.1, "learning_rate": 0.00019767485138077326, "loss": 1.4155, "step": 73 }, { "epoch": 0.1, "learning_rate": 0.0001975812958575343, "loss": 1.3287, "step": 74 }, { "epoch": 0.1, "learning_rate": 0.00019748591813099456, "loss": 1.3901, "step": 75 }, { "epoch": 0.1, "learning_rate": 0.00019738871998220857, "loss": 1.1718, "step": 76 }, { "epoch": 0.1, "learning_rate": 0.00019728970322622485, "loss": 1.2168, "step": 77 }, { "epoch": 0.1, "learning_rate": 0.00019718886971205205, "loss": 1.2009, "step": 78 }, { "epoch": 0.11, "learning_rate": 0.0001970862213226244, "loss": 1.2114, "step": 79 }, { "epoch": 0.11, "learning_rate": 0.00019698175997476657, "loss": 1.5937, "step": 80 }, { "epoch": 0.11, "learning_rate": 0.0001968754876191578, "loss": 1.2369, "step": 81 }, { "epoch": 0.11, "learning_rate": 0.00019676740624029567, "loss": 1.2128, "step": 82 }, { "epoch": 0.11, "learning_rate": 0.00019665751785645874, "loss": 1.2511, "step": 83 }, { "epoch": 0.11, "learning_rate": 0.00019654582451966913, "loss": 1.4469, "step": 84 }, { "epoch": 0.11, "learning_rate": 0.00019643232831565414, "loss": 1.2032, "step": 85 }, { "epoch": 0.11, "learning_rate": 0.00019631703136380714, "loss": 1.24, "step": 86 }, { "epoch": 0.12, "learning_rate": 0.00019619993581714823, "loss": 1.3202, "step": 87 }, { "epoch": 0.12, "learning_rate": 0.00019608104386228378, "loss": 1.2662, "step": 88 }, { "epoch": 0.12, "learning_rate": 0.00019596035771936592, "loss": 1.1238, "step": 89 }, { "epoch": 0.12, "learning_rate": 0.00019583787964205074, "loss": 1.3928, "step": 90 }, { "epoch": 0.12, "learning_rate": 0.00019571361191745646, "loss": 1.5508, "step": 91 }, { "epoch": 0.12, "learning_rate": 0.00019558755686612057, "loss": 1.2687, "step": 92 }, { "epoch": 0.12, "learning_rate": 0.00019545971684195663, "loss": 1.2282, "step": 93 }, { "epoch": 0.13, "learning_rate": 0.00019533009423221013, "loss": 1.3355, "step": 94 }, { "epoch": 0.13, "learning_rate": 0.0001951986914574141, "loss": 1.1111, "step": 95 }, { "epoch": 0.13, "learning_rate": 0.00019506551097134384, "loss": 1.3427, "step": 96 }, { "epoch": 0.13, "learning_rate": 0.000194930555260971, "loss": 1.0872, "step": 97 }, { "epoch": 0.13, "learning_rate": 0.0001947938268464173, "loss": 1.1873, "step": 98 }, { "epoch": 0.13, "learning_rate": 0.00019465532828090735, "loss": 1.2597, "step": 99 }, { "epoch": 0.13, "learning_rate": 0.00019451506215072107, "loss": 1.3552, "step": 100 }, { "epoch": 0.13, "learning_rate": 0.0001943730310751453, "loss": 1.1638, "step": 101 }, { "epoch": 0.14, "learning_rate": 0.00019422923770642494, "loss": 1.2076, "step": 102 }, { "epoch": 0.14, "learning_rate": 0.00019408368472971345, "loss": 1.3398, "step": 103 }, { "epoch": 0.14, "learning_rate": 0.00019393637486302256, "loss": 1.1939, "step": 104 }, { "epoch": 0.14, "learning_rate": 0.0001937873108571718, "loss": 1.3124, "step": 105 }, { "epoch": 0.14, "learning_rate": 0.00019363649549573682, "loss": 1.5443, "step": 106 }, { "epoch": 0.14, "learning_rate": 0.00019348393159499759, "loss": 1.2504, "step": 107 }, { "epoch": 0.14, "learning_rate": 0.00019332962200388577, "loss": 1.1387, "step": 108 }, { "epoch": 0.15, "learning_rate": 0.00019317356960393156, "loss": 1.2406, "step": 109 }, { "epoch": 0.15, "learning_rate": 0.00019301577730920975, "loss": 1.3236, "step": 110 }, { "epoch": 0.15, "learning_rate": 0.00019285624806628543, "loss": 1.2134, "step": 111 }, { "epoch": 0.15, "learning_rate": 0.00019269498485415895, "loss": 1.4447, "step": 112 }, { "epoch": 0.15, "learning_rate": 0.00019253199068421028, "loss": 1.2898, "step": 113 }, { "epoch": 0.15, "learning_rate": 0.00019236726860014268, "loss": 1.1405, "step": 114 }, { "epoch": 0.15, "learning_rate": 0.00019220082167792607, "loss": 1.0123, "step": 115 }, { "epoch": 0.15, "learning_rate": 0.00019203265302573938, "loss": 1.253, "step": 116 }, { "epoch": 0.16, "learning_rate": 0.00019186276578391265, "loss": 1.3768, "step": 117 }, { "epoch": 0.16, "learning_rate": 0.00019169116312486834, "loss": 1.0697, "step": 118 }, { "epoch": 0.16, "learning_rate": 0.00019151784825306205, "loss": 1.1176, "step": 119 }, { "epoch": 0.16, "learning_rate": 0.00019134282440492272, "loss": 1.2583, "step": 120 }, { "epoch": 0.16, "learning_rate": 0.0001911660948487922, "loss": 1.0209, "step": 121 }, { "epoch": 0.16, "learning_rate": 0.00019098766288486426, "loss": 1.2799, "step": 122 }, { "epoch": 0.16, "learning_rate": 0.00019080753184512282, "loss": 1.1244, "step": 123 }, { "epoch": 0.17, "learning_rate": 0.00019062570509327992, "loss": 1.3889, "step": 124 }, { "epoch": 0.17, "learning_rate": 0.00019044218602471275, "loss": 1.2197, "step": 125 }, { "epoch": 0.17, "learning_rate": 0.00019025697806640033, "loss": 1.1068, "step": 126 }, { "epoch": 0.17, "learning_rate": 0.00019007008467685944, "loss": 1.1628, "step": 127 }, { "epoch": 0.17, "learning_rate": 0.00018988150934608013, "loss": 1.2459, "step": 128 }, { "epoch": 0.17, "learning_rate": 0.00018969125559546054, "loss": 1.5482, "step": 129 }, { "epoch": 0.17, "learning_rate": 0.0001894993269777411, "loss": 1.3619, "step": 130 }, { "epoch": 0.17, "learning_rate": 0.0001893057270769381, "loss": 1.2704, "step": 131 }, { "epoch": 0.18, "learning_rate": 0.00018911045950827693, "loss": 1.4129, "step": 132 }, { "epoch": 0.18, "learning_rate": 0.00018891352791812452, "loss": 1.6589, "step": 133 }, { "epoch": 0.18, "learning_rate": 0.00018871493598392124, "loss": 1.1371, "step": 134 }, { "epoch": 0.18, "learning_rate": 0.0001885146874141121, "loss": 1.0249, "step": 135 }, { "epoch": 0.18, "learning_rate": 0.00018831278594807783, "loss": 1.0886, "step": 136 }, { "epoch": 0.18, "learning_rate": 0.0001881092353560646, "loss": 1.2341, "step": 137 }, { "epoch": 0.18, "learning_rate": 0.000187904039439114, "loss": 1.091, "step": 138 }, { "epoch": 0.19, "learning_rate": 0.00018769720202899194, "loss": 1.1046, "step": 139 }, { "epoch": 0.19, "learning_rate": 0.00018748872698811693, "loss": 1.2981, "step": 140 }, { "epoch": 0.19, "learning_rate": 0.0001872786182094882, "loss": 1.1426, "step": 141 }, { "epoch": 0.19, "learning_rate": 0.0001870668796166129, "loss": 1.3405, "step": 142 }, { "epoch": 0.19, "learning_rate": 0.00018685351516343278, "loss": 1.4744, "step": 143 }, { "epoch": 0.19, "learning_rate": 0.00018663852883425046, "loss": 1.3066, "step": 144 }, { "epoch": 0.19, "learning_rate": 0.00018642192464365497, "loss": 1.3448, "step": 145 }, { "epoch": 0.19, "learning_rate": 0.00018620370663644678, "loss": 1.2116, "step": 146 }, { "epoch": 0.2, "learning_rate": 0.00018598387888756223, "loss": 1.3548, "step": 147 }, { "epoch": 0.2, "learning_rate": 0.00018576244550199758, "loss": 1.3036, "step": 148 }, { "epoch": 0.2, "learning_rate": 0.00018553941061473218, "loss": 1.2543, "step": 149 }, { "epoch": 0.2, "learning_rate": 0.0001853147783906514, "loss": 1.446, "step": 150 }, { "epoch": 0.2, "learning_rate": 0.00018508855302446867, "loss": 1.2109, "step": 151 }, { "epoch": 0.2, "learning_rate": 0.00018486073874064745, "loss": 1.2498, "step": 152 }, { "epoch": 0.2, "learning_rate": 0.000184631339793322, "loss": 1.3629, "step": 153 }, { "epoch": 0.21, "learning_rate": 0.00018440036046621816, "loss": 1.0992, "step": 154 }, { "epoch": 0.21, "learning_rate": 0.00018416780507257333, "loss": 1.204, "step": 155 }, { "epoch": 0.21, "learning_rate": 0.00018393367795505587, "loss": 1.331, "step": 156 }, { "epoch": 0.21, "learning_rate": 0.000183697983485684, "loss": 1.1731, "step": 157 }, { "epoch": 0.21, "learning_rate": 0.0001834607260657443, "loss": 1.3034, "step": 158 }, { "epoch": 0.21, "learning_rate": 0.00018322191012570919, "loss": 1.2737, "step": 159 }, { "epoch": 0.21, "learning_rate": 0.0001829815401251547, "loss": 1.4202, "step": 160 }, { "epoch": 0.21, "learning_rate": 0.0001827396205526767, "loss": 1.2023, "step": 161 }, { "epoch": 0.22, "learning_rate": 0.00018249615592580732, "loss": 1.6523, "step": 162 }, { "epoch": 0.22, "learning_rate": 0.0001822511507909307, "loss": 1.3558, "step": 163 }, { "epoch": 0.22, "learning_rate": 0.00018200460972319786, "loss": 1.2573, "step": 164 }, { "epoch": 0.22, "learning_rate": 0.0001817565373264413, "loss": 1.4447, "step": 165 }, { "epoch": 0.22, "learning_rate": 0.00018150693823308913, "loss": 1.0333, "step": 166 }, { "epoch": 0.22, "learning_rate": 0.00018125581710407864, "loss": 1.2761, "step": 167 }, { "epoch": 0.22, "learning_rate": 0.000181003178628769, "loss": 1.2982, "step": 168 }, { "epoch": 0.23, "learning_rate": 0.0001807490275248539, "loss": 1.2358, "step": 169 }, { "epoch": 0.23, "learning_rate": 0.00018049336853827343, "loss": 1.1648, "step": 170 }, { "epoch": 0.23, "learning_rate": 0.00018023620644312539, "loss": 1.2026, "step": 171 }, { "epoch": 0.23, "learning_rate": 0.00017997754604157605, "loss": 1.1491, "step": 172 }, { "epoch": 0.23, "learning_rate": 0.00017971739216377089, "loss": 1.2765, "step": 173 }, { "epoch": 0.23, "learning_rate": 0.00017945574966774376, "loss": 1.3827, "step": 174 }, { "epoch": 0.23, "learning_rate": 0.00017919262343932678, "loss": 1.2097, "step": 175 }, { "epoch": 0.23, "learning_rate": 0.00017892801839205867, "loss": 1.2611, "step": 176 }, { "epoch": 0.24, "learning_rate": 0.00017866193946709327, "loss": 1.179, "step": 177 }, { "epoch": 0.24, "learning_rate": 0.00017839439163310714, "loss": 1.0867, "step": 178 }, { "epoch": 0.24, "learning_rate": 0.00017812537988620675, "loss": 1.2562, "step": 179 }, { "epoch": 0.24, "learning_rate": 0.00017785490924983525, "loss": 1.0158, "step": 180 }, { "epoch": 0.24, "learning_rate": 0.00017758298477467865, "loss": 1.1524, "step": 181 }, { "epoch": 0.24, "learning_rate": 0.00017730961153857155, "loss": 1.2147, "step": 182 }, { "epoch": 0.24, "learning_rate": 0.00017703479464640216, "loss": 1.2079, "step": 183 }, { "epoch": 0.25, "learning_rate": 0.0001767585392300172, "loss": 1.1908, "step": 184 }, { "epoch": 0.25, "learning_rate": 0.0001764808504481259, "loss": 1.0956, "step": 185 }, { "epoch": 0.25, "learning_rate": 0.00017620173348620368, "loss": 1.2362, "step": 186 }, { "epoch": 0.25, "learning_rate": 0.00017592119355639544, "loss": 1.3215, "step": 187 }, { "epoch": 0.25, "learning_rate": 0.00017563923589741806, "loss": 1.3085, "step": 188 }, { "epoch": 0.25, "learning_rate": 0.00017535586577446276, "loss": 1.2376, "step": 189 }, { "epoch": 0.25, "learning_rate": 0.00017507108847909656, "loss": 1.4742, "step": 190 }, { "epoch": 0.25, "learning_rate": 0.0001747849093291637, "loss": 1.5781, "step": 191 }, { "epoch": 0.26, "learning_rate": 0.00017449733366868618, "loss": 1.345, "step": 192 }, { "epoch": 0.26, "learning_rate": 0.000174208366867764, "loss": 1.2734, "step": 193 }, { "epoch": 0.26, "learning_rate": 0.00017391801432247488, "loss": 1.2547, "step": 194 }, { "epoch": 0.26, "learning_rate": 0.00017362628145477354, "loss": 1.1431, "step": 195 }, { "epoch": 0.26, "learning_rate": 0.00017333317371239044, "loss": 1.1941, "step": 196 }, { "epoch": 0.26, "learning_rate": 0.00017303869656872995, "loss": 1.231, "step": 197 }, { "epoch": 0.26, "learning_rate": 0.00017274285552276828, "loss": 1.2727, "step": 198 }, { "epoch": 0.27, "learning_rate": 0.00017244565609895074, "loss": 1.2107, "step": 199 }, { "epoch": 0.27, "learning_rate": 0.0001721471038470885, "loss": 1.2548, "step": 200 }, { "epoch": 0.27, "learning_rate": 0.00017184720434225518, "loss": 1.2215, "step": 201 }, { "epoch": 0.27, "learning_rate": 0.0001715459631846824, "loss": 1.2097, "step": 202 }, { "epoch": 0.27, "learning_rate": 0.0001712433859996555, "loss": 1.2819, "step": 203 }, { "epoch": 0.27, "learning_rate": 0.0001709394784374084, "loss": 1.157, "step": 204 }, { "epoch": 0.27, "learning_rate": 0.00017063424617301805, "loss": 1.4194, "step": 205 }, { "epoch": 0.27, "learning_rate": 0.0001703276949062985, "loss": 1.4027, "step": 206 }, { "epoch": 0.28, "learning_rate": 0.0001700198303616944, "loss": 1.1552, "step": 207 }, { "epoch": 0.28, "learning_rate": 0.00016971065828817424, "loss": 1.1541, "step": 208 }, { "epoch": 0.28, "learning_rate": 0.00016940018445912272, "loss": 1.228, "step": 209 }, { "epoch": 0.28, "learning_rate": 0.00016908841467223336, "loss": 1.3562, "step": 210 }, { "epoch": 0.28, "learning_rate": 0.00016877535474939988, "loss": 1.2066, "step": 211 }, { "epoch": 0.28, "learning_rate": 0.00016846101053660762, "loss": 1.2524, "step": 212 }, { "epoch": 0.28, "learning_rate": 0.0001681453879038243, "loss": 1.2474, "step": 213 }, { "epoch": 0.29, "learning_rate": 0.00016782849274489054, "loss": 1.4695, "step": 214 }, { "epoch": 0.29, "learning_rate": 0.00016751033097740976, "loss": 1.115, "step": 215 }, { "epoch": 0.29, "learning_rate": 0.00016719090854263753, "loss": 1.0784, "step": 216 }, { "epoch": 0.29, "learning_rate": 0.0001668702314053708, "loss": 1.2242, "step": 217 }, { "epoch": 0.29, "learning_rate": 0.00016654830555383647, "loss": 1.2211, "step": 218 }, { "epoch": 0.29, "learning_rate": 0.00016622513699957948, "loss": 1.3118, "step": 219 }, { "epoch": 0.29, "learning_rate": 0.00016590073177735065, "loss": 1.2726, "step": 220 }, { "epoch": 0.29, "learning_rate": 0.00016557509594499403, "loss": 1.1403, "step": 221 }, { "epoch": 0.3, "learning_rate": 0.00016524823558333363, "loss": 1.3207, "step": 222 }, { "epoch": 0.3, "learning_rate": 0.00016492015679605993, "loss": 1.1014, "step": 223 }, { "epoch": 0.3, "learning_rate": 0.00016459086570961594, "loss": 1.2223, "step": 224 }, { "epoch": 0.3, "learning_rate": 0.00016426036847308286, "loss": 1.1968, "step": 225 }, { "epoch": 0.3, "learning_rate": 0.00016392867125806504, "loss": 1.1946, "step": 226 }, { "epoch": 0.3, "learning_rate": 0.00016359578025857495, "loss": 1.227, "step": 227 }, { "epoch": 0.3, "learning_rate": 0.00016326170169091733, "loss": 1.3381, "step": 228 }, { "epoch": 0.31, "learning_rate": 0.00016292644179357336, "loss": 1.2175, "step": 229 }, { "epoch": 0.31, "learning_rate": 0.00016259000682708385, "loss": 1.2369, "step": 230 }, { "epoch": 0.31, "learning_rate": 0.00016225240307393257, "loss": 1.443, "step": 231 }, { "epoch": 0.31, "learning_rate": 0.00016191363683842883, "loss": 1.2366, "step": 232 }, { "epoch": 0.31, "learning_rate": 0.0001615737144465898, "loss": 1.1747, "step": 233 }, { "epoch": 0.31, "learning_rate": 0.00016123264224602245, "loss": 1.1084, "step": 234 }, { "epoch": 0.31, "learning_rate": 0.00016089042660580468, "loss": 1.142, "step": 235 }, { "epoch": 0.31, "learning_rate": 0.0001605470739163669, "loss": 1.0347, "step": 236 }, { "epoch": 0.32, "learning_rate": 0.00016020259058937227, "loss": 1.2157, "step": 237 }, { "epoch": 0.32, "learning_rate": 0.00015985698305759713, "loss": 1.1908, "step": 238 }, { "epoch": 0.32, "learning_rate": 0.00015951025777481096, "loss": 1.3981, "step": 239 }, { "epoch": 0.32, "learning_rate": 0.0001591624212156558, "loss": 1.1152, "step": 240 }, { "epoch": 0.32, "learning_rate": 0.00015881347987552517, "loss": 1.3025, "step": 241 }, { "epoch": 0.32, "learning_rate": 0.00015846344027044306, "loss": 1.1785, "step": 242 }, { "epoch": 0.32, "learning_rate": 0.00015811230893694213, "loss": 1.232, "step": 243 }, { "epoch": 0.33, "learning_rate": 0.00015776009243194158, "loss": 1.2228, "step": 244 }, { "epoch": 0.33, "learning_rate": 0.0001574067973326248, "loss": 1.2446, "step": 245 }, { "epoch": 0.33, "learning_rate": 0.00015705243023631652, "loss": 1.0289, "step": 246 }, { "epoch": 0.33, "learning_rate": 0.00015669699776035958, "loss": 1.193, "step": 247 }, { "epoch": 0.33, "learning_rate": 0.00015634050654199147, "loss": 1.0522, "step": 248 }, { "epoch": 0.33, "learning_rate": 0.00015598296323822024, "loss": 1.0648, "step": 249 }, { "epoch": 0.33, "learning_rate": 0.0001556243745257003, "loss": 1.3198, "step": 250 }, { "epoch": 0.33, "learning_rate": 0.00015526474710060768, "loss": 1.2633, "step": 251 }, { "epoch": 0.34, "learning_rate": 0.00015490408767851506, "loss": 1.2146, "step": 252 }, { "epoch": 0.34, "learning_rate": 0.00015454240299426627, "loss": 1.005, "step": 253 }, { "epoch": 0.34, "learning_rate": 0.00015417969980185053, "loss": 1.1544, "step": 254 }, { "epoch": 0.34, "learning_rate": 0.0001538159848742765, "loss": 1.114, "step": 255 }, { "epoch": 0.34, "learning_rate": 0.00015345126500344554, "loss": 1.0793, "step": 256 }, { "epoch": 0.34, "learning_rate": 0.0001530855470000251, "loss": 1.2889, "step": 257 }, { "epoch": 0.34, "learning_rate": 0.00015271883769332143, "loss": 1.2251, "step": 258 }, { "epoch": 0.35, "learning_rate": 0.00015235114393115202, "loss": 1.1177, "step": 259 }, { "epoch": 0.35, "learning_rate": 0.00015198247257971787, "loss": 1.2651, "step": 260 }, { "epoch": 0.35, "learning_rate": 0.00015161283052347516, "loss": 0.9811, "step": 261 }, { "epoch": 0.35, "learning_rate": 0.00015124222466500665, "loss": 1.28, "step": 262 }, { "epoch": 0.35, "learning_rate": 0.00015087066192489288, "loss": 1.058, "step": 263 }, { "epoch": 0.35, "learning_rate": 0.00015049814924158298, "loss": 0.8838, "step": 264 }, { "epoch": 0.35, "learning_rate": 0.00015012469357126494, "loss": 1.1265, "step": 265 }, { "epoch": 0.35, "learning_rate": 0.00014975030188773585, "loss": 1.4656, "step": 266 }, { "epoch": 0.36, "learning_rate": 0.00014937498118227155, "loss": 1.3269, "step": 267 }, { "epoch": 0.36, "learning_rate": 0.00014899873846349626, "loss": 1.1389, "step": 268 }, { "epoch": 0.36, "learning_rate": 0.0001486215807572515, "loss": 1.0299, "step": 269 }, { "epoch": 0.36, "learning_rate": 0.00014824351510646507, "loss": 1.1693, "step": 270 }, { "epoch": 0.36, "learning_rate": 0.00014786454857101939, "loss": 1.2748, "step": 271 }, { "epoch": 0.36, "learning_rate": 0.00014748468822761975, "loss": 1.2396, "step": 272 }, { "epoch": 0.36, "learning_rate": 0.00014710394116966205, "loss": 0.9683, "step": 273 }, { "epoch": 0.37, "learning_rate": 0.00014672231450710066, "loss": 1.1646, "step": 274 }, { "epoch": 0.37, "learning_rate": 0.00014633981536631512, "loss": 1.1627, "step": 275 }, { "epoch": 0.37, "learning_rate": 0.00014595645088997757, "loss": 1.1887, "step": 276 }, { "epoch": 0.37, "learning_rate": 0.00014557222823691912, "loss": 1.2117, "step": 277 }, { "epoch": 0.37, "learning_rate": 0.0001451871545819961, "loss": 1.1973, "step": 278 }, { "epoch": 0.37, "learning_rate": 0.00014480123711595636, "loss": 1.2492, "step": 279 }, { "epoch": 0.37, "learning_rate": 0.00014441448304530467, "loss": 1.088, "step": 280 }, { "epoch": 0.37, "learning_rate": 0.00014402689959216845, "loss": 1.3454, "step": 281 }, { "epoch": 0.38, "learning_rate": 0.00014363849399416255, "loss": 1.4685, "step": 282 }, { "epoch": 0.38, "learning_rate": 0.0001432492735042545, "loss": 1.2552, "step": 283 }, { "epoch": 0.38, "learning_rate": 0.00014285924539062878, "loss": 1.2045, "step": 284 }, { "epoch": 0.38, "learning_rate": 0.0001424684169365512, "loss": 1.2046, "step": 285 }, { "epoch": 0.38, "learning_rate": 0.0001420767954402329, "loss": 1.315, "step": 286 }, { "epoch": 0.38, "learning_rate": 0.000141684388214694, "loss": 1.349, "step": 287 }, { "epoch": 0.38, "learning_rate": 0.0001412912025876272, "loss": 1.3101, "step": 288 }, { "epoch": 0.39, "learning_rate": 0.0001408972459012606, "loss": 1.0353, "step": 289 }, { "epoch": 0.39, "learning_rate": 0.00014050252551222115, "loss": 1.1797, "step": 290 }, { "epoch": 0.39, "learning_rate": 0.0001401070487913967, "loss": 1.1902, "step": 291 }, { "epoch": 0.39, "learning_rate": 0.00013971082312379864, "loss": 1.3881, "step": 292 }, { "epoch": 0.39, "learning_rate": 0.00013931385590842412, "loss": 1.2216, "step": 293 }, { "epoch": 0.39, "learning_rate": 0.0001389161545581175, "loss": 1.1883, "step": 294 }, { "epoch": 0.39, "learning_rate": 0.00013851772649943237, "loss": 1.3729, "step": 295 }, { "epoch": 0.39, "learning_rate": 0.00013811857917249253, "loss": 1.1422, "step": 296 }, { "epoch": 0.4, "learning_rate": 0.00013771872003085316, "loss": 1.1766, "step": 297 }, { "epoch": 0.4, "learning_rate": 0.00013731815654136168, "loss": 1.2579, "step": 298 }, { "epoch": 0.4, "learning_rate": 0.00013691689618401835, "loss": 1.1001, "step": 299 }, { "epoch": 0.4, "learning_rate": 0.0001365149464518364, "loss": 1.1681, "step": 300 }, { "epoch": 0.4, "learning_rate": 0.00013611231485070231, "loss": 1.1191, "step": 301 }, { "epoch": 0.4, "learning_rate": 0.00013570900889923565, "loss": 1.2803, "step": 302 }, { "epoch": 0.4, "learning_rate": 0.00013530503612864847, "loss": 1.1689, "step": 303 }, { "epoch": 0.41, "learning_rate": 0.0001349004040826048, "loss": 1.1942, "step": 304 }, { "epoch": 0.41, "learning_rate": 0.00013449512031707986, "loss": 1.6546, "step": 305 }, { "epoch": 0.41, "learning_rate": 0.00013408919240021888, "loss": 1.0284, "step": 306 }, { "epoch": 0.41, "learning_rate": 0.00013368262791219567, "loss": 1.0765, "step": 307 }, { "epoch": 0.41, "learning_rate": 0.00013327543444507134, "loss": 1.3107, "step": 308 }, { "epoch": 0.41, "learning_rate": 0.00013286761960265214, "loss": 1.2215, "step": 309 }, { "epoch": 0.41, "learning_rate": 0.00013245919100034794, "loss": 1.0344, "step": 310 }, { "epoch": 0.41, "learning_rate": 0.00013205015626502957, "loss": 1.0257, "step": 311 }, { "epoch": 0.42, "learning_rate": 0.00013164052303488673, "loss": 1.0787, "step": 312 }, { "epoch": 0.42, "learning_rate": 0.00013123029895928515, "loss": 1.2161, "step": 313 }, { "epoch": 0.42, "learning_rate": 0.00013081949169862396, "loss": 1.0499, "step": 314 }, { "epoch": 0.42, "learning_rate": 0.0001304081089241923, "loss": 1.1542, "step": 315 }, { "epoch": 0.42, "learning_rate": 0.00012999615831802647, "loss": 1.3226, "step": 316 }, { "epoch": 0.42, "learning_rate": 0.00012958364757276614, "loss": 1.2494, "step": 317 }, { "epoch": 0.42, "learning_rate": 0.00012917058439151102, "loss": 1.009, "step": 318 }, { "epoch": 0.43, "learning_rate": 0.00012875697648767663, "loss": 1.0185, "step": 319 }, { "epoch": 0.43, "learning_rate": 0.00012834283158485063, "loss": 1.1762, "step": 320 }, { "epoch": 0.43, "learning_rate": 0.00012792815741664837, "loss": 1.3182, "step": 321 }, { "epoch": 0.43, "learning_rate": 0.00012751296172656862, "loss": 1.0991, "step": 322 }, { "epoch": 0.43, "learning_rate": 0.00012709725226784873, "loss": 0.9753, "step": 323 }, { "epoch": 0.43, "learning_rate": 0.00012668103680332012, "loss": 1.4911, "step": 324 }, { "epoch": 0.43, "learning_rate": 0.0001262643231052632, "loss": 1.0702, "step": 325 }, { "epoch": 0.43, "learning_rate": 0.00012584711895526226, "loss": 1.166, "step": 326 }, { "epoch": 0.44, "learning_rate": 0.0001254294321440601, "loss": 1.1387, "step": 327 }, { "epoch": 0.44, "learning_rate": 0.0001250112704714126, "loss": 1.12, "step": 328 }, { "epoch": 0.44, "learning_rate": 0.00012459264174594304, "loss": 1.2133, "step": 329 }, { "epoch": 0.44, "learning_rate": 0.0001241735537849963, "loss": 1.2153, "step": 330 }, { "epoch": 0.44, "learning_rate": 0.00012375401441449294, "loss": 1.2934, "step": 331 }, { "epoch": 0.44, "learning_rate": 0.000123334031468783, "loss": 1.0571, "step": 332 }, { "epoch": 0.44, "learning_rate": 0.0001229136127904996, "loss": 1.2117, "step": 333 }, { "epoch": 0.45, "learning_rate": 0.00012249276623041267, "loss": 1.1282, "step": 334 }, { "epoch": 0.45, "learning_rate": 0.00012207149964728236, "loss": 1.3015, "step": 335 }, { "epoch": 0.45, "learning_rate": 0.00012164982090771202, "loss": 1.3017, "step": 336 }, { "epoch": 0.45, "learning_rate": 0.00012122773788600163, "loss": 1.3005, "step": 337 }, { "epoch": 0.45, "learning_rate": 0.00012080525846400055, "loss": 1.0432, "step": 338 }, { "epoch": 0.45, "learning_rate": 0.00012038239053096038, "loss": 1.2335, "step": 339 }, { "epoch": 0.45, "learning_rate": 0.00011995914198338776, "loss": 1.0483, "step": 340 }, { "epoch": 0.45, "learning_rate": 0.00011953552072489666, "loss": 1.2439, "step": 341 }, { "epoch": 0.46, "learning_rate": 0.00011911153466606104, "loss": 1.1537, "step": 342 }, { "epoch": 0.46, "learning_rate": 0.00011868719172426703, "loss": 1.2969, "step": 343 }, { "epoch": 0.46, "learning_rate": 0.00011826249982356501, "loss": 1.0627, "step": 344 }, { "epoch": 0.46, "learning_rate": 0.00011783746689452177, "loss": 1.2375, "step": 345 }, { "epoch": 0.46, "learning_rate": 0.0001174121008740724, "loss": 1.1943, "step": 346 }, { "epoch": 0.46, "learning_rate": 0.00011698640970537193, "loss": 1.2505, "step": 347 }, { "epoch": 0.46, "learning_rate": 0.0001165604013376472, "loss": 1.1273, "step": 348 }, { "epoch": 0.47, "learning_rate": 0.00011613408372604825, "loss": 1.2728, "step": 349 }, { "epoch": 0.47, "learning_rate": 0.00011570746483149997, "loss": 1.08, "step": 350 }, { "epoch": 0.47, "learning_rate": 0.00011528055262055317, "loss": 1.2419, "step": 351 }, { "epoch": 0.47, "learning_rate": 0.000114853355065236, "loss": 1.0556, "step": 352 }, { "epoch": 0.47, "learning_rate": 0.00011442588014290512, "loss": 1.1469, "step": 353 }, { "epoch": 0.47, "learning_rate": 0.0001139981358360966, "loss": 1.1669, "step": 354 }, { "epoch": 0.47, "learning_rate": 0.00011357013013237689, "loss": 1.0307, "step": 355 }, { "epoch": 0.47, "learning_rate": 0.00011314187102419374, "loss": 1.1989, "step": 356 }, { "epoch": 0.48, "learning_rate": 0.00011271336650872685, "loss": 1.2265, "step": 357 }, { "epoch": 0.48, "learning_rate": 0.00011228462458773865, "loss": 1.1262, "step": 358 }, { "epoch": 0.48, "learning_rate": 0.00011185565326742473, "loss": 1.0212, "step": 359 }, { "epoch": 0.48, "learning_rate": 0.00011142646055826442, "loss": 1.3271, "step": 360 }, { "epoch": 0.48, "learning_rate": 0.00011099705447487128, "loss": 1.0989, "step": 361 }, { "epoch": 0.48, "learning_rate": 0.00011056744303584321, "loss": 1.0225, "step": 362 }, { "epoch": 0.48, "learning_rate": 0.00011013763426361303, "loss": 1.2502, "step": 363 }, { "epoch": 0.49, "learning_rate": 0.0001097076361842984, "loss": 1.1363, "step": 364 }, { "epoch": 0.49, "learning_rate": 0.00010927745682755201, "loss": 1.1234, "step": 365 }, { "epoch": 0.49, "learning_rate": 0.0001088471042264118, "loss": 1.1054, "step": 366 }, { "epoch": 0.49, "learning_rate": 0.00010841658641715065, "loss": 1.1446, "step": 367 }, { "epoch": 0.49, "learning_rate": 0.0001079859114391266, "loss": 1.0292, "step": 368 }, { "epoch": 0.49, "learning_rate": 0.00010755508733463265, "loss": 1.2314, "step": 369 }, { "epoch": 0.49, "learning_rate": 0.00010712412214874639, "loss": 1.1457, "step": 370 }, { "epoch": 0.49, "learning_rate": 0.00010669302392918005, "loss": 1.2095, "step": 371 }, { "epoch": 0.5, "learning_rate": 0.0001062618007261301, "loss": 1.1903, "step": 372 }, { "epoch": 0.5, "learning_rate": 0.00010583046059212678, "loss": 1.1705, "step": 373 }, { "epoch": 0.5, "learning_rate": 0.00010539901158188398, "loss": 1.0966, "step": 374 }, { "epoch": 0.5, "learning_rate": 0.00010496746175214868, "loss": 1.1144, "step": 375 }, { "epoch": 0.5, "learning_rate": 0.00010453581916155059, "loss": 1.14, "step": 376 }, { "epoch": 0.5, "learning_rate": 0.00010410409187045145, "loss": 1.1596, "step": 377 }, { "epoch": 0.5, "learning_rate": 0.00010367228794079482, "loss": 1.3328, "step": 378 }, { "epoch": 0.51, "learning_rate": 0.00010324041543595535, "loss": 1.1764, "step": 379 }, { "epoch": 0.51, "learning_rate": 0.00010280848242058819, "loss": 1.1665, "step": 380 }, { "epoch": 0.51, "learning_rate": 0.0001023764969604785, "loss": 1.0002, "step": 381 }, { "epoch": 0.51, "learning_rate": 0.00010194446712239076, "loss": 1.1748, "step": 382 }, { "epoch": 0.51, "learning_rate": 0.00010151240097391819, "loss": 1.1904, "step": 383 }, { "epoch": 0.51, "learning_rate": 0.00010108030658333192, "loss": 1.2239, "step": 384 }, { "epoch": 0.51, "learning_rate": 0.00010064819201943066, "loss": 1.0239, "step": 385 }, { "epoch": 0.51, "learning_rate": 0.00010021606535138965, "loss": 1.2411, "step": 386 }, { "epoch": 0.52, "learning_rate": 9.978393464861035e-05, "loss": 1.1617, "step": 387 }, { "epoch": 0.52, "learning_rate": 9.935180798056935e-05, "loss": 1.1307, "step": 388 }, { "epoch": 0.52, "learning_rate": 9.891969341666809e-05, "loss": 1.1023, "step": 389 }, { "epoch": 0.52, "learning_rate": 9.848759902608186e-05, "loss": 1.0405, "step": 390 }, { "epoch": 0.52, "learning_rate": 9.805553287760922e-05, "loss": 1.203, "step": 391 }, { "epoch": 0.52, "learning_rate": 9.76235030395215e-05, "loss": 1.3004, "step": 392 }, { "epoch": 0.52, "learning_rate": 9.719151757941184e-05, "loss": 1.026, "step": 393 }, { "epoch": 0.53, "learning_rate": 9.675958456404467e-05, "loss": 1.2303, "step": 394 }, { "epoch": 0.53, "learning_rate": 9.632771205920519e-05, "loss": 1.0618, "step": 395 }, { "epoch": 0.53, "learning_rate": 9.589590812954857e-05, "loss": 1.0282, "step": 396 }, { "epoch": 0.53, "learning_rate": 9.546418083844943e-05, "loss": 1.3016, "step": 397 }, { "epoch": 0.53, "learning_rate": 9.503253824785133e-05, "loss": 1.2357, "step": 398 }, { "epoch": 0.53, "learning_rate": 9.460098841811601e-05, "loss": 1.0257, "step": 399 }, { "epoch": 0.53, "learning_rate": 9.416953940787324e-05, "loss": 1.4958, "step": 400 }, { "epoch": 0.53, "learning_rate": 9.373819927386995e-05, "loss": 1.1874, "step": 401 }, { "epoch": 0.54, "learning_rate": 9.330697607081994e-05, "loss": 1.0308, "step": 402 }, { "epoch": 0.54, "learning_rate": 9.287587785125364e-05, "loss": 1.2861, "step": 403 }, { "epoch": 0.54, "learning_rate": 9.24449126653674e-05, "loss": 1.2222, "step": 404 }, { "epoch": 0.54, "learning_rate": 9.20140885608734e-05, "loss": 0.9737, "step": 405 }, { "epoch": 0.54, "learning_rate": 9.158341358284939e-05, "loss": 1.0326, "step": 406 }, { "epoch": 0.54, "learning_rate": 9.115289577358825e-05, "loss": 1.1184, "step": 407 }, { "epoch": 0.54, "learning_rate": 9.072254317244801e-05, "loss": 1.2239, "step": 408 }, { "epoch": 0.55, "learning_rate": 9.029236381570161e-05, "loss": 1.0769, "step": 409 }, { "epoch": 0.55, "learning_rate": 8.986236573638696e-05, "loss": 1.1572, "step": 410 }, { "epoch": 0.55, "learning_rate": 8.94325569641568e-05, "loss": 1.3555, "step": 411 }, { "epoch": 0.55, "learning_rate": 8.900294552512877e-05, "loss": 1.1645, "step": 412 }, { "epoch": 0.55, "learning_rate": 8.857353944173559e-05, "loss": 0.9806, "step": 413 }, { "epoch": 0.55, "learning_rate": 8.81443467325753e-05, "loss": 1.1617, "step": 414 }, { "epoch": 0.55, "learning_rate": 8.771537541226139e-05, "loss": 1.2285, "step": 415 }, { "epoch": 0.55, "learning_rate": 8.728663349127314e-05, "loss": 1.0941, "step": 416 }, { "epoch": 0.56, "learning_rate": 8.685812897580629e-05, "loss": 1.0698, "step": 417 }, { "epoch": 0.56, "learning_rate": 8.642986986762315e-05, "loss": 0.9959, "step": 418 }, { "epoch": 0.56, "learning_rate": 8.600186416390342e-05, "loss": 0.9897, "step": 419 }, { "epoch": 0.56, "learning_rate": 8.55741198570949e-05, "loss": 1.0812, "step": 420 }, { "epoch": 0.56, "learning_rate": 8.514664493476401e-05, "loss": 1.1732, "step": 421 }, { "epoch": 0.56, "learning_rate": 8.471944737944686e-05, "loss": 1.1076, "step": 422 }, { "epoch": 0.56, "learning_rate": 8.429253516850005e-05, "loss": 1.1697, "step": 423 }, { "epoch": 0.57, "learning_rate": 8.386591627395173e-05, "loss": 1.1698, "step": 424 }, { "epoch": 0.57, "learning_rate": 8.343959866235283e-05, "loss": 1.0579, "step": 425 }, { "epoch": 0.57, "learning_rate": 8.301359029462809e-05, "loss": 1.1315, "step": 426 }, { "epoch": 0.57, "learning_rate": 8.25878991259276e-05, "loss": 1.2361, "step": 427 }, { "epoch": 0.57, "learning_rate": 8.216253310547824e-05, "loss": 1.0225, "step": 428 }, { "epoch": 0.57, "learning_rate": 8.173750017643504e-05, "loss": 0.9499, "step": 429 }, { "epoch": 0.57, "learning_rate": 8.1312808275733e-05, "loss": 1.168, "step": 430 }, { "epoch": 0.57, "learning_rate": 8.088846533393899e-05, "loss": 1.2618, "step": 431 }, { "epoch": 0.58, "learning_rate": 8.046447927510335e-05, "loss": 1.0638, "step": 432 }, { "epoch": 0.58, "learning_rate": 8.004085801661227e-05, "loss": 1.0386, "step": 433 }, { "epoch": 0.58, "learning_rate": 7.961760946903963e-05, "loss": 0.9807, "step": 434 }, { "epoch": 0.58, "learning_rate": 7.919474153599949e-05, "loss": 1.0623, "step": 435 }, { "epoch": 0.58, "learning_rate": 7.877226211399839e-05, "loss": 1.3777, "step": 436 }, { "epoch": 0.58, "learning_rate": 7.835017909228802e-05, "loss": 1.0209, "step": 437 }, { "epoch": 0.58, "learning_rate": 7.792850035271768e-05, "loss": 1.1633, "step": 438 }, { "epoch": 0.59, "learning_rate": 7.750723376958733e-05, "loss": 1.2194, "step": 439 }, { "epoch": 0.59, "learning_rate": 7.708638720950044e-05, "loss": 1.1162, "step": 440 }, { "epoch": 0.59, "learning_rate": 7.666596853121702e-05, "loss": 1.1514, "step": 441 }, { "epoch": 0.59, "learning_rate": 7.624598558550706e-05, "loss": 1.2077, "step": 442 }, { "epoch": 0.59, "learning_rate": 7.582644621500368e-05, "loss": 1.022, "step": 443 }, { "epoch": 0.59, "learning_rate": 7.540735825405699e-05, "loss": 1.1484, "step": 444 }, { "epoch": 0.59, "learning_rate": 7.498872952858744e-05, "loss": 1.3568, "step": 445 }, { "epoch": 0.59, "learning_rate": 7.45705678559399e-05, "loss": 1.371, "step": 446 }, { "epoch": 0.6, "learning_rate": 7.415288104473774e-05, "loss": 1.1064, "step": 447 }, { "epoch": 0.6, "learning_rate": 7.373567689473683e-05, "loss": 1.0975, "step": 448 }, { "epoch": 0.6, "learning_rate": 7.33189631966799e-05, "loss": 1.1722, "step": 449 }, { "epoch": 0.6, "learning_rate": 7.290274773215132e-05, "loss": 1.0057, "step": 450 }, { "epoch": 0.6, "learning_rate": 7.248703827343142e-05, "loss": 1.1114, "step": 451 }, { "epoch": 0.6, "learning_rate": 7.207184258335162e-05, "loss": 1.1438, "step": 452 }, { "epoch": 0.6, "learning_rate": 7.165716841514939e-05, "loss": 0.8642, "step": 453 }, { "epoch": 0.61, "learning_rate": 7.124302351232336e-05, "loss": 1.1709, "step": 454 }, { "epoch": 0.61, "learning_rate": 7.0829415608489e-05, "loss": 1.1788, "step": 455 }, { "epoch": 0.61, "learning_rate": 7.041635242723385e-05, "loss": 1.172, "step": 456 }, { "epoch": 0.61, "learning_rate": 7.000384168197354e-05, "loss": 1.0772, "step": 457 }, { "epoch": 0.61, "learning_rate": 6.95918910758077e-05, "loss": 1.1322, "step": 458 }, { "epoch": 0.61, "learning_rate": 6.918050830137609e-05, "loss": 1.0205, "step": 459 }, { "epoch": 0.61, "learning_rate": 6.876970104071482e-05, "loss": 1.0369, "step": 460 }, { "epoch": 0.61, "learning_rate": 6.83594769651133e-05, "loss": 1.1918, "step": 461 }, { "epoch": 0.62, "learning_rate": 6.794984373497048e-05, "loss": 1.3306, "step": 462 }, { "epoch": 0.62, "learning_rate": 6.754080899965208e-05, "loss": 1.0853, "step": 463 }, { "epoch": 0.62, "learning_rate": 6.713238039734788e-05, "loss": 1.0349, "step": 464 }, { "epoch": 0.62, "learning_rate": 6.67245655549287e-05, "loss": 1.2025, "step": 465 }, { "epoch": 0.62, "learning_rate": 6.631737208780433e-05, "loss": 1.219, "step": 466 }, { "epoch": 0.62, "learning_rate": 6.591080759978113e-05, "loss": 1.1701, "step": 467 }, { "epoch": 0.62, "learning_rate": 6.550487968292012e-05, "loss": 1.0878, "step": 468 }, { "epoch": 0.63, "learning_rate": 6.509959591739522e-05, "loss": 0.983, "step": 469 }, { "epoch": 0.63, "learning_rate": 6.469496387135158e-05, "loss": 0.9932, "step": 470 }, { "epoch": 0.63, "learning_rate": 6.429099110076435e-05, "loss": 1.3622, "step": 471 }, { "epoch": 0.63, "learning_rate": 6.388768514929768e-05, "loss": 1.1379, "step": 472 }, { "epoch": 0.63, "learning_rate": 6.348505354816364e-05, "loss": 1.1296, "step": 473 }, { "epoch": 0.63, "learning_rate": 6.308310381598168e-05, "loss": 0.87, "step": 474 }, { "epoch": 0.63, "learning_rate": 6.268184345863835e-05, "loss": 1.1147, "step": 475 }, { "epoch": 0.63, "learning_rate": 6.228127996914686e-05, "loss": 1.034, "step": 476 }, { "epoch": 0.64, "learning_rate": 6.18814208275075e-05, "loss": 1.1915, "step": 477 }, { "epoch": 0.64, "learning_rate": 6.148227350056763e-05, "loss": 1.113, "step": 478 }, { "epoch": 0.64, "learning_rate": 6.10838454418825e-05, "loss": 1.3938, "step": 479 }, { "epoch": 0.64, "learning_rate": 6.068614409157591e-05, "loss": 1.3238, "step": 480 }, { "epoch": 0.64, "learning_rate": 6.028917687620138e-05, "loss": 0.9361, "step": 481 }, { "epoch": 0.64, "learning_rate": 5.989295120860334e-05, "loss": 1.0367, "step": 482 }, { "epoch": 0.64, "learning_rate": 5.949747448777889e-05, "loss": 1.1468, "step": 483 }, { "epoch": 0.65, "learning_rate": 5.9102754098739424e-05, "loss": 0.9248, "step": 484 }, { "epoch": 0.65, "learning_rate": 5.870879741237285e-05, "loss": 1.078, "step": 485 }, { "epoch": 0.65, "learning_rate": 5.831561178530602e-05, "loss": 0.8219, "step": 486 }, { "epoch": 0.65, "learning_rate": 5.792320455976714e-05, "loss": 1.014, "step": 487 }, { "epoch": 0.65, "learning_rate": 5.7531583063448816e-05, "loss": 0.9519, "step": 488 }, { "epoch": 0.65, "learning_rate": 5.714075460937125e-05, "loss": 1.1882, "step": 489 }, { "epoch": 0.65, "learning_rate": 5.675072649574551e-05, "loss": 1.0922, "step": 490 }, { "epoch": 0.65, "learning_rate": 5.6361506005837475e-05, "loss": 0.9829, "step": 491 }, { "epoch": 0.66, "learning_rate": 5.597310040783161e-05, "loss": 1.2343, "step": 492 }, { "epoch": 0.66, "learning_rate": 5.5585516954695316e-05, "loss": 0.9906, "step": 493 }, { "epoch": 0.66, "learning_rate": 5.5198762884043666e-05, "loss": 1.1951, "step": 494 }, { "epoch": 0.66, "learning_rate": 5.481284541800391e-05, "loss": 1.1001, "step": 495 }, { "epoch": 0.66, "learning_rate": 5.442777176308089e-05, "loss": 0.9636, "step": 496 }, { "epoch": 0.66, "learning_rate": 5.404354911002243e-05, "loss": 1.2158, "step": 497 }, { "epoch": 0.66, "learning_rate": 5.36601846336849e-05, "loss": 1.0944, "step": 498 }, { "epoch": 0.67, "learning_rate": 5.327768549289934e-05, "loss": 1.1395, "step": 499 }, { "epoch": 0.67, "learning_rate": 5.289605883033792e-05, "loss": 1.0332, "step": 500 }, { "epoch": 0.67, "learning_rate": 5.251531177238029e-05, "loss": 0.9977, "step": 501 }, { "epoch": 0.67, "learning_rate": 5.213545142898061e-05, "loss": 1.2525, "step": 502 }, { "epoch": 0.67, "learning_rate": 5.175648489353493e-05, "loss": 1.3495, "step": 503 }, { "epoch": 0.67, "learning_rate": 5.137841924274851e-05, "loss": 0.9494, "step": 504 }, { "epoch": 0.67, "learning_rate": 5.1001261536503787e-05, "loss": 1.0448, "step": 505 }, { "epoch": 0.67, "learning_rate": 5.06250188177285e-05, "loss": 1.1775, "step": 506 }, { "epoch": 0.68, "learning_rate": 5.024969811226419e-05, "loss": 0.9847, "step": 507 }, { "epoch": 0.68, "learning_rate": 4.9875306428735094e-05, "loss": 1.1366, "step": 508 }, { "epoch": 0.68, "learning_rate": 4.9501850758417056e-05, "loss": 1.1707, "step": 509 }, { "epoch": 0.68, "learning_rate": 4.912933807510713e-05, "loss": 1.3234, "step": 510 }, { "epoch": 0.68, "learning_rate": 4.875777533499339e-05, "loss": 1.1704, "step": 511 }, { "epoch": 0.68, "learning_rate": 4.838716947652485e-05, "loss": 0.9302, "step": 512 }, { "epoch": 0.68, "learning_rate": 4.8017527420282135e-05, "loss": 1.066, "step": 513 }, { "epoch": 0.69, "learning_rate": 4.7648856068848e-05, "loss": 0.9668, "step": 514 }, { "epoch": 0.69, "learning_rate": 4.7281162306678585e-05, "loss": 1.212, "step": 515 }, { "epoch": 0.69, "learning_rate": 4.691445299997491e-05, "loss": 1.1384, "step": 516 }, { "epoch": 0.69, "learning_rate": 4.6548734996554486e-05, "loss": 1.0717, "step": 517 }, { "epoch": 0.69, "learning_rate": 4.618401512572351e-05, "loss": 1.1552, "step": 518 }, { "epoch": 0.69, "learning_rate": 4.582030019814948e-05, "loss": 0.9568, "step": 519 }, { "epoch": 0.69, "learning_rate": 4.545759700573378e-05, "loss": 1.1076, "step": 520 }, { "epoch": 0.69, "learning_rate": 4.509591232148495e-05, "loss": 1.225, "step": 521 }, { "epoch": 0.7, "learning_rate": 4.473525289939233e-05, "loss": 0.9351, "step": 522 }, { "epoch": 0.7, "learning_rate": 4.437562547429971e-05, "loss": 0.8681, "step": 523 }, { "epoch": 0.7, "learning_rate": 4.4017036761779787e-05, "loss": 1.1195, "step": 524 }, { "epoch": 0.7, "learning_rate": 4.365949345800856e-05, "loss": 1.1276, "step": 525 }, { "epoch": 0.7, "learning_rate": 4.330300223964042e-05, "loss": 1.1984, "step": 526 }, { "epoch": 0.7, "learning_rate": 4.2947569763683506e-05, "loss": 1.0645, "step": 527 }, { "epoch": 0.7, "learning_rate": 4.259320266737522e-05, "loss": 1.2742, "step": 528 }, { "epoch": 0.71, "learning_rate": 4.223990756805841e-05, "loss": 1.128, "step": 529 }, { "epoch": 0.71, "learning_rate": 4.188769106305787e-05, "loss": 1.0449, "step": 530 }, { "epoch": 0.71, "learning_rate": 4.1536559729556944e-05, "loss": 1.0549, "step": 531 }, { "epoch": 0.71, "learning_rate": 4.118652012447486e-05, "loss": 1.0476, "step": 532 }, { "epoch": 0.71, "learning_rate": 4.083757878434422e-05, "loss": 1.2761, "step": 533 }, { "epoch": 0.71, "learning_rate": 4.048974222518904e-05, "loss": 1.1075, "step": 534 }, { "epoch": 0.71, "learning_rate": 4.01430169424029e-05, "loss": 1.1601, "step": 535 }, { "epoch": 0.71, "learning_rate": 3.9797409410627794e-05, "loss": 1.1012, "step": 536 }, { "epoch": 0.72, "learning_rate": 3.945292608363313e-05, "loss": 1.1574, "step": 537 }, { "epoch": 0.72, "learning_rate": 3.910957339419533e-05, "loss": 1.0068, "step": 538 }, { "epoch": 0.72, "learning_rate": 3.8767357753977596e-05, "loss": 1.1855, "step": 539 }, { "epoch": 0.72, "learning_rate": 3.842628555341018e-05, "loss": 1.1743, "step": 540 }, { "epoch": 0.72, "learning_rate": 3.808636316157119e-05, "loss": 1.1359, "step": 541 }, { "epoch": 0.72, "learning_rate": 3.7747596926067484e-05, "loss": 0.9914, "step": 542 }, { "epoch": 0.72, "learning_rate": 3.740999317291618e-05, "loss": 1.1649, "step": 543 }, { "epoch": 0.73, "learning_rate": 3.7073558206426675e-05, "loss": 0.9311, "step": 544 }, { "epoch": 0.73, "learning_rate": 3.673829830908266e-05, "loss": 1.0966, "step": 545 }, { "epoch": 0.73, "learning_rate": 3.640421974142508e-05, "loss": 1.3163, "step": 546 }, { "epoch": 0.73, "learning_rate": 3.607132874193499e-05, "loss": 1.06, "step": 547 }, { "epoch": 0.73, "learning_rate": 3.573963152691715e-05, "loss": 1.213, "step": 548 }, { "epoch": 0.73, "learning_rate": 3.540913429038407e-05, "loss": 1.2059, "step": 549 }, { "epoch": 0.73, "learning_rate": 3.507984320394012e-05, "loss": 1.0737, "step": 550 }, { "epoch": 0.73, "learning_rate": 3.47517644166664e-05, "loss": 1.163, "step": 551 }, { "epoch": 0.74, "learning_rate": 3.442490405500598e-05, "loss": 0.9526, "step": 552 }, { "epoch": 0.74, "learning_rate": 3.409926822264937e-05, "loss": 1.2319, "step": 553 }, { "epoch": 0.74, "learning_rate": 3.377486300042054e-05, "loss": 1.1492, "step": 554 }, { "epoch": 0.74, "learning_rate": 3.3451694446163553e-05, "loss": 1.0427, "step": 555 }, { "epoch": 0.74, "learning_rate": 3.3129768594629186e-05, "loss": 0.9605, "step": 556 }, { "epoch": 0.74, "learning_rate": 3.280909145736246e-05, "loss": 1.0737, "step": 557 }, { "epoch": 0.74, "learning_rate": 3.2489669022590244e-05, "loss": 0.9318, "step": 558 }, { "epoch": 0.75, "learning_rate": 3.217150725510946e-05, "loss": 1.1213, "step": 559 }, { "epoch": 0.75, "learning_rate": 3.185461209617571e-05, "loss": 0.8458, "step": 560 }, { "epoch": 0.75, "learning_rate": 3.153898946339241e-05, "loss": 1.1041, "step": 561 }, { "epoch": 0.75, "learning_rate": 3.122464525060013e-05, "loss": 1.0573, "step": 562 }, { "epoch": 0.75, "learning_rate": 3.091158532776666e-05, "loss": 1.0406, "step": 563 }, { "epoch": 0.75, "learning_rate": 3.059981554087732e-05, "loss": 1.2047, "step": 564 }, { "epoch": 0.75, "learning_rate": 3.0289341711825815e-05, "loss": 1.1553, "step": 565 }, { "epoch": 0.75, "learning_rate": 2.998016963830562e-05, "loss": 0.8904, "step": 566 }, { "epoch": 0.76, "learning_rate": 2.9672305093701503e-05, "loss": 1.1387, "step": 567 }, { "epoch": 0.76, "learning_rate": 2.9365753826981947e-05, "loss": 1.1298, "step": 568 }, { "epoch": 0.76, "learning_rate": 2.9060521562591624e-05, "loss": 1.1261, "step": 569 }, { "epoch": 0.76, "learning_rate": 2.875661400034452e-05, "loss": 1.0706, "step": 570 }, { "epoch": 0.76, "learning_rate": 2.8454036815317642e-05, "loss": 1.1102, "step": 571 }, { "epoch": 0.76, "learning_rate": 2.815279565774488e-05, "loss": 0.9629, "step": 572 }, { "epoch": 0.76, "learning_rate": 2.78528961529115e-05, "loss": 1.1748, "step": 573 }, { "epoch": 0.77, "learning_rate": 2.7554343901049294e-05, "loss": 1.0437, "step": 574 }, { "epoch": 0.77, "learning_rate": 2.7257144477231756e-05, "loss": 1.156, "step": 575 }, { "epoch": 0.77, "learning_rate": 2.6961303431270068e-05, "loss": 1.0216, "step": 576 }, { "epoch": 0.77, "learning_rate": 2.666682628760958e-05, "loss": 1.0136, "step": 577 }, { "epoch": 0.77, "learning_rate": 2.6373718545226445e-05, "loss": 1.2247, "step": 578 }, { "epoch": 0.77, "learning_rate": 2.608198567752512e-05, "loss": 1.1009, "step": 579 }, { "epoch": 0.77, "learning_rate": 2.5791633132236025e-05, "loss": 1.0293, "step": 580 }, { "epoch": 0.77, "learning_rate": 2.550266633131382e-05, "loss": 1.2516, "step": 581 }, { "epoch": 0.78, "learning_rate": 2.5215090670836306e-05, "loss": 1.2131, "step": 582 }, { "epoch": 0.78, "learning_rate": 2.4928911520903465e-05, "loss": 0.9738, "step": 583 }, { "epoch": 0.78, "learning_rate": 2.4644134225537264e-05, "loss": 1.0539, "step": 584 }, { "epoch": 0.78, "learning_rate": 2.4360764102581945e-05, "loss": 1.0488, "step": 585 }, { "epoch": 0.78, "learning_rate": 2.4078806443604596e-05, "loss": 1.2307, "step": 586 }, { "epoch": 0.78, "learning_rate": 2.3798266513796317e-05, "loss": 1.0795, "step": 587 }, { "epoch": 0.78, "learning_rate": 2.351914955187412e-05, "loss": 0.9543, "step": 588 }, { "epoch": 0.79, "learning_rate": 2.3241460769982814e-05, "loss": 1.0224, "step": 589 }, { "epoch": 0.79, "learning_rate": 2.2965205353597863e-05, "loss": 1.2327, "step": 590 }, { "epoch": 0.79, "learning_rate": 2.2690388461428468e-05, "loss": 1.0083, "step": 591 }, { "epoch": 0.79, "learning_rate": 2.241701522532136e-05, "loss": 1.0704, "step": 592 }, { "epoch": 0.79, "learning_rate": 2.2145090750164777e-05, "loss": 1.1573, "step": 593 }, { "epoch": 0.79, "learning_rate": 2.1874620113793286e-05, "loss": 0.9994, "step": 594 }, { "epoch": 0.79, "learning_rate": 2.160560836689286e-05, "loss": 1.0597, "step": 595 }, { "epoch": 0.79, "learning_rate": 2.1338060532906736e-05, "loss": 1.2347, "step": 596 }, { "epoch": 0.8, "learning_rate": 2.107198160794136e-05, "loss": 0.9029, "step": 597 }, { "epoch": 0.8, "learning_rate": 2.0807376560673254e-05, "loss": 1.087, "step": 598 }, { "epoch": 0.8, "learning_rate": 2.0544250332256276e-05, "loss": 1.0703, "step": 599 }, { "epoch": 0.8, "learning_rate": 2.028260783622914e-05, "loss": 0.9767, "step": 600 }, { "epoch": 0.8, "learning_rate": 2.0022453958423938e-05, "loss": 1.0907, "step": 601 }, { "epoch": 0.8, "learning_rate": 1.9763793556874655e-05, "loss": 1.0953, "step": 602 }, { "epoch": 0.8, "learning_rate": 1.9506631461726566e-05, "loss": 0.8641, "step": 603 }, { "epoch": 0.81, "learning_rate": 1.925097247514609e-05, "loss": 1.2929, "step": 604 }, { "epoch": 0.81, "learning_rate": 1.8996821371231022e-05, "loss": 0.9507, "step": 605 }, { "epoch": 0.81, "learning_rate": 1.8744182895921368e-05, "loss": 1.3895, "step": 606 }, { "epoch": 0.81, "learning_rate": 1.849306176691088e-05, "loss": 1.1314, "step": 607 }, { "epoch": 0.81, "learning_rate": 1.8243462673558755e-05, "loss": 1.1839, "step": 608 }, { "epoch": 0.81, "learning_rate": 1.799539027680216e-05, "loss": 1.1373, "step": 609 }, { "epoch": 0.81, "learning_rate": 1.7748849209069286e-05, "loss": 1.2087, "step": 610 }, { "epoch": 0.81, "learning_rate": 1.7503844074192653e-05, "loss": 1.0104, "step": 611 }, { "epoch": 0.82, "learning_rate": 1.7260379447323328e-05, "loss": 1.1559, "step": 612 }, { "epoch": 0.82, "learning_rate": 1.7018459874845326e-05, "loss": 1.1308, "step": 613 }, { "epoch": 0.82, "learning_rate": 1.677808987429079e-05, "loss": 1.0858, "step": 614 }, { "epoch": 0.82, "learning_rate": 1.6539273934255727e-05, "loss": 1.0542, "step": 615 }, { "epoch": 0.82, "learning_rate": 1.6302016514315998e-05, "loss": 1.0631, "step": 616 }, { "epoch": 0.82, "learning_rate": 1.6066322044944126e-05, "loss": 1.0598, "step": 617 }, { "epoch": 0.82, "learning_rate": 1.5832194927426668e-05, "loss": 1.2746, "step": 618 }, { "epoch": 0.83, "learning_rate": 1.5599639533781853e-05, "loss": 1.1003, "step": 619 }, { "epoch": 0.83, "learning_rate": 1.5368660206678033e-05, "loss": 1.2666, "step": 620 }, { "epoch": 0.83, "learning_rate": 1.5139261259352589e-05, "loss": 1.2522, "step": 621 }, { "epoch": 0.83, "learning_rate": 1.4911446975531328e-05, "loss": 0.9772, "step": 622 }, { "epoch": 0.83, "learning_rate": 1.4685221609348632e-05, "loss": 1.0075, "step": 623 }, { "epoch": 0.83, "learning_rate": 1.4460589385267842e-05, "loss": 1.005, "step": 624 }, { "epoch": 0.83, "learning_rate": 1.4237554498002425e-05, "loss": 1.1105, "step": 625 }, { "epoch": 0.83, "learning_rate": 1.4016121112437786e-05, "loss": 1.3069, "step": 626 }, { "epoch": 0.84, "learning_rate": 1.379629336355326e-05, "loss": 0.9892, "step": 627 }, { "epoch": 0.84, "learning_rate": 1.3578075356345044e-05, "loss": 1.0631, "step": 628 }, { "epoch": 0.84, "learning_rate": 1.3361471165749562e-05, "loss": 1.0555, "step": 629 }, { "epoch": 0.84, "learning_rate": 1.3146484836567264e-05, "loss": 1.1469, "step": 630 }, { "epoch": 0.84, "learning_rate": 1.2933120383387132e-05, "loss": 1.1518, "step": 631 }, { "epoch": 0.84, "learning_rate": 1.2721381790511832e-05, "loss": 1.1602, "step": 632 }, { "epoch": 0.84, "learning_rate": 1.2511273011883096e-05, "loss": 0.9754, "step": 633 }, { "epoch": 0.85, "learning_rate": 1.2302797971008085e-05, "loss": 1.0898, "step": 634 }, { "epoch": 0.85, "learning_rate": 1.2095960560886e-05, "loss": 1.1445, "step": 635 }, { "epoch": 0.85, "learning_rate": 1.1890764643935393e-05, "loss": 1.2429, "step": 636 }, { "epoch": 0.85, "learning_rate": 1.168721405192218e-05, "loss": 1.1659, "step": 637 }, { "epoch": 0.85, "learning_rate": 1.1485312585887886e-05, "loss": 0.8704, "step": 638 }, { "epoch": 0.85, "learning_rate": 1.1285064016078784e-05, "loss": 1.1806, "step": 639 }, { "epoch": 0.85, "learning_rate": 1.1086472081875487e-05, "loss": 1.1452, "step": 640 }, { "epoch": 0.85, "learning_rate": 1.0889540491723105e-05, "loss": 0.9456, "step": 641 }, { "epoch": 0.86, "learning_rate": 1.0694272923061932e-05, "loss": 1.029, "step": 642 }, { "epoch": 0.86, "learning_rate": 1.0500673022258922e-05, "loss": 1.055, "step": 643 }, { "epoch": 0.86, "learning_rate": 1.030874440453944e-05, "loss": 1.0806, "step": 644 }, { "epoch": 0.86, "learning_rate": 1.0118490653919855e-05, "loss": 0.9064, "step": 645 }, { "epoch": 0.86, "learning_rate": 9.929915323140571e-06, "loss": 0.7556, "step": 646 }, { "epoch": 0.86, "learning_rate": 9.743021933599695e-06, "loss": 1.1699, "step": 647 }, { "epoch": 0.86, "learning_rate": 9.557813975287267e-06, "loss": 1.217, "step": 648 }, { "epoch": 0.87, "learning_rate": 9.374294906720082e-06, "loss": 1.0701, "step": 649 }, { "epoch": 0.87, "learning_rate": 9.192468154877187e-06, "loss": 1.1156, "step": 650 }, { "epoch": 0.87, "learning_rate": 9.012337115135772e-06, "loss": 1.1668, "step": 651 }, { "epoch": 0.87, "learning_rate": 8.833905151207833e-06, "loss": 1.0837, "step": 652 }, { "epoch": 0.87, "learning_rate": 8.657175595077316e-06, "loss": 0.9721, "step": 653 }, { "epoch": 0.87, "learning_rate": 8.482151746937982e-06, "loss": 1.0993, "step": 654 }, { "epoch": 0.87, "learning_rate": 8.308836875131665e-06, "loss": 1.0196, "step": 655 }, { "epoch": 0.87, "learning_rate": 8.137234216087353e-06, "loss": 0.9373, "step": 656 }, { "epoch": 0.88, "learning_rate": 7.967346974260625e-06, "loss": 0.906, "step": 657 }, { "epoch": 0.88, "learning_rate": 7.79917832207394e-06, "loss": 1.1333, "step": 658 }, { "epoch": 0.88, "learning_rate": 7.63273139985733e-06, "loss": 1.0379, "step": 659 }, { "epoch": 0.88, "learning_rate": 7.468009315789748e-06, "loss": 1.2477, "step": 660 }, { "epoch": 0.88, "learning_rate": 7.305015145841054e-06, "loss": 0.9208, "step": 661 }, { "epoch": 0.88, "learning_rate": 7.143751933714582e-06, "loss": 1.15, "step": 662 }, { "epoch": 0.88, "learning_rate": 6.9842226907902766e-06, "loss": 1.2247, "step": 663 }, { "epoch": 0.89, "learning_rate": 6.8264303960684506e-06, "loss": 1.0735, "step": 664 }, { "epoch": 0.89, "learning_rate": 6.67037799611423e-06, "loss": 0.8792, "step": 665 }, { "epoch": 0.89, "learning_rate": 6.51606840500244e-06, "loss": 1.0715, "step": 666 }, { "epoch": 0.89, "learning_rate": 6.3635045042632066e-06, "loss": 0.9893, "step": 667 }, { "epoch": 0.89, "learning_rate": 6.212689142828232e-06, "loss": 1.2205, "step": 668 }, { "epoch": 0.89, "learning_rate": 6.063625136977447e-06, "loss": 1.2093, "step": 669 }, { "epoch": 0.89, "learning_rate": 5.916315270286588e-06, "loss": 1.0735, "step": 670 }, { "epoch": 0.89, "learning_rate": 5.770762293575083e-06, "loss": 1.0884, "step": 671 }, { "epoch": 0.9, "learning_rate": 5.626968924854714e-06, "loss": 1.064, "step": 672 }, { "epoch": 0.9, "learning_rate": 5.4849378492789374e-06, "loss": 1.0008, "step": 673 }, { "epoch": 0.9, "learning_rate": 5.344671719092664e-06, "loss": 1.2432, "step": 674 }, { "epoch": 0.9, "learning_rate": 5.206173153582705e-06, "loss": 0.9597, "step": 675 }, { "epoch": 0.9, "learning_rate": 5.069444739029006e-06, "loss": 0.9729, "step": 676 }, { "epoch": 0.9, "learning_rate": 4.934489028656164e-06, "loss": 0.937, "step": 677 }, { "epoch": 0.9, "learning_rate": 4.801308542585892e-06, "loss": 0.999, "step": 678 }, { "epoch": 0.91, "learning_rate": 4.669905767789884e-06, "loss": 0.9253, "step": 679 }, { "epoch": 0.91, "learning_rate": 4.540283158043391e-06, "loss": 1.0677, "step": 680 }, { "epoch": 0.91, "learning_rate": 4.41244313387944e-06, "loss": 1.1795, "step": 681 }, { "epoch": 0.91, "learning_rate": 4.286388082543569e-06, "loss": 1.0223, "step": 682 }, { "epoch": 0.91, "learning_rate": 4.162120357949284e-06, "loss": 1.0694, "step": 683 }, { "epoch": 0.91, "learning_rate": 4.0396422806341036e-06, "loss": 1.0328, "step": 684 }, { "epoch": 0.91, "learning_rate": 3.918956137716235e-06, "loss": 0.974, "step": 685 }, { "epoch": 0.91, "learning_rate": 3.8000641828518036e-06, "loss": 1.0735, "step": 686 }, { "epoch": 0.92, "learning_rate": 3.682968636192863e-06, "loss": 1.0635, "step": 687 }, { "epoch": 0.92, "learning_rate": 3.567671684345875e-06, "loss": 1.0855, "step": 688 }, { "epoch": 0.92, "learning_rate": 3.454175480330857e-06, "loss": 1.2279, "step": 689 }, { "epoch": 0.92, "learning_rate": 3.342482143541281e-06, "loss": 0.8907, "step": 690 }, { "epoch": 0.92, "learning_rate": 3.23259375970435e-06, "loss": 1.1258, "step": 691 }, { "epoch": 0.92, "learning_rate": 3.124512380842204e-06, "loss": 0.9176, "step": 692 }, { "epoch": 0.92, "learning_rate": 3.018240025233465e-06, "loss": 1.2405, "step": 693 }, { "epoch": 0.93, "learning_rate": 2.9137786773756117e-06, "loss": 1.0521, "step": 694 }, { "epoch": 0.93, "learning_rate": 2.8111302879479717e-06, "loss": 0.9603, "step": 695 }, { "epoch": 0.93, "learning_rate": 2.710296773775167e-06, "loss": 1.0522, "step": 696 }, { "epoch": 0.93, "learning_rate": 2.6112800177914398e-06, "loss": 1.2222, "step": 697 }, { "epoch": 0.93, "learning_rate": 2.514081869005458e-06, "loss": 1.103, "step": 698 }, { "epoch": 0.93, "learning_rate": 2.418704142465722e-06, "loss": 1.1205, "step": 699 }, { "epoch": 0.93, "learning_rate": 2.3251486192267578e-06, "loss": 0.8992, "step": 700 }, { "epoch": 0.93, "learning_rate": 2.233417046315822e-06, "loss": 1.046, "step": 701 }, { "epoch": 0.94, "learning_rate": 2.1435111367002824e-06, "loss": 0.9887, "step": 702 }, { "epoch": 0.94, "learning_rate": 2.055432569255622e-06, "loss": 0.9442, "step": 703 }, { "epoch": 0.94, "learning_rate": 1.9691829887340864e-06, "loss": 0.9405, "step": 704 }, { "epoch": 0.94, "learning_rate": 1.8847640057339965e-06, "loss": 0.9889, "step": 705 }, { "epoch": 0.94, "learning_rate": 1.8021771966696189e-06, "loss": 1.1377, "step": 706 }, { "epoch": 0.94, "learning_rate": 1.721424103741831e-06, "loss": 1.0913, "step": 707 }, { "epoch": 0.94, "learning_rate": 1.6425062349091913e-06, "loss": 0.9902, "step": 708 }, { "epoch": 0.95, "learning_rate": 1.56542506385986e-06, "loss": 1.2601, "step": 709 }, { "epoch": 0.95, "learning_rate": 1.4901820299840997e-06, "loss": 1.0899, "step": 710 }, { "epoch": 0.95, "learning_rate": 1.4167785383472854e-06, "loss": 1.0721, "step": 711 }, { "epoch": 0.95, "learning_rate": 1.345215959663837e-06, "loss": 0.9274, "step": 712 }, { "epoch": 0.95, "learning_rate": 1.2754956302714615e-06, "loss": 1.061, "step": 713 }, { "epoch": 0.95, "learning_rate": 1.2076188521062847e-06, "loss": 1.0961, "step": 714 }, { "epoch": 0.95, "learning_rate": 1.1415868926785256e-06, "loss": 1.2141, "step": 715 }, { "epoch": 0.95, "learning_rate": 1.0774009850488153e-06, "loss": 1.124, "step": 716 }, { "epoch": 0.96, "learning_rate": 1.0150623278051718e-06, "loss": 1.1232, "step": 717 }, { "epoch": 0.96, "learning_rate": 9.545720850406504e-07, "loss": 1.1324, "step": 718 }, { "epoch": 0.96, "learning_rate": 8.959313863315389e-07, "loss": 1.0871, "step": 719 }, { "epoch": 0.96, "learning_rate": 8.391413267163417e-07, "loss": 1.135, "step": 720 }, { "epoch": 0.96, "learning_rate": 7.842029666752626e-07, "loss": 0.9644, "step": 721 }, { "epoch": 0.96, "learning_rate": 7.311173321104647e-07, "loss": 1.3016, "step": 722 }, { "epoch": 0.96, "learning_rate": 6.798854143268641e-07, "loss": 1.2784, "step": 723 }, { "epoch": 0.97, "learning_rate": 6.305081700136328e-07, "loss": 1.0005, "step": 724 }, { "epoch": 0.97, "learning_rate": 5.829865212263474e-07, "loss": 1.0055, "step": 725 }, { "epoch": 0.97, "learning_rate": 5.373213553697576e-07, "loss": 1.0099, "step": 726 }, { "epoch": 0.97, "learning_rate": 4.935135251811996e-07, "loss": 1.1788, "step": 727 }, { "epoch": 0.97, "learning_rate": 4.515638487147311e-07, "loss": 1.1793, "step": 728 }, { "epoch": 0.97, "learning_rate": 4.114731093257884e-07, "loss": 1.136, "step": 729 }, { "epoch": 0.97, "learning_rate": 3.732420556565752e-07, "loss": 1.0169, "step": 730 }, { "epoch": 0.97, "learning_rate": 3.368714016221186e-07, "loss": 1.0661, "step": 731 }, { "epoch": 0.98, "learning_rate": 3.023618263968797e-07, "loss": 1.0488, "step": 732 }, { "epoch": 0.98, "learning_rate": 2.697139744021415e-07, "loss": 1.1705, "step": 733 }, { "epoch": 0.98, "learning_rate": 2.389284552939075e-07, "loss": 1.1367, "step": 734 }, { "epoch": 0.98, "learning_rate": 2.1000584395155509e-07, "loss": 0.8443, "step": 735 }, { "epoch": 0.98, "learning_rate": 1.8294668046708873e-07, "loss": 0.9826, "step": 736 }, { "epoch": 0.98, "learning_rate": 1.577514701350591e-07, "loss": 1.1442, "step": 737 }, { "epoch": 0.98, "learning_rate": 1.3442068344312608e-07, "loss": 1.1088, "step": 738 }, { "epoch": 0.99, "learning_rate": 1.129547560632771e-07, "loss": 0.9073, "step": 739 }, { "epoch": 0.99, "learning_rate": 9.33540888436668e-08, "loss": 1.1139, "step": 740 }, { "epoch": 0.99, "learning_rate": 7.561904780116758e-08, "loss": 0.9704, "step": 741 }, { "epoch": 0.99, "learning_rate": 5.97499641145416e-08, "loss": 0.9565, "step": 742 }, { "epoch": 0.99, "learning_rate": 4.574713411816811e-08, "loss": 1.0933, "step": 743 }, { "epoch": 0.99, "learning_rate": 3.361081929664778e-08, "loss": 1.0756, "step": 744 }, { "epoch": 0.99, "learning_rate": 2.3341246279806607e-08, "loss": 1.0339, "step": 745 }, { "epoch": 0.99, "learning_rate": 1.4938606838510448e-08, "loss": 1.1196, "step": 746 }, { "epoch": 1.0, "learning_rate": 8.403057881067877e-09, "loss": 1.0981, "step": 747 }, { "epoch": 1.0, "learning_rate": 3.734721450343592e-09, "loss": 1.0525, "step": 748 }, { "epoch": 1.0, "learning_rate": 9.336847214269639e-10, "loss": 1.0861, "step": 749 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.1944, "step": 750 }, { "epoch": 1.0, "step": 750, "total_flos": 2.2873055182821786e+17, "train_loss": 1.167799865881602, "train_runtime": 10360.645, "train_samples_per_second": 1.158, "train_steps_per_second": 0.072 } ], "logging_steps": 1.0, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "total_flos": 2.2873055182821786e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }