diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9997153128579279, + "epoch": 0.9997086941553455, "global_step": 302000, "is_hyper_param_search": false, "is_local_process_zero": true, @@ -9,18126 +9,18126 @@ "log_history": [ { "epoch": 0.0, - "learning_rate": 4.9983430431007635e-05, - "loss": 2.8017, + "learning_rate": 4.998344853155372e-05, + "loss": 2.7721, "step": 100 }, { "epoch": 0.0, - "learning_rate": 4.996686086201526e-05, - "loss": 2.8069, + "learning_rate": 4.996689706310744e-05, + "loss": 2.7878, "step": 200 }, { "epoch": 0.0, - "learning_rate": 4.995029129302289e-05, - "loss": 2.8092, + "learning_rate": 4.9950345594661166e-05, + "loss": 2.7908, "step": 300 }, { "epoch": 0.0, - "learning_rate": 4.993372172403052e-05, - "loss": 2.8122, + "learning_rate": 4.993379412621488e-05, + "loss": 2.8032, "step": 400 }, { "epoch": 0.0, - "learning_rate": 4.9917152155038145e-05, - "loss": 2.81, + "learning_rate": 4.99172426577686e-05, + "loss": 2.8096, "step": 500 }, { "epoch": 0.0, - "learning_rate": 4.990069053183531e-05, - "loss": 2.8143, + "learning_rate": 4.9900691189322316e-05, + "loss": 2.7972, "step": 600 }, { "epoch": 0.0, - "learning_rate": 4.988413895380786e-05, - "loss": 2.8392, + "learning_rate": 4.9884139720876035e-05, + "loss": 2.797, "step": 700 }, { "epoch": 0.0, - "learning_rate": 4.986758737578041e-05, - "loss": 2.8283, + "learning_rate": 4.986758825242976e-05, + "loss": 2.8063, "step": 800 }, { "epoch": 0.0, - "learning_rate": 4.985103579775296e-05, - "loss": 2.8453, + "learning_rate": 4.985103678398348e-05, + "loss": 2.8167, "step": 900 }, { "epoch": 0.0, - "learning_rate": 4.983448421972551e-05, - "loss": 2.8275, + "learning_rate": 4.98344853155372e-05, + "loss": 2.8002, "step": 1000 }, { "epoch": 0.0, - "learning_rate": 4.981793264169806e-05, - "loss": 2.833, + "learning_rate": 4.981793384709091e-05, + "loss": 2.8002, "step": 1100 }, { "epoch": 0.0, - "learning_rate": 4.980138106367061e-05, - "loss": 2.834, + "learning_rate": 4.980138237864464e-05, + "loss": 2.8049, "step": 1200 }, { "epoch": 0.0, - "learning_rate": 4.9784829485643166e-05, - "loss": 2.8299, + "learning_rate": 4.9784830910198356e-05, + "loss": 2.8064, "step": 1300 }, { "epoch": 0.0, - "learning_rate": 4.976827790761571e-05, - "loss": 2.8227, + "learning_rate": 4.9768279441752075e-05, + "loss": 2.8052, "step": 1400 }, { "epoch": 0.0, - "learning_rate": 4.975172632958827e-05, - "loss": 2.8306, + "learning_rate": 4.9751727973305794e-05, + "loss": 2.7996, "step": 1500 }, { "epoch": 0.01, - "learning_rate": 4.9735174751560814e-05, - "loss": 2.8427, + "learning_rate": 4.973517650485951e-05, + "loss": 2.807, "step": 1600 }, { "epoch": 0.01, - "learning_rate": 4.9718623173533366e-05, - "loss": 2.8226, + "learning_rate": 4.971862503641324e-05, + "loss": 2.8028, "step": 1700 }, { "epoch": 0.01, - "learning_rate": 4.970207159550592e-05, - "loss": 2.8421, + "learning_rate": 4.970207356796695e-05, + "loss": 2.7891, "step": 1800 }, { "epoch": 0.01, - "learning_rate": 4.968552001747847e-05, - "loss": 2.8378, + "learning_rate": 4.968552209952067e-05, + "loss": 2.7999, "step": 1900 }, { "epoch": 0.01, - "learning_rate": 4.966896843945102e-05, - "loss": 2.8386, + "learning_rate": 4.966897063107439e-05, + "loss": 2.7993, "step": 2000 }, { "epoch": 0.01, - "learning_rate": 4.965241686142357e-05, - "loss": 2.8267, + "learning_rate": 4.965241916262811e-05, + "loss": 2.8064, "step": 2100 }, { "epoch": 0.01, - "learning_rate": 4.9635865283396124e-05, - "loss": 2.8567, + "learning_rate": 4.9635867694181834e-05, + "loss": 2.8121, "step": 2200 }, { "epoch": 0.01, - "learning_rate": 4.961931370536867e-05, - "loss": 2.8344, + "learning_rate": 4.961931622573555e-05, + "loss": 2.8099, "step": 2300 }, { "epoch": 0.01, - "learning_rate": 4.960276212734122e-05, - "loss": 2.8342, + "learning_rate": 4.960276475728927e-05, + "loss": 2.8138, "step": 2400 }, { "epoch": 0.01, - "learning_rate": 4.958621054931377e-05, - "loss": 2.8368, + "learning_rate": 4.9586213288842984e-05, + "loss": 2.8153, "step": 2500 }, { "epoch": 0.01, - "learning_rate": 4.9569658971286324e-05, - "loss": 2.8459, + "learning_rate": 4.956966182039671e-05, + "loss": 2.8168, "step": 2600 }, { "epoch": 0.01, - "learning_rate": 4.9553107393258875e-05, - "loss": 2.8355, + "learning_rate": 4.955311035195043e-05, + "loss": 2.808, "step": 2700 }, { "epoch": 0.01, - "learning_rate": 4.953655581523143e-05, - "loss": 2.8357, + "learning_rate": 4.953655888350415e-05, + "loss": 2.8072, "step": 2800 }, { "epoch": 0.01, - "learning_rate": 4.952000423720398e-05, - "loss": 2.8337, + "learning_rate": 4.9520007415057866e-05, + "loss": 2.8262, "step": 2900 }, { "epoch": 0.01, - "learning_rate": 4.950345265917653e-05, - "loss": 2.8252, + "learning_rate": 4.9503455946611585e-05, + "loss": 2.8076, "step": 3000 }, { "epoch": 0.01, - "learning_rate": 4.948690108114908e-05, - "loss": 2.8353, + "learning_rate": 4.948690447816531e-05, + "loss": 2.8218, "step": 3100 }, { "epoch": 0.01, - "learning_rate": 4.947034950312163e-05, - "loss": 2.8416, + "learning_rate": 4.947035300971902e-05, + "loss": 2.8215, "step": 3200 }, { "epoch": 0.01, - "learning_rate": 4.945379792509418e-05, - "loss": 2.8523, + "learning_rate": 4.945380154127274e-05, + "loss": 2.8039, "step": 3300 }, { "epoch": 0.01, - "learning_rate": 4.943724634706673e-05, - "loss": 2.8434, + "learning_rate": 4.943725007282646e-05, + "loss": 2.8078, "step": 3400 }, { "epoch": 0.01, - "learning_rate": 4.942069476903928e-05, - "loss": 2.8458, + "learning_rate": 4.942069860438018e-05, + "loss": 2.8186, "step": 3500 }, { "epoch": 0.01, - "learning_rate": 4.940414319101183e-05, - "loss": 2.8366, + "learning_rate": 4.9404147135933906e-05, + "loss": 2.8091, "step": 3600 }, { "epoch": 0.01, - "learning_rate": 4.9387591612984384e-05, - "loss": 2.8351, + "learning_rate": 4.9387595667487625e-05, + "loss": 2.7946, "step": 3700 }, { "epoch": 0.01, - "learning_rate": 4.9371040034956936e-05, - "loss": 2.8309, + "learning_rate": 4.937104419904134e-05, + "loss": 2.8129, "step": 3800 }, { "epoch": 0.01, - "learning_rate": 4.935448845692949e-05, - "loss": 2.8493, + "learning_rate": 4.9354492730595056e-05, + "loss": 2.8099, "step": 3900 }, { "epoch": 0.01, - "learning_rate": 4.933793687890203e-05, - "loss": 2.8409, + "learning_rate": 4.933794126214878e-05, + "loss": 2.8073, "step": 4000 }, { "epoch": 0.01, - "learning_rate": 4.932138530087459e-05, - "loss": 2.8533, + "learning_rate": 4.93213897937025e-05, + "loss": 2.8009, "step": 4100 }, { "epoch": 0.01, - "learning_rate": 4.9304833722847136e-05, - "loss": 2.8363, + "learning_rate": 4.930483832525622e-05, + "loss": 2.8141, "step": 4200 }, { "epoch": 0.01, - "learning_rate": 4.9288282144819694e-05, - "loss": 2.8345, + "learning_rate": 4.928828685680994e-05, + "loss": 2.8163, "step": 4300 }, { "epoch": 0.01, - "learning_rate": 4.927173056679224e-05, - "loss": 2.8417, + "learning_rate": 4.927173538836366e-05, + "loss": 2.8123, "step": 4400 }, { "epoch": 0.01, - "learning_rate": 4.925517898876479e-05, - "loss": 2.8521, + "learning_rate": 4.925518391991738e-05, + "loss": 2.8308, "step": 4500 }, { "epoch": 0.02, - "learning_rate": 4.923862741073734e-05, - "loss": 2.8455, + "learning_rate": 4.9238632451471096e-05, + "loss": 2.8078, "step": 4600 }, { "epoch": 0.02, - "learning_rate": 4.9222075832709894e-05, - "loss": 2.8291, + "learning_rate": 4.9222080983024815e-05, + "loss": 2.8176, "step": 4700 }, { "epoch": 0.02, - "learning_rate": 4.9205524254682445e-05, - "loss": 2.8411, + "learning_rate": 4.9205529514578534e-05, + "loss": 2.8254, "step": 4800 }, { "epoch": 0.02, - "learning_rate": 4.918897267665499e-05, - "loss": 2.8452, + "learning_rate": 4.918897804613225e-05, + "loss": 2.8092, "step": 4900 }, { "epoch": 0.02, - "learning_rate": 4.917242109862755e-05, - "loss": 2.8467, + "learning_rate": 4.917242657768598e-05, + "loss": 2.7964, "step": 5000 }, { "epoch": 0.02, - "learning_rate": 4.915586952060009e-05, - "loss": 2.8484, + "learning_rate": 4.91558751092397e-05, + "loss": 2.8133, "step": 5100 }, { "epoch": 0.02, - "learning_rate": 4.913931794257265e-05, - "loss": 2.8335, + "learning_rate": 4.913932364079341e-05, + "loss": 2.814, "step": 5200 }, { "epoch": 0.02, - "learning_rate": 4.9122766364545197e-05, - "loss": 2.8415, + "learning_rate": 4.912277217234713e-05, + "loss": 2.8066, "step": 5300 }, { "epoch": 0.02, - "learning_rate": 4.910621478651775e-05, - "loss": 2.835, + "learning_rate": 4.9106220703900855e-05, + "loss": 2.8021, "step": 5400 }, { "epoch": 0.02, - "learning_rate": 4.90896632084903e-05, - "loss": 2.842, + "learning_rate": 4.9089669235454574e-05, + "loss": 2.8167, "step": 5500 }, { "epoch": 0.02, - "learning_rate": 4.907311163046285e-05, - "loss": 2.8311, + "learning_rate": 4.907311776700829e-05, + "loss": 2.8132, "step": 5600 }, { "epoch": 0.02, - "learning_rate": 4.90565600524354e-05, - "loss": 2.8388, + "learning_rate": 4.905656629856201e-05, + "loss": 2.8201, "step": 5700 }, { "epoch": 0.02, - "learning_rate": 4.904000847440795e-05, - "loss": 2.8548, + "learning_rate": 4.904001483011573e-05, + "loss": 2.8111, "step": 5800 }, { "epoch": 0.02, - "learning_rate": 4.9023456896380506e-05, - "loss": 2.8435, + "learning_rate": 4.902346336166945e-05, + "loss": 2.8159, "step": 5900 }, { "epoch": 0.02, - "learning_rate": 4.900690531835305e-05, - "loss": 2.8327, + "learning_rate": 4.900691189322317e-05, + "loss": 2.809, "step": 6000 }, { "epoch": 0.02, - "learning_rate": 4.899035374032561e-05, - "loss": 2.8308, + "learning_rate": 4.899036042477689e-05, + "loss": 2.809, "step": 6100 }, { "epoch": 0.02, - "learning_rate": 4.8973802162298154e-05, - "loss": 2.8264, + "learning_rate": 4.8973808956330606e-05, + "loss": 2.8167, "step": 6200 }, { "epoch": 0.02, - "learning_rate": 4.8957250584270706e-05, - "loss": 2.8588, + "learning_rate": 4.8957257487884325e-05, + "loss": 2.8271, "step": 6300 }, { "epoch": 0.02, - "learning_rate": 4.894069900624326e-05, - "loss": 2.8451, + "learning_rate": 4.894070601943805e-05, + "loss": 2.8136, "step": 6400 }, { "epoch": 0.02, - "learning_rate": 4.892414742821581e-05, - "loss": 2.8426, + "learning_rate": 4.892415455099177e-05, + "loss": 2.809, "step": 6500 }, { "epoch": 0.02, - "learning_rate": 4.890759585018836e-05, - "loss": 2.8265, + "learning_rate": 4.890760308254548e-05, + "loss": 2.8097, "step": 6600 }, { "epoch": 0.02, - "learning_rate": 4.8891044272160905e-05, - "loss": 2.8479, + "learning_rate": 4.88910516140992e-05, + "loss": 2.7963, "step": 6700 }, { "epoch": 0.02, - "learning_rate": 4.8874492694133464e-05, - "loss": 2.846, + "learning_rate": 4.887450014565293e-05, + "loss": 2.8047, "step": 6800 }, { "epoch": 0.02, - "learning_rate": 4.885794111610601e-05, - "loss": 2.8454, + "learning_rate": 4.8857948677206646e-05, + "loss": 2.8084, "step": 6900 }, { "epoch": 0.02, - "learning_rate": 4.884138953807856e-05, - "loss": 2.8374, + "learning_rate": 4.8841397208760365e-05, + "loss": 2.8134, "step": 7000 }, { "epoch": 0.02, - "learning_rate": 4.882483796005111e-05, - "loss": 2.8363, + "learning_rate": 4.8824845740314084e-05, + "loss": 2.8075, "step": 7100 }, { "epoch": 0.02, - "learning_rate": 4.8808286382023663e-05, - "loss": 2.8428, + "learning_rate": 4.88082942718678e-05, + "loss": 2.7972, "step": 7200 }, { "epoch": 0.02, - "learning_rate": 4.8791734803996215e-05, - "loss": 2.8419, + "learning_rate": 4.879174280342152e-05, + "loss": 2.8139, "step": 7300 }, { "epoch": 0.02, - "learning_rate": 4.8775183225968767e-05, - "loss": 2.8369, + "learning_rate": 4.877519133497524e-05, + "loss": 2.8022, "step": 7400 }, { "epoch": 0.02, - "learning_rate": 4.875863164794132e-05, - "loss": 2.8411, + "learning_rate": 4.875863986652896e-05, + "loss": 2.8181, "step": 7500 }, { "epoch": 0.03, - "learning_rate": 4.874208006991387e-05, - "loss": 2.8427, + "learning_rate": 4.874208839808268e-05, + "loss": 2.8265, "step": 7600 }, { "epoch": 0.03, - "learning_rate": 4.872552849188642e-05, - "loss": 2.846, + "learning_rate": 4.87255369296364e-05, + "loss": 2.8182, "step": 7700 }, { "epoch": 0.03, - "learning_rate": 4.8708976913858966e-05, - "loss": 2.8483, + "learning_rate": 4.8708985461190124e-05, + "loss": 2.8139, "step": 7800 }, { "epoch": 0.03, - "learning_rate": 4.869242533583152e-05, - "loss": 2.8329, + "learning_rate": 4.869243399274384e-05, + "loss": 2.8045, "step": 7900 }, { "epoch": 0.03, - "learning_rate": 4.867587375780407e-05, - "loss": 2.8567, + "learning_rate": 4.8675882524297555e-05, + "loss": 2.7956, "step": 8000 }, { "epoch": 0.03, - "learning_rate": 4.865932217977662e-05, - "loss": 2.8305, + "learning_rate": 4.8659331055851274e-05, + "loss": 2.8211, "step": 8100 }, { "epoch": 0.03, - "learning_rate": 4.864277060174917e-05, - "loss": 2.8468, + "learning_rate": 4.8642779587405e-05, + "loss": 2.8181, "step": 8200 }, { "epoch": 0.03, - "learning_rate": 4.8626219023721724e-05, - "loss": 2.8374, + "learning_rate": 4.862622811895872e-05, + "loss": 2.7978, "step": 8300 }, { "epoch": 0.03, - "learning_rate": 4.8609667445694276e-05, - "loss": 2.8385, + "learning_rate": 4.860967665051244e-05, + "loss": 2.8014, "step": 8400 }, { "epoch": 0.03, - "learning_rate": 4.859311586766683e-05, - "loss": 2.8313, + "learning_rate": 4.8593125182066157e-05, + "loss": 2.8099, "step": 8500 }, { "epoch": 0.03, - "learning_rate": 4.857656428963938e-05, - "loss": 2.8438, + "learning_rate": 4.8576573713619876e-05, + "loss": 2.819, "step": 8600 }, { "epoch": 0.03, - "learning_rate": 4.856001271161193e-05, - "loss": 2.8463, + "learning_rate": 4.8560022245173595e-05, + "loss": 2.8252, "step": 8700 }, { "epoch": 0.03, - "learning_rate": 4.8543461133584475e-05, - "loss": 2.851, + "learning_rate": 4.8543470776727314e-05, + "loss": 2.8039, "step": 8800 }, { "epoch": 0.03, - "learning_rate": 4.8526909555557034e-05, - "loss": 2.8528, + "learning_rate": 4.852691930828103e-05, + "loss": 2.8084, "step": 8900 }, { "epoch": 0.03, - "learning_rate": 4.851035797752958e-05, - "loss": 2.8353, + "learning_rate": 4.851036783983475e-05, + "loss": 2.8251, "step": 9000 }, { "epoch": 0.03, - "learning_rate": 4.849380639950213e-05, - "loss": 2.8469, + "learning_rate": 4.849381637138847e-05, + "loss": 2.8183, "step": 9100 }, { "epoch": 0.03, - "learning_rate": 4.847725482147468e-05, - "loss": 2.8376, + "learning_rate": 4.8477264902942196e-05, + "loss": 2.8023, "step": 9200 }, { "epoch": 0.03, - "learning_rate": 4.8460703243447233e-05, - "loss": 2.8278, + "learning_rate": 4.8460713434495915e-05, + "loss": 2.8056, "step": 9300 }, { "epoch": 0.03, - "learning_rate": 4.8444151665419785e-05, - "loss": 2.8475, + "learning_rate": 4.844416196604963e-05, + "loss": 2.8341, "step": 9400 }, { "epoch": 0.03, - "learning_rate": 4.842760008739233e-05, - "loss": 2.8338, + "learning_rate": 4.8427610497603346e-05, + "loss": 2.8251, "step": 9500 }, { "epoch": 0.03, - "learning_rate": 4.841104850936489e-05, - "loss": 2.8471, + "learning_rate": 4.8411059029157065e-05, + "loss": 2.8124, "step": 9600 }, { "epoch": 0.03, - "learning_rate": 4.839449693133743e-05, - "loss": 2.8453, + "learning_rate": 4.839450756071079e-05, + "loss": 2.8115, "step": 9700 }, { "epoch": 0.03, - "learning_rate": 4.837794535330999e-05, - "loss": 2.8296, + "learning_rate": 4.837795609226451e-05, + "loss": 2.8074, "step": 9800 }, { "epoch": 0.03, - "learning_rate": 4.8361393775282536e-05, - "loss": 2.8461, + "learning_rate": 4.836140462381823e-05, + "loss": 2.8205, "step": 9900 }, { "epoch": 0.03, - "learning_rate": 4.8344842197255095e-05, - "loss": 2.8325, + "learning_rate": 4.834485315537194e-05, + "loss": 2.8196, "step": 10000 }, { "epoch": 0.03, - "learning_rate": 4.832829061922764e-05, - "loss": 2.8487, + "learning_rate": 4.832830168692567e-05, + "loss": 2.8264, "step": 10100 }, { "epoch": 0.03, - "learning_rate": 4.831173904120019e-05, - "loss": 2.8405, + "learning_rate": 4.8311750218479386e-05, + "loss": 2.8211, "step": 10200 }, { "epoch": 0.03, - "learning_rate": 4.829518746317274e-05, - "loss": 2.845, + "learning_rate": 4.8295198750033105e-05, + "loss": 2.8174, "step": 10300 }, { "epoch": 0.03, - "learning_rate": 4.827863588514529e-05, - "loss": 2.8454, + "learning_rate": 4.8278647281586824e-05, + "loss": 2.8217, "step": 10400 }, { "epoch": 0.03, - "learning_rate": 4.8262084307117846e-05, - "loss": 2.8379, + "learning_rate": 4.826209581314054e-05, + "loss": 2.805, "step": 10500 }, { "epoch": 0.04, - "learning_rate": 4.824553272909039e-05, - "loss": 2.8463, + "learning_rate": 4.824554434469427e-05, + "loss": 2.8154, "step": 10600 }, { "epoch": 0.04, - "learning_rate": 4.822898115106295e-05, - "loss": 2.842, + "learning_rate": 4.822899287624798e-05, + "loss": 2.8108, "step": 10700 }, { "epoch": 0.04, - "learning_rate": 4.8212429573035494e-05, - "loss": 2.8485, + "learning_rate": 4.82124414078017e-05, + "loss": 2.8132, "step": 10800 }, { "epoch": 0.04, - "learning_rate": 4.8195877995008045e-05, - "loss": 2.8273, + "learning_rate": 4.819588993935542e-05, + "loss": 2.812, "step": 10900 }, { "epoch": 0.04, - "learning_rate": 4.81793264169806e-05, - "loss": 2.8423, + "learning_rate": 4.817933847090914e-05, + "loss": 2.8159, "step": 11000 }, { "epoch": 0.04, - "learning_rate": 4.816277483895315e-05, - "loss": 2.8474, + "learning_rate": 4.8162787002462864e-05, + "loss": 2.808, "step": 11100 }, { "epoch": 0.04, - "learning_rate": 4.81462232609257e-05, - "loss": 2.8452, + "learning_rate": 4.814623553401658e-05, + "loss": 2.8049, "step": 11200 }, { "epoch": 0.04, - "learning_rate": 4.8129671682898245e-05, - "loss": 2.844, + "learning_rate": 4.81296840655703e-05, + "loss": 2.8226, "step": 11300 }, { "epoch": 0.04, - "learning_rate": 4.8113120104870803e-05, - "loss": 2.8438, + "learning_rate": 4.8113132597124014e-05, + "loss": 2.8259, "step": 11400 }, { "epoch": 0.04, - "learning_rate": 4.809656852684335e-05, - "loss": 2.8309, + "learning_rate": 4.809658112867774e-05, + "loss": 2.8072, "step": 11500 }, { "epoch": 0.04, - "learning_rate": 4.808001694881591e-05, - "loss": 2.836, + "learning_rate": 4.808002966023146e-05, + "loss": 2.8118, "step": 11600 }, { "epoch": 0.04, - "learning_rate": 4.806346537078845e-05, - "loss": 2.8353, + "learning_rate": 4.806347819178518e-05, + "loss": 2.8244, "step": 11700 }, { "epoch": 0.04, - "learning_rate": 4.8046913792761e-05, - "loss": 2.8435, + "learning_rate": 4.8046926723338897e-05, + "loss": 2.8146, "step": 11800 }, { "epoch": 0.04, - "learning_rate": 4.8030362214733555e-05, - "loss": 2.8532, + "learning_rate": 4.8030375254892616e-05, + "loss": 2.8068, "step": 11900 }, { "epoch": 0.04, - "learning_rate": 4.8013810636706106e-05, - "loss": 2.8437, + "learning_rate": 4.801382378644634e-05, + "loss": 2.8115, "step": 12000 }, { "epoch": 0.04, - "learning_rate": 4.799725905867866e-05, - "loss": 2.8334, + "learning_rate": 4.7997272318000053e-05, + "loss": 2.7999, "step": 12100 }, { "epoch": 0.04, - "learning_rate": 4.798070748065121e-05, - "loss": 2.8572, + "learning_rate": 4.798072084955377e-05, + "loss": 2.8037, "step": 12200 }, { "epoch": 0.04, - "learning_rate": 4.796415590262376e-05, - "loss": 2.8463, + "learning_rate": 4.796416938110749e-05, + "loss": 2.8246, "step": 12300 }, { "epoch": 0.04, - "learning_rate": 4.7947604324596306e-05, - "loss": 2.8434, + "learning_rate": 4.794761791266121e-05, + "loss": 2.8262, "step": 12400 }, { "epoch": 0.04, - "learning_rate": 4.793105274656886e-05, - "loss": 2.8448, + "learning_rate": 4.7931066444214936e-05, + "loss": 2.8186, "step": 12500 }, { "epoch": 0.04, - "learning_rate": 4.791450116854141e-05, - "loss": 2.8438, + "learning_rate": 4.7914514975768655e-05, + "loss": 2.8099, "step": 12600 }, { "epoch": 0.04, - "learning_rate": 4.789794959051396e-05, - "loss": 2.8479, + "learning_rate": 4.7897963507322374e-05, + "loss": 2.8106, "step": 12700 }, { "epoch": 0.04, - "learning_rate": 4.788139801248651e-05, - "loss": 2.8309, + "learning_rate": 4.7881412038876086e-05, + "loss": 2.8141, "step": 12800 }, { "epoch": 0.04, - "learning_rate": 4.7864846434459064e-05, - "loss": 2.8307, + "learning_rate": 4.786486057042981e-05, + "loss": 2.8181, "step": 12900 }, { "epoch": 0.04, - "learning_rate": 4.7848294856431616e-05, - "loss": 2.8501, + "learning_rate": 4.784830910198353e-05, + "loss": 2.8083, "step": 13000 }, { "epoch": 0.04, - "learning_rate": 4.783174327840417e-05, - "loss": 2.8497, + "learning_rate": 4.783175763353725e-05, + "loss": 2.8187, "step": 13100 }, { "epoch": 0.04, - "learning_rate": 4.781519170037672e-05, - "loss": 2.8411, + "learning_rate": 4.781520616509097e-05, + "loss": 2.8075, "step": 13200 }, { "epoch": 0.04, - "learning_rate": 4.779864012234927e-05, - "loss": 2.8514, + "learning_rate": 4.779865469664469e-05, + "loss": 2.8215, "step": 13300 }, { "epoch": 0.04, - "learning_rate": 4.7782088544321815e-05, - "loss": 2.8284, + "learning_rate": 4.7782103228198414e-05, + "loss": 2.8186, "step": 13400 }, { "epoch": 0.04, - "learning_rate": 4.776553696629437e-05, - "loss": 2.8343, + "learning_rate": 4.7765551759752126e-05, + "loss": 2.8136, "step": 13500 }, { "epoch": 0.05, - "learning_rate": 4.774898538826692e-05, - "loss": 2.8454, + "learning_rate": 4.7749000291305845e-05, + "loss": 2.8021, "step": 13600 }, { "epoch": 0.05, - "learning_rate": 4.773243381023947e-05, - "loss": 2.8402, + "learning_rate": 4.7732448822859564e-05, + "loss": 2.7984, "step": 13700 }, { "epoch": 0.05, - "learning_rate": 4.771588223221202e-05, - "loss": 2.8503, + "learning_rate": 4.771589735441328e-05, + "loss": 2.829, "step": 13800 }, { "epoch": 0.05, - "learning_rate": 4.769933065418457e-05, - "loss": 2.8372, + "learning_rate": 4.769934588596701e-05, + "loss": 2.82, "step": 13900 }, { "epoch": 0.05, - "learning_rate": 4.7682779076157125e-05, - "loss": 2.8327, + "learning_rate": 4.768279441752073e-05, + "loss": 2.8201, "step": 14000 }, { "epoch": 0.05, - "learning_rate": 4.7666227498129676e-05, - "loss": 2.8345, + "learning_rate": 4.766624294907445e-05, + "loss": 2.842, "step": 14100 }, { "epoch": 0.05, - "learning_rate": 4.764967592010223e-05, - "loss": 2.8484, + "learning_rate": 4.764969148062816e-05, + "loss": 2.8121, "step": 14200 }, { "epoch": 0.05, - "learning_rate": 4.763312434207477e-05, - "loss": 2.8262, + "learning_rate": 4.7633140012181885e-05, + "loss": 2.8175, "step": 14300 }, { "epoch": 0.05, - "learning_rate": 4.761657276404733e-05, - "loss": 2.8329, + "learning_rate": 4.7616588543735604e-05, + "loss": 2.8192, "step": 14400 }, { "epoch": 0.05, - "learning_rate": 4.7600021186019876e-05, - "loss": 2.8381, + "learning_rate": 4.760003707528932e-05, + "loss": 2.8123, "step": 14500 }, { "epoch": 0.05, - "learning_rate": 4.758346960799243e-05, - "loss": 2.8234, + "learning_rate": 4.758348560684304e-05, + "loss": 2.8076, "step": 14600 }, { "epoch": 0.05, - "learning_rate": 4.756691802996498e-05, - "loss": 2.8568, + "learning_rate": 4.756693413839676e-05, + "loss": 2.8153, "step": 14700 }, { "epoch": 0.05, - "learning_rate": 4.755036645193753e-05, - "loss": 2.8474, + "learning_rate": 4.755038266995048e-05, + "loss": 2.8077, "step": 14800 }, { "epoch": 0.05, - "learning_rate": 4.753381487391008e-05, - "loss": 2.8512, + "learning_rate": 4.75338312015042e-05, + "loss": 2.8155, "step": 14900 }, { "epoch": 0.05, - "learning_rate": 4.751726329588263e-05, - "loss": 2.8348, + "learning_rate": 4.751727973305792e-05, + "loss": 2.8092, "step": 15000 }, { "epoch": 0.05, - "learning_rate": 4.7500711717855186e-05, - "loss": 2.8192, + "learning_rate": 4.7500728264611637e-05, + "loss": 2.8259, "step": 15100 }, { "epoch": 0.05, - "learning_rate": 4.748416013982773e-05, - "loss": 2.8394, + "learning_rate": 4.7484176796165355e-05, + "loss": 2.8017, "step": 15200 }, { "epoch": 0.05, - "learning_rate": 4.746760856180029e-05, - "loss": 2.8362, + "learning_rate": 4.746762532771908e-05, + "loss": 2.818, "step": 15300 }, { "epoch": 0.05, - "learning_rate": 4.7451056983772834e-05, - "loss": 2.8483, + "learning_rate": 4.74510738592728e-05, + "loss": 2.8197, "step": 15400 }, { "epoch": 0.05, - "learning_rate": 4.7434505405745385e-05, - "loss": 2.8399, + "learning_rate": 4.743452239082652e-05, + "loss": 2.8164, "step": 15500 }, { "epoch": 0.05, - "learning_rate": 4.741795382771794e-05, - "loss": 2.8468, + "learning_rate": 4.741797092238023e-05, + "loss": 2.8274, "step": 15600 }, { "epoch": 0.05, - "learning_rate": 4.740140224969049e-05, - "loss": 2.848, + "learning_rate": 4.740141945393396e-05, + "loss": 2.8307, "step": 15700 }, { "epoch": 0.05, - "learning_rate": 4.738485067166304e-05, - "loss": 2.8336, + "learning_rate": 4.7384867985487676e-05, + "loss": 2.8148, "step": 15800 }, { "epoch": 0.05, - "learning_rate": 4.7368299093635585e-05, - "loss": 2.8456, + "learning_rate": 4.7368316517041395e-05, + "loss": 2.8272, "step": 15900 }, { "epoch": 0.05, - "learning_rate": 4.735174751560814e-05, - "loss": 2.8644, + "learning_rate": 4.7351765048595114e-05, + "loss": 2.8161, "step": 16000 }, { "epoch": 0.05, - "learning_rate": 4.733519593758069e-05, - "loss": 2.8515, + "learning_rate": 4.733521358014883e-05, + "loss": 2.8145, "step": 16100 }, { "epoch": 0.05, - "learning_rate": 4.7318644359553246e-05, - "loss": 2.8556, + "learning_rate": 4.731866211170255e-05, + "loss": 2.8019, "step": 16200 }, { "epoch": 0.05, - "learning_rate": 4.730209278152579e-05, - "loss": 2.8332, + "learning_rate": 4.730211064325627e-05, + "loss": 2.8244, "step": 16300 }, { "epoch": 0.05, - "learning_rate": 4.728554120349834e-05, - "loss": 2.8434, + "learning_rate": 4.728555917480999e-05, + "loss": 2.8194, "step": 16400 }, { "epoch": 0.05, - "learning_rate": 4.7268989625470894e-05, - "loss": 2.847, + "learning_rate": 4.726900770636371e-05, + "loss": 2.8087, "step": 16500 }, { "epoch": 0.05, - "learning_rate": 4.7252438047443446e-05, - "loss": 2.8424, + "learning_rate": 4.725245623791743e-05, + "loss": 2.8109, "step": 16600 }, { "epoch": 0.06, - "learning_rate": 4.7235886469416e-05, - "loss": 2.8478, + "learning_rate": 4.7235904769471154e-05, + "loss": 2.813, "step": 16700 }, { "epoch": 0.06, - "learning_rate": 4.721933489138854e-05, - "loss": 2.8458, + "learning_rate": 4.721935330102487e-05, + "loss": 2.8144, "step": 16800 }, { "epoch": 0.06, - "learning_rate": 4.72027833133611e-05, - "loss": 2.8461, + "learning_rate": 4.7202801832578585e-05, + "loss": 2.8075, "step": 16900 }, { "epoch": 0.06, - "learning_rate": 4.7186231735333646e-05, - "loss": 2.8354, + "learning_rate": 4.7186250364132304e-05, + "loss": 2.8134, "step": 17000 }, { "epoch": 0.06, - "learning_rate": 4.7169680157306204e-05, - "loss": 2.8274, + "learning_rate": 4.716969889568603e-05, + "loss": 2.7994, "step": 17100 }, { "epoch": 0.06, - "learning_rate": 4.715312857927875e-05, - "loss": 2.8402, + "learning_rate": 4.715314742723975e-05, + "loss": 2.807, "step": 17200 }, { "epoch": 0.06, - "learning_rate": 4.71365770012513e-05, - "loss": 2.828, + "learning_rate": 4.713659595879347e-05, + "loss": 2.8123, "step": 17300 }, { "epoch": 0.06, - "learning_rate": 4.712002542322385e-05, - "loss": 2.8362, + "learning_rate": 4.712004449034719e-05, + "loss": 2.8217, "step": 17400 }, { "epoch": 0.06, - "learning_rate": 4.7103473845196404e-05, - "loss": 2.8453, + "learning_rate": 4.7103493021900906e-05, + "loss": 2.8129, "step": 17500 }, { "epoch": 0.06, - "learning_rate": 4.7086922267168955e-05, - "loss": 2.8359, + "learning_rate": 4.7086941553454625e-05, + "loss": 2.7981, "step": 17600 }, { "epoch": 0.06, - "learning_rate": 4.707037068914151e-05, - "loss": 2.8315, + "learning_rate": 4.7070390085008344e-05, + "loss": 2.8116, "step": 17700 }, { "epoch": 0.06, - "learning_rate": 4.705381911111406e-05, - "loss": 2.8487, + "learning_rate": 4.705383861656206e-05, + "loss": 2.8084, "step": 17800 }, { "epoch": 0.06, - "learning_rate": 4.70372675330866e-05, - "loss": 2.8352, + "learning_rate": 4.703728714811578e-05, + "loss": 2.8146, "step": 17900 }, { "epoch": 0.06, - "learning_rate": 4.7020715955059155e-05, - "loss": 2.8415, + "learning_rate": 4.70207356796695e-05, + "loss": 2.802, "step": 18000 }, { "epoch": 0.06, - "learning_rate": 4.7004164377031706e-05, - "loss": 2.8283, + "learning_rate": 4.7004184211223226e-05, + "loss": 2.8217, "step": 18100 }, { "epoch": 0.06, - "learning_rate": 4.698761279900426e-05, - "loss": 2.8301, + "learning_rate": 4.6987632742776945e-05, + "loss": 2.8007, "step": 18200 }, { "epoch": 0.06, - "learning_rate": 4.697106122097681e-05, - "loss": 2.8645, + "learning_rate": 4.697108127433066e-05, + "loss": 2.8053, "step": 18300 }, { "epoch": 0.06, - "learning_rate": 4.695450964294936e-05, - "loss": 2.8379, + "learning_rate": 4.6954529805884376e-05, + "loss": 2.7995, "step": 18400 }, { "epoch": 0.06, - "learning_rate": 4.693795806492191e-05, - "loss": 2.8355, + "learning_rate": 4.6937978337438095e-05, + "loss": 2.8113, "step": 18500 }, { "epoch": 0.06, - "learning_rate": 4.6921406486894465e-05, - "loss": 2.8392, + "learning_rate": 4.692142686899182e-05, + "loss": 2.822, "step": 18600 }, { "epoch": 0.06, - "learning_rate": 4.6904854908867016e-05, - "loss": 2.8531, + "learning_rate": 4.690487540054554e-05, + "loss": 2.8202, "step": 18700 }, { "epoch": 0.06, - "learning_rate": 4.688830333083957e-05, - "loss": 2.8512, + "learning_rate": 4.688832393209926e-05, + "loss": 2.8018, "step": 18800 }, { "epoch": 0.06, - "learning_rate": 4.687175175281211e-05, - "loss": 2.8457, + "learning_rate": 4.687177246365298e-05, + "loss": 2.8097, "step": 18900 }, { "epoch": 0.06, - "learning_rate": 4.6855200174784664e-05, - "loss": 2.8411, + "learning_rate": 4.68552209952067e-05, + "loss": 2.815, "step": 19000 }, { "epoch": 0.06, - "learning_rate": 4.6838648596757216e-05, - "loss": 2.8249, + "learning_rate": 4.6838669526760416e-05, + "loss": 2.8089, "step": 19100 }, { "epoch": 0.06, - "learning_rate": 4.682209701872977e-05, - "loss": 2.8413, + "learning_rate": 4.6822118058314135e-05, + "loss": 2.8112, "step": 19200 }, { "epoch": 0.06, - "learning_rate": 4.680554544070232e-05, - "loss": 2.849, + "learning_rate": 4.6805566589867854e-05, + "loss": 2.8313, "step": 19300 }, { "epoch": 0.06, - "learning_rate": 4.678899386267487e-05, - "loss": 2.8488, + "learning_rate": 4.678901512142157e-05, + "loss": 2.8209, "step": 19400 }, { "epoch": 0.06, - "learning_rate": 4.677244228464742e-05, - "loss": 2.8438, + "learning_rate": 4.67724636529753e-05, + "loss": 2.8056, "step": 19500 }, { "epoch": 0.06, - "learning_rate": 4.675589070661997e-05, - "loss": 2.8409, + "learning_rate": 4.675591218452902e-05, + "loss": 2.8137, "step": 19600 }, { "epoch": 0.07, - "learning_rate": 4.6739339128592525e-05, - "loss": 2.8498, + "learning_rate": 4.673936071608273e-05, + "loss": 2.8136, "step": 19700 }, { "epoch": 0.07, - "learning_rate": 4.672278755056507e-05, - "loss": 2.8324, + "learning_rate": 4.672280924763645e-05, + "loss": 2.8201, "step": 19800 }, { "epoch": 0.07, - "learning_rate": 4.670623597253763e-05, - "loss": 2.8314, + "learning_rate": 4.670625777919017e-05, + "loss": 2.8131, "step": 19900 }, { "epoch": 0.07, - "learning_rate": 4.668968439451017e-05, - "loss": 2.8377, + "learning_rate": 4.6689706310743894e-05, + "loss": 2.8219, "step": 20000 }, { "epoch": 0.07, - "learning_rate": 4.6673132816482725e-05, - "loss": 2.8327, + "learning_rate": 4.667315484229761e-05, + "loss": 2.8149, "step": 20100 }, { "epoch": 0.07, - "learning_rate": 4.6656581238455277e-05, - "loss": 2.8247, + "learning_rate": 4.665660337385133e-05, + "loss": 2.8027, "step": 20200 }, { "epoch": 0.07, - "learning_rate": 4.664002966042783e-05, - "loss": 2.8412, + "learning_rate": 4.664005190540505e-05, + "loss": 2.8364, "step": 20300 }, { "epoch": 0.07, - "learning_rate": 4.662347808240038e-05, - "loss": 2.846, + "learning_rate": 4.662350043695877e-05, + "loss": 2.8238, "step": 20400 }, { "epoch": 0.07, - "learning_rate": 4.6606926504372925e-05, - "loss": 2.8553, + "learning_rate": 4.660694896851249e-05, + "loss": 2.825, "step": 20500 }, { "epoch": 0.07, - "learning_rate": 4.659037492634548e-05, - "loss": 2.8304, + "learning_rate": 4.659039750006621e-05, + "loss": 2.8069, "step": 20600 }, { "epoch": 0.07, - "learning_rate": 4.657382334831803e-05, - "loss": 2.8341, + "learning_rate": 4.657384603161993e-05, + "loss": 2.8114, "step": 20700 }, { "epoch": 0.07, - "learning_rate": 4.6557271770290586e-05, - "loss": 2.8603, + "learning_rate": 4.6557294563173646e-05, + "loss": 2.8192, "step": 20800 }, { "epoch": 0.07, - "learning_rate": 4.654072019226313e-05, - "loss": 2.8495, + "learning_rate": 4.654074309472737e-05, + "loss": 2.8049, "step": 20900 }, { "epoch": 0.07, - "learning_rate": 4.652416861423568e-05, - "loss": 2.8363, + "learning_rate": 4.652419162628109e-05, + "loss": 2.803, "step": 21000 }, { "epoch": 0.07, - "learning_rate": 4.6507617036208234e-05, - "loss": 2.8464, + "learning_rate": 4.65076401578348e-05, + "loss": 2.8005, "step": 21100 }, { "epoch": 0.07, - "learning_rate": 4.6491065458180786e-05, - "loss": 2.8384, + "learning_rate": 4.649108868938852e-05, + "loss": 2.8187, "step": 21200 }, { "epoch": 0.07, - "learning_rate": 4.647451388015334e-05, - "loss": 2.8409, + "learning_rate": 4.647453722094224e-05, + "loss": 2.8054, "step": 21300 }, { "epoch": 0.07, - "learning_rate": 4.645796230212588e-05, - "loss": 2.8328, + "learning_rate": 4.6457985752495966e-05, + "loss": 2.8148, "step": 21400 }, { "epoch": 0.07, - "learning_rate": 4.644141072409844e-05, - "loss": 2.8301, + "learning_rate": 4.6441434284049685e-05, + "loss": 2.8146, "step": 21500 }, { "epoch": 0.07, - "learning_rate": 4.6424859146070985e-05, - "loss": 2.8482, + "learning_rate": 4.6424882815603404e-05, + "loss": 2.8058, "step": 21600 }, { "epoch": 0.07, - "learning_rate": 4.6408307568043544e-05, - "loss": 2.8456, + "learning_rate": 4.640833134715712e-05, + "loss": 2.8261, "step": 21700 }, { "epoch": 0.07, - "learning_rate": 4.639175599001609e-05, - "loss": 2.8448, + "learning_rate": 4.639177987871084e-05, + "loss": 2.8264, "step": 21800 }, { "epoch": 0.07, - "learning_rate": 4.637520441198864e-05, - "loss": 2.8333, + "learning_rate": 4.637522841026456e-05, + "loss": 2.8053, "step": 21900 }, { "epoch": 0.07, - "learning_rate": 4.635865283396119e-05, - "loss": 2.8446, + "learning_rate": 4.635867694181828e-05, + "loss": 2.8113, "step": 22000 }, { "epoch": 0.07, - "learning_rate": 4.6342101255933743e-05, - "loss": 2.8277, + "learning_rate": 4.6342125473372e-05, + "loss": 2.8248, "step": 22100 }, { "epoch": 0.07, - "learning_rate": 4.6325549677906295e-05, - "loss": 2.8431, + "learning_rate": 4.632557400492572e-05, + "loss": 2.8315, "step": 22200 }, { "epoch": 0.07, - "learning_rate": 4.630899809987884e-05, - "loss": 2.8341, + "learning_rate": 4.6309022536479444e-05, + "loss": 2.8085, "step": 22300 }, { "epoch": 0.07, - "learning_rate": 4.62924465218514e-05, - "loss": 2.8317, + "learning_rate": 4.629247106803316e-05, + "loss": 2.8159, "step": 22400 }, { "epoch": 0.07, - "learning_rate": 4.627589494382394e-05, - "loss": 2.8332, + "learning_rate": 4.6275919599586875e-05, + "loss": 2.8176, "step": 22500 }, { "epoch": 0.07, - "learning_rate": 4.62593433657965e-05, - "loss": 2.8353, + "learning_rate": 4.6259368131140594e-05, + "loss": 2.7987, "step": 22600 }, { "epoch": 0.08, - "learning_rate": 4.6242791787769046e-05, - "loss": 2.8443, + "learning_rate": 4.624281666269431e-05, + "loss": 2.8256, "step": 22700 }, { "epoch": 0.08, - "learning_rate": 4.62262402097416e-05, - "loss": 2.8433, + "learning_rate": 4.622626519424804e-05, + "loss": 2.8213, "step": 22800 }, { "epoch": 0.08, - "learning_rate": 4.620968863171415e-05, - "loss": 2.8517, + "learning_rate": 4.620971372580176e-05, + "loss": 2.8096, "step": 22900 }, { "epoch": 0.08, - "learning_rate": 4.61931370536867e-05, - "loss": 2.8487, + "learning_rate": 4.619316225735548e-05, + "loss": 2.8044, "step": 23000 }, { "epoch": 0.08, - "learning_rate": 4.617658547565925e-05, - "loss": 2.8456, + "learning_rate": 4.6176610788909196e-05, + "loss": 2.8182, "step": 23100 }, { "epoch": 0.08, - "learning_rate": 4.6160033897631804e-05, - "loss": 2.8361, + "learning_rate": 4.6160059320462915e-05, + "loss": 2.8114, "step": 23200 }, { "epoch": 0.08, - "learning_rate": 4.6143482319604356e-05, - "loss": 2.8266, + "learning_rate": 4.6143507852016634e-05, + "loss": 2.8297, "step": 23300 }, { "epoch": 0.08, - "learning_rate": 4.61269307415769e-05, - "loss": 2.8385, + "learning_rate": 4.612695638357035e-05, + "loss": 2.8087, "step": 23400 }, { "epoch": 0.08, - "learning_rate": 4.611037916354945e-05, - "loss": 2.8425, + "learning_rate": 4.611040491512407e-05, + "loss": 2.8071, "step": 23500 }, { "epoch": 0.08, - "learning_rate": 4.6093827585522004e-05, - "loss": 2.8464, + "learning_rate": 4.609385344667779e-05, + "loss": 2.8116, "step": 23600 }, { "epoch": 0.08, - "learning_rate": 4.6077276007494555e-05, - "loss": 2.8285, + "learning_rate": 4.607730197823151e-05, + "loss": 2.8134, "step": 23700 }, { "epoch": 0.08, - "learning_rate": 4.606072442946711e-05, - "loss": 2.8404, + "learning_rate": 4.606075050978523e-05, + "loss": 2.8133, "step": 23800 }, { "epoch": 0.08, - "learning_rate": 4.604417285143966e-05, - "loss": 2.8319, + "learning_rate": 4.604419904133895e-05, + "loss": 2.8104, "step": 23900 }, { "epoch": 0.08, - "learning_rate": 4.602762127341221e-05, - "loss": 2.8452, + "learning_rate": 4.6027647572892667e-05, + "loss": 2.8233, "step": 24000 }, { "epoch": 0.08, - "learning_rate": 4.601106969538476e-05, - "loss": 2.8372, + "learning_rate": 4.6011096104446386e-05, + "loss": 2.8129, "step": 24100 }, { "epoch": 0.08, - "learning_rate": 4.5994518117357313e-05, - "loss": 2.8581, + "learning_rate": 4.599454463600011e-05, + "loss": 2.8178, "step": 24200 }, { "epoch": 0.08, - "learning_rate": 4.5977966539329865e-05, - "loss": 2.8396, + "learning_rate": 4.597799316755383e-05, + "loss": 2.7981, "step": 24300 }, { "epoch": 0.08, - "learning_rate": 4.596141496130241e-05, - "loss": 2.8394, + "learning_rate": 4.596144169910755e-05, + "loss": 2.8125, "step": 24400 }, { "epoch": 0.08, - "learning_rate": 4.594486338327496e-05, - "loss": 2.8439, + "learning_rate": 4.594489023066126e-05, + "loss": 2.8201, "step": 24500 }, { "epoch": 0.08, - "learning_rate": 4.592831180524751e-05, - "loss": 2.8511, + "learning_rate": 4.592833876221499e-05, + "loss": 2.804, "step": 24600 }, { "epoch": 0.08, - "learning_rate": 4.5911760227220065e-05, - "loss": 2.8339, + "learning_rate": 4.5911787293768706e-05, + "loss": 2.8178, "step": 24700 }, { "epoch": 0.08, - "learning_rate": 4.5895208649192616e-05, - "loss": 2.8312, + "learning_rate": 4.5895235825322425e-05, + "loss": 2.8102, "step": 24800 }, { "epoch": 0.08, - "learning_rate": 4.587865707116517e-05, - "loss": 2.8335, + "learning_rate": 4.5878684356876144e-05, + "loss": 2.811, "step": 24900 }, { "epoch": 0.08, - "learning_rate": 4.586210549313772e-05, - "loss": 2.8369, + "learning_rate": 4.586213288842986e-05, + "loss": 2.8142, "step": 25000 }, { "epoch": 0.08, - "learning_rate": 4.5845553915110264e-05, - "loss": 2.8413, + "learning_rate": 4.584558141998358e-05, + "loss": 2.8072, "step": 25100 }, { "epoch": 0.08, - "learning_rate": 4.582900233708282e-05, - "loss": 2.8348, + "learning_rate": 4.58290299515373e-05, + "loss": 2.8098, "step": 25200 }, { "epoch": 0.08, - "learning_rate": 4.581245075905537e-05, - "loss": 2.8417, + "learning_rate": 4.581247848309102e-05, + "loss": 2.8195, "step": 25300 }, { "epoch": 0.08, - "learning_rate": 4.5795899181027926e-05, - "loss": 2.8355, + "learning_rate": 4.579592701464474e-05, + "loss": 2.8112, "step": 25400 }, { "epoch": 0.08, - "learning_rate": 4.577934760300047e-05, - "loss": 2.8334, + "learning_rate": 4.577937554619846e-05, + "loss": 2.822, "step": 25500 }, { "epoch": 0.08, - "learning_rate": 4.576279602497303e-05, - "loss": 2.8465, + "learning_rate": 4.5762824077752184e-05, + "loss": 2.8091, "step": 25600 }, { "epoch": 0.09, - "learning_rate": 4.5746244446945574e-05, - "loss": 2.8228, + "learning_rate": 4.57462726093059e-05, + "loss": 2.8281, "step": 25700 }, { "epoch": 0.09, - "learning_rate": 4.5729692868918126e-05, - "loss": 2.838, + "learning_rate": 4.572972114085962e-05, + "loss": 2.8093, "step": 25800 }, { "epoch": 0.09, - "learning_rate": 4.571314129089068e-05, - "loss": 2.8545, + "learning_rate": 4.5713169672413334e-05, + "loss": 2.8143, "step": 25900 }, { "epoch": 0.09, - "learning_rate": 4.569658971286322e-05, - "loss": 2.8359, + "learning_rate": 4.569661820396706e-05, + "loss": 2.8002, "step": 26000 }, { "epoch": 0.09, - "learning_rate": 4.568003813483578e-05, - "loss": 2.8444, + "learning_rate": 4.568006673552078e-05, + "loss": 2.8091, "step": 26100 }, { "epoch": 0.09, - "learning_rate": 4.5663486556808325e-05, - "loss": 2.8226, + "learning_rate": 4.56635152670745e-05, + "loss": 2.8114, "step": 26200 }, { "epoch": 0.09, - "learning_rate": 4.5646934978780884e-05, - "loss": 2.835, + "learning_rate": 4.564696379862822e-05, + "loss": 2.7986, "step": 26300 }, { "epoch": 0.09, - "learning_rate": 4.563038340075343e-05, - "loss": 2.8345, + "learning_rate": 4.5630412330181936e-05, + "loss": 2.8263, "step": 26400 }, { "epoch": 0.09, - "learning_rate": 4.561383182272598e-05, - "loss": 2.8335, + "learning_rate": 4.5613860861735655e-05, + "loss": 2.7956, "step": 26500 }, { "epoch": 0.09, - "learning_rate": 4.559728024469853e-05, - "loss": 2.8487, + "learning_rate": 4.5597309393289374e-05, + "loss": 2.8136, "step": 26600 }, { "epoch": 0.09, - "learning_rate": 4.558072866667108e-05, - "loss": 2.8383, + "learning_rate": 4.558075792484309e-05, + "loss": 2.8219, "step": 26700 }, { "epoch": 0.09, - "learning_rate": 4.5564177088643635e-05, - "loss": 2.8404, + "learning_rate": 4.556420645639681e-05, + "loss": 2.809, "step": 26800 }, { "epoch": 0.09, - "learning_rate": 4.554762551061618e-05, - "loss": 2.8295, + "learning_rate": 4.554765498795053e-05, + "loss": 2.806, "step": 26900 }, { "epoch": 0.09, - "learning_rate": 4.553107393258874e-05, - "loss": 2.8233, + "learning_rate": 4.5531103519504256e-05, + "loss": 2.8139, "step": 27000 }, { "epoch": 0.09, - "learning_rate": 4.551452235456128e-05, - "loss": 2.8334, + "learning_rate": 4.5514552051057975e-05, + "loss": 2.8167, "step": 27100 }, { "epoch": 0.09, - "learning_rate": 4.549797077653384e-05, - "loss": 2.8476, + "learning_rate": 4.5498000582611694e-05, + "loss": 2.8023, "step": 27200 }, { "epoch": 0.09, - "learning_rate": 4.5481419198506386e-05, - "loss": 2.8307, + "learning_rate": 4.5481449114165407e-05, + "loss": 2.7996, "step": 27300 }, { "epoch": 0.09, - "learning_rate": 4.546486762047894e-05, - "loss": 2.831, + "learning_rate": 4.5464897645719126e-05, + "loss": 2.8053, "step": 27400 }, { "epoch": 0.09, - "learning_rate": 4.544831604245149e-05, - "loss": 2.8349, + "learning_rate": 4.544834617727285e-05, + "loss": 2.8116, "step": 27500 }, { "epoch": 0.09, - "learning_rate": 4.543176446442404e-05, - "loss": 2.8302, + "learning_rate": 4.543179470882657e-05, + "loss": 2.8057, "step": 27600 }, { "epoch": 0.09, - "learning_rate": 4.541521288639659e-05, - "loss": 2.8362, + "learning_rate": 4.541524324038029e-05, + "loss": 2.8028, "step": 27700 }, { "epoch": 0.09, - "learning_rate": 4.5398661308369144e-05, - "loss": 2.8498, + "learning_rate": 4.539869177193401e-05, + "loss": 2.8114, "step": 27800 }, { "epoch": 0.09, - "learning_rate": 4.5382109730341696e-05, - "loss": 2.8462, + "learning_rate": 4.538214030348773e-05, + "loss": 2.8049, "step": 27900 }, { "epoch": 0.09, - "learning_rate": 4.536555815231424e-05, - "loss": 2.838, + "learning_rate": 4.5365588835041446e-05, + "loss": 2.8253, "step": 28000 }, { "epoch": 0.09, - "learning_rate": 4.534900657428679e-05, - "loss": 2.8327, + "learning_rate": 4.5349037366595165e-05, + "loss": 2.8078, "step": 28100 }, { "epoch": 0.09, - "learning_rate": 4.5332454996259344e-05, - "loss": 2.8381, + "learning_rate": 4.5332485898148884e-05, + "loss": 2.8083, "step": 28200 }, { "epoch": 0.09, - "learning_rate": 4.5315903418231895e-05, - "loss": 2.8473, + "learning_rate": 4.53159344297026e-05, + "loss": 2.8228, "step": 28300 }, { "epoch": 0.09, - "learning_rate": 4.529935184020445e-05, - "loss": 2.8458, + "learning_rate": 4.529938296125633e-05, + "loss": 2.8095, "step": 28400 }, { "epoch": 0.09, - "learning_rate": 4.5282800262177e-05, - "loss": 2.8251, + "learning_rate": 4.528283149281005e-05, + "loss": 2.822, "step": 28500 }, { "epoch": 0.09, - "learning_rate": 4.526624868414955e-05, - "loss": 2.8279, + "learning_rate": 4.526628002436377e-05, + "loss": 2.8086, "step": 28600 }, { "epoch": 0.1, - "learning_rate": 4.52496971061221e-05, - "loss": 2.8304, + "learning_rate": 4.524972855591748e-05, + "loss": 2.7954, "step": 28700 }, { "epoch": 0.1, - "learning_rate": 4.523314552809465e-05, - "loss": 2.8272, + "learning_rate": 4.52331770874712e-05, + "loss": 2.8052, "step": 28800 }, { "epoch": 0.1, - "learning_rate": 4.5216593950067205e-05, - "loss": 2.8423, + "learning_rate": 4.5216625619024924e-05, + "loss": 2.8118, "step": 28900 }, { "epoch": 0.1, - "learning_rate": 4.520004237203975e-05, - "loss": 2.8424, + "learning_rate": 4.520007415057864e-05, + "loss": 2.8199, "step": 29000 }, { "epoch": 0.1, - "learning_rate": 4.51834907940123e-05, - "loss": 2.8339, + "learning_rate": 4.518352268213236e-05, + "loss": 2.8096, "step": 29100 }, { "epoch": 0.1, - "learning_rate": 4.516693921598485e-05, - "loss": 2.825, + "learning_rate": 4.516697121368608e-05, + "loss": 2.7978, "step": 29200 }, { "epoch": 0.1, - "learning_rate": 4.5150387637957404e-05, - "loss": 2.8379, + "learning_rate": 4.51504197452398e-05, + "loss": 2.8108, "step": 29300 }, { "epoch": 0.1, - "learning_rate": 4.5133836059929956e-05, - "loss": 2.8339, + "learning_rate": 4.513386827679352e-05, + "loss": 2.8116, "step": 29400 }, { "epoch": 0.1, - "learning_rate": 4.511728448190251e-05, - "loss": 2.8378, + "learning_rate": 4.511731680834724e-05, + "loss": 2.8155, "step": 29500 }, { "epoch": 0.1, - "learning_rate": 4.510073290387506e-05, - "loss": 2.8347, + "learning_rate": 4.510076533990096e-05, + "loss": 2.8031, "step": 29600 }, { "epoch": 0.1, - "learning_rate": 4.508418132584761e-05, - "loss": 2.8452, + "learning_rate": 4.5084213871454676e-05, + "loss": 2.8047, "step": 29700 }, { "epoch": 0.1, - "learning_rate": 4.506762974782016e-05, - "loss": 2.8264, + "learning_rate": 4.50676624030084e-05, + "loss": 2.8153, "step": 29800 }, { "epoch": 0.1, - "learning_rate": 4.505107816979271e-05, - "loss": 2.8351, + "learning_rate": 4.505111093456212e-05, + "loss": 2.8112, "step": 29900 }, { "epoch": 0.1, - "learning_rate": 4.5034526591765266e-05, - "loss": 2.8509, + "learning_rate": 4.503455946611583e-05, + "loss": 2.8158, "step": 30000 }, { "epoch": 0.1, - "learning_rate": 4.501797501373781e-05, - "loss": 2.8331, + "learning_rate": 4.501800799766955e-05, + "loss": 2.825, "step": 30100 }, { "epoch": 0.1, - "learning_rate": 4.500142343571036e-05, - "loss": 2.8249, + "learning_rate": 4.500145652922327e-05, + "loss": 2.8182, "step": 30200 }, { "epoch": 0.1, - "learning_rate": 4.4984871857682914e-05, - "loss": 2.842, + "learning_rate": 4.4984905060776996e-05, + "loss": 2.8059, "step": 30300 }, { "epoch": 0.1, - "learning_rate": 4.4968320279655465e-05, - "loss": 2.8321, + "learning_rate": 4.4968353592330715e-05, + "loss": 2.801, "step": 30400 }, { "epoch": 0.1, - "learning_rate": 4.495176870162802e-05, - "loss": 2.8271, + "learning_rate": 4.4951802123884434e-05, + "loss": 2.8125, "step": 30500 }, { "epoch": 0.1, - "learning_rate": 4.493521712360056e-05, - "loss": 2.8319, + "learning_rate": 4.493525065543815e-05, + "loss": 2.8173, "step": 30600 }, { "epoch": 0.1, - "learning_rate": 4.491866554557312e-05, - "loss": 2.8434, + "learning_rate": 4.491869918699187e-05, + "loss": 2.8212, "step": 30700 }, { "epoch": 0.1, - "learning_rate": 4.4902113967545665e-05, - "loss": 2.8417, + "learning_rate": 4.490214771854559e-05, + "loss": 2.8168, "step": 30800 }, { "epoch": 0.1, - "learning_rate": 4.488556238951822e-05, - "loss": 2.8293, + "learning_rate": 4.488559625009931e-05, + "loss": 2.8093, "step": 30900 }, { "epoch": 0.1, - "learning_rate": 4.486901081149077e-05, - "loss": 2.8392, + "learning_rate": 4.486904478165303e-05, + "loss": 2.7983, "step": 31000 }, { "epoch": 0.1, - "learning_rate": 4.4852459233463326e-05, - "loss": 2.8442, + "learning_rate": 4.485249331320675e-05, + "loss": 2.8148, "step": 31100 }, { "epoch": 0.1, - "learning_rate": 4.483590765543587e-05, - "loss": 2.8381, + "learning_rate": 4.4835941844760474e-05, + "loss": 2.8102, "step": 31200 }, { "epoch": 0.1, - "learning_rate": 4.481935607740842e-05, - "loss": 2.8459, + "learning_rate": 4.481939037631419e-05, + "loss": 2.8075, "step": 31300 }, { "epoch": 0.1, - "learning_rate": 4.4802804499380974e-05, - "loss": 2.8357, + "learning_rate": 4.4802838907867905e-05, + "loss": 2.8192, "step": 31400 }, { "epoch": 0.1, - "learning_rate": 4.478625292135352e-05, - "loss": 2.8515, + "learning_rate": 4.4786287439421624e-05, + "loss": 2.8144, "step": 31500 }, { "epoch": 0.1, - "learning_rate": 4.476970134332608e-05, - "loss": 2.8293, + "learning_rate": 4.476973597097534e-05, + "loss": 2.828, "step": 31600 }, { "epoch": 0.1, - "learning_rate": 4.475314976529862e-05, - "loss": 2.8419, + "learning_rate": 4.475318450252907e-05, + "loss": 2.8107, "step": 31700 }, { "epoch": 0.11, - "learning_rate": 4.473659818727118e-05, - "loss": 2.8548, + "learning_rate": 4.473663303408279e-05, + "loss": 2.8072, "step": 31800 }, { "epoch": 0.11, - "learning_rate": 4.4720046609243726e-05, - "loss": 2.8341, + "learning_rate": 4.472008156563651e-05, + "loss": 2.8178, "step": 31900 }, { "epoch": 0.11, - "learning_rate": 4.470349503121628e-05, - "loss": 2.8378, + "learning_rate": 4.4703530097190226e-05, + "loss": 2.82, "step": 32000 }, { "epoch": 0.11, - "learning_rate": 4.468694345318883e-05, - "loss": 2.836, + "learning_rate": 4.4686978628743945e-05, + "loss": 2.8096, "step": 32100 }, { "epoch": 0.11, - "learning_rate": 4.467039187516138e-05, - "loss": 2.8353, + "learning_rate": 4.4670427160297664e-05, + "loss": 2.8131, "step": 32200 }, { "epoch": 0.11, - "learning_rate": 4.465384029713393e-05, - "loss": 2.8336, + "learning_rate": 4.465387569185138e-05, + "loss": 2.8138, "step": 32300 }, { "epoch": 0.11, - "learning_rate": 4.463728871910648e-05, - "loss": 2.8326, + "learning_rate": 4.46373242234051e-05, + "loss": 2.8009, "step": 32400 }, { "epoch": 0.11, - "learning_rate": 4.4620737141079035e-05, - "loss": 2.8353, + "learning_rate": 4.462077275495882e-05, + "loss": 2.828, "step": 32500 }, { "epoch": 0.11, - "learning_rate": 4.460418556305158e-05, - "loss": 2.8394, + "learning_rate": 4.460422128651254e-05, + "loss": 2.8212, "step": 32600 }, { "epoch": 0.11, - "learning_rate": 4.458763398502414e-05, - "loss": 2.8447, + "learning_rate": 4.4587669818066266e-05, + "loss": 2.81, "step": 32700 }, { "epoch": 0.11, - "learning_rate": 4.457108240699668e-05, - "loss": 2.8372, + "learning_rate": 4.457111834961998e-05, + "loss": 2.819, "step": 32800 }, { "epoch": 0.11, - "learning_rate": 4.4554530828969235e-05, - "loss": 2.8416, + "learning_rate": 4.45545668811737e-05, + "loss": 2.8013, "step": 32900 }, { "epoch": 0.11, - "learning_rate": 4.4537979250941787e-05, - "loss": 2.8288, + "learning_rate": 4.4538015412727416e-05, + "loss": 2.8188, "step": 33000 }, { "epoch": 0.11, - "learning_rate": 4.452142767291434e-05, - "loss": 2.8489, + "learning_rate": 4.452146394428114e-05, + "loss": 2.8181, "step": 33100 }, { "epoch": 0.11, - "learning_rate": 4.450487609488689e-05, - "loss": 2.8383, + "learning_rate": 4.450491247583486e-05, + "loss": 2.8019, "step": 33200 }, { "epoch": 0.11, - "learning_rate": 4.448832451685944e-05, - "loss": 2.8324, + "learning_rate": 4.448836100738858e-05, + "loss": 2.8019, "step": 33300 }, { "epoch": 0.11, - "learning_rate": 4.447177293883199e-05, - "loss": 2.8504, + "learning_rate": 4.44718095389423e-05, + "loss": 2.8125, "step": 33400 }, { "epoch": 0.11, - "learning_rate": 4.445522136080454e-05, - "loss": 2.8377, + "learning_rate": 4.445525807049602e-05, + "loss": 2.7891, "step": 33500 }, { "epoch": 0.11, - "learning_rate": 4.443866978277709e-05, - "loss": 2.8291, + "learning_rate": 4.4438706602049736e-05, + "loss": 2.8158, "step": 33600 }, { "epoch": 0.11, - "learning_rate": 4.442211820474964e-05, - "loss": 2.8317, + "learning_rate": 4.4422155133603455e-05, + "loss": 2.8035, "step": 33700 }, { "epoch": 0.11, - "learning_rate": 4.440556662672219e-05, - "loss": 2.8416, + "learning_rate": 4.4405603665157174e-05, + "loss": 2.8154, "step": 33800 }, { "epoch": 0.11, - "learning_rate": 4.4389015048694744e-05, - "loss": 2.8395, + "learning_rate": 4.438905219671089e-05, + "loss": 2.8074, "step": 33900 }, { "epoch": 0.11, - "learning_rate": 4.4372463470667296e-05, - "loss": 2.8322, + "learning_rate": 4.437250072826461e-05, + "loss": 2.8071, "step": 34000 }, { "epoch": 0.11, - "learning_rate": 4.435591189263985e-05, - "loss": 2.833, + "learning_rate": 4.435594925981834e-05, + "loss": 2.8131, "step": 34100 }, { "epoch": 0.11, - "learning_rate": 4.43393603146124e-05, - "loss": 2.8268, + "learning_rate": 4.433939779137205e-05, + "loss": 2.8091, "step": 34200 }, { "epoch": 0.11, - "learning_rate": 4.432280873658495e-05, - "loss": 2.8369, + "learning_rate": 4.432284632292577e-05, + "loss": 2.8209, "step": 34300 }, { "epoch": 0.11, - "learning_rate": 4.43062571585575e-05, - "loss": 2.8451, + "learning_rate": 4.430629485447949e-05, + "loss": 2.8109, "step": 34400 }, { "epoch": 0.11, - "learning_rate": 4.428970558053005e-05, - "loss": 2.8391, + "learning_rate": 4.4289743386033214e-05, + "loss": 2.8085, "step": 34500 }, { "epoch": 0.11, - "learning_rate": 4.42731540025026e-05, - "loss": 2.8308, + "learning_rate": 4.427319191758693e-05, + "loss": 2.8065, "step": 34600 }, { "epoch": 0.11, - "learning_rate": 4.425660242447515e-05, - "loss": 2.8327, + "learning_rate": 4.425664044914065e-05, + "loss": 2.8186, "step": 34700 }, { "epoch": 0.12, - "learning_rate": 4.42400508464477e-05, - "loss": 2.8227, + "learning_rate": 4.424008898069437e-05, + "loss": 2.8115, "step": 34800 }, { "epoch": 0.12, - "learning_rate": 4.422349926842025e-05, - "loss": 2.8327, + "learning_rate": 4.422353751224809e-05, + "loss": 2.8165, "step": 34900 }, { "epoch": 0.12, - "learning_rate": 4.4206947690392805e-05, - "loss": 2.8314, + "learning_rate": 4.420698604380181e-05, + "loss": 2.8051, "step": 35000 }, { "epoch": 0.12, - "learning_rate": 4.4190396112365357e-05, - "loss": 2.8425, + "learning_rate": 4.419043457535553e-05, + "loss": 2.8182, "step": 35100 }, { "epoch": 0.12, - "learning_rate": 4.417384453433791e-05, - "loss": 2.8403, + "learning_rate": 4.417388310690925e-05, + "loss": 2.829, "step": 35200 }, { "epoch": 0.12, - "learning_rate": 4.415729295631046e-05, - "loss": 2.8463, + "learning_rate": 4.4157331638462966e-05, + "loss": 2.8209, "step": 35300 }, { "epoch": 0.12, - "learning_rate": 4.4140741378283005e-05, - "loss": 2.8329, + "learning_rate": 4.4140780170016685e-05, + "loss": 2.8115, "step": 35400 }, { "epoch": 0.12, - "learning_rate": 4.412418980025556e-05, - "loss": 2.8273, + "learning_rate": 4.412422870157041e-05, + "loss": 2.8039, "step": 35500 }, { "epoch": 0.12, - "learning_rate": 4.410763822222811e-05, - "loss": 2.8213, + "learning_rate": 4.410767723312412e-05, + "loss": 2.8209, "step": 35600 }, { "epoch": 0.12, - "learning_rate": 4.409108664420066e-05, - "loss": 2.8446, + "learning_rate": 4.409112576467784e-05, + "loss": 2.8151, "step": 35700 }, { "epoch": 0.12, - "learning_rate": 4.407453506617321e-05, - "loss": 2.8299, + "learning_rate": 4.407457429623156e-05, + "loss": 2.8072, "step": 35800 }, { "epoch": 0.12, - "learning_rate": 4.405798348814576e-05, - "loss": 2.8137, + "learning_rate": 4.4058022827785287e-05, + "loss": 2.8079, "step": 35900 }, { "epoch": 0.12, - "learning_rate": 4.4041431910118314e-05, - "loss": 2.8412, + "learning_rate": 4.4041471359339005e-05, + "loss": 2.8155, "step": 36000 }, { "epoch": 0.12, - "learning_rate": 4.402488033209086e-05, - "loss": 2.8399, + "learning_rate": 4.4024919890892724e-05, + "loss": 2.827, "step": 36100 }, { "epoch": 0.12, - "learning_rate": 4.400832875406342e-05, - "loss": 2.8283, + "learning_rate": 4.4008368422446443e-05, + "loss": 2.8087, "step": 36200 }, { "epoch": 0.12, - "learning_rate": 4.399177717603596e-05, - "loss": 2.8321, + "learning_rate": 4.3991816954000156e-05, + "loss": 2.8112, "step": 36300 }, { "epoch": 0.12, - "learning_rate": 4.397522559800852e-05, - "loss": 2.8226, + "learning_rate": 4.397526548555388e-05, + "loss": 2.8066, "step": 36400 }, { "epoch": 0.12, - "learning_rate": 4.3958674019981065e-05, - "loss": 2.8405, + "learning_rate": 4.39587140171076e-05, + "loss": 2.7995, "step": 36500 }, { "epoch": 0.12, - "learning_rate": 4.394212244195362e-05, - "loss": 2.8338, + "learning_rate": 4.394216254866132e-05, + "loss": 2.8124, "step": 36600 }, { "epoch": 0.12, - "learning_rate": 4.392557086392617e-05, - "loss": 2.8359, + "learning_rate": 4.392561108021504e-05, + "loss": 2.8112, "step": 36700 }, { "epoch": 0.12, - "learning_rate": 4.390901928589872e-05, - "loss": 2.8252, + "learning_rate": 4.390905961176876e-05, + "loss": 2.8205, "step": 36800 }, { "epoch": 0.12, - "learning_rate": 4.389246770787127e-05, - "loss": 2.8263, + "learning_rate": 4.3892508143322476e-05, + "loss": 2.8132, "step": 36900 }, { "epoch": 0.12, - "learning_rate": 4.387591612984382e-05, - "loss": 2.8286, + "learning_rate": 4.3875956674876195e-05, + "loss": 2.81, "step": 37000 }, { "epoch": 0.12, - "learning_rate": 4.3859364551816375e-05, - "loss": 2.8263, + "learning_rate": 4.3859405206429914e-05, + "loss": 2.8004, "step": 37100 }, { "epoch": 0.12, - "learning_rate": 4.384281297378892e-05, - "loss": 2.8268, + "learning_rate": 4.384285373798363e-05, + "loss": 2.8055, "step": 37200 }, { "epoch": 0.12, - "learning_rate": 4.382626139576148e-05, - "loss": 2.8397, + "learning_rate": 4.382630226953736e-05, + "loss": 2.8111, "step": 37300 }, { "epoch": 0.12, - "learning_rate": 4.380970981773402e-05, - "loss": 2.8376, + "learning_rate": 4.380975080109108e-05, + "loss": 2.8189, "step": 37400 }, { "epoch": 0.12, - "learning_rate": 4.3793158239706575e-05, - "loss": 2.843, + "learning_rate": 4.37931993326448e-05, + "loss": 2.8021, "step": 37500 }, { "epoch": 0.12, - "learning_rate": 4.3776606661679126e-05, - "loss": 2.8407, + "learning_rate": 4.377664786419851e-05, + "loss": 2.7961, "step": 37600 }, { "epoch": 0.12, - "learning_rate": 4.376005508365168e-05, - "loss": 2.8377, + "learning_rate": 4.376009639575223e-05, + "loss": 2.8059, "step": 37700 }, { "epoch": 0.13, - "learning_rate": 4.374350350562423e-05, - "loss": 2.8302, + "learning_rate": 4.3743544927305954e-05, + "loss": 2.8151, "step": 37800 }, { "epoch": 0.13, - "learning_rate": 4.3726951927596774e-05, - "loss": 2.8415, + "learning_rate": 4.372699345885967e-05, + "loss": 2.815, "step": 37900 }, { "epoch": 0.13, - "learning_rate": 4.371040034956933e-05, - "loss": 2.8311, + "learning_rate": 4.371044199041339e-05, + "loss": 2.8215, "step": 38000 }, { "epoch": 0.13, - "learning_rate": 4.369384877154188e-05, - "loss": 2.8321, + "learning_rate": 4.369389052196711e-05, + "loss": 2.8042, "step": 38100 }, { "epoch": 0.13, - "learning_rate": 4.3677297193514436e-05, - "loss": 2.8399, + "learning_rate": 4.367733905352083e-05, + "loss": 2.808, "step": 38200 }, { "epoch": 0.13, - "learning_rate": 4.366074561548698e-05, - "loss": 2.8252, + "learning_rate": 4.366078758507455e-05, + "loss": 2.7963, "step": 38300 }, { "epoch": 0.13, - "learning_rate": 4.364419403745953e-05, - "loss": 2.8264, + "learning_rate": 4.364423611662827e-05, + "loss": 2.8151, "step": 38400 }, { "epoch": 0.13, - "learning_rate": 4.3627642459432084e-05, - "loss": 2.8285, + "learning_rate": 4.362768464818199e-05, + "loss": 2.8098, "step": 38500 }, { "epoch": 0.13, - "learning_rate": 4.3611090881404635e-05, - "loss": 2.8244, + "learning_rate": 4.3611133179735706e-05, + "loss": 2.8169, "step": 38600 }, { "epoch": 0.13, - "learning_rate": 4.359453930337719e-05, - "loss": 2.8326, + "learning_rate": 4.359458171128943e-05, + "loss": 2.8037, "step": 38700 }, { "epoch": 0.13, - "learning_rate": 4.357798772534974e-05, - "loss": 2.8334, + "learning_rate": 4.357803024284315e-05, + "loss": 2.8016, "step": 38800 }, { "epoch": 0.13, - "learning_rate": 4.356143614732229e-05, - "loss": 2.8198, + "learning_rate": 4.356147877439687e-05, + "loss": 2.802, "step": 38900 }, { "epoch": 0.13, - "learning_rate": 4.3544884569294835e-05, - "loss": 2.8377, + "learning_rate": 4.354492730595058e-05, + "loss": 2.8094, "step": 39000 }, { "epoch": 0.13, - "learning_rate": 4.352833299126739e-05, - "loss": 2.8376, + "learning_rate": 4.35283758375043e-05, + "loss": 2.8136, "step": 39100 }, { "epoch": 0.13, - "learning_rate": 4.351178141323994e-05, - "loss": 2.8276, + "learning_rate": 4.3511824369058026e-05, + "loss": 2.8157, "step": 39200 }, { "epoch": 0.13, - "learning_rate": 4.349522983521249e-05, - "loss": 2.8243, + "learning_rate": 4.3495272900611745e-05, + "loss": 2.8079, "step": 39300 }, { "epoch": 0.13, - "learning_rate": 4.347867825718504e-05, - "loss": 2.8298, + "learning_rate": 4.3478721432165464e-05, + "loss": 2.8003, "step": 39400 }, { "epoch": 0.13, - "learning_rate": 4.346212667915759e-05, - "loss": 2.8346, + "learning_rate": 4.3462169963719183e-05, + "loss": 2.8123, "step": 39500 }, { "epoch": 0.13, - "learning_rate": 4.3445575101130145e-05, - "loss": 2.8386, + "learning_rate": 4.34456184952729e-05, + "loss": 2.8088, "step": 39600 }, { "epoch": 0.13, - "learning_rate": 4.3429023523102696e-05, - "loss": 2.8269, + "learning_rate": 4.342906702682662e-05, + "loss": 2.8015, "step": 39700 }, { "epoch": 0.13, - "learning_rate": 4.341247194507525e-05, - "loss": 2.8197, + "learning_rate": 4.341251555838034e-05, + "loss": 2.8109, "step": 39800 }, { "epoch": 0.13, - "learning_rate": 4.33959203670478e-05, - "loss": 2.8278, + "learning_rate": 4.339596408993406e-05, + "loss": 2.7914, "step": 39900 }, { "epoch": 0.13, - "learning_rate": 4.3379368789020344e-05, - "loss": 2.8352, + "learning_rate": 4.337941262148778e-05, + "loss": 2.8155, "step": 40000 }, { "epoch": 0.13, - "learning_rate": 4.3362817210992896e-05, - "loss": 2.8478, + "learning_rate": 4.3362861153041504e-05, + "loss": 2.8105, "step": 40100 }, { "epoch": 0.13, - "learning_rate": 4.334626563296545e-05, - "loss": 2.8221, + "learning_rate": 4.334630968459522e-05, + "loss": 2.7998, "step": 40200 }, { "epoch": 0.13, - "learning_rate": 4.3329714054938e-05, - "loss": 2.8267, + "learning_rate": 4.332975821614894e-05, + "loss": 2.8032, "step": 40300 }, { "epoch": 0.13, - "learning_rate": 4.331316247691055e-05, - "loss": 2.8447, + "learning_rate": 4.3313206747702654e-05, + "loss": 2.8343, "step": 40400 }, { "epoch": 0.13, - "learning_rate": 4.32966108988831e-05, - "loss": 2.8235, + "learning_rate": 4.329665527925637e-05, + "loss": 2.8116, "step": 40500 }, { "epoch": 0.13, - "learning_rate": 4.3280059320855654e-05, - "loss": 2.8403, + "learning_rate": 4.32801038108101e-05, + "loss": 2.8113, "step": 40600 }, { "epoch": 0.13, - "learning_rate": 4.32635077428282e-05, - "loss": 2.8261, + "learning_rate": 4.326355234236382e-05, + "loss": 2.8066, "step": 40700 }, { "epoch": 0.14, - "learning_rate": 4.324695616480076e-05, - "loss": 2.838, + "learning_rate": 4.324700087391754e-05, + "loss": 2.8092, "step": 40800 }, { "epoch": 0.14, - "learning_rate": 4.32304045867733e-05, - "loss": 2.8264, + "learning_rate": 4.3230449405471256e-05, + "loss": 2.8082, "step": 40900 }, { "epoch": 0.14, - "learning_rate": 4.321385300874586e-05, - "loss": 2.8339, + "learning_rate": 4.3213897937024975e-05, + "loss": 2.8049, "step": 41000 }, { "epoch": 0.14, - "learning_rate": 4.3197301430718405e-05, - "loss": 2.8241, + "learning_rate": 4.3197346468578694e-05, + "loss": 2.8015, "step": 41100 }, { "epoch": 0.14, - "learning_rate": 4.318074985269096e-05, - "loss": 2.8266, + "learning_rate": 4.318079500013241e-05, + "loss": 2.8148, "step": 41200 }, { "epoch": 0.14, - "learning_rate": 4.316419827466351e-05, - "loss": 2.8349, + "learning_rate": 4.316424353168613e-05, + "loss": 2.8122, "step": 41300 }, { "epoch": 0.14, - "learning_rate": 4.314764669663606e-05, - "loss": 2.848, + "learning_rate": 4.314769206323985e-05, + "loss": 2.8091, "step": 41400 }, { "epoch": 0.14, - "learning_rate": 4.313109511860861e-05, - "loss": 2.8332, + "learning_rate": 4.313114059479357e-05, + "loss": 2.8106, "step": 41500 }, { "epoch": 0.14, - "learning_rate": 4.3114543540581156e-05, - "loss": 2.8408, + "learning_rate": 4.3114589126347296e-05, + "loss": 2.8122, "step": 41600 }, { "epoch": 0.14, - "learning_rate": 4.3097991962553715e-05, - "loss": 2.831, + "learning_rate": 4.3098037657901015e-05, + "loss": 2.8174, "step": 41700 }, { "epoch": 0.14, - "learning_rate": 4.308144038452626e-05, - "loss": 2.8369, + "learning_rate": 4.308148618945473e-05, + "loss": 2.816, "step": 41800 }, { "epoch": 0.14, - "learning_rate": 4.306488880649882e-05, - "loss": 2.8181, + "learning_rate": 4.3064934721008446e-05, + "loss": 2.8264, "step": 41900 }, { "epoch": 0.14, - "learning_rate": 4.304833722847136e-05, - "loss": 2.8316, + "learning_rate": 4.304838325256217e-05, + "loss": 2.8113, "step": 42000 }, { "epoch": 0.14, - "learning_rate": 4.3031785650443914e-05, - "loss": 2.8301, + "learning_rate": 4.303183178411589e-05, + "loss": 2.7861, "step": 42100 }, { "epoch": 0.14, - "learning_rate": 4.3015234072416466e-05, - "loss": 2.8268, + "learning_rate": 4.301528031566961e-05, + "loss": 2.8134, "step": 42200 }, { "epoch": 0.14, - "learning_rate": 4.299868249438902e-05, - "loss": 2.8283, + "learning_rate": 4.299872884722333e-05, + "loss": 2.8011, "step": 42300 }, { "epoch": 0.14, - "learning_rate": 4.298213091636157e-05, - "loss": 2.8261, + "learning_rate": 4.298217737877705e-05, + "loss": 2.7928, "step": 42400 }, { "epoch": 0.14, - "learning_rate": 4.2965579338334114e-05, - "loss": 2.8374, + "learning_rate": 4.2965625910330766e-05, + "loss": 2.7977, "step": 42500 }, { "epoch": 0.14, - "learning_rate": 4.294902776030667e-05, - "loss": 2.8423, + "learning_rate": 4.2949074441884485e-05, + "loss": 2.8288, "step": 42600 }, { "epoch": 0.14, - "learning_rate": 4.293247618227922e-05, - "loss": 2.8345, + "learning_rate": 4.2932522973438204e-05, + "loss": 2.7978, "step": 42700 }, { "epoch": 0.14, - "learning_rate": 4.2915924604251776e-05, - "loss": 2.8194, + "learning_rate": 4.2915971504991923e-05, + "loss": 2.8003, "step": 42800 }, { "epoch": 0.14, - "learning_rate": 4.289937302622432e-05, - "loss": 2.8366, + "learning_rate": 4.289942003654564e-05, + "loss": 2.7981, "step": 42900 }, { "epoch": 0.14, - "learning_rate": 4.288282144819687e-05, - "loss": 2.8393, + "learning_rate": 4.288286856809937e-05, + "loss": 2.7947, "step": 43000 }, { "epoch": 0.14, - "learning_rate": 4.2866269870169424e-05, - "loss": 2.8492, + "learning_rate": 4.286631709965308e-05, + "loss": 2.8156, "step": 43100 }, { "epoch": 0.14, - "learning_rate": 4.2849718292141975e-05, - "loss": 2.8272, + "learning_rate": 4.28497656312068e-05, + "loss": 2.8202, "step": 43200 }, { "epoch": 0.14, - "learning_rate": 4.283316671411453e-05, - "loss": 2.8413, + "learning_rate": 4.283321416276052e-05, + "loss": 2.8246, "step": 43300 }, { "epoch": 0.14, - "learning_rate": 4.281661513608708e-05, - "loss": 2.8357, + "learning_rate": 4.2816662694314244e-05, + "loss": 2.8088, "step": 43400 }, { "epoch": 0.14, - "learning_rate": 4.280006355805963e-05, - "loss": 2.8239, + "learning_rate": 4.280011122586796e-05, + "loss": 2.8038, "step": 43500 }, { "epoch": 0.14, - "learning_rate": 4.2783511980032175e-05, - "loss": 2.8282, + "learning_rate": 4.278355975742168e-05, + "loss": 2.8082, "step": 43600 }, { "epoch": 0.14, - "learning_rate": 4.276696040200473e-05, - "loss": 2.8454, + "learning_rate": 4.27670082889754e-05, + "loss": 2.802, "step": 43700 }, { "epoch": 0.14, - "learning_rate": 4.275040882397728e-05, - "loss": 2.8512, + "learning_rate": 4.275045682052912e-05, + "loss": 2.8142, "step": 43800 }, { "epoch": 0.15, - "learning_rate": 4.273385724594983e-05, - "loss": 2.8311, + "learning_rate": 4.273390535208284e-05, + "loss": 2.8056, "step": 43900 }, { "epoch": 0.15, - "learning_rate": 4.271730566792238e-05, - "loss": 2.836, + "learning_rate": 4.271735388363656e-05, + "loss": 2.8144, "step": 44000 }, { "epoch": 0.15, - "learning_rate": 4.270075408989493e-05, - "loss": 2.8308, + "learning_rate": 4.270080241519028e-05, + "loss": 2.7998, "step": 44100 }, { "epoch": 0.15, - "learning_rate": 4.2684202511867484e-05, - "loss": 2.8309, + "learning_rate": 4.2684250946743996e-05, + "loss": 2.8136, "step": 44200 }, { "epoch": 0.15, - "learning_rate": 4.2667650933840036e-05, - "loss": 2.831, + "learning_rate": 4.2667699478297715e-05, + "loss": 2.8057, "step": 44300 }, { "epoch": 0.15, - "learning_rate": 4.265109935581259e-05, - "loss": 2.8348, + "learning_rate": 4.265114800985144e-05, + "loss": 2.8254, "step": 44400 }, { "epoch": 0.15, - "learning_rate": 4.263454777778514e-05, - "loss": 2.8223, + "learning_rate": 4.263459654140515e-05, + "loss": 2.7837, "step": 44500 }, { "epoch": 0.15, - "learning_rate": 4.2617996199757684e-05, - "loss": 2.8266, + "learning_rate": 4.261804507295887e-05, + "loss": 2.8068, "step": 44600 }, { "epoch": 0.15, - "learning_rate": 4.2601444621730236e-05, - "loss": 2.838, + "learning_rate": 4.260149360451259e-05, + "loss": 2.8084, "step": 44700 }, { "epoch": 0.15, - "learning_rate": 4.258489304370279e-05, - "loss": 2.8418, + "learning_rate": 4.2584942136066317e-05, + "loss": 2.828, "step": 44800 }, { "epoch": 0.15, - "learning_rate": 4.256834146567534e-05, - "loss": 2.8413, + "learning_rate": 4.2568390667620036e-05, + "loss": 2.8163, "step": 44900 }, { "epoch": 0.15, - "learning_rate": 4.255178988764789e-05, - "loss": 2.8112, + "learning_rate": 4.2551839199173755e-05, + "loss": 2.8021, "step": 45000 }, { "epoch": 0.15, - "learning_rate": 4.253523830962044e-05, - "loss": 2.8354, + "learning_rate": 4.2535287730727474e-05, + "loss": 2.81, "step": 45100 }, { "epoch": 0.15, - "learning_rate": 4.2518686731592994e-05, - "loss": 2.8282, + "learning_rate": 4.2518736262281186e-05, + "loss": 2.8128, "step": 45200 }, { "epoch": 0.15, - "learning_rate": 4.2502135153565545e-05, - "loss": 2.8438, + "learning_rate": 4.250218479383491e-05, + "loss": 2.808, "step": 45300 }, { "epoch": 0.15, - "learning_rate": 4.24855835755381e-05, - "loss": 2.8433, + "learning_rate": 4.248563332538863e-05, + "loss": 2.8042, "step": 45400 }, { "epoch": 0.15, - "learning_rate": 4.246903199751064e-05, - "loss": 2.8147, + "learning_rate": 4.246908185694235e-05, + "loss": 2.8202, "step": 45500 }, { "epoch": 0.15, - "learning_rate": 4.24524804194832e-05, - "loss": 2.8321, + "learning_rate": 4.245253038849607e-05, + "loss": 2.813, "step": 45600 }, { "epoch": 0.15, - "learning_rate": 4.2435928841455745e-05, - "loss": 2.8348, + "learning_rate": 4.243597892004979e-05, + "loss": 2.8098, "step": 45700 }, { "epoch": 0.15, - "learning_rate": 4.2419377263428296e-05, - "loss": 2.8373, + "learning_rate": 4.241942745160351e-05, + "loss": 2.8086, "step": 45800 }, { "epoch": 0.15, - "learning_rate": 4.240282568540085e-05, - "loss": 2.8435, + "learning_rate": 4.2402875983157225e-05, + "loss": 2.8128, "step": 45900 }, { "epoch": 0.15, - "learning_rate": 4.23862741073734e-05, - "loss": 2.8158, + "learning_rate": 4.2386324514710944e-05, + "loss": 2.7975, "step": 46000 }, { "epoch": 0.15, - "learning_rate": 4.236972252934595e-05, - "loss": 2.8201, + "learning_rate": 4.236977304626466e-05, + "loss": 2.8128, "step": 46100 }, { "epoch": 0.15, - "learning_rate": 4.2353170951318496e-05, - "loss": 2.8264, + "learning_rate": 4.235322157781839e-05, + "loss": 2.8199, "step": 46200 }, { "epoch": 0.15, - "learning_rate": 4.2336619373291054e-05, - "loss": 2.8361, + "learning_rate": 4.233667010937211e-05, + "loss": 2.8186, "step": 46300 }, { "epoch": 0.15, - "learning_rate": 4.23200677952636e-05, - "loss": 2.8284, + "learning_rate": 4.232011864092583e-05, + "loss": 2.8012, "step": 46400 }, { "epoch": 0.15, - "learning_rate": 4.230351621723616e-05, - "loss": 2.832, + "learning_rate": 4.2303567172479546e-05, + "loss": 2.809, "step": 46500 }, { "epoch": 0.15, - "learning_rate": 4.22869646392087e-05, - "loss": 2.8216, + "learning_rate": 4.228701570403326e-05, + "loss": 2.8052, "step": 46600 }, { "epoch": 0.15, - "learning_rate": 4.227041306118126e-05, - "loss": 2.8196, + "learning_rate": 4.2270464235586984e-05, + "loss": 2.8122, "step": 46700 }, { "epoch": 0.15, - "learning_rate": 4.2253861483153806e-05, - "loss": 2.8237, + "learning_rate": 4.22539127671407e-05, + "loss": 2.8034, "step": 46800 }, { "epoch": 0.16, - "learning_rate": 4.223730990512636e-05, - "loss": 2.8331, + "learning_rate": 4.223736129869442e-05, + "loss": 2.8148, "step": 46900 }, { "epoch": 0.16, - "learning_rate": 4.222075832709891e-05, - "loss": 2.8345, + "learning_rate": 4.222080983024814e-05, + "loss": 2.8127, "step": 47000 }, { "epoch": 0.16, - "learning_rate": 4.2204206749071454e-05, - "loss": 2.8442, + "learning_rate": 4.220425836180186e-05, + "loss": 2.8045, "step": 47100 }, { "epoch": 0.16, - "learning_rate": 4.218765517104401e-05, - "loss": 2.8392, + "learning_rate": 4.2187706893355586e-05, + "loss": 2.7989, "step": 47200 }, { "epoch": 0.16, - "learning_rate": 4.217110359301656e-05, - "loss": 2.84, + "learning_rate": 4.21711554249093e-05, + "loss": 2.8028, "step": 47300 }, { "epoch": 0.16, - "learning_rate": 4.2154552014989115e-05, - "loss": 2.83, + "learning_rate": 4.215460395646302e-05, + "loss": 2.7974, "step": 47400 }, { "epoch": 0.16, - "learning_rate": 4.213800043696166e-05, - "loss": 2.8247, + "learning_rate": 4.2138052488016736e-05, + "loss": 2.8092, "step": 47500 }, { "epoch": 0.16, - "learning_rate": 4.212144885893421e-05, - "loss": 2.8233, + "learning_rate": 4.212150101957046e-05, + "loss": 2.7943, "step": 47600 }, { "epoch": 0.16, - "learning_rate": 4.210489728090676e-05, - "loss": 2.8257, + "learning_rate": 4.210494955112418e-05, + "loss": 2.804, "step": 47700 }, { "epoch": 0.16, - "learning_rate": 4.2088345702879315e-05, - "loss": 2.8346, + "learning_rate": 4.20883980826779e-05, + "loss": 2.8135, "step": 47800 }, { "epoch": 0.16, - "learning_rate": 4.2071794124851867e-05, - "loss": 2.836, + "learning_rate": 4.207184661423162e-05, + "loss": 2.7891, "step": 47900 }, { "epoch": 0.16, - "learning_rate": 4.205524254682441e-05, - "loss": 2.8317, + "learning_rate": 4.205529514578533e-05, + "loss": 2.8112, "step": 48000 }, { "epoch": 0.16, - "learning_rate": 4.203869096879697e-05, - "loss": 2.8226, + "learning_rate": 4.2038743677339057e-05, + "loss": 2.8007, "step": 48100 }, { "epoch": 0.16, - "learning_rate": 4.2022139390769515e-05, - "loss": 2.8514, + "learning_rate": 4.2022192208892776e-05, + "loss": 2.8103, "step": 48200 }, { "epoch": 0.16, - "learning_rate": 4.200558781274207e-05, - "loss": 2.8234, + "learning_rate": 4.2005640740446495e-05, + "loss": 2.8088, "step": 48300 }, { "epoch": 0.16, - "learning_rate": 4.198903623471462e-05, - "loss": 2.8362, + "learning_rate": 4.1989089272000214e-05, + "loss": 2.8118, "step": 48400 }, { "epoch": 0.16, - "learning_rate": 4.197248465668717e-05, - "loss": 2.8391, + "learning_rate": 4.197253780355393e-05, + "loss": 2.8217, "step": 48500 }, { "epoch": 0.16, - "learning_rate": 4.195593307865972e-05, - "loss": 2.8297, + "learning_rate": 4.195598633510766e-05, + "loss": 2.8115, "step": 48600 }, { "epoch": 0.16, - "learning_rate": 4.193938150063227e-05, - "loss": 2.836, + "learning_rate": 4.193943486666137e-05, + "loss": 2.8141, "step": 48700 }, { "epoch": 0.16, - "learning_rate": 4.1922829922604824e-05, - "loss": 2.8279, + "learning_rate": 4.192288339821509e-05, + "loss": 2.8035, "step": 48800 }, { "epoch": 0.16, - "learning_rate": 4.1906278344577376e-05, - "loss": 2.8115, + "learning_rate": 4.190633192976881e-05, + "loss": 2.8094, "step": 48900 }, { "epoch": 0.16, - "learning_rate": 4.188972676654993e-05, - "loss": 2.8372, + "learning_rate": 4.1889780461322534e-05, + "loss": 2.7941, "step": 49000 }, { "epoch": 0.16, - "learning_rate": 4.187317518852247e-05, - "loss": 2.8195, + "learning_rate": 4.187322899287625e-05, + "loss": 2.7959, "step": 49100 }, { "epoch": 0.16, - "learning_rate": 4.1856623610495024e-05, - "loss": 2.8306, + "learning_rate": 4.185667752442997e-05, + "loss": 2.8021, "step": 49200 }, { "epoch": 0.16, - "learning_rate": 4.1840072032467575e-05, - "loss": 2.8381, + "learning_rate": 4.184012605598369e-05, + "loss": 2.8077, "step": 49300 }, { "epoch": 0.16, - "learning_rate": 4.182352045444013e-05, - "loss": 2.8229, + "learning_rate": 4.18235745875374e-05, + "loss": 2.8029, "step": 49400 }, { "epoch": 0.16, - "learning_rate": 4.180696887641268e-05, - "loss": 2.8271, + "learning_rate": 4.180702311909113e-05, + "loss": 2.8087, "step": 49500 }, { "epoch": 0.16, - "learning_rate": 4.179041729838523e-05, - "loss": 2.8243, + "learning_rate": 4.179047165064485e-05, + "loss": 2.8156, "step": 49600 }, { "epoch": 0.16, - "learning_rate": 4.177386572035778e-05, - "loss": 2.8172, + "learning_rate": 4.177392018219857e-05, + "loss": 2.8078, "step": 49700 }, { "epoch": 0.16, - "learning_rate": 4.175731414233033e-05, - "loss": 2.8327, + "learning_rate": 4.1757368713752286e-05, + "loss": 2.7915, "step": 49800 }, { "epoch": 0.17, - "learning_rate": 4.1740762564302885e-05, - "loss": 2.8171, + "learning_rate": 4.1740817245306005e-05, + "loss": 2.805, "step": 49900 }, { "epoch": 0.17, - "learning_rate": 4.1724210986275437e-05, - "loss": 2.8319, + "learning_rate": 4.1724265776859724e-05, + "loss": 2.8067, "step": 50000 }, { "epoch": 0.17, - "learning_rate": 4.170765940824798e-05, + "learning_rate": 4.170771430841344e-05, "loss": 2.816, "step": 50100 }, { "epoch": 0.17, - "learning_rate": 4.169110783022053e-05, - "loss": 2.8203, + "learning_rate": 4.169116283996716e-05, + "loss": 2.8095, "step": 50200 }, { "epoch": 0.17, - "learning_rate": 4.1674556252193085e-05, - "loss": 2.8313, + "learning_rate": 4.167461137152088e-05, + "loss": 2.8193, "step": 50300 }, { "epoch": 0.17, - "learning_rate": 4.1658004674165636e-05, - "loss": 2.8187, + "learning_rate": 4.16580599030746e-05, + "loss": 2.8174, "step": 50400 }, { "epoch": 0.17, - "learning_rate": 4.164145309613819e-05, - "loss": 2.8135, + "learning_rate": 4.1641508434628326e-05, + "loss": 2.8053, "step": 50500 }, { "epoch": 0.17, - "learning_rate": 4.162490151811074e-05, - "loss": 2.8434, + "learning_rate": 4.1624956966182045e-05, + "loss": 2.8069, "step": 50600 }, { "epoch": 0.17, - "learning_rate": 4.160834994008329e-05, - "loss": 2.8477, + "learning_rate": 4.160840549773576e-05, + "loss": 2.8127, "step": 50700 }, { "epoch": 0.17, - "learning_rate": 4.159179836205584e-05, - "loss": 2.8135, + "learning_rate": 4.1591854029289476e-05, + "loss": 2.819, "step": 50800 }, { "epoch": 0.17, - "learning_rate": 4.1575246784028394e-05, - "loss": 2.8303, + "learning_rate": 4.15753025608432e-05, + "loss": 2.8113, "step": 50900 }, { "epoch": 0.17, - "learning_rate": 4.155869520600094e-05, - "loss": 2.8218, + "learning_rate": 4.155875109239692e-05, + "loss": 2.7989, "step": 51000 }, { "epoch": 0.17, - "learning_rate": 4.15421436279735e-05, - "loss": 2.8445, + "learning_rate": 4.154219962395064e-05, + "loss": 2.816, "step": 51100 }, { "epoch": 0.17, - "learning_rate": 4.152559204994604e-05, - "loss": 2.8357, + "learning_rate": 4.152564815550436e-05, + "loss": 2.7987, "step": 51200 }, { "epoch": 0.17, - "learning_rate": 4.1509040471918594e-05, - "loss": 2.8309, + "learning_rate": 4.150909668705808e-05, + "loss": 2.8008, "step": 51300 }, { "epoch": 0.17, - "learning_rate": 4.1492488893891145e-05, - "loss": 2.8464, + "learning_rate": 4.1492545218611797e-05, + "loss": 2.7955, "step": 51400 }, { "epoch": 0.17, - "learning_rate": 4.14759373158637e-05, - "loss": 2.846, + "learning_rate": 4.1475993750165516e-05, + "loss": 2.8359, "step": 51500 }, { "epoch": 0.17, - "learning_rate": 4.145938573783625e-05, - "loss": 2.8346, + "learning_rate": 4.1459442281719235e-05, + "loss": 2.7892, "step": 51600 }, { "epoch": 0.17, - "learning_rate": 4.1442834159808793e-05, - "loss": 2.8264, + "learning_rate": 4.1442890813272954e-05, + "loss": 2.8003, "step": 51700 }, { "epoch": 0.17, - "learning_rate": 4.142628258178135e-05, - "loss": 2.8172, + "learning_rate": 4.142633934482667e-05, + "loss": 2.8014, "step": 51800 }, { "epoch": 0.17, - "learning_rate": 4.14097310037539e-05, - "loss": 2.8279, + "learning_rate": 4.14097878763804e-05, + "loss": 2.8021, "step": 51900 }, { "epoch": 0.17, - "learning_rate": 4.1393179425726455e-05, - "loss": 2.8331, + "learning_rate": 4.139323640793412e-05, + "loss": 2.8198, "step": 52000 }, { "epoch": 0.17, - "learning_rate": 4.1376627847699e-05, - "loss": 2.8433, + "learning_rate": 4.137668493948783e-05, + "loss": 2.8041, "step": 52100 }, { "epoch": 0.17, - "learning_rate": 4.136007626967156e-05, - "loss": 2.8383, + "learning_rate": 4.136013347104155e-05, + "loss": 2.8143, "step": 52200 }, { "epoch": 0.17, - "learning_rate": 4.13435246916441e-05, - "loss": 2.8341, + "learning_rate": 4.1343582002595274e-05, + "loss": 2.8188, "step": 52300 }, { "epoch": 0.17, - "learning_rate": 4.1326973113616655e-05, - "loss": 2.8292, + "learning_rate": 4.132703053414899e-05, + "loss": 2.7977, "step": 52400 }, { "epoch": 0.17, - "learning_rate": 4.1310421535589206e-05, - "loss": 2.83, + "learning_rate": 4.131047906570271e-05, + "loss": 2.8216, "step": 52500 }, { "epoch": 0.17, - "learning_rate": 4.129386995756175e-05, - "loss": 2.8268, + "learning_rate": 4.129392759725643e-05, + "loss": 2.7988, "step": 52600 }, { "epoch": 0.17, - "learning_rate": 4.127731837953431e-05, - "loss": 2.8179, + "learning_rate": 4.127737612881015e-05, + "loss": 2.8149, "step": 52700 }, { "epoch": 0.17, - "learning_rate": 4.1260766801506854e-05, - "loss": 2.8263, + "learning_rate": 4.126082466036387e-05, + "loss": 2.7965, "step": 52800 }, { "epoch": 0.18, - "learning_rate": 4.124421522347941e-05, - "loss": 2.8302, + "learning_rate": 4.124427319191759e-05, + "loss": 2.8059, "step": 52900 }, { "epoch": 0.18, - "learning_rate": 4.122766364545196e-05, - "loss": 2.8318, + "learning_rate": 4.122772172347131e-05, + "loss": 2.8075, "step": 53000 }, { "epoch": 0.18, - "learning_rate": 4.121111206742451e-05, - "loss": 2.839, + "learning_rate": 4.1211170255025026e-05, + "loss": 2.8086, "step": 53100 }, { "epoch": 0.18, - "learning_rate": 4.119456048939706e-05, - "loss": 2.8147, + "learning_rate": 4.1194618786578745e-05, + "loss": 2.8161, "step": 53200 }, { "epoch": 0.18, - "learning_rate": 4.117800891136961e-05, - "loss": 2.8302, + "learning_rate": 4.117806731813247e-05, + "loss": 2.8058, "step": 53300 }, { "epoch": 0.18, - "learning_rate": 4.1161457333342164e-05, - "loss": 2.8239, + "learning_rate": 4.116151584968619e-05, + "loss": 2.8104, "step": 53400 }, { "epoch": 0.18, - "learning_rate": 4.114490575531471e-05, - "loss": 2.8285, + "learning_rate": 4.11449643812399e-05, + "loss": 2.8105, "step": 53500 }, { "epoch": 0.18, - "learning_rate": 4.112835417728727e-05, - "loss": 2.8377, + "learning_rate": 4.112841291279362e-05, + "loss": 2.8044, "step": 53600 }, { "epoch": 0.18, - "learning_rate": 4.111180259925981e-05, - "loss": 2.8151, + "learning_rate": 4.111186144434735e-05, + "loss": 2.791, "step": 53700 }, { "epoch": 0.18, - "learning_rate": 4.109525102123237e-05, - "loss": 2.8311, + "learning_rate": 4.1095309975901066e-05, + "loss": 2.7971, "step": 53800 }, { "epoch": 0.18, - "learning_rate": 4.1078699443204915e-05, - "loss": 2.8398, + "learning_rate": 4.1078758507454785e-05, + "loss": 2.8085, "step": 53900 }, { "epoch": 0.18, - "learning_rate": 4.106214786517747e-05, - "loss": 2.8286, + "learning_rate": 4.1062207039008504e-05, + "loss": 2.8043, "step": 54000 }, { "epoch": 0.18, - "learning_rate": 4.104559628715002e-05, - "loss": 2.8173, + "learning_rate": 4.104565557056222e-05, + "loss": 2.8138, "step": 54100 }, { "epoch": 0.18, - "learning_rate": 4.102904470912257e-05, - "loss": 2.8304, + "learning_rate": 4.102910410211594e-05, + "loss": 2.8023, "step": 54200 }, { "epoch": 0.18, - "learning_rate": 4.101249313109512e-05, - "loss": 2.8278, + "learning_rate": 4.101255263366966e-05, + "loss": 2.8177, "step": 54300 }, { "epoch": 0.18, - "learning_rate": 4.099594155306767e-05, - "loss": 2.8133, + "learning_rate": 4.099600116522338e-05, + "loss": 2.8152, "step": 54400 }, { "epoch": 0.18, - "learning_rate": 4.0979389975040225e-05, - "loss": 2.8224, + "learning_rate": 4.09794496967771e-05, + "loss": 2.813, "step": 54500 }, { "epoch": 0.18, - "learning_rate": 4.096283839701277e-05, - "loss": 2.8417, + "learning_rate": 4.096289822833082e-05, + "loss": 2.7945, "step": 54600 }, { "epoch": 0.18, - "learning_rate": 4.094628681898532e-05, - "loss": 2.8174, + "learning_rate": 4.094634675988454e-05, + "loss": 2.8041, "step": 54700 }, { "epoch": 0.18, - "learning_rate": 4.092973524095787e-05, - "loss": 2.8235, + "learning_rate": 4.092979529143826e-05, + "loss": 2.8123, "step": 54800 }, { "epoch": 0.18, - "learning_rate": 4.0913183662930424e-05, - "loss": 2.8401, + "learning_rate": 4.0913243822991974e-05, + "loss": 2.8047, "step": 54900 }, { "epoch": 0.18, - "learning_rate": 4.0896632084902976e-05, - "loss": 2.8415, + "learning_rate": 4.0896692354545693e-05, + "loss": 2.8108, "step": 55000 }, { "epoch": 0.18, - "learning_rate": 4.088008050687553e-05, - "loss": 2.8349, + "learning_rate": 4.088014088609942e-05, + "loss": 2.8052, "step": 55100 }, { "epoch": 0.18, - "learning_rate": 4.086352892884808e-05, - "loss": 2.8263, + "learning_rate": 4.086358941765314e-05, + "loss": 2.814, "step": 55200 }, { "epoch": 0.18, - "learning_rate": 4.084697735082063e-05, - "loss": 2.8378, + "learning_rate": 4.084703794920686e-05, + "loss": 2.8038, "step": 55300 }, { "epoch": 0.18, - "learning_rate": 4.083042577279318e-05, - "loss": 2.828, + "learning_rate": 4.0830486480760576e-05, + "loss": 2.8004, "step": 55400 }, { "epoch": 0.18, - "learning_rate": 4.0813874194765734e-05, - "loss": 2.8397, + "learning_rate": 4.0813935012314295e-05, + "loss": 2.8046, "step": 55500 }, { "epoch": 0.18, - "learning_rate": 4.079732261673828e-05, - "loss": 2.8138, + "learning_rate": 4.0797383543868014e-05, + "loss": 2.8175, "step": 55600 }, { "epoch": 0.18, - "learning_rate": 4.078077103871083e-05, - "loss": 2.8193, + "learning_rate": 4.078083207542173e-05, + "loss": 2.8085, "step": 55700 }, { "epoch": 0.18, - "learning_rate": 4.076421946068338e-05, - "loss": 2.8271, + "learning_rate": 4.076428060697545e-05, + "loss": 2.8076, "step": 55800 }, { "epoch": 0.19, - "learning_rate": 4.0747667882655934e-05, - "loss": 2.8317, + "learning_rate": 4.074772913852917e-05, + "loss": 2.8047, "step": 55900 }, { "epoch": 0.19, - "learning_rate": 4.0731116304628485e-05, - "loss": 2.8341, + "learning_rate": 4.073117767008289e-05, + "loss": 2.8232, "step": 56000 }, { "epoch": 0.19, - "learning_rate": 4.071456472660104e-05, - "loss": 2.8336, + "learning_rate": 4.0714626201636616e-05, + "loss": 2.8034, "step": 56100 }, { "epoch": 0.19, - "learning_rate": 4.069801314857359e-05, - "loss": 2.8301, + "learning_rate": 4.0698074733190335e-05, + "loss": 2.81, "step": 56200 }, { "epoch": 0.19, - "learning_rate": 4.068146157054614e-05, - "loss": 2.8217, + "learning_rate": 4.068152326474405e-05, + "loss": 2.8006, "step": 56300 }, { "epoch": 0.19, - "learning_rate": 4.066490999251869e-05, - "loss": 2.8357, + "learning_rate": 4.0664971796297766e-05, + "loss": 2.8121, "step": 56400 }, { "epoch": 0.19, - "learning_rate": 4.0648358414491236e-05, - "loss": 2.8324, + "learning_rate": 4.064842032785149e-05, + "loss": 2.8019, "step": 56500 }, { "epoch": 0.19, - "learning_rate": 4.0631806836463795e-05, - "loss": 2.8312, + "learning_rate": 4.063186885940521e-05, + "loss": 2.7927, "step": 56600 }, { "epoch": 0.19, - "learning_rate": 4.061525525843634e-05, - "loss": 2.8144, + "learning_rate": 4.061531739095893e-05, + "loss": 2.8002, "step": 56700 }, { "epoch": 0.19, - "learning_rate": 4.059870368040889e-05, - "loss": 2.8157, + "learning_rate": 4.059876592251265e-05, + "loss": 2.8026, "step": 56800 }, { "epoch": 0.19, - "learning_rate": 4.058215210238144e-05, - "loss": 2.8317, + "learning_rate": 4.058221445406636e-05, + "loss": 2.8267, "step": 56900 }, { "epoch": 0.19, - "learning_rate": 4.0565600524353994e-05, - "loss": 2.8221, + "learning_rate": 4.056566298562009e-05, + "loss": 2.7956, "step": 57000 }, { "epoch": 0.19, - "learning_rate": 4.0549048946326546e-05, - "loss": 2.8137, + "learning_rate": 4.0549111517173806e-05, + "loss": 2.7924, "step": 57100 }, { "epoch": 0.19, - "learning_rate": 4.053249736829909e-05, - "loss": 2.8414, + "learning_rate": 4.0532560048727525e-05, + "loss": 2.8085, "step": 57200 }, { "epoch": 0.19, - "learning_rate": 4.051594579027165e-05, - "loss": 2.8178, + "learning_rate": 4.0516008580281244e-05, + "loss": 2.8003, "step": 57300 }, { "epoch": 0.19, - "learning_rate": 4.0499394212244194e-05, - "loss": 2.8364, + "learning_rate": 4.049945711183496e-05, + "loss": 2.8188, "step": 57400 }, { "epoch": 0.19, - "learning_rate": 4.048284263421675e-05, - "loss": 2.8251, + "learning_rate": 4.048290564338869e-05, + "loss": 2.7995, "step": 57500 }, { "epoch": 0.19, - "learning_rate": 4.04662910561893e-05, - "loss": 2.83, + "learning_rate": 4.04663541749424e-05, + "loss": 2.8052, "step": 57600 }, { "epoch": 0.19, - "learning_rate": 4.044973947816185e-05, - "loss": 2.8318, + "learning_rate": 4.044980270649612e-05, + "loss": 2.8037, "step": 57700 }, { "epoch": 0.19, - "learning_rate": 4.04331879001344e-05, - "loss": 2.8341, + "learning_rate": 4.043325123804984e-05, + "loss": 2.7951, "step": 57800 }, { "epoch": 0.19, - "learning_rate": 4.041663632210695e-05, - "loss": 2.824, + "learning_rate": 4.0416699769603564e-05, + "loss": 2.8199, "step": 57900 }, { "epoch": 0.19, - "learning_rate": 4.0400084744079504e-05, - "loss": 2.8264, + "learning_rate": 4.040014830115728e-05, + "loss": 2.8044, "step": 58000 }, { "epoch": 0.19, - "learning_rate": 4.038353316605205e-05, - "loss": 2.8326, + "learning_rate": 4.0383596832711e-05, + "loss": 2.7989, "step": 58100 }, { "epoch": 0.19, - "learning_rate": 4.036698158802461e-05, - "loss": 2.8197, + "learning_rate": 4.036704536426472e-05, + "loss": 2.8152, "step": 58200 }, { "epoch": 0.19, - "learning_rate": 4.035043000999715e-05, - "loss": 2.8325, + "learning_rate": 4.0350493895818433e-05, + "loss": 2.8057, "step": 58300 }, { "epoch": 0.19, - "learning_rate": 4.033387843196971e-05, - "loss": 2.8238, + "learning_rate": 4.033394242737216e-05, + "loss": 2.8312, "step": 58400 }, { "epoch": 0.19, - "learning_rate": 4.0317326853942255e-05, - "loss": 2.8371, + "learning_rate": 4.031739095892588e-05, + "loss": 2.8065, "step": 58500 }, { "epoch": 0.19, - "learning_rate": 4.0300775275914806e-05, - "loss": 2.8284, + "learning_rate": 4.03008394904796e-05, + "loss": 2.8048, "step": 58600 }, { "epoch": 0.19, - "learning_rate": 4.028422369788736e-05, - "loss": 2.8293, + "learning_rate": 4.0284288022033316e-05, + "loss": 2.8145, "step": 58700 }, { "epoch": 0.19, - "learning_rate": 4.026767211985991e-05, - "loss": 2.826, + "learning_rate": 4.0267736553587035e-05, + "loss": 2.8112, "step": 58800 }, { "epoch": 0.19, - "learning_rate": 4.025112054183246e-05, - "loss": 2.8268, + "learning_rate": 4.025118508514076e-05, + "loss": 2.8147, "step": 58900 }, { "epoch": 0.2, - "learning_rate": 4.023456896380501e-05, - "loss": 2.8211, + "learning_rate": 4.023463361669447e-05, + "loss": 2.8039, "step": 59000 }, { "epoch": 0.2, - "learning_rate": 4.0218017385777564e-05, - "loss": 2.8236, + "learning_rate": 4.021808214824819e-05, + "loss": 2.7997, "step": 59100 }, { "epoch": 0.2, - "learning_rate": 4.020146580775011e-05, - "loss": 2.833, + "learning_rate": 4.020153067980191e-05, + "loss": 2.7909, "step": 59200 }, { "epoch": 0.2, - "learning_rate": 4.018491422972267e-05, - "loss": 2.8181, + "learning_rate": 4.018497921135563e-05, + "loss": 2.8021, "step": 59300 }, { "epoch": 0.2, - "learning_rate": 4.016836265169521e-05, - "loss": 2.8275, + "learning_rate": 4.0168427742909356e-05, + "loss": 2.8064, "step": 59400 }, { "epoch": 0.2, - "learning_rate": 4.0151811073667764e-05, - "loss": 2.8314, + "learning_rate": 4.0151876274463075e-05, + "loss": 2.8002, "step": 59500 }, { "epoch": 0.2, - "learning_rate": 4.0135259495640316e-05, - "loss": 2.8284, + "learning_rate": 4.0135324806016794e-05, + "loss": 2.8056, "step": 59600 }, { "epoch": 0.2, - "learning_rate": 4.011870791761287e-05, - "loss": 2.8118, + "learning_rate": 4.0118773337570506e-05, + "loss": 2.8139, "step": 59700 }, { "epoch": 0.2, - "learning_rate": 4.010215633958542e-05, - "loss": 2.8389, + "learning_rate": 4.010222186912423e-05, + "loss": 2.7957, "step": 59800 }, { "epoch": 0.2, - "learning_rate": 4.008560476155797e-05, - "loss": 2.8109, + "learning_rate": 4.008567040067795e-05, + "loss": 2.8015, "step": 59900 }, { "epoch": 0.2, - "learning_rate": 4.006905318353052e-05, - "loss": 2.8206, + "learning_rate": 4.006911893223167e-05, + "loss": 2.809, "step": 60000 }, { "epoch": 0.2, - "learning_rate": 4.0052501605503074e-05, - "loss": 2.8343, + "learning_rate": 4.005256746378539e-05, + "loss": 2.8058, "step": 60100 }, { "epoch": 0.2, - "learning_rate": 4.003595002747562e-05, - "loss": 2.8292, + "learning_rate": 4.003601599533911e-05, + "loss": 2.817, "step": 60200 }, { "epoch": 0.2, - "learning_rate": 4.001939844944817e-05, - "loss": 2.8236, + "learning_rate": 4.0019464526892833e-05, + "loss": 2.8093, "step": 60300 }, { "epoch": 0.2, - "learning_rate": 4.000284687142072e-05, - "loss": 2.8184, + "learning_rate": 4.0002913058446546e-05, + "loss": 2.7949, "step": 60400 }, { "epoch": 0.2, - "learning_rate": 3.998629529339327e-05, - "loss": 2.8229, + "learning_rate": 3.9986361590000265e-05, + "loss": 2.7912, "step": 60500 }, { "epoch": 0.2, - "learning_rate": 3.9969743715365825e-05, - "loss": 2.8167, + "learning_rate": 3.9969810121553984e-05, + "loss": 2.8034, "step": 60600 }, { "epoch": 0.2, - "learning_rate": 3.9953192137338376e-05, - "loss": 2.8354, + "learning_rate": 3.99532586531077e-05, + "loss": 2.8178, "step": 60700 }, { "epoch": 0.2, - "learning_rate": 3.993664055931093e-05, - "loss": 2.8205, + "learning_rate": 3.993670718466143e-05, + "loss": 2.7983, "step": 60800 }, { "epoch": 0.2, - "learning_rate": 3.992008898128348e-05, - "loss": 2.8164, + "learning_rate": 3.992015571621515e-05, + "loss": 2.8039, "step": 60900 }, { "epoch": 0.2, - "learning_rate": 3.990353740325603e-05, - "loss": 2.8259, + "learning_rate": 3.9903604247768866e-05, + "loss": 2.8196, "step": 61000 }, { "epoch": 0.2, - "learning_rate": 3.9886985825228576e-05, - "loss": 2.826, + "learning_rate": 3.988705277932258e-05, + "loss": 2.8025, "step": 61100 }, { "epoch": 0.2, - "learning_rate": 3.9870434247201134e-05, - "loss": 2.8359, + "learning_rate": 3.9870501310876304e-05, + "loss": 2.7947, "step": 61200 }, { "epoch": 0.2, - "learning_rate": 3.985388266917368e-05, - "loss": 2.8304, + "learning_rate": 3.985394984243002e-05, + "loss": 2.8098, "step": 61300 }, { "epoch": 0.2, - "learning_rate": 3.983733109114623e-05, - "loss": 2.8132, + "learning_rate": 3.983739837398374e-05, + "loss": 2.8016, "step": 61400 }, { "epoch": 0.2, - "learning_rate": 3.982077951311878e-05, - "loss": 2.8347, + "learning_rate": 3.982084690553746e-05, + "loss": 2.8038, "step": 61500 }, { "epoch": 0.2, - "learning_rate": 3.9804227935091334e-05, - "loss": 2.8242, + "learning_rate": 3.980429543709118e-05, + "loss": 2.818, "step": 61600 }, { "epoch": 0.2, - "learning_rate": 3.9787676357063886e-05, - "loss": 2.8201, + "learning_rate": 3.9787743968644906e-05, + "loss": 2.8017, "step": 61700 }, { "epoch": 0.2, - "learning_rate": 3.977112477903643e-05, - "loss": 2.8227, + "learning_rate": 3.977119250019862e-05, + "loss": 2.801, "step": 61800 }, { "epoch": 0.2, - "learning_rate": 3.975457320100899e-05, - "loss": 2.8309, + "learning_rate": 3.975464103175234e-05, + "loss": 2.8037, "step": 61900 }, { "epoch": 0.21, - "learning_rate": 3.9738021622981534e-05, - "loss": 2.8135, + "learning_rate": 3.9738089563306056e-05, + "loss": 2.7792, "step": 62000 }, { "epoch": 0.21, - "learning_rate": 3.972147004495409e-05, - "loss": 2.8111, + "learning_rate": 3.9721538094859775e-05, + "loss": 2.799, "step": 62100 }, { "epoch": 0.21, - "learning_rate": 3.970491846692664e-05, - "loss": 2.8085, + "learning_rate": 3.97049866264135e-05, + "loss": 2.7958, "step": 62200 }, { "epoch": 0.21, - "learning_rate": 3.9688366888899195e-05, - "loss": 2.8285, + "learning_rate": 3.968843515796722e-05, + "loss": 2.8078, "step": 62300 }, { "epoch": 0.21, - "learning_rate": 3.967181531087174e-05, - "loss": 2.8154, + "learning_rate": 3.967188368952094e-05, + "loss": 2.7961, "step": 62400 }, { "epoch": 0.21, - "learning_rate": 3.965526373284429e-05, - "loss": 2.8307, + "learning_rate": 3.965533222107465e-05, + "loss": 2.7925, "step": 62500 }, { "epoch": 0.21, - "learning_rate": 3.963871215481684e-05, - "loss": 2.837, + "learning_rate": 3.963878075262838e-05, + "loss": 2.8041, "step": 62600 }, { "epoch": 0.21, - "learning_rate": 3.962216057678939e-05, - "loss": 2.8178, + "learning_rate": 3.9622229284182096e-05, + "loss": 2.8071, "step": 62700 }, { "epoch": 0.21, - "learning_rate": 3.9605608998761947e-05, - "loss": 2.827, + "learning_rate": 3.9605677815735815e-05, + "loss": 2.7948, "step": 62800 }, { "epoch": 0.21, - "learning_rate": 3.958905742073449e-05, - "loss": 2.8003, + "learning_rate": 3.9589126347289534e-05, + "loss": 2.7875, "step": 62900 }, { "epoch": 0.21, - "learning_rate": 3.957250584270705e-05, - "loss": 2.8224, + "learning_rate": 3.957257487884325e-05, + "loss": 2.8135, "step": 63000 }, { "epoch": 0.21, - "learning_rate": 3.9555954264679595e-05, - "loss": 2.8149, + "learning_rate": 3.955602341039697e-05, + "loss": 2.8226, "step": 63100 }, { "epoch": 0.21, - "learning_rate": 3.9539402686652146e-05, - "loss": 2.8243, + "learning_rate": 3.953947194195069e-05, + "loss": 2.8012, "step": 63200 }, { "epoch": 0.21, - "learning_rate": 3.95228511086247e-05, - "loss": 2.8195, + "learning_rate": 3.952292047350441e-05, + "loss": 2.795, "step": 63300 }, { "epoch": 0.21, - "learning_rate": 3.950629953059725e-05, - "loss": 2.8286, + "learning_rate": 3.950636900505813e-05, + "loss": 2.8081, "step": 63400 }, { "epoch": 0.21, - "learning_rate": 3.94897479525698e-05, - "loss": 2.8341, + "learning_rate": 3.948981753661185e-05, + "loss": 2.8029, "step": 63500 }, { "epoch": 0.21, - "learning_rate": 3.9473196374542346e-05, - "loss": 2.8186, + "learning_rate": 3.9473266068165573e-05, + "loss": 2.8052, "step": 63600 }, { "epoch": 0.21, - "learning_rate": 3.9456644796514904e-05, - "loss": 2.8155, + "learning_rate": 3.945671459971929e-05, + "loss": 2.8015, "step": 63700 }, { "epoch": 0.21, - "learning_rate": 3.944009321848745e-05, - "loss": 2.8313, + "learning_rate": 3.9440163131273005e-05, + "loss": 2.7956, "step": 63800 }, { "epoch": 0.21, - "learning_rate": 3.942354164046001e-05, - "loss": 2.8086, + "learning_rate": 3.9423611662826724e-05, + "loss": 2.787, "step": 63900 }, { "epoch": 0.21, - "learning_rate": 3.940699006243255e-05, - "loss": 2.8348, + "learning_rate": 3.940706019438045e-05, + "loss": 2.7956, "step": 64000 }, { "epoch": 0.21, - "learning_rate": 3.9390438484405104e-05, - "loss": 2.8339, + "learning_rate": 3.939050872593417e-05, + "loss": 2.7939, "step": 64100 }, { "epoch": 0.21, - "learning_rate": 3.9373886906377655e-05, - "loss": 2.8341, + "learning_rate": 3.937395725748789e-05, + "loss": 2.7985, "step": 64200 }, { "epoch": 0.21, - "learning_rate": 3.935733532835021e-05, - "loss": 2.8196, + "learning_rate": 3.9357405789041606e-05, + "loss": 2.7921, "step": 64300 }, { "epoch": 0.21, - "learning_rate": 3.934078375032276e-05, - "loss": 2.8391, + "learning_rate": 3.9340854320595325e-05, + "loss": 2.8069, "step": 64400 }, { "epoch": 0.21, - "learning_rate": 3.932423217229531e-05, - "loss": 2.8166, + "learning_rate": 3.9324302852149044e-05, + "loss": 2.8148, "step": 64500 }, { "epoch": 0.21, - "learning_rate": 3.930768059426786e-05, + "learning_rate": 3.930775138370276e-05, "loss": 2.8064, "step": 64600 }, { "epoch": 0.21, - "learning_rate": 3.9291129016240407e-05, - "loss": 2.8277, + "learning_rate": 3.929119991525648e-05, + "loss": 2.8038, "step": 64700 }, { "epoch": 0.21, - "learning_rate": 3.9274577438212965e-05, - "loss": 2.8302, + "learning_rate": 3.92746484468102e-05, + "loss": 2.7962, "step": 64800 }, { "epoch": 0.21, - "learning_rate": 3.925802586018551e-05, - "loss": 2.8243, + "learning_rate": 3.925809697836392e-05, + "loss": 2.8021, "step": 64900 }, { "epoch": 0.22, - "learning_rate": 3.924147428215806e-05, - "loss": 2.8426, + "learning_rate": 3.9241545509917646e-05, + "loss": 2.8189, "step": 65000 }, { "epoch": 0.22, - "learning_rate": 3.922492270413061e-05, - "loss": 2.8199, + "learning_rate": 3.9224994041471365e-05, + "loss": 2.7926, "step": 65100 }, { "epoch": 0.22, - "learning_rate": 3.9208371126103165e-05, - "loss": 2.8083, + "learning_rate": 3.920844257302508e-05, + "loss": 2.7796, "step": 65200 }, { "epoch": 0.22, - "learning_rate": 3.9191819548075716e-05, - "loss": 2.8252, + "learning_rate": 3.9191891104578796e-05, + "loss": 2.7977, "step": 65300 }, { "epoch": 0.22, - "learning_rate": 3.917526797004827e-05, - "loss": 2.8184, + "learning_rate": 3.917533963613252e-05, + "loss": 2.7965, "step": 65400 }, { "epoch": 0.22, - "learning_rate": 3.915871639202082e-05, - "loss": 2.8296, + "learning_rate": 3.915878816768624e-05, + "loss": 2.7975, "step": 65500 }, { "epoch": 0.22, - "learning_rate": 3.914216481399337e-05, - "loss": 2.816, + "learning_rate": 3.914223669923996e-05, + "loss": 2.7907, "step": 65600 }, { "epoch": 0.22, - "learning_rate": 3.9125613235965916e-05, - "loss": 2.8174, + "learning_rate": 3.912568523079368e-05, + "loss": 2.7874, "step": 65700 }, { "epoch": 0.22, - "learning_rate": 3.910906165793847e-05, - "loss": 2.8261, + "learning_rate": 3.91091337623474e-05, + "loss": 2.8194, "step": 65800 }, { "epoch": 0.22, - "learning_rate": 3.909251007991102e-05, - "loss": 2.8285, + "learning_rate": 3.909258229390112e-05, + "loss": 2.8091, "step": 65900 }, { "epoch": 0.22, - "learning_rate": 3.907595850188357e-05, - "loss": 2.8285, + "learning_rate": 3.9076030825454836e-05, + "loss": 2.7992, "step": 66000 }, { "epoch": 0.22, - "learning_rate": 3.905940692385612e-05, - "loss": 2.8263, + "learning_rate": 3.9059479357008555e-05, + "loss": 2.8089, "step": 66100 }, { "epoch": 0.22, - "learning_rate": 3.9042855345828674e-05, - "loss": 2.8228, + "learning_rate": 3.9042927888562274e-05, + "loss": 2.8013, "step": 66200 }, { "epoch": 0.22, - "learning_rate": 3.9026303767801225e-05, - "loss": 2.8255, + "learning_rate": 3.902637642011599e-05, + "loss": 2.8114, "step": 66300 }, { "epoch": 0.22, - "learning_rate": 3.900975218977378e-05, - "loss": 2.8131, + "learning_rate": 3.900982495166972e-05, + "loss": 2.7981, "step": 66400 }, { "epoch": 0.22, - "learning_rate": 3.899320061174633e-05, - "loss": 2.8264, + "learning_rate": 3.899327348322344e-05, + "loss": 2.7984, "step": 66500 }, { "epoch": 0.22, - "learning_rate": 3.8976649033718873e-05, - "loss": 2.8249, + "learning_rate": 3.897672201477715e-05, + "loss": 2.8112, "step": 66600 }, { "epoch": 0.22, - "learning_rate": 3.896009745569143e-05, - "loss": 2.816, + "learning_rate": 3.896017054633087e-05, + "loss": 2.8126, "step": 66700 }, { "epoch": 0.22, - "learning_rate": 3.894354587766398e-05, - "loss": 2.8365, + "learning_rate": 3.8943619077884594e-05, + "loss": 2.7928, "step": 66800 }, { "epoch": 0.22, - "learning_rate": 3.892699429963653e-05, - "loss": 2.8214, + "learning_rate": 3.892706760943831e-05, + "loss": 2.8067, "step": 66900 }, { "epoch": 0.22, - "learning_rate": 3.891044272160908e-05, - "loss": 2.8171, + "learning_rate": 3.891051614099203e-05, + "loss": 2.7965, "step": 67000 }, { "epoch": 0.22, - "learning_rate": 3.889389114358163e-05, - "loss": 2.8229, + "learning_rate": 3.889396467254575e-05, + "loss": 2.7896, "step": 67100 }, { "epoch": 0.22, - "learning_rate": 3.887733956555418e-05, - "loss": 2.8276, + "learning_rate": 3.887741320409947e-05, + "loss": 2.8063, "step": 67200 }, { "epoch": 0.22, - "learning_rate": 3.886078798752673e-05, - "loss": 2.8332, + "learning_rate": 3.886086173565319e-05, + "loss": 2.806, "step": 67300 }, { "epoch": 0.22, - "learning_rate": 3.8844236409499286e-05, - "loss": 2.8422, + "learning_rate": 3.884431026720691e-05, + "loss": 2.8023, "step": 67400 }, { "epoch": 0.22, - "learning_rate": 3.882768483147183e-05, - "loss": 2.8198, + "learning_rate": 3.882775879876063e-05, + "loss": 2.7984, "step": 67500 }, { "epoch": 0.22, - "learning_rate": 3.881113325344439e-05, - "loss": 2.8112, + "learning_rate": 3.8811207330314346e-05, + "loss": 2.8007, "step": 67600 }, { "epoch": 0.22, - "learning_rate": 3.8794581675416934e-05, - "loss": 2.8272, + "learning_rate": 3.8794655861868065e-05, + "loss": 2.8216, "step": 67700 }, { "epoch": 0.22, - "learning_rate": 3.877803009738949e-05, - "loss": 2.8187, + "learning_rate": 3.877810439342179e-05, + "loss": 2.7899, "step": 67800 }, { "epoch": 0.22, - "learning_rate": 3.876147851936204e-05, - "loss": 2.8157, + "learning_rate": 3.876155292497551e-05, + "loss": 2.8003, "step": 67900 }, { "epoch": 0.23, - "learning_rate": 3.874492694133459e-05, - "loss": 2.8207, + "learning_rate": 3.874500145652922e-05, + "loss": 2.7922, "step": 68000 }, { "epoch": 0.23, - "learning_rate": 3.872837536330714e-05, - "loss": 2.8192, + "learning_rate": 3.872844998808294e-05, + "loss": 2.8048, "step": 68100 }, { "epoch": 0.23, - "learning_rate": 3.8711823785279685e-05, - "loss": 2.8279, + "learning_rate": 3.871189851963666e-05, + "loss": 2.8043, "step": 68200 }, { "epoch": 0.23, - "learning_rate": 3.8695272207252244e-05, - "loss": 2.8242, + "learning_rate": 3.8695347051190386e-05, + "loss": 2.7971, "step": 68300 }, { "epoch": 0.23, - "learning_rate": 3.867872062922479e-05, - "loss": 2.83, + "learning_rate": 3.8678795582744105e-05, + "loss": 2.8044, "step": 68400 }, { "epoch": 0.23, - "learning_rate": 3.866216905119735e-05, - "loss": 2.81, + "learning_rate": 3.8662244114297824e-05, + "loss": 2.7983, "step": 68500 }, { "epoch": 0.23, - "learning_rate": 3.864561747316989e-05, - "loss": 2.8154, + "learning_rate": 3.864569264585154e-05, + "loss": 2.7938, "step": 68600 }, { "epoch": 0.23, - "learning_rate": 3.8629065895142443e-05, - "loss": 2.8225, + "learning_rate": 3.862914117740526e-05, + "loss": 2.7924, "step": 68700 }, { "epoch": 0.23, - "learning_rate": 3.8612514317114995e-05, - "loss": 2.8305, + "learning_rate": 3.861258970895898e-05, + "loss": 2.7966, "step": 68800 }, { "epoch": 0.23, - "learning_rate": 3.859596273908755e-05, - "loss": 2.8281, + "learning_rate": 3.85960382405127e-05, + "loss": 2.8047, "step": 68900 }, { "epoch": 0.23, - "learning_rate": 3.85794111610601e-05, - "loss": 2.8171, + "learning_rate": 3.857948677206642e-05, + "loss": 2.8105, "step": 69000 }, { "epoch": 0.23, - "learning_rate": 3.856285958303264e-05, - "loss": 2.8309, + "learning_rate": 3.856293530362014e-05, + "loss": 2.8037, "step": 69100 }, { "epoch": 0.23, - "learning_rate": 3.85463080050052e-05, - "loss": 2.8152, + "learning_rate": 3.8546383835173864e-05, + "loss": 2.7932, "step": 69200 }, { "epoch": 0.23, - "learning_rate": 3.8529756426977746e-05, - "loss": 2.8146, + "learning_rate": 3.852983236672758e-05, + "loss": 2.7984, "step": 69300 }, { "epoch": 0.23, - "learning_rate": 3.8513204848950305e-05, - "loss": 2.8119, + "learning_rate": 3.8513280898281295e-05, + "loss": 2.7906, "step": 69400 }, { "epoch": 0.23, - "learning_rate": 3.849665327092285e-05, - "loss": 2.8135, + "learning_rate": 3.8496729429835014e-05, + "loss": 2.8004, "step": 69500 }, { "epoch": 0.23, - "learning_rate": 3.84801016928954e-05, - "loss": 2.8125, + "learning_rate": 3.848017796138873e-05, + "loss": 2.8039, "step": 69600 }, { "epoch": 0.23, - "learning_rate": 3.846355011486795e-05, - "loss": 2.8322, + "learning_rate": 3.846362649294246e-05, + "loss": 2.7961, "step": 69700 }, { "epoch": 0.23, - "learning_rate": 3.8446998536840504e-05, - "loss": 2.8382, + "learning_rate": 3.844707502449618e-05, + "loss": 2.812, "step": 69800 }, { "epoch": 0.23, - "learning_rate": 3.8430446958813056e-05, - "loss": 2.8142, + "learning_rate": 3.8430523556049896e-05, + "loss": 2.793, "step": 69900 }, { "epoch": 0.23, - "learning_rate": 3.841389538078561e-05, - "loss": 2.8252, + "learning_rate": 3.841397208760361e-05, + "loss": 2.8116, "step": 70000 }, { "epoch": 0.23, - "learning_rate": 3.839734380275816e-05, - "loss": 2.8362, + "learning_rate": 3.8397420619157334e-05, + "loss": 2.8052, "step": 70100 }, { "epoch": 0.23, - "learning_rate": 3.8380792224730704e-05, - "loss": 2.8371, + "learning_rate": 3.838086915071105e-05, + "loss": 2.8059, "step": 70200 }, { "epoch": 0.23, - "learning_rate": 3.8364240646703256e-05, - "loss": 2.8127, + "learning_rate": 3.836431768226477e-05, + "loss": 2.8099, "step": 70300 }, { "epoch": 0.23, - "learning_rate": 3.834768906867581e-05, - "loss": 2.824, + "learning_rate": 3.834776621381849e-05, + "loss": 2.8017, "step": 70400 }, { "epoch": 0.23, - "learning_rate": 3.833113749064836e-05, - "loss": 2.8213, + "learning_rate": 3.833121474537221e-05, + "loss": 2.8093, "step": 70500 }, { "epoch": 0.23, - "learning_rate": 3.831458591262091e-05, - "loss": 2.8028, + "learning_rate": 3.8314663276925936e-05, + "loss": 2.7935, "step": 70600 }, { "epoch": 0.23, - "learning_rate": 3.829803433459346e-05, - "loss": 2.8236, + "learning_rate": 3.829811180847965e-05, + "loss": 2.7994, "step": 70700 }, { "epoch": 0.23, - "learning_rate": 3.8281482756566014e-05, - "loss": 2.8198, + "learning_rate": 3.828156034003337e-05, + "loss": 2.7913, "step": 70800 }, { "epoch": 0.23, - "learning_rate": 3.8264931178538565e-05, - "loss": 2.8252, + "learning_rate": 3.8265008871587086e-05, + "loss": 2.8116, "step": 70900 }, { "epoch": 0.24, - "learning_rate": 3.824837960051112e-05, - "loss": 2.8175, + "learning_rate": 3.8248457403140805e-05, + "loss": 2.793, "step": 71000 }, { "epoch": 0.24, - "learning_rate": 3.823182802248367e-05, - "loss": 2.8225, + "learning_rate": 3.823190593469453e-05, + "loss": 2.7858, "step": 71100 }, { "epoch": 0.24, - "learning_rate": 3.821527644445621e-05, - "loss": 2.826, + "learning_rate": 3.821535446624825e-05, + "loss": 2.8034, "step": 71200 }, { "epoch": 0.24, - "learning_rate": 3.8198724866428765e-05, - "loss": 2.8309, + "learning_rate": 3.819880299780197e-05, + "loss": 2.8032, "step": 71300 }, { "epoch": 0.24, - "learning_rate": 3.8182173288401316e-05, - "loss": 2.8209, + "learning_rate": 3.818225152935568e-05, + "loss": 2.8033, "step": 71400 }, { "epoch": 0.24, - "learning_rate": 3.816562171037387e-05, - "loss": 2.8275, + "learning_rate": 3.816570006090941e-05, + "loss": 2.7903, "step": 71500 }, { "epoch": 0.24, - "learning_rate": 3.814907013234642e-05, - "loss": 2.8365, + "learning_rate": 3.8149148592463126e-05, + "loss": 2.7951, "step": 71600 }, { "epoch": 0.24, - "learning_rate": 3.813251855431897e-05, - "loss": 2.8138, + "learning_rate": 3.8132597124016845e-05, + "loss": 2.7933, "step": 71700 }, { "epoch": 0.24, - "learning_rate": 3.811596697629152e-05, - "loss": 2.8352, + "learning_rate": 3.8116045655570564e-05, + "loss": 2.7944, "step": 71800 }, { "epoch": 0.24, - "learning_rate": 3.8099415398264074e-05, - "loss": 2.825, + "learning_rate": 3.809949418712428e-05, + "loss": 2.7994, "step": 71900 }, { "epoch": 0.24, - "learning_rate": 3.8082863820236626e-05, - "loss": 2.8312, + "learning_rate": 3.808294271867801e-05, + "loss": 2.7814, "step": 72000 }, { "epoch": 0.24, - "learning_rate": 3.806631224220917e-05, - "loss": 2.824, + "learning_rate": 3.806639125023172e-05, + "loss": 2.8116, "step": 72100 }, { "epoch": 0.24, - "learning_rate": 3.804976066418173e-05, - "loss": 2.8123, + "learning_rate": 3.804983978178544e-05, + "loss": 2.8044, "step": 72200 }, { "epoch": 0.24, - "learning_rate": 3.8033209086154274e-05, - "loss": 2.825, + "learning_rate": 3.803328831333916e-05, + "loss": 2.7912, "step": 72300 }, { "epoch": 0.24, - "learning_rate": 3.8016657508126826e-05, - "loss": 2.8144, + "learning_rate": 3.801673684489288e-05, + "loss": 2.7934, "step": 72400 }, { "epoch": 0.24, - "learning_rate": 3.800010593009938e-05, - "loss": 2.8123, + "learning_rate": 3.8000185376446604e-05, + "loss": 2.7945, "step": 72500 }, { "epoch": 0.24, - "learning_rate": 3.798355435207193e-05, - "loss": 2.8355, + "learning_rate": 3.798363390800032e-05, + "loss": 2.7875, "step": 72600 }, { "epoch": 0.24, - "learning_rate": 3.796700277404448e-05, - "loss": 2.8087, + "learning_rate": 3.796708243955404e-05, + "loss": 2.7922, "step": 72700 }, { "epoch": 0.24, - "learning_rate": 3.7950451196017025e-05, - "loss": 2.8272, + "learning_rate": 3.7950530971107754e-05, + "loss": 2.8091, "step": 72800 }, { "epoch": 0.24, - "learning_rate": 3.7933899617989584e-05, - "loss": 2.8179, + "learning_rate": 3.793397950266148e-05, + "loss": 2.7993, "step": 72900 }, { "epoch": 0.24, - "learning_rate": 3.791734803996213e-05, - "loss": 2.8276, + "learning_rate": 3.79174280342152e-05, + "loss": 2.7959, "step": 73000 }, { "epoch": 0.24, - "learning_rate": 3.790079646193469e-05, - "loss": 2.8211, + "learning_rate": 3.790087656576892e-05, + "loss": 2.7881, "step": 73100 }, { "epoch": 0.24, - "learning_rate": 3.788424488390723e-05, - "loss": 2.8123, + "learning_rate": 3.7884325097322636e-05, + "loss": 2.7973, "step": 73200 }, { "epoch": 0.24, - "learning_rate": 3.786769330587979e-05, - "loss": 2.8119, + "learning_rate": 3.7867773628876355e-05, + "loss": 2.7978, "step": 73300 }, { "epoch": 0.24, - "learning_rate": 3.7851141727852335e-05, - "loss": 2.8226, + "learning_rate": 3.7851222160430074e-05, + "loss": 2.8087, "step": 73400 }, { "epoch": 0.24, - "learning_rate": 3.7834590149824886e-05, - "loss": 2.8292, + "learning_rate": 3.783467069198379e-05, + "loss": 2.8027, "step": 73500 }, { "epoch": 0.24, - "learning_rate": 3.781803857179744e-05, - "loss": 2.817, + "learning_rate": 3.781811922353751e-05, + "loss": 2.7975, "step": 73600 }, { "epoch": 0.24, - "learning_rate": 3.780148699376998e-05, - "loss": 2.8097, + "learning_rate": 3.780156775509123e-05, + "loss": 2.7962, "step": 73700 }, { "epoch": 0.24, - "learning_rate": 3.778493541574254e-05, - "loss": 2.8224, + "learning_rate": 3.778501628664495e-05, + "loss": 2.8015, "step": 73800 }, { "epoch": 0.24, - "learning_rate": 3.7768383837715086e-05, - "loss": 2.8304, + "learning_rate": 3.7768464818198676e-05, + "loss": 2.8002, "step": 73900 }, { "epoch": 0.24, - "learning_rate": 3.7751832259687644e-05, - "loss": 2.8303, + "learning_rate": 3.7751913349752395e-05, + "loss": 2.8134, "step": 74000 }, { "epoch": 0.25, - "learning_rate": 3.773528068166019e-05, - "loss": 2.8262, + "learning_rate": 3.7735361881306114e-05, + "loss": 2.8049, "step": 74100 }, { "epoch": 0.25, - "learning_rate": 3.771872910363274e-05, - "loss": 2.837, + "learning_rate": 3.7718810412859826e-05, + "loss": 2.7857, "step": 74200 }, { "epoch": 0.25, - "learning_rate": 3.770217752560529e-05, - "loss": 2.8226, + "learning_rate": 3.770225894441355e-05, + "loss": 2.8082, "step": 74300 }, { "epoch": 0.25, - "learning_rate": 3.7685625947577844e-05, - "loss": 2.8174, + "learning_rate": 3.768570747596727e-05, + "loss": 2.7961, "step": 74400 }, { "epoch": 0.25, - "learning_rate": 3.7669074369550396e-05, - "loss": 2.8293, + "learning_rate": 3.766915600752099e-05, + "loss": 2.7975, "step": 74500 }, { "epoch": 0.25, - "learning_rate": 3.765252279152295e-05, - "loss": 2.8269, + "learning_rate": 3.765260453907471e-05, + "loss": 2.8033, "step": 74600 }, { "epoch": 0.25, - "learning_rate": 3.76359712134955e-05, - "loss": 2.8292, + "learning_rate": 3.763605307062843e-05, + "loss": 2.789, "step": 74700 }, { "epoch": 0.25, - "learning_rate": 3.7619419635468044e-05, - "loss": 2.8157, + "learning_rate": 3.761950160218215e-05, + "loss": 2.8044, "step": 74800 }, { "epoch": 0.25, - "learning_rate": 3.76028680574406e-05, - "loss": 2.8149, + "learning_rate": 3.7602950133735866e-05, + "loss": 2.8039, "step": 74900 }, { "epoch": 0.25, - "learning_rate": 3.758631647941315e-05, - "loss": 2.8346, + "learning_rate": 3.7586398665289585e-05, + "loss": 2.7816, "step": 75000 }, { "epoch": 0.25, - "learning_rate": 3.75697649013857e-05, - "loss": 2.811, + "learning_rate": 3.7569847196843304e-05, + "loss": 2.7964, "step": 75100 }, { "epoch": 0.25, - "learning_rate": 3.755321332335825e-05, - "loss": 2.828, + "learning_rate": 3.755329572839702e-05, + "loss": 2.8026, "step": 75200 }, { "epoch": 0.25, - "learning_rate": 3.75366617453308e-05, - "loss": 2.8138, + "learning_rate": 3.753674425995075e-05, + "loss": 2.7933, "step": 75300 }, { "epoch": 0.25, - "learning_rate": 3.752011016730335e-05, - "loss": 2.8169, + "learning_rate": 3.752019279150447e-05, + "loss": 2.791, "step": 75400 }, { "epoch": 0.25, - "learning_rate": 3.7503558589275905e-05, - "loss": 2.817, + "learning_rate": 3.7503641323058187e-05, + "loss": 2.8032, "step": 75500 }, { "epoch": 0.25, - "learning_rate": 3.7487007011248456e-05, - "loss": 2.8267, + "learning_rate": 3.74870898546119e-05, + "loss": 2.7881, "step": 75600 }, { "epoch": 0.25, - "learning_rate": 3.747045543322101e-05, - "loss": 2.8087, + "learning_rate": 3.7470538386165624e-05, + "loss": 2.8178, "step": 75700 }, { "epoch": 0.25, - "learning_rate": 3.745390385519355e-05, - "loss": 2.8283, + "learning_rate": 3.7453986917719343e-05, + "loss": 2.7971, "step": 75800 }, { "epoch": 0.25, - "learning_rate": 3.7437352277166104e-05, - "loss": 2.8198, + "learning_rate": 3.743743544927306e-05, + "loss": 2.7892, "step": 75900 }, { "epoch": 0.25, - "learning_rate": 3.7420800699138656e-05, - "loss": 2.8144, + "learning_rate": 3.742088398082678e-05, + "loss": 2.7943, "step": 76000 }, { "epoch": 0.25, - "learning_rate": 3.740424912111121e-05, - "loss": 2.8259, + "learning_rate": 3.74043325123805e-05, + "loss": 2.7954, "step": 76100 }, { "epoch": 0.25, - "learning_rate": 3.738769754308376e-05, - "loss": 2.8099, + "learning_rate": 3.738778104393422e-05, + "loss": 2.8102, "step": 76200 }, { "epoch": 0.25, - "learning_rate": 3.737114596505631e-05, - "loss": 2.8183, + "learning_rate": 3.737122957548794e-05, + "loss": 2.7908, "step": 76300 }, { "epoch": 0.25, - "learning_rate": 3.735459438702886e-05, - "loss": 2.8269, + "learning_rate": 3.735467810704166e-05, + "loss": 2.8037, "step": 76400 }, { "epoch": 0.25, - "learning_rate": 3.7338042809001414e-05, - "loss": 2.8298, + "learning_rate": 3.7338126638595376e-05, + "loss": 2.8037, "step": 76500 }, { "epoch": 0.25, - "learning_rate": 3.7321491230973966e-05, - "loss": 2.8137, + "learning_rate": 3.7321575170149095e-05, + "loss": 2.7986, "step": 76600 }, { "epoch": 0.25, - "learning_rate": 3.730493965294651e-05, - "loss": 2.823, + "learning_rate": 3.730502370170282e-05, + "loss": 2.8097, "step": 76700 }, { "epoch": 0.25, - "learning_rate": 3.728838807491907e-05, - "loss": 2.8337, + "learning_rate": 3.728847223325654e-05, + "loss": 2.7869, "step": 76800 }, { "epoch": 0.25, - "learning_rate": 3.7271836496891614e-05, - "loss": 2.8206, + "learning_rate": 3.727192076481025e-05, + "loss": 2.803, "step": 76900 }, { "epoch": 0.25, - "learning_rate": 3.7255284918864165e-05, - "loss": 2.8232, + "learning_rate": 3.725536929636397e-05, + "loss": 2.7874, "step": 77000 }, { "epoch": 0.26, - "learning_rate": 3.723873334083672e-05, - "loss": 2.8237, + "learning_rate": 3.723881782791769e-05, + "loss": 2.793, "step": 77100 }, { "epoch": 0.26, - "learning_rate": 3.722218176280927e-05, - "loss": 2.8214, + "learning_rate": 3.7222266359471416e-05, + "loss": 2.7934, "step": 77200 }, { "epoch": 0.26, - "learning_rate": 3.720563018478182e-05, - "loss": 2.8101, + "learning_rate": 3.7205714891025135e-05, + "loss": 2.7955, "step": 77300 }, { "epoch": 0.26, - "learning_rate": 3.718907860675437e-05, - "loss": 2.8157, + "learning_rate": 3.7189163422578854e-05, + "loss": 2.8036, "step": 77400 }, { "epoch": 0.26, - "learning_rate": 3.717252702872692e-05, - "loss": 2.8214, + "learning_rate": 3.717261195413257e-05, + "loss": 2.7991, "step": 77500 }, { "epoch": 0.26, - "learning_rate": 3.715597545069947e-05, - "loss": 2.8119, + "learning_rate": 3.715606048568629e-05, + "loss": 2.7935, "step": 77600 }, { "epoch": 0.26, - "learning_rate": 3.7139423872672027e-05, - "loss": 2.8187, + "learning_rate": 3.713950901724001e-05, + "loss": 2.8034, "step": 77700 }, { "epoch": 0.26, - "learning_rate": 3.712287229464457e-05, - "loss": 2.8183, + "learning_rate": 3.712295754879373e-05, + "loss": 2.7982, "step": 77800 }, { "epoch": 0.26, - "learning_rate": 3.710632071661713e-05, - "loss": 2.8303, + "learning_rate": 3.710640608034745e-05, + "loss": 2.7929, "step": 77900 }, { "epoch": 0.26, - "learning_rate": 3.7089769138589675e-05, - "loss": 2.8283, + "learning_rate": 3.708985461190117e-05, + "loss": 2.8051, "step": 78000 }, { "epoch": 0.26, - "learning_rate": 3.7073217560562226e-05, - "loss": 2.8367, + "learning_rate": 3.7073303143454894e-05, + "loss": 2.7848, "step": 78100 }, { "epoch": 0.26, - "learning_rate": 3.705666598253478e-05, - "loss": 2.8051, + "learning_rate": 3.705675167500861e-05, + "loss": 2.8122, "step": 78200 }, { "epoch": 0.26, - "learning_rate": 3.704011440450732e-05, - "loss": 2.8211, + "learning_rate": 3.7040200206562325e-05, + "loss": 2.7852, "step": 78300 }, { "epoch": 0.26, - "learning_rate": 3.702356282647988e-05, - "loss": 2.8177, + "learning_rate": 3.7023648738116044e-05, + "loss": 2.8042, "step": 78400 }, { "epoch": 0.26, - "learning_rate": 3.7007011248452426e-05, - "loss": 2.8233, + "learning_rate": 3.700709726966976e-05, + "loss": 2.8095, "step": 78500 }, { "epoch": 0.26, - "learning_rate": 3.6990459670424984e-05, - "loss": 2.8313, + "learning_rate": 3.699054580122349e-05, + "loss": 2.7797, "step": 78600 }, { "epoch": 0.26, - "learning_rate": 3.697390809239753e-05, - "loss": 2.8295, + "learning_rate": 3.697399433277721e-05, + "loss": 2.8006, "step": 78700 }, { "epoch": 0.26, - "learning_rate": 3.695735651437008e-05, - "loss": 2.8294, + "learning_rate": 3.6957442864330927e-05, + "loss": 2.7917, "step": 78800 }, { "epoch": 0.26, - "learning_rate": 3.694080493634263e-05, - "loss": 2.8139, + "learning_rate": 3.6940891395884645e-05, + "loss": 2.8105, "step": 78900 }, { "epoch": 0.26, - "learning_rate": 3.6924253358315184e-05, - "loss": 2.8172, + "learning_rate": 3.6924339927438364e-05, + "loss": 2.7962, "step": 79000 }, { "epoch": 0.26, - "learning_rate": 3.6907701780287735e-05, - "loss": 2.8221, + "learning_rate": 3.6907788458992083e-05, + "loss": 2.8003, "step": 79100 }, { "epoch": 0.26, - "learning_rate": 3.689115020226028e-05, - "loss": 2.8347, + "learning_rate": 3.68912369905458e-05, + "loss": 2.7943, "step": 79200 }, { "epoch": 0.26, - "learning_rate": 3.687459862423284e-05, - "loss": 2.8117, + "learning_rate": 3.687468552209952e-05, + "loss": 2.7884, "step": 79300 }, { "epoch": 0.26, - "learning_rate": 3.6858047046205383e-05, - "loss": 2.819, + "learning_rate": 3.685813405365324e-05, + "loss": 2.7898, "step": 79400 }, { "epoch": 0.26, - "learning_rate": 3.684149546817794e-05, - "loss": 2.8279, + "learning_rate": 3.6841582585206966e-05, + "loss": 2.7911, "step": 79500 }, { "epoch": 0.26, - "learning_rate": 3.6824943890150487e-05, - "loss": 2.8345, + "learning_rate": 3.6825031116760685e-05, + "loss": 2.8001, "step": 79600 }, { "epoch": 0.26, - "learning_rate": 3.680839231212304e-05, - "loss": 2.8234, + "learning_rate": 3.68084796483144e-05, + "loss": 2.8035, "step": 79700 }, { "epoch": 0.26, - "learning_rate": 3.679184073409559e-05, - "loss": 2.8303, + "learning_rate": 3.6791928179868116e-05, + "loss": 2.7965, "step": 79800 }, { "epoch": 0.26, - "learning_rate": 3.677528915606814e-05, - "loss": 2.8238, + "learning_rate": 3.6775376711421835e-05, + "loss": 2.7912, "step": 79900 }, { "epoch": 0.26, - "learning_rate": 3.675873757804069e-05, - "loss": 2.8062, + "learning_rate": 3.675882524297556e-05, + "loss": 2.806, "step": 80000 }, { "epoch": 0.27, - "learning_rate": 3.6742186000013245e-05, - "loss": 2.8327, + "learning_rate": 3.674227377452928e-05, + "loss": 2.7825, "step": 80100 }, { "epoch": 0.27, - "learning_rate": 3.6725634421985796e-05, - "loss": 2.8246, + "learning_rate": 3.6725722306083e-05, + "loss": 2.8045, "step": 80200 }, { "epoch": 0.27, - "learning_rate": 3.670908284395834e-05, - "loss": 2.804, + "learning_rate": 3.670917083763672e-05, + "loss": 2.8077, "step": 80300 }, { "epoch": 0.27, - "learning_rate": 3.66925312659309e-05, - "loss": 2.8122, + "learning_rate": 3.669261936919044e-05, + "loss": 2.799, "step": 80400 }, { "epoch": 0.27, - "learning_rate": 3.6675979687903444e-05, - "loss": 2.8263, + "learning_rate": 3.6676067900744156e-05, + "loss": 2.795, "step": 80500 }, { "epoch": 0.27, - "learning_rate": 3.6659428109875996e-05, - "loss": 2.8107, + "learning_rate": 3.6659516432297875e-05, + "loss": 2.7984, "step": 80600 }, { "epoch": 0.27, - "learning_rate": 3.664287653184855e-05, - "loss": 2.8181, + "learning_rate": 3.6642964963851594e-05, + "loss": 2.799, "step": 80700 }, { "epoch": 0.27, - "learning_rate": 3.66263249538211e-05, - "loss": 2.8229, + "learning_rate": 3.662641349540531e-05, + "loss": 2.7995, "step": 80800 }, { "epoch": 0.27, - "learning_rate": 3.660977337579365e-05, - "loss": 2.8225, + "learning_rate": 3.660986202695904e-05, + "loss": 2.7957, "step": 80900 }, { "epoch": 0.27, - "learning_rate": 3.65932217977662e-05, - "loss": 2.8089, + "learning_rate": 3.659331055851276e-05, + "loss": 2.7811, "step": 81000 }, { "epoch": 0.27, - "learning_rate": 3.6576670219738754e-05, - "loss": 2.8221, + "learning_rate": 3.657675909006647e-05, + "loss": 2.7886, "step": 81100 }, { "epoch": 0.27, - "learning_rate": 3.6560118641711305e-05, - "loss": 2.8213, + "learning_rate": 3.656020762162019e-05, + "loss": 2.8242, "step": 81200 }, { "epoch": 0.27, - "learning_rate": 3.654356706368385e-05, - "loss": 2.8199, + "learning_rate": 3.654365615317391e-05, + "loss": 2.7909, "step": 81300 }, { "epoch": 0.27, - "learning_rate": 3.65270154856564e-05, - "loss": 2.8193, + "learning_rate": 3.6527104684727634e-05, + "loss": 2.798, "step": 81400 }, { "epoch": 0.27, - "learning_rate": 3.6510463907628953e-05, - "loss": 2.8088, + "learning_rate": 3.651055321628135e-05, + "loss": 2.7922, "step": 81500 }, { "epoch": 0.27, - "learning_rate": 3.6493912329601505e-05, - "loss": 2.8199, + "learning_rate": 3.649400174783507e-05, + "loss": 2.7939, "step": 81600 }, { "epoch": 0.27, - "learning_rate": 3.647736075157406e-05, - "loss": 2.8204, + "learning_rate": 3.647745027938879e-05, + "loss": 2.8081, "step": 81700 }, { "epoch": 0.27, - "learning_rate": 3.646080917354661e-05, - "loss": 2.8348, + "learning_rate": 3.646089881094251e-05, + "loss": 2.7998, "step": 81800 }, { "epoch": 0.27, - "learning_rate": 3.644425759551916e-05, - "loss": 2.8123, + "learning_rate": 3.644434734249623e-05, + "loss": 2.8032, "step": 81900 }, { "epoch": 0.27, - "learning_rate": 3.642770601749171e-05, - "loss": 2.8272, + "learning_rate": 3.642779587404995e-05, + "loss": 2.7968, "step": 82000 }, { "epoch": 0.27, - "learning_rate": 3.641115443946426e-05, - "loss": 2.8206, + "learning_rate": 3.6411244405603666e-05, + "loss": 2.7985, "step": 82100 }, { "epoch": 0.27, - "learning_rate": 3.639460286143681e-05, - "loss": 2.8284, + "learning_rate": 3.6394692937157385e-05, + "loss": 2.7965, "step": 82200 }, { "epoch": 0.27, - "learning_rate": 3.6378051283409366e-05, - "loss": 2.8113, + "learning_rate": 3.6378141468711104e-05, + "loss": 2.7992, "step": 82300 }, { "epoch": 0.27, - "learning_rate": 3.636149970538191e-05, - "loss": 2.8417, + "learning_rate": 3.636159000026483e-05, + "loss": 2.8059, "step": 82400 }, { "epoch": 0.27, - "learning_rate": 3.634494812735446e-05, - "loss": 2.8024, + "learning_rate": 3.634503853181854e-05, + "loss": 2.7912, "step": 82500 }, { "epoch": 0.27, - "learning_rate": 3.6328396549327014e-05, - "loss": 2.8073, + "learning_rate": 3.632848706337226e-05, + "loss": 2.7922, "step": 82600 }, { "epoch": 0.27, - "learning_rate": 3.6311844971299566e-05, - "loss": 2.8203, + "learning_rate": 3.631193559492598e-05, + "loss": 2.7988, "step": 82700 }, { "epoch": 0.27, - "learning_rate": 3.629529339327212e-05, - "loss": 2.8288, + "learning_rate": 3.6295384126479706e-05, + "loss": 2.8027, "step": 82800 }, { "epoch": 0.27, - "learning_rate": 3.627874181524466e-05, + "learning_rate": 3.6278832658033425e-05, "loss": 2.8087, "step": 82900 }, { "epoch": 0.27, - "learning_rate": 3.626219023721722e-05, - "loss": 2.7993, + "learning_rate": 3.6262281189587144e-05, + "loss": 2.7949, "step": 83000 }, { "epoch": 0.28, - "learning_rate": 3.6245638659189765e-05, - "loss": 2.8234, + "learning_rate": 3.6245729721140856e-05, + "loss": 2.7839, "step": 83100 }, { "epoch": 0.28, - "learning_rate": 3.6229087081162324e-05, - "loss": 2.8182, + "learning_rate": 3.622917825269458e-05, + "loss": 2.798, "step": 83200 }, { "epoch": 0.28, - "learning_rate": 3.621253550313487e-05, - "loss": 2.8256, + "learning_rate": 3.62126267842483e-05, + "loss": 2.8024, "step": 83300 }, { "epoch": 0.28, - "learning_rate": 3.619598392510743e-05, - "loss": 2.8285, + "learning_rate": 3.619607531580202e-05, + "loss": 2.7996, "step": 83400 }, { "epoch": 0.28, - "learning_rate": 3.617943234707997e-05, - "loss": 2.813, + "learning_rate": 3.617952384735574e-05, + "loss": 2.7864, "step": 83500 }, { "epoch": 0.28, - "learning_rate": 3.6162880769052523e-05, - "loss": 2.8364, + "learning_rate": 3.616297237890946e-05, + "loss": 2.7975, "step": 83600 }, { "epoch": 0.28, - "learning_rate": 3.6146329191025075e-05, - "loss": 2.83, + "learning_rate": 3.614642091046318e-05, + "loss": 2.8074, "step": 83700 }, { "epoch": 0.28, - "learning_rate": 3.612977761299762e-05, - "loss": 2.8121, + "learning_rate": 3.6129869442016896e-05, + "loss": 2.7986, "step": 83800 }, { "epoch": 0.28, - "learning_rate": 3.611322603497018e-05, - "loss": 2.8203, + "learning_rate": 3.6113317973570615e-05, + "loss": 2.7989, "step": 83900 }, { "epoch": 0.28, - "learning_rate": 3.609667445694272e-05, - "loss": 2.7977, + "learning_rate": 3.6096766505124334e-05, + "loss": 2.7948, "step": 84000 }, { "epoch": 0.28, - "learning_rate": 3.608012287891528e-05, - "loss": 2.818, + "learning_rate": 3.608021503667805e-05, + "loss": 2.7916, "step": 84100 }, { "epoch": 0.28, - "learning_rate": 3.6063571300887826e-05, - "loss": 2.8206, + "learning_rate": 3.606366356823178e-05, + "loss": 2.7937, "step": 84200 }, { "epoch": 0.28, - "learning_rate": 3.604701972286038e-05, - "loss": 2.7989, + "learning_rate": 3.60471120997855e-05, + "loss": 2.7929, "step": 84300 }, { "epoch": 0.28, - "learning_rate": 3.603046814483293e-05, - "loss": 2.8098, + "learning_rate": 3.603056063133922e-05, + "loss": 2.7914, "step": 84400 }, { "epoch": 0.28, - "learning_rate": 3.601391656680548e-05, - "loss": 2.8111, + "learning_rate": 3.601400916289293e-05, + "loss": 2.7831, "step": 84500 }, { "epoch": 0.28, - "learning_rate": 3.599736498877803e-05, - "loss": 2.8119, + "learning_rate": 3.5997457694446655e-05, + "loss": 2.78, "step": 84600 }, { "epoch": 0.28, - "learning_rate": 3.598081341075058e-05, - "loss": 2.8284, + "learning_rate": 3.5980906226000374e-05, + "loss": 2.809, "step": 84700 }, { "epoch": 0.28, - "learning_rate": 3.5964261832723136e-05, - "loss": 2.8136, + "learning_rate": 3.596435475755409e-05, + "loss": 2.7987, "step": 84800 }, { "epoch": 0.28, - "learning_rate": 3.594771025469568e-05, - "loss": 2.8239, + "learning_rate": 3.594780328910781e-05, + "loss": 2.7889, "step": 84900 }, { "epoch": 0.28, - "learning_rate": 3.593115867666824e-05, - "loss": 2.8094, + "learning_rate": 3.593125182066153e-05, + "loss": 2.806, "step": 85000 }, { "epoch": 0.28, - "learning_rate": 3.5914607098640784e-05, - "loss": 2.8071, + "learning_rate": 3.591470035221525e-05, + "loss": 2.8064, "step": 85100 }, { "epoch": 0.28, - "learning_rate": 3.5898055520613336e-05, - "loss": 2.8304, + "learning_rate": 3.589814888376897e-05, + "loss": 2.7939, "step": 85200 }, { "epoch": 0.28, - "learning_rate": 3.588150394258589e-05, - "loss": 2.8058, + "learning_rate": 3.588159741532269e-05, + "loss": 2.7968, "step": 85300 }, { "epoch": 0.28, - "learning_rate": 3.586495236455844e-05, - "loss": 2.8181, + "learning_rate": 3.5865045946876406e-05, + "loss": 2.8035, "step": 85400 }, { "epoch": 0.28, - "learning_rate": 3.584840078653099e-05, - "loss": 2.8105, + "learning_rate": 3.5848494478430125e-05, + "loss": 2.7947, "step": 85500 }, { "epoch": 0.28, - "learning_rate": 3.583184920850354e-05, - "loss": 2.8194, + "learning_rate": 3.583194300998385e-05, + "loss": 2.79, "step": 85600 }, { "epoch": 0.28, - "learning_rate": 3.5815297630476094e-05, - "loss": 2.8251, + "learning_rate": 3.581539154153757e-05, + "loss": 2.7937, "step": 85700 }, { "epoch": 0.28, - "learning_rate": 3.579874605244864e-05, - "loss": 2.8225, + "learning_rate": 3.579884007309129e-05, + "loss": 2.7986, "step": 85800 }, { "epoch": 0.28, - "learning_rate": 3.57821944744212e-05, - "loss": 2.8093, + "learning_rate": 3.5782288604645e-05, + "loss": 2.7959, "step": 85900 }, { "epoch": 0.28, - "learning_rate": 3.576564289639374e-05, - "loss": 2.8158, + "learning_rate": 3.576573713619872e-05, + "loss": 2.7817, "step": 86000 }, { "epoch": 0.29, - "learning_rate": 3.574909131836629e-05, - "loss": 2.8041, + "learning_rate": 3.5749185667752446e-05, + "loss": 2.7887, "step": 86100 }, { "epoch": 0.29, - "learning_rate": 3.5732539740338845e-05, - "loss": 2.822, + "learning_rate": 3.5732634199306165e-05, + "loss": 2.8046, "step": 86200 }, { "epoch": 0.29, - "learning_rate": 3.5715988162311396e-05, - "loss": 2.8219, + "learning_rate": 3.5716082730859884e-05, + "loss": 2.8154, "step": 86300 }, { "epoch": 0.29, - "learning_rate": 3.569943658428395e-05, - "loss": 2.812, + "learning_rate": 3.56995312624136e-05, + "loss": 2.7992, "step": 86400 }, { "epoch": 0.29, - "learning_rate": 3.56828850062565e-05, - "loss": 2.8077, + "learning_rate": 3.568297979396732e-05, + "loss": 2.7906, "step": 86500 }, { "epoch": 0.29, - "learning_rate": 3.566633342822905e-05, - "loss": 2.8205, + "learning_rate": 3.566642832552104e-05, + "loss": 2.7971, "step": 86600 }, { "epoch": 0.29, - "learning_rate": 3.56497818502016e-05, - "loss": 2.833, + "learning_rate": 3.564987685707476e-05, + "loss": 2.8006, "step": 86700 }, { "epoch": 0.29, - "learning_rate": 3.563323027217415e-05, - "loss": 2.8045, + "learning_rate": 3.563332538862848e-05, + "loss": 2.7867, "step": 86800 }, { "epoch": 0.29, - "learning_rate": 3.56166786941467e-05, - "loss": 2.8233, + "learning_rate": 3.56167739201822e-05, + "loss": 2.8029, "step": 86900 }, { "epoch": 0.29, - "learning_rate": 3.560012711611925e-05, - "loss": 2.823, + "learning_rate": 3.5600222451735924e-05, + "loss": 2.8045, "step": 87000 }, { "epoch": 0.29, - "learning_rate": 3.55835755380918e-05, - "loss": 2.8174, + "learning_rate": 3.558367098328964e-05, + "loss": 2.7958, "step": 87100 }, { "epoch": 0.29, - "learning_rate": 3.5567023960064354e-05, - "loss": 2.8256, + "learning_rate": 3.556711951484336e-05, + "loss": 2.7877, "step": 87200 }, { "epoch": 0.29, - "learning_rate": 3.5550472382036906e-05, - "loss": 2.8138, + "learning_rate": 3.5550568046397074e-05, + "loss": 2.7957, "step": 87300 }, { "epoch": 0.29, - "learning_rate": 3.553392080400946e-05, - "loss": 2.8049, + "learning_rate": 3.553401657795079e-05, + "loss": 2.7879, "step": 87400 }, { "epoch": 0.29, - "learning_rate": 3.551736922598201e-05, - "loss": 2.802, + "learning_rate": 3.551746510950452e-05, + "loss": 2.7893, "step": 87500 }, { "epoch": 0.29, - "learning_rate": 3.550081764795456e-05, - "loss": 2.8099, + "learning_rate": 3.550091364105824e-05, + "loss": 2.7988, "step": 87600 }, { "epoch": 0.29, - "learning_rate": 3.5484266069927105e-05, - "loss": 2.8038, + "learning_rate": 3.5484362172611957e-05, + "loss": 2.7894, "step": 87700 }, { "epoch": 0.29, - "learning_rate": 3.5467714491899664e-05, - "loss": 2.8119, + "learning_rate": 3.5467810704165676e-05, + "loss": 2.7894, "step": 87800 }, { "epoch": 0.29, - "learning_rate": 3.545116291387221e-05, - "loss": 2.8169, + "learning_rate": 3.5451259235719395e-05, + "loss": 2.7947, "step": 87900 }, { "epoch": 0.29, - "learning_rate": 3.543461133584476e-05, - "loss": 2.818, + "learning_rate": 3.5434707767273114e-05, + "loss": 2.7858, "step": 88000 }, { "epoch": 0.29, - "learning_rate": 3.541805975781731e-05, - "loss": 2.802, + "learning_rate": 3.541815629882683e-05, + "loss": 2.7793, "step": 88100 }, { "epoch": 0.29, - "learning_rate": 3.540150817978986e-05, - "loss": 2.8132, + "learning_rate": 3.540160483038055e-05, + "loss": 2.7998, "step": 88200 }, { "epoch": 0.29, - "learning_rate": 3.5384956601762415e-05, - "loss": 2.8137, + "learning_rate": 3.538505336193427e-05, + "loss": 2.7966, "step": 88300 }, { "epoch": 0.29, - "learning_rate": 3.536840502373496e-05, - "loss": 2.8196, + "learning_rate": 3.5368501893487996e-05, + "loss": 2.7911, "step": 88400 }, { "epoch": 0.29, - "learning_rate": 3.535185344570752e-05, - "loss": 2.8158, + "learning_rate": 3.5351950425041715e-05, + "loss": 2.7925, "step": 88500 }, { "epoch": 0.29, - "learning_rate": 3.533530186768006e-05, - "loss": 2.8068, + "learning_rate": 3.5335398956595434e-05, + "loss": 2.7874, "step": 88600 }, { "epoch": 0.29, - "learning_rate": 3.531875028965262e-05, - "loss": 2.7977, + "learning_rate": 3.5318847488149146e-05, + "loss": 2.792, "step": 88700 }, { "epoch": 0.29, - "learning_rate": 3.5302198711625166e-05, - "loss": 2.8049, + "learning_rate": 3.5302296019702865e-05, + "loss": 2.798, "step": 88800 }, { "epoch": 0.29, - "learning_rate": 3.5285647133597724e-05, - "loss": 2.8167, + "learning_rate": 3.528574455125659e-05, + "loss": 2.8084, "step": 88900 }, { "epoch": 0.29, - "learning_rate": 3.526909555557027e-05, - "loss": 2.807, + "learning_rate": 3.526919308281031e-05, + "loss": 2.8082, "step": 89000 }, { "epoch": 0.29, - "learning_rate": 3.525254397754282e-05, - "loss": 2.8163, + "learning_rate": 3.525264161436403e-05, + "loss": 2.7882, "step": 89100 }, { "epoch": 0.3, - "learning_rate": 3.523599239951537e-05, - "loss": 2.8135, + "learning_rate": 3.523609014591775e-05, + "loss": 2.7884, "step": 89200 }, { "epoch": 0.3, - "learning_rate": 3.521944082148792e-05, - "loss": 2.8198, + "learning_rate": 3.521953867747147e-05, + "loss": 2.7991, "step": 89300 }, { "epoch": 0.3, - "learning_rate": 3.5202889243460476e-05, - "loss": 2.813, + "learning_rate": 3.5202987209025186e-05, + "loss": 2.7927, "step": 89400 }, { "epoch": 0.3, - "learning_rate": 3.518633766543302e-05, - "loss": 2.8167, + "learning_rate": 3.5186435740578905e-05, + "loss": 2.8031, "step": 89500 }, { "epoch": 0.3, - "learning_rate": 3.516978608740558e-05, - "loss": 2.8099, + "learning_rate": 3.5169884272132624e-05, + "loss": 2.7861, "step": 89600 }, { "epoch": 0.3, - "learning_rate": 3.5153234509378124e-05, - "loss": 2.8235, + "learning_rate": 3.515333280368634e-05, + "loss": 2.7839, "step": 89700 }, { "epoch": 0.3, - "learning_rate": 3.5136682931350675e-05, - "loss": 2.8269, + "learning_rate": 3.513678133524007e-05, + "loss": 2.7954, "step": 89800 }, { "epoch": 0.3, - "learning_rate": 3.512013135332323e-05, - "loss": 2.8034, + "learning_rate": 3.512022986679379e-05, + "loss": 2.8076, "step": 89900 }, { "epoch": 0.3, - "learning_rate": 3.510357977529578e-05, - "loss": 2.8264, + "learning_rate": 3.51036783983475e-05, + "loss": 2.7951, "step": 90000 }, { "epoch": 0.3, - "learning_rate": 3.508702819726833e-05, - "loss": 2.825, + "learning_rate": 3.508712692990122e-05, + "loss": 2.8056, "step": 90100 }, { "epoch": 0.3, - "learning_rate": 3.5070476619240875e-05, - "loss": 2.8259, + "learning_rate": 3.507057546145494e-05, + "loss": 2.784, "step": 90200 }, { "epoch": 0.3, - "learning_rate": 3.505392504121343e-05, - "loss": 2.8093, + "learning_rate": 3.5054023993008664e-05, + "loss": 2.7861, "step": 90300 }, { "epoch": 0.3, - "learning_rate": 3.503737346318598e-05, - "loss": 2.8142, + "learning_rate": 3.503747252456238e-05, + "loss": 2.8018, "step": 90400 }, { "epoch": 0.3, - "learning_rate": 3.5020821885158536e-05, - "loss": 2.812, + "learning_rate": 3.50209210561161e-05, + "loss": 2.7928, "step": 90500 }, { "epoch": 0.3, - "learning_rate": 3.500427030713108e-05, - "loss": 2.8219, + "learning_rate": 3.500436958766982e-05, + "loss": 2.8092, "step": 90600 }, { "epoch": 0.3, - "learning_rate": 3.498771872910363e-05, - "loss": 2.8255, + "learning_rate": 3.498781811922354e-05, + "loss": 2.8036, "step": 90700 }, { "epoch": 0.3, - "learning_rate": 3.4971167151076184e-05, - "loss": 2.8156, + "learning_rate": 3.497126665077726e-05, + "loss": 2.8082, "step": 90800 }, { "epoch": 0.3, - "learning_rate": 3.4954615573048736e-05, - "loss": 2.8154, + "learning_rate": 3.495471518233098e-05, + "loss": 2.7942, "step": 90900 }, { "epoch": 0.3, - "learning_rate": 3.493806399502129e-05, - "loss": 2.8149, + "learning_rate": 3.4938163713884697e-05, + "loss": 2.7854, "step": 91000 }, { "epoch": 0.3, - "learning_rate": 3.492151241699384e-05, - "loss": 2.798, + "learning_rate": 3.4921612245438416e-05, + "loss": 2.7866, "step": 91100 }, { "epoch": 0.3, - "learning_rate": 3.490496083896639e-05, - "loss": 2.8208, + "learning_rate": 3.4905060776992135e-05, + "loss": 2.7999, "step": 91200 }, { "epoch": 0.3, - "learning_rate": 3.488840926093894e-05, - "loss": 2.8249, + "learning_rate": 3.488850930854586e-05, + "loss": 2.7915, "step": 91300 }, { "epoch": 0.3, - "learning_rate": 3.487185768291149e-05, - "loss": 2.8209, + "learning_rate": 3.487195784009957e-05, + "loss": 2.7863, "step": 91400 }, { "epoch": 0.3, - "learning_rate": 3.485530610488404e-05, - "loss": 2.8102, + "learning_rate": 3.485540637165329e-05, + "loss": 2.7946, "step": 91500 }, { "epoch": 0.3, - "learning_rate": 3.483875452685659e-05, - "loss": 2.8164, + "learning_rate": 3.483885490320701e-05, + "loss": 2.8056, "step": 91600 }, { "epoch": 0.3, - "learning_rate": 3.482220294882914e-05, - "loss": 2.8143, + "learning_rate": 3.4822303434760736e-05, + "loss": 2.8009, "step": 91700 }, { "epoch": 0.3, - "learning_rate": 3.4805651370801694e-05, - "loss": 2.8136, + "learning_rate": 3.4805751966314455e-05, + "loss": 2.7764, "step": 91800 }, { "epoch": 0.3, - "learning_rate": 3.4789099792774245e-05, - "loss": 2.8163, + "learning_rate": 3.4789200497868174e-05, + "loss": 2.7902, "step": 91900 }, { "epoch": 0.3, - "learning_rate": 3.47725482147468e-05, - "loss": 2.8238, + "learning_rate": 3.477264902942189e-05, + "loss": 2.798, "step": 92000 }, { "epoch": 0.3, - "learning_rate": 3.475599663671935e-05, - "loss": 2.8191, + "learning_rate": 3.475609756097561e-05, + "loss": 2.8087, "step": 92100 }, { "epoch": 0.31, - "learning_rate": 3.47394450586919e-05, - "loss": 2.8224, + "learning_rate": 3.473954609252933e-05, + "loss": 2.8151, "step": 92200 }, { "epoch": 0.31, - "learning_rate": 3.4722893480664445e-05, - "loss": 2.8104, + "learning_rate": 3.472299462408305e-05, + "loss": 2.7908, "step": 92300 }, { "epoch": 0.31, - "learning_rate": 3.4706341902637e-05, - "loss": 2.829, + "learning_rate": 3.470644315563677e-05, + "loss": 2.7867, "step": 92400 }, { "epoch": 0.31, - "learning_rate": 3.468979032460955e-05, - "loss": 2.829, + "learning_rate": 3.468989168719049e-05, + "loss": 2.7974, "step": 92500 }, { "epoch": 0.31, - "learning_rate": 3.46732387465821e-05, - "loss": 2.8201, + "learning_rate": 3.467334021874421e-05, + "loss": 2.7866, "step": 92600 }, { "epoch": 0.31, - "learning_rate": 3.465668716855465e-05, - "loss": 2.8195, + "learning_rate": 3.465678875029793e-05, + "loss": 2.7805, "step": 92700 }, { "epoch": 0.31, - "learning_rate": 3.46401355905272e-05, - "loss": 2.8034, + "learning_rate": 3.4640237281851645e-05, + "loss": 2.7888, "step": 92800 }, { "epoch": 0.31, - "learning_rate": 3.4623584012499755e-05, - "loss": 2.8166, + "learning_rate": 3.4623685813405364e-05, + "loss": 2.8069, "step": 92900 }, { "epoch": 0.31, - "learning_rate": 3.4607032434472306e-05, - "loss": 2.8231, + "learning_rate": 3.460713434495908e-05, + "loss": 2.7875, "step": 93000 }, { "epoch": 0.31, - "learning_rate": 3.459048085644486e-05, - "loss": 2.8119, + "learning_rate": 3.459058287651281e-05, + "loss": 2.7935, "step": 93100 }, { "epoch": 0.31, - "learning_rate": 3.45739292784174e-05, - "loss": 2.8089, + "learning_rate": 3.457403140806653e-05, + "loss": 2.7907, "step": 93200 }, { "epoch": 0.31, - "learning_rate": 3.455737770038996e-05, - "loss": 2.8274, + "learning_rate": 3.455747993962025e-05, + "loss": 2.8058, "step": 93300 }, { "epoch": 0.31, - "learning_rate": 3.4540826122362506e-05, - "loss": 2.8133, + "learning_rate": 3.4540928471173966e-05, + "loss": 2.8035, "step": 93400 }, { "epoch": 0.31, - "learning_rate": 3.4524274544335064e-05, - "loss": 2.8078, + "learning_rate": 3.4524377002727685e-05, + "loss": 2.7926, "step": 93500 }, { "epoch": 0.31, - "learning_rate": 3.450772296630761e-05, - "loss": 2.8129, + "learning_rate": 3.4507825534281404e-05, + "loss": 2.7808, "step": 93600 }, { "epoch": 0.31, - "learning_rate": 3.449117138828016e-05, - "loss": 2.8085, + "learning_rate": 3.449127406583512e-05, + "loss": 2.7932, "step": 93700 }, { "epoch": 0.31, - "learning_rate": 3.447461981025271e-05, - "loss": 2.8094, + "learning_rate": 3.447472259738884e-05, + "loss": 2.7944, "step": 93800 }, { "epoch": 0.31, - "learning_rate": 3.445806823222526e-05, - "loss": 2.8188, + "learning_rate": 3.445817112894256e-05, + "loss": 2.781, "step": 93900 }, { "epoch": 0.31, - "learning_rate": 3.4441516654197815e-05, - "loss": 2.832, + "learning_rate": 3.444161966049628e-05, + "loss": 2.8042, "step": 94000 }, { "epoch": 0.31, - "learning_rate": 3.442496507617036e-05, - "loss": 2.8094, + "learning_rate": 3.4425068192050005e-05, + "loss": 2.7935, "step": 94100 }, { "epoch": 0.31, - "learning_rate": 3.440841349814292e-05, - "loss": 2.804, + "learning_rate": 3.440851672360372e-05, + "loss": 2.7941, "step": 94200 }, { "epoch": 0.31, - "learning_rate": 3.4391861920115463e-05, - "loss": 2.8104, + "learning_rate": 3.4391965255157437e-05, + "loss": 2.7841, "step": 94300 }, { "epoch": 0.31, - "learning_rate": 3.437531034208802e-05, - "loss": 2.8287, + "learning_rate": 3.4375413786711156e-05, + "loss": 2.7892, "step": 94400 }, { "epoch": 0.31, - "learning_rate": 3.4358758764060567e-05, - "loss": 2.8207, + "learning_rate": 3.435886231826488e-05, + "loss": 2.7938, "step": 94500 }, { "epoch": 0.31, - "learning_rate": 3.434220718603312e-05, - "loss": 2.8125, + "learning_rate": 3.43423108498186e-05, + "loss": 2.7926, "step": 94600 }, { "epoch": 0.31, - "learning_rate": 3.432565560800567e-05, - "loss": 2.8115, + "learning_rate": 3.432575938137232e-05, + "loss": 2.793, "step": 94700 }, { "epoch": 0.31, - "learning_rate": 3.4309104029978215e-05, - "loss": 2.8201, + "learning_rate": 3.430920791292604e-05, + "loss": 2.7951, "step": 94800 }, { "epoch": 0.31, - "learning_rate": 3.429255245195077e-05, - "loss": 2.8093, + "learning_rate": 3.429265644447975e-05, + "loss": 2.8048, "step": 94900 }, { "epoch": 0.31, - "learning_rate": 3.427600087392332e-05, - "loss": 2.8085, + "learning_rate": 3.4276104976033476e-05, + "loss": 2.7899, "step": 95000 }, { "epoch": 0.31, - "learning_rate": 3.4259449295895876e-05, - "loss": 2.8029, + "learning_rate": 3.4259553507587195e-05, + "loss": 2.7918, "step": 95100 }, { "epoch": 0.32, - "learning_rate": 3.424289771786842e-05, - "loss": 2.8175, + "learning_rate": 3.4243002039140914e-05, + "loss": 2.7899, "step": 95200 }, { "epoch": 0.32, - "learning_rate": 3.422634613984097e-05, - "loss": 2.8096, + "learning_rate": 3.422645057069463e-05, + "loss": 2.7859, "step": 95300 }, { "epoch": 0.32, - "learning_rate": 3.4209794561813524e-05, - "loss": 2.8182, + "learning_rate": 3.420989910224835e-05, + "loss": 2.7945, "step": 95400 }, { "epoch": 0.32, - "learning_rate": 3.4193242983786076e-05, - "loss": 2.8073, + "learning_rate": 3.419334763380208e-05, + "loss": 2.7848, "step": 95500 }, { "epoch": 0.32, - "learning_rate": 3.417669140575863e-05, - "loss": 2.8121, + "learning_rate": 3.417679616535579e-05, + "loss": 2.7842, "step": 95600 }, { "epoch": 0.32, - "learning_rate": 3.416013982773118e-05, - "loss": 2.8117, + "learning_rate": 3.416024469690951e-05, + "loss": 2.7983, "step": 95700 }, { "epoch": 0.32, - "learning_rate": 3.414358824970373e-05, - "loss": 2.8116, + "learning_rate": 3.414369322846323e-05, + "loss": 2.8021, "step": 95800 }, { "epoch": 0.32, - "learning_rate": 3.4127036671676275e-05, - "loss": 2.8264, + "learning_rate": 3.4127141760016954e-05, + "loss": 2.7948, "step": 95900 }, { "epoch": 0.32, - "learning_rate": 3.4110485093648834e-05, - "loss": 2.8057, + "learning_rate": 3.411059029157067e-05, + "loss": 2.7876, "step": 96000 }, { "epoch": 0.32, - "learning_rate": 3.409393351562138e-05, - "loss": 2.8057, + "learning_rate": 3.409403882312439e-05, + "loss": 2.7933, "step": 96100 }, { "epoch": 0.32, - "learning_rate": 3.407738193759393e-05, - "loss": 2.8038, + "learning_rate": 3.4077487354678104e-05, + "loss": 2.7829, "step": 96200 }, { "epoch": 0.32, - "learning_rate": 3.406083035956648e-05, - "loss": 2.8014, + "learning_rate": 3.406093588623182e-05, + "loss": 2.8034, "step": 96300 }, { "epoch": 0.32, - "learning_rate": 3.4044278781539033e-05, - "loss": 2.8196, + "learning_rate": 3.404438441778555e-05, + "loss": 2.7833, "step": 96400 }, { "epoch": 0.32, - "learning_rate": 3.4027727203511585e-05, - "loss": 2.8168, + "learning_rate": 3.402783294933927e-05, + "loss": 2.8065, "step": 96500 }, { "epoch": 0.32, - "learning_rate": 3.401117562548414e-05, - "loss": 2.8057, + "learning_rate": 3.401128148089299e-05, + "loss": 2.7941, "step": 96600 }, { "epoch": 0.32, - "learning_rate": 3.399462404745669e-05, - "loss": 2.8097, + "learning_rate": 3.3994730012446706e-05, + "loss": 2.7911, "step": 96700 }, { "epoch": 0.32, - "learning_rate": 3.397807246942924e-05, - "loss": 2.8137, + "learning_rate": 3.3978178544000425e-05, + "loss": 2.8107, "step": 96800 }, { "epoch": 0.32, - "learning_rate": 3.3961520891401785e-05, - "loss": 2.8116, + "learning_rate": 3.3961627075554144e-05, + "loss": 2.7953, "step": 96900 }, { "epoch": 0.32, - "learning_rate": 3.3944969313374336e-05, - "loss": 2.7983, + "learning_rate": 3.394507560710786e-05, + "loss": 2.7856, "step": 97000 }, { "epoch": 0.32, - "learning_rate": 3.392841773534689e-05, - "loss": 2.8096, + "learning_rate": 3.392852413866158e-05, + "loss": 2.7887, "step": 97100 }, { "epoch": 0.32, - "learning_rate": 3.391186615731944e-05, - "loss": 2.793, + "learning_rate": 3.39119726702153e-05, + "loss": 2.7941, "step": 97200 }, { "epoch": 0.32, - "learning_rate": 3.389531457929199e-05, - "loss": 2.8234, + "learning_rate": 3.3895421201769026e-05, + "loss": 2.7983, "step": 97300 }, { "epoch": 0.32, - "learning_rate": 3.387876300126454e-05, - "loss": 2.8147, + "learning_rate": 3.3878869733322745e-05, + "loss": 2.791, "step": 97400 }, { "epoch": 0.32, - "learning_rate": 3.3862211423237094e-05, - "loss": 2.8088, + "learning_rate": 3.3862318264876464e-05, + "loss": 2.7934, "step": 97500 }, { "epoch": 0.32, - "learning_rate": 3.3845659845209646e-05, - "loss": 2.8178, + "learning_rate": 3.3845766796430177e-05, + "loss": 2.7918, "step": 97600 }, { "epoch": 0.32, - "learning_rate": 3.38291082671822e-05, - "loss": 2.8301, + "learning_rate": 3.3829215327983896e-05, + "loss": 2.7978, "step": 97700 }, { "epoch": 0.32, - "learning_rate": 3.381255668915474e-05, - "loss": 2.824, + "learning_rate": 3.381266385953762e-05, + "loss": 2.7978, "step": 97800 }, { "epoch": 0.32, - "learning_rate": 3.37960051111273e-05, - "loss": 2.8186, + "learning_rate": 3.379611239109134e-05, + "loss": 2.8136, "step": 97900 }, { "epoch": 0.32, - "learning_rate": 3.3779453533099845e-05, - "loss": 2.8083, + "learning_rate": 3.377956092264506e-05, + "loss": 2.7875, "step": 98000 }, { "epoch": 0.32, - "learning_rate": 3.37629019550724e-05, - "loss": 2.8157, + "learning_rate": 3.376300945419878e-05, + "loss": 2.7835, "step": 98100 }, { "epoch": 0.33, - "learning_rate": 3.374635037704495e-05, - "loss": 2.829, + "learning_rate": 3.37464579857525e-05, + "loss": 2.7872, "step": 98200 }, { "epoch": 0.33, - "learning_rate": 3.37297987990175e-05, - "loss": 2.8202, + "learning_rate": 3.3729906517306216e-05, + "loss": 2.7989, "step": 98300 }, { "epoch": 0.33, - "learning_rate": 3.371324722099005e-05, - "loss": 2.8175, + "learning_rate": 3.3713355048859935e-05, + "loss": 2.7881, "step": 98400 }, { "epoch": 0.33, - "learning_rate": 3.3696695642962603e-05, - "loss": 2.8132, + "learning_rate": 3.3696803580413654e-05, + "loss": 2.7989, "step": 98500 }, { "epoch": 0.33, - "learning_rate": 3.3680144064935155e-05, - "loss": 2.8108, + "learning_rate": 3.368025211196737e-05, + "loss": 2.7989, "step": 98600 }, { "epoch": 0.33, - "learning_rate": 3.36635924869077e-05, - "loss": 2.7982, + "learning_rate": 3.36637006435211e-05, + "loss": 2.8182, "step": 98700 }, { "epoch": 0.33, - "learning_rate": 3.364704090888026e-05, - "loss": 2.8052, + "learning_rate": 3.364714917507482e-05, + "loss": 2.7951, "step": 98800 }, { "epoch": 0.33, - "learning_rate": 3.36304893308528e-05, - "loss": 2.8156, + "learning_rate": 3.363059770662854e-05, + "loss": 2.7792, "step": 98900 }, { "epoch": 0.33, - "learning_rate": 3.361393775282536e-05, - "loss": 2.8046, + "learning_rate": 3.361404623818225e-05, + "loss": 2.7931, "step": 99000 }, { "epoch": 0.33, - "learning_rate": 3.3597386174797906e-05, - "loss": 2.807, + "learning_rate": 3.359749476973597e-05, + "loss": 2.8, "step": 99100 }, { "epoch": 0.33, - "learning_rate": 3.358083459677046e-05, - "loss": 2.8166, + "learning_rate": 3.3580943301289694e-05, + "loss": 2.7939, "step": 99200 }, { "epoch": 0.33, - "learning_rate": 3.356428301874301e-05, - "loss": 2.8194, + "learning_rate": 3.356439183284341e-05, + "loss": 2.7996, "step": 99300 }, { "epoch": 0.33, - "learning_rate": 3.3547731440715554e-05, - "loss": 2.803, + "learning_rate": 3.354784036439713e-05, + "loss": 2.8009, "step": 99400 }, { "epoch": 0.33, - "learning_rate": 3.353117986268811e-05, - "loss": 2.8254, + "learning_rate": 3.353128889595085e-05, + "loss": 2.7943, "step": 99500 }, { "epoch": 0.33, - "learning_rate": 3.351462828466066e-05, - "loss": 2.8114, + "learning_rate": 3.351473742750457e-05, + "loss": 2.7853, "step": 99600 }, { "epoch": 0.33, - "learning_rate": 3.3498076706633216e-05, - "loss": 2.8137, + "learning_rate": 3.349818595905829e-05, + "loss": 2.7853, "step": 99700 }, { "epoch": 0.33, - "learning_rate": 3.348152512860576e-05, - "loss": 2.8166, + "learning_rate": 3.348163449061201e-05, + "loss": 2.8004, "step": 99800 }, { "epoch": 0.33, - "learning_rate": 3.346497355057832e-05, - "loss": 2.8018, + "learning_rate": 3.346508302216573e-05, + "loss": 2.7958, "step": 99900 }, { "epoch": 0.33, - "learning_rate": 3.3448421972550864e-05, - "loss": 2.8283, + "learning_rate": 3.3448531553719446e-05, + "loss": 2.7812, "step": 100000 }, { "epoch": 0.33, - "learning_rate": 3.3431870394523416e-05, - "loss": 2.8038, + "learning_rate": 3.3431980085273165e-05, + "loss": 2.7962, "step": 100100 }, { "epoch": 0.33, - "learning_rate": 3.341531881649597e-05, - "loss": 2.8066, + "learning_rate": 3.341542861682689e-05, + "loss": 2.788, "step": 100200 }, { "epoch": 0.33, - "learning_rate": 3.339876723846851e-05, - "loss": 2.8106, + "learning_rate": 3.339887714838061e-05, + "loss": 2.7938, "step": 100300 }, { "epoch": 0.33, - "learning_rate": 3.338221566044107e-05, - "loss": 2.8192, + "learning_rate": 3.338232567993432e-05, + "loss": 2.7806, "step": 100400 }, { "epoch": 0.33, - "learning_rate": 3.3365664082413615e-05, - "loss": 2.8167, + "learning_rate": 3.336577421148804e-05, + "loss": 2.7939, "step": 100500 }, { "epoch": 0.33, - "learning_rate": 3.3349112504386174e-05, - "loss": 2.8185, + "learning_rate": 3.3349222743041766e-05, + "loss": 2.7813, "step": 100600 }, { "epoch": 0.33, - "learning_rate": 3.333256092635872e-05, - "loss": 2.808, + "learning_rate": 3.3332671274595485e-05, + "loss": 2.8064, "step": 100700 }, { "epoch": 0.33, - "learning_rate": 3.331600934833127e-05, - "loss": 2.8211, + "learning_rate": 3.3316119806149204e-05, + "loss": 2.7841, "step": 100800 }, { "epoch": 0.33, - "learning_rate": 3.329945777030382e-05, - "loss": 2.8057, + "learning_rate": 3.329956833770292e-05, + "loss": 2.8049, "step": 100900 }, { "epoch": 0.33, - "learning_rate": 3.328290619227637e-05, - "loss": 2.8159, + "learning_rate": 3.328301686925664e-05, + "loss": 2.8005, "step": 101000 }, { "epoch": 0.33, - "learning_rate": 3.3266354614248925e-05, - "loss": 2.8267, + "learning_rate": 3.326646540081036e-05, + "loss": 2.7827, "step": 101100 }, { "epoch": 0.34, - "learning_rate": 3.3249803036221476e-05, - "loss": 2.7896, + "learning_rate": 3.324991393236408e-05, + "loss": 2.7936, "step": 101200 }, { "epoch": 0.34, - "learning_rate": 3.323325145819403e-05, - "loss": 2.8037, + "learning_rate": 3.32333624639178e-05, + "loss": 2.7981, "step": 101300 }, { "epoch": 0.34, - "learning_rate": 3.321669988016657e-05, - "loss": 2.8183, + "learning_rate": 3.321681099547152e-05, + "loss": 2.7851, "step": 101400 }, { "epoch": 0.34, - "learning_rate": 3.320014830213913e-05, - "loss": 2.8159, + "learning_rate": 3.320025952702524e-05, + "loss": 2.8006, "step": 101500 }, { "epoch": 0.34, - "learning_rate": 3.3183596724111676e-05, - "loss": 2.8086, + "learning_rate": 3.318370805857896e-05, + "loss": 2.7972, "step": 101600 }, { "epoch": 0.34, - "learning_rate": 3.316704514608423e-05, - "loss": 2.8099, + "learning_rate": 3.316715659013268e-05, + "loss": 2.7888, "step": 101700 }, { "epoch": 0.34, - "learning_rate": 3.315049356805678e-05, - "loss": 2.8087, + "learning_rate": 3.3150605121686394e-05, + "loss": 2.7833, "step": 101800 }, { "epoch": 0.34, - "learning_rate": 3.313394199002933e-05, - "loss": 2.8098, + "learning_rate": 3.313405365324011e-05, + "loss": 2.7792, "step": 101900 }, { "epoch": 0.34, - "learning_rate": 3.311739041200188e-05, - "loss": 2.8027, + "learning_rate": 3.311750218479384e-05, + "loss": 2.788, "step": 102000 }, { "epoch": 0.34, - "learning_rate": 3.3100838833974434e-05, - "loss": 2.8072, + "learning_rate": 3.310095071634756e-05, + "loss": 2.7767, "step": 102100 }, { "epoch": 0.34, - "learning_rate": 3.3084287255946986e-05, - "loss": 2.8202, + "learning_rate": 3.308439924790128e-05, + "loss": 2.7829, "step": 102200 }, { "epoch": 0.34, - "learning_rate": 3.306773567791954e-05, - "loss": 2.8134, + "learning_rate": 3.3067847779454996e-05, + "loss": 2.797, "step": 102300 }, { "epoch": 0.34, - "learning_rate": 3.305118409989208e-05, - "loss": 2.8091, + "learning_rate": 3.3051296311008715e-05, + "loss": 2.7872, "step": 102400 }, { "epoch": 0.34, - "learning_rate": 3.3034632521864634e-05, - "loss": 2.82, + "learning_rate": 3.3034744842562434e-05, + "loss": 2.7898, "step": 102500 }, { "epoch": 0.34, - "learning_rate": 3.3018080943837185e-05, - "loss": 2.8074, + "learning_rate": 3.301819337411615e-05, + "loss": 2.7898, "step": 102600 }, { "epoch": 0.34, - "learning_rate": 3.300152936580974e-05, - "loss": 2.8191, + "learning_rate": 3.300164190566987e-05, + "loss": 2.7948, "step": 102700 }, { "epoch": 0.34, - "learning_rate": 3.298497778778229e-05, - "loss": 2.8161, + "learning_rate": 3.298509043722359e-05, + "loss": 2.7868, "step": 102800 }, { "epoch": 0.34, - "learning_rate": 3.296842620975484e-05, - "loss": 2.8123, + "learning_rate": 3.296853896877731e-05, + "loss": 2.803, "step": 102900 }, { "epoch": 0.34, - "learning_rate": 3.295187463172739e-05, - "loss": 2.8195, + "learning_rate": 3.2951987500331035e-05, + "loss": 2.7888, "step": 103000 }, { "epoch": 0.34, - "learning_rate": 3.293532305369994e-05, - "loss": 2.8082, + "learning_rate": 3.293543603188475e-05, + "loss": 2.7972, "step": 103100 }, { "epoch": 0.34, - "learning_rate": 3.2918771475672495e-05, - "loss": 2.8131, + "learning_rate": 3.291888456343847e-05, + "loss": 2.7844, "step": 103200 }, { "epoch": 0.34, - "learning_rate": 3.290221989764504e-05, - "loss": 2.8141, + "learning_rate": 3.2902333094992186e-05, + "loss": 2.7916, "step": 103300 }, { "epoch": 0.34, - "learning_rate": 3.28856683196176e-05, - "loss": 2.8168, + "learning_rate": 3.288578162654591e-05, + "loss": 2.7793, "step": 103400 }, { "epoch": 0.34, - "learning_rate": 3.286911674159014e-05, - "loss": 2.8117, + "learning_rate": 3.286923015809963e-05, + "loss": 2.8114, "step": 103500 }, { "epoch": 0.34, - "learning_rate": 3.2852565163562694e-05, - "loss": 2.8153, + "learning_rate": 3.285267868965335e-05, + "loss": 2.8014, "step": 103600 }, { "epoch": 0.34, - "learning_rate": 3.2836013585535246e-05, - "loss": 2.8152, + "learning_rate": 3.283612722120707e-05, + "loss": 2.7956, "step": 103700 }, { "epoch": 0.34, - "learning_rate": 3.28194620075078e-05, - "loss": 2.8062, + "learning_rate": 3.281957575276078e-05, + "loss": 2.7915, "step": 103800 }, { "epoch": 0.34, - "learning_rate": 3.280291042948035e-05, - "loss": 2.8156, + "learning_rate": 3.2803024284314506e-05, + "loss": 2.783, "step": 103900 }, { "epoch": 0.34, - "learning_rate": 3.2786358851452894e-05, - "loss": 2.8086, + "learning_rate": 3.2786472815868225e-05, + "loss": 2.792, "step": 104000 }, { "epoch": 0.34, - "learning_rate": 3.276980727342545e-05, - "loss": 2.8118, + "learning_rate": 3.2769921347421944e-05, + "loss": 2.7838, "step": 104100 }, { "epoch": 0.34, - "learning_rate": 3.2753255695398e-05, - "loss": 2.8091, + "learning_rate": 3.275336987897566e-05, + "loss": 2.7865, "step": 104200 }, { "epoch": 0.35, - "learning_rate": 3.2736704117370556e-05, - "loss": 2.8174, + "learning_rate": 3.273681841052938e-05, + "loss": 2.7995, "step": 104300 }, { "epoch": 0.35, - "learning_rate": 3.27201525393431e-05, - "loss": 2.8141, + "learning_rate": 3.272026694208311e-05, + "loss": 2.7825, "step": 104400 }, { "epoch": 0.35, - "learning_rate": 3.270360096131566e-05, - "loss": 2.8085, + "learning_rate": 3.270371547363682e-05, + "loss": 2.7923, "step": 104500 }, { "epoch": 0.35, - "learning_rate": 3.2687049383288204e-05, - "loss": 2.8146, + "learning_rate": 3.268716400519054e-05, + "loss": 2.7852, "step": 104600 }, { "epoch": 0.35, - "learning_rate": 3.2670497805260755e-05, - "loss": 2.8109, + "learning_rate": 3.267061253674426e-05, + "loss": 2.7987, "step": 104700 }, { "epoch": 0.35, - "learning_rate": 3.265394622723331e-05, - "loss": 2.8261, + "learning_rate": 3.2654061068297984e-05, + "loss": 2.7958, "step": 104800 }, { "epoch": 0.35, - "learning_rate": 3.263739464920585e-05, - "loss": 2.8031, + "learning_rate": 3.26375095998517e-05, + "loss": 2.7904, "step": 104900 }, { "epoch": 0.35, - "learning_rate": 3.262084307117841e-05, - "loss": 2.8173, + "learning_rate": 3.262095813140542e-05, + "loss": 2.8019, "step": 105000 }, { "epoch": 0.35, - "learning_rate": 3.2604291493150955e-05, - "loss": 2.8155, + "learning_rate": 3.260440666295914e-05, + "loss": 2.7867, "step": 105100 }, { "epoch": 0.35, - "learning_rate": 3.258773991512351e-05, - "loss": 2.8125, + "learning_rate": 3.258785519451285e-05, + "loss": 2.786, "step": 105200 }, { "epoch": 0.35, - "learning_rate": 3.257118833709606e-05, - "loss": 2.8062, + "learning_rate": 3.257130372606658e-05, + "loss": 2.7963, "step": 105300 }, { "epoch": 0.35, - "learning_rate": 3.255463675906861e-05, - "loss": 2.8128, + "learning_rate": 3.25547522576203e-05, + "loss": 2.7889, "step": 105400 }, { "epoch": 0.35, - "learning_rate": 3.253808518104116e-05, - "loss": 2.815, + "learning_rate": 3.253820078917402e-05, + "loss": 2.7809, "step": 105500 }, { "epoch": 0.35, - "learning_rate": 3.252153360301371e-05, - "loss": 2.8102, + "learning_rate": 3.2521649320727736e-05, + "loss": 2.7993, "step": 105600 }, { "epoch": 0.35, - "learning_rate": 3.2504982024986264e-05, - "loss": 2.8117, + "learning_rate": 3.2505097852281455e-05, + "loss": 2.778, "step": 105700 }, { "epoch": 0.35, - "learning_rate": 3.248843044695881e-05, - "loss": 2.8154, + "learning_rate": 3.248854638383518e-05, + "loss": 2.7981, "step": 105800 }, { "epoch": 0.35, - "learning_rate": 3.247187886893137e-05, - "loss": 2.8076, + "learning_rate": 3.247199491538889e-05, + "loss": 2.796, "step": 105900 }, { "epoch": 0.35, - "learning_rate": 3.245532729090391e-05, - "loss": 2.8165, + "learning_rate": 3.245544344694261e-05, + "loss": 2.7921, "step": 106000 }, { "epoch": 0.35, - "learning_rate": 3.243877571287647e-05, - "loss": 2.8194, + "learning_rate": 3.243889197849633e-05, + "loss": 2.8061, "step": 106100 }, { "epoch": 0.35, - "learning_rate": 3.2422224134849016e-05, - "loss": 2.8317, + "learning_rate": 3.2422340510050056e-05, + "loss": 2.7927, "step": 106200 }, { "epoch": 0.35, - "learning_rate": 3.240567255682157e-05, - "loss": 2.801, + "learning_rate": 3.2405789041603775e-05, + "loss": 2.7969, "step": 106300 }, { "epoch": 0.35, - "learning_rate": 3.238912097879412e-05, - "loss": 2.813, + "learning_rate": 3.2389237573157494e-05, + "loss": 2.7894, "step": 106400 }, { "epoch": 0.35, - "learning_rate": 3.237256940076667e-05, - "loss": 2.7996, + "learning_rate": 3.2372686104711213e-05, + "loss": 2.798, "step": 106500 }, { "epoch": 0.35, - "learning_rate": 3.235601782273922e-05, - "loss": 2.8047, + "learning_rate": 3.2356134636264926e-05, + "loss": 2.7968, "step": 106600 }, { "epoch": 0.35, - "learning_rate": 3.2339466244711774e-05, - "loss": 2.8138, + "learning_rate": 3.233958316781865e-05, + "loss": 2.796, "step": 106700 }, { "epoch": 0.35, - "learning_rate": 3.2322914666684325e-05, - "loss": 2.8071, + "learning_rate": 3.232303169937237e-05, + "loss": 2.7892, "step": 106800 }, { "epoch": 0.35, - "learning_rate": 3.230636308865687e-05, - "loss": 2.8006, + "learning_rate": 3.230648023092609e-05, + "loss": 2.801, "step": 106900 }, { "epoch": 0.35, - "learning_rate": 3.228981151062943e-05, - "loss": 2.8147, + "learning_rate": 3.228992876247981e-05, + "loss": 2.7956, "step": 107000 }, { "epoch": 0.35, - "learning_rate": 3.227325993260197e-05, - "loss": 2.8068, + "learning_rate": 3.227337729403353e-05, + "loss": 2.7937, "step": 107100 }, { "epoch": 0.35, - "learning_rate": 3.2256708354574525e-05, - "loss": 2.8081, + "learning_rate": 3.225682582558725e-05, + "loss": 2.799, "step": 107200 }, { "epoch": 0.36, - "learning_rate": 3.2240156776547077e-05, - "loss": 2.8007, + "learning_rate": 3.2240274357140965e-05, + "loss": 2.7903, "step": 107300 }, { "epoch": 0.36, - "learning_rate": 3.222360519851963e-05, - "loss": 2.8114, + "learning_rate": 3.2223722888694684e-05, + "loss": 2.7842, "step": 107400 }, { "epoch": 0.36, - "learning_rate": 3.220705362049218e-05, - "loss": 2.8228, + "learning_rate": 3.22071714202484e-05, + "loss": 2.8027, "step": 107500 }, { "epoch": 0.36, - "learning_rate": 3.219050204246473e-05, - "loss": 2.8191, + "learning_rate": 3.219061995180213e-05, + "loss": 2.8145, "step": 107600 }, { "epoch": 0.36, - "learning_rate": 3.217395046443728e-05, - "loss": 2.8089, + "learning_rate": 3.217406848335585e-05, + "loss": 2.794, "step": 107700 }, { "epoch": 0.36, - "learning_rate": 3.2157398886409835e-05, - "loss": 2.807, + "learning_rate": 3.215751701490957e-05, + "loss": 2.7794, "step": 107800 }, { "epoch": 0.36, - "learning_rate": 3.214084730838238e-05, - "loss": 2.8074, + "learning_rate": 3.2140965546463286e-05, + "loss": 2.7773, "step": 107900 }, { "epoch": 0.36, - "learning_rate": 3.212429573035494e-05, - "loss": 2.8104, + "learning_rate": 3.2124414078017e-05, + "loss": 2.7884, "step": 108000 }, { "epoch": 0.36, - "learning_rate": 3.210774415232748e-05, - "loss": 2.8107, + "learning_rate": 3.2107862609570724e-05, + "loss": 2.7808, "step": 108100 }, { "epoch": 0.36, - "learning_rate": 3.2091192574300034e-05, - "loss": 2.8026, + "learning_rate": 3.209131114112444e-05, + "loss": 2.8017, "step": 108200 }, { "epoch": 0.36, - "learning_rate": 3.2074640996272586e-05, - "loss": 2.815, + "learning_rate": 3.207475967267816e-05, + "loss": 2.805, "step": 108300 }, { "epoch": 0.36, - "learning_rate": 3.205808941824514e-05, - "loss": 2.7987, + "learning_rate": 3.205820820423188e-05, + "loss": 2.7912, "step": 108400 }, { "epoch": 0.36, - "learning_rate": 3.204153784021769e-05, - "loss": 2.8136, + "learning_rate": 3.20416567357856e-05, + "loss": 2.792, "step": 108500 }, { "epoch": 0.36, - "learning_rate": 3.202498626219024e-05, - "loss": 2.8023, + "learning_rate": 3.2025105267339326e-05, + "loss": 2.7913, "step": 108600 }, { "epoch": 0.36, - "learning_rate": 3.200843468416279e-05, - "loss": 2.8256, + "learning_rate": 3.200855379889304e-05, + "loss": 2.7977, "step": 108700 }, { "epoch": 0.36, - "learning_rate": 3.199188310613534e-05, - "loss": 2.8226, + "learning_rate": 3.199200233044676e-05, + "loss": 2.8004, "step": 108800 }, { "epoch": 0.36, - "learning_rate": 3.1975331528107895e-05, - "loss": 2.8159, + "learning_rate": 3.1975450862000476e-05, + "loss": 2.7981, "step": 108900 }, { "epoch": 0.36, - "learning_rate": 3.195877995008044e-05, - "loss": 2.8049, + "learning_rate": 3.1958899393554195e-05, + "loss": 2.7803, "step": 109000 }, { "epoch": 0.36, - "learning_rate": 3.1942228372053e-05, - "loss": 2.803, + "learning_rate": 3.194234792510792e-05, + "loss": 2.7979, "step": 109100 }, { "epoch": 0.36, - "learning_rate": 3.1925676794025543e-05, - "loss": 2.8157, + "learning_rate": 3.192579645666164e-05, + "loss": 2.7885, "step": 109200 }, { "epoch": 0.36, - "learning_rate": 3.1909125215998095e-05, - "loss": 2.804, + "learning_rate": 3.190924498821535e-05, + "loss": 2.8018, "step": 109300 }, { "epoch": 0.36, - "learning_rate": 3.1892573637970647e-05, - "loss": 2.8059, + "learning_rate": 3.189269351976907e-05, + "loss": 2.8063, "step": 109400 }, { "epoch": 0.36, - "learning_rate": 3.187602205994319e-05, - "loss": 2.8045, + "learning_rate": 3.1876142051322796e-05, + "loss": 2.804, "step": 109500 }, { "epoch": 0.36, - "learning_rate": 3.185947048191575e-05, - "loss": 2.8239, + "learning_rate": 3.1859590582876515e-05, + "loss": 2.799, "step": 109600 }, { "epoch": 0.36, - "learning_rate": 3.1842918903888295e-05, - "loss": 2.798, + "learning_rate": 3.1843039114430234e-05, + "loss": 2.7842, "step": 109700 }, { "epoch": 0.36, - "learning_rate": 3.182636732586085e-05, - "loss": 2.8172, + "learning_rate": 3.182648764598395e-05, + "loss": 2.7896, "step": 109800 }, { "epoch": 0.36, - "learning_rate": 3.18098157478334e-05, - "loss": 2.8089, + "learning_rate": 3.180993617753767e-05, + "loss": 2.7947, "step": 109900 }, { "epoch": 0.36, - "learning_rate": 3.1793264169805956e-05, - "loss": 2.8078, + "learning_rate": 3.179338470909139e-05, + "loss": 2.7942, "step": 110000 }, { "epoch": 0.36, - "learning_rate": 3.17767125917785e-05, - "loss": 2.8138, + "learning_rate": 3.177683324064511e-05, + "loss": 2.7778, "step": 110100 }, { "epoch": 0.36, - "learning_rate": 3.176016101375105e-05, - "loss": 2.817, + "learning_rate": 3.176028177219883e-05, + "loss": 2.7983, "step": 110200 }, { "epoch": 0.37, - "learning_rate": 3.1743609435723604e-05, - "loss": 2.8112, + "learning_rate": 3.174373030375255e-05, + "loss": 2.7934, "step": 110300 }, { "epoch": 0.37, - "learning_rate": 3.172705785769615e-05, - "loss": 2.8168, + "learning_rate": 3.172717883530627e-05, + "loss": 2.7849, "step": 110400 }, { "epoch": 0.37, - "learning_rate": 3.171050627966871e-05, - "loss": 2.8012, + "learning_rate": 3.171062736685999e-05, + "loss": 2.7911, "step": 110500 }, { "epoch": 0.37, - "learning_rate": 3.169395470164125e-05, - "loss": 2.8051, + "learning_rate": 3.169407589841371e-05, + "loss": 2.7987, "step": 110600 }, { "epoch": 0.37, - "learning_rate": 3.167740312361381e-05, - "loss": 2.8053, + "learning_rate": 3.1677524429967424e-05, + "loss": 2.782, "step": 110700 }, { "epoch": 0.37, - "learning_rate": 3.1660851545586355e-05, - "loss": 2.8176, + "learning_rate": 3.166097296152114e-05, + "loss": 2.7827, "step": 110800 }, { "epoch": 0.37, - "learning_rate": 3.164429996755891e-05, - "loss": 2.8013, + "learning_rate": 3.164442149307487e-05, + "loss": 2.7702, "step": 110900 }, { "epoch": 0.37, - "learning_rate": 3.162774838953146e-05, - "loss": 2.8224, + "learning_rate": 3.162787002462859e-05, + "loss": 2.7883, "step": 111000 }, { "epoch": 0.37, - "learning_rate": 3.161119681150401e-05, - "loss": 2.786, + "learning_rate": 3.161131855618231e-05, + "loss": 2.7813, "step": 111100 }, { "epoch": 0.37, - "learning_rate": 3.159464523347656e-05, - "loss": 2.8069, + "learning_rate": 3.1594767087736026e-05, + "loss": 2.788, "step": 111200 }, { "epoch": 0.37, - "learning_rate": 3.1578093655449113e-05, - "loss": 2.8066, + "learning_rate": 3.1578215619289745e-05, + "loss": 2.7826, "step": 111300 }, { "epoch": 0.37, - "learning_rate": 3.1561542077421665e-05, - "loss": 2.8134, + "learning_rate": 3.1561664150843464e-05, + "loss": 2.7912, "step": 111400 }, { "epoch": 0.37, - "learning_rate": 3.154499049939421e-05, - "loss": 2.8063, + "learning_rate": 3.154511268239718e-05, + "loss": 2.776, "step": 111500 }, { "epoch": 0.37, - "learning_rate": 3.152843892136677e-05, - "loss": 2.8092, + "learning_rate": 3.15285612139509e-05, + "loss": 2.7785, "step": 111600 }, { "epoch": 0.37, - "learning_rate": 3.151188734333931e-05, - "loss": 2.8035, + "learning_rate": 3.151200974550462e-05, + "loss": 2.7956, "step": 111700 }, { "epoch": 0.37, - "learning_rate": 3.1495335765311865e-05, - "loss": 2.8095, + "learning_rate": 3.149545827705834e-05, + "loss": 2.7874, "step": 111800 }, { "epoch": 0.37, - "learning_rate": 3.1478784187284416e-05, - "loss": 2.798, + "learning_rate": 3.1478906808612066e-05, + "loss": 2.792, "step": 111900 }, { "epoch": 0.37, - "learning_rate": 3.146223260925697e-05, - "loss": 2.8112, + "learning_rate": 3.1462355340165785e-05, + "loss": 2.798, "step": 112000 }, { "epoch": 0.37, - "learning_rate": 3.144568103122952e-05, - "loss": 2.8113, + "learning_rate": 3.14458038717195e-05, + "loss": 2.7935, "step": 112100 }, { "epoch": 0.37, - "learning_rate": 3.142912945320207e-05, - "loss": 2.8064, + "learning_rate": 3.1429252403273216e-05, + "loss": 2.7692, "step": 112200 }, { "epoch": 0.37, - "learning_rate": 3.141257787517462e-05, - "loss": 2.8269, + "learning_rate": 3.141270093482694e-05, + "loss": 2.7896, "step": 112300 }, { "epoch": 0.37, - "learning_rate": 3.1396026297147174e-05, - "loss": 2.8041, + "learning_rate": 3.139614946638066e-05, + "loss": 2.794, "step": 112400 }, { "epoch": 0.37, - "learning_rate": 3.137947471911972e-05, - "loss": 2.8107, + "learning_rate": 3.137959799793438e-05, + "loss": 2.7887, "step": 112500 }, { "epoch": 0.37, - "learning_rate": 3.136292314109227e-05, - "loss": 2.8108, + "learning_rate": 3.13630465294881e-05, + "loss": 2.785, "step": 112600 }, { "epoch": 0.37, - "learning_rate": 3.134637156306482e-05, - "loss": 2.8109, + "learning_rate": 3.134649506104182e-05, + "loss": 2.7814, "step": 112700 }, { "epoch": 0.37, - "learning_rate": 3.1329819985037374e-05, - "loss": 2.8135, + "learning_rate": 3.1329943592595536e-05, + "loss": 2.7795, "step": 112800 }, { "epoch": 0.37, - "learning_rate": 3.1313268407009925e-05, - "loss": 2.8092, + "learning_rate": 3.1313392124149255e-05, + "loss": 2.7815, "step": 112900 }, { "epoch": 0.37, - "learning_rate": 3.129671682898248e-05, - "loss": 2.8045, + "learning_rate": 3.1296840655702974e-05, + "loss": 2.7926, "step": 113000 }, { "epoch": 0.37, - "learning_rate": 3.128016525095503e-05, - "loss": 2.8011, + "learning_rate": 3.128028918725669e-05, + "loss": 2.7787, "step": 113100 }, { "epoch": 0.37, - "learning_rate": 3.126361367292758e-05, - "loss": 2.8121, + "learning_rate": 3.126373771881041e-05, + "loss": 2.7863, "step": 113200 }, { "epoch": 0.38, - "learning_rate": 3.124706209490013e-05, - "loss": 2.7899, + "learning_rate": 3.124718625036414e-05, + "loss": 2.7841, "step": 113300 }, { "epoch": 0.38, - "learning_rate": 3.123051051687268e-05, - "loss": 2.8168, + "learning_rate": 3.123063478191786e-05, + "loss": 2.7931, "step": 113400 }, { "epoch": 0.38, - "learning_rate": 3.1213958938845235e-05, - "loss": 2.7913, + "learning_rate": 3.121408331347157e-05, + "loss": 2.7927, "step": 113500 }, { "epoch": 0.38, - "learning_rate": 3.119740736081778e-05, - "loss": 2.8056, + "learning_rate": 3.119753184502529e-05, + "loss": 2.7846, "step": 113600 }, { "epoch": 0.38, - "learning_rate": 3.118085578279033e-05, - "loss": 2.8002, + "learning_rate": 3.1180980376579014e-05, + "loss": 2.7902, "step": 113700 }, { "epoch": 0.38, - "learning_rate": 3.116430420476288e-05, - "loss": 2.8031, + "learning_rate": 3.116442890813273e-05, + "loss": 2.789, "step": 113800 }, { "epoch": 0.38, - "learning_rate": 3.1147752626735435e-05, - "loss": 2.8144, + "learning_rate": 3.114787743968645e-05, + "loss": 2.7942, "step": 113900 }, { "epoch": 0.38, - "learning_rate": 3.1131201048707986e-05, - "loss": 2.8118, + "learning_rate": 3.113132597124017e-05, + "loss": 2.7986, "step": 114000 }, { "epoch": 0.38, - "learning_rate": 3.111464947068054e-05, - "loss": 2.8072, + "learning_rate": 3.111477450279389e-05, + "loss": 2.7819, "step": 114100 }, { "epoch": 0.38, - "learning_rate": 3.109809789265309e-05, - "loss": 2.8091, + "learning_rate": 3.109822303434761e-05, + "loss": 2.7942, "step": 114200 }, { "epoch": 0.38, - "learning_rate": 3.1081546314625634e-05, - "loss": 2.8047, + "learning_rate": 3.108167156590133e-05, + "loss": 2.7762, "step": 114300 }, { "epoch": 0.38, - "learning_rate": 3.106499473659819e-05, - "loss": 2.8065, + "learning_rate": 3.106512009745505e-05, + "loss": 2.774, "step": 114400 }, { "epoch": 0.38, - "learning_rate": 3.104844315857074e-05, - "loss": 2.8209, + "learning_rate": 3.1048568629008766e-05, + "loss": 2.7817, "step": 114500 }, { "epoch": 0.38, - "learning_rate": 3.1031891580543296e-05, - "loss": 2.8182, + "learning_rate": 3.1032017160562485e-05, + "loss": 2.7936, "step": 114600 }, { "epoch": 0.38, - "learning_rate": 3.101534000251584e-05, - "loss": 2.8251, + "learning_rate": 3.101546569211621e-05, + "loss": 2.7957, "step": 114700 }, { "epoch": 0.38, - "learning_rate": 3.099878842448839e-05, - "loss": 2.8, + "learning_rate": 3.099891422366993e-05, + "loss": 2.7702, "step": 114800 }, { "epoch": 0.38, - "learning_rate": 3.0982236846460944e-05, - "loss": 2.8058, + "learning_rate": 3.098236275522364e-05, + "loss": 2.7816, "step": 114900 }, { "epoch": 0.38, - "learning_rate": 3.096568526843349e-05, - "loss": 2.7979, + "learning_rate": 3.096581128677736e-05, + "loss": 2.7776, "step": 115000 }, { "epoch": 0.38, - "learning_rate": 3.094913369040605e-05, - "loss": 2.821, + "learning_rate": 3.0949259818331087e-05, + "loss": 2.7865, "step": 115100 }, { "epoch": 0.38, - "learning_rate": 3.093258211237859e-05, - "loss": 2.8085, + "learning_rate": 3.0932708349884806e-05, + "loss": 2.794, "step": 115200 }, { "epoch": 0.38, - "learning_rate": 3.091603053435115e-05, - "loss": 2.8016, + "learning_rate": 3.0916156881438525e-05, + "loss": 2.7873, "step": 115300 }, { "epoch": 0.38, - "learning_rate": 3.0899478956323695e-05, - "loss": 2.8095, + "learning_rate": 3.0899605412992244e-05, + "loss": 2.7894, "step": 115400 }, { "epoch": 0.38, - "learning_rate": 3.0882927378296254e-05, - "loss": 2.7999, + "learning_rate": 3.088305394454596e-05, + "loss": 2.7833, "step": 115500 }, { "epoch": 0.38, - "learning_rate": 3.08663758002688e-05, - "loss": 2.816, + "learning_rate": 3.086650247609968e-05, + "loss": 2.7832, "step": 115600 }, { "epoch": 0.38, - "learning_rate": 3.084982422224135e-05, - "loss": 2.8056, + "learning_rate": 3.08499510076534e-05, + "loss": 2.7865, "step": 115700 }, { "epoch": 0.38, - "learning_rate": 3.08332726442139e-05, - "loss": 2.8035, + "learning_rate": 3.083339953920712e-05, + "loss": 2.7856, "step": 115800 }, { "epoch": 0.38, - "learning_rate": 3.0816721066186446e-05, - "loss": 2.7915, + "learning_rate": 3.081684807076084e-05, + "loss": 2.7897, "step": 115900 }, { "epoch": 0.38, - "learning_rate": 3.0800169488159005e-05, - "loss": 2.7957, + "learning_rate": 3.080029660231456e-05, + "loss": 2.7917, "step": 116000 }, { "epoch": 0.38, - "learning_rate": 3.078361791013155e-05, - "loss": 2.8147, + "learning_rate": 3.078374513386828e-05, + "loss": 2.788, "step": 116100 }, { "epoch": 0.38, - "learning_rate": 3.076706633210411e-05, - "loss": 2.795, + "learning_rate": 3.0767193665421995e-05, + "loss": 2.7895, "step": 116200 }, { "epoch": 0.38, - "learning_rate": 3.075051475407665e-05, - "loss": 2.8028, + "learning_rate": 3.0750642196975714e-05, + "loss": 2.7844, "step": 116300 }, { "epoch": 0.39, - "learning_rate": 3.0733963176049204e-05, - "loss": 2.8082, + "learning_rate": 3.073409072852943e-05, + "loss": 2.7912, "step": 116400 }, { "epoch": 0.39, - "learning_rate": 3.0717411598021756e-05, - "loss": 2.8264, + "learning_rate": 3.071753926008316e-05, + "loss": 2.7877, "step": 116500 }, { "epoch": 0.39, - "learning_rate": 3.070086001999431e-05, - "loss": 2.8118, + "learning_rate": 3.070098779163688e-05, + "loss": 2.7909, "step": 116600 }, { "epoch": 0.39, - "learning_rate": 3.068430844196686e-05, - "loss": 2.8071, + "learning_rate": 3.06844363231906e-05, + "loss": 2.7926, "step": 116700 }, { "epoch": 0.39, - "learning_rate": 3.066775686393941e-05, - "loss": 2.816, + "learning_rate": 3.0667884854744316e-05, + "loss": 2.7806, "step": 116800 }, { "epoch": 0.39, - "learning_rate": 3.065120528591196e-05, - "loss": 2.7917, + "learning_rate": 3.065133338629803e-05, + "loss": 2.7841, "step": 116900 }, { "epoch": 0.39, - "learning_rate": 3.063465370788451e-05, - "loss": 2.8143, + "learning_rate": 3.0634781917851754e-05, + "loss": 2.787, "step": 117000 }, { "epoch": 0.39, - "learning_rate": 3.0618102129857066e-05, - "loss": 2.8048, + "learning_rate": 3.061823044940547e-05, + "loss": 2.7789, "step": 117100 }, { "epoch": 0.39, - "learning_rate": 3.060155055182961e-05, - "loss": 2.8149, + "learning_rate": 3.060167898095919e-05, + "loss": 2.7702, "step": 117200 }, { "epoch": 0.39, - "learning_rate": 3.058499897380216e-05, - "loss": 2.8092, + "learning_rate": 3.058512751251291e-05, + "loss": 2.7936, "step": 117300 }, { "epoch": 0.39, - "learning_rate": 3.0568447395774714e-05, - "loss": 2.8126, + "learning_rate": 3.056857604406663e-05, + "loss": 2.7896, "step": 117400 }, { "epoch": 0.39, - "learning_rate": 3.0551895817747265e-05, - "loss": 2.8076, + "learning_rate": 3.0552024575620356e-05, + "loss": 2.7848, "step": 117500 }, { "epoch": 0.39, - "learning_rate": 3.053534423971982e-05, - "loss": 2.7898, + "learning_rate": 3.053547310717407e-05, + "loss": 2.7803, "step": 117600 }, { "epoch": 0.39, - "learning_rate": 3.051879266169237e-05, - "loss": 2.8043, + "learning_rate": 3.051892163872779e-05, + "loss": 2.7919, "step": 117700 }, { "epoch": 0.39, - "learning_rate": 3.0502241083664917e-05, - "loss": 2.8165, + "learning_rate": 3.050237017028151e-05, + "loss": 2.7847, "step": 117800 }, { "epoch": 0.39, - "learning_rate": 3.048568950563747e-05, - "loss": 2.8109, + "learning_rate": 3.0485818701835228e-05, + "loss": 2.7884, "step": 117900 }, { "epoch": 0.39, - "learning_rate": 3.046913792761002e-05, - "loss": 2.813, + "learning_rate": 3.0469267233388947e-05, + "loss": 2.8017, "step": 118000 }, { "epoch": 0.39, - "learning_rate": 3.0452586349582568e-05, - "loss": 2.8049, + "learning_rate": 3.045271576494267e-05, + "loss": 2.7902, "step": 118100 }, { "epoch": 0.39, - "learning_rate": 3.0436034771555123e-05, - "loss": 2.8088, + "learning_rate": 3.043616429649639e-05, + "loss": 2.7662, "step": 118200 }, { "epoch": 0.39, - "learning_rate": 3.041948319352767e-05, - "loss": 2.8101, + "learning_rate": 3.0419612828050104e-05, + "loss": 2.7879, "step": 118300 }, { "epoch": 0.39, - "learning_rate": 3.0402931615500223e-05, - "loss": 2.807, + "learning_rate": 3.0403061359603823e-05, + "loss": 2.7693, "step": 118400 }, { "epoch": 0.39, - "learning_rate": 3.038638003747277e-05, - "loss": 2.8173, + "learning_rate": 3.0386509891157546e-05, + "loss": 2.781, "step": 118500 }, { "epoch": 0.39, - "learning_rate": 3.0369828459445326e-05, - "loss": 2.7936, + "learning_rate": 3.0369958422711264e-05, + "loss": 2.7978, "step": 118600 }, { "epoch": 0.39, - "learning_rate": 3.0353276881417874e-05, - "loss": 2.8076, + "learning_rate": 3.0353406954264983e-05, + "loss": 2.7619, "step": 118700 }, { "epoch": 0.39, - "learning_rate": 3.033672530339043e-05, - "loss": 2.8081, + "learning_rate": 3.0336855485818706e-05, + "loss": 2.7885, "step": 118800 }, { "epoch": 0.39, - "learning_rate": 3.0320173725362977e-05, - "loss": 2.8104, + "learning_rate": 3.0320304017372425e-05, + "loss": 2.7805, "step": 118900 }, { "epoch": 0.39, - "learning_rate": 3.0303622147335532e-05, - "loss": 2.7959, + "learning_rate": 3.030375254892614e-05, + "loss": 2.7908, "step": 119000 }, { "epoch": 0.39, - "learning_rate": 3.028707056930808e-05, - "loss": 2.8227, + "learning_rate": 3.028720108047986e-05, + "loss": 2.7909, "step": 119100 }, { "epoch": 0.39, - "learning_rate": 3.027051899128063e-05, - "loss": 2.8016, + "learning_rate": 3.0270649612033582e-05, + "loss": 2.7909, "step": 119200 }, { "epoch": 0.39, - "learning_rate": 3.025396741325318e-05, - "loss": 2.8102, + "learning_rate": 3.02540981435873e-05, + "loss": 2.7831, "step": 119300 }, { "epoch": 0.4, - "learning_rate": 3.023741583522573e-05, - "loss": 2.8155, + "learning_rate": 3.023754667514102e-05, + "loss": 2.7889, "step": 119400 }, { "epoch": 0.4, - "learning_rate": 3.0220864257198284e-05, - "loss": 2.8122, + "learning_rate": 3.0220995206694742e-05, + "loss": 2.7773, "step": 119500 }, { "epoch": 0.4, - "learning_rate": 3.0204312679170832e-05, - "loss": 2.8077, + "learning_rate": 3.020444373824846e-05, + "loss": 2.8016, "step": 119600 }, { "epoch": 0.4, - "learning_rate": 3.0187761101143387e-05, - "loss": 2.8094, + "learning_rate": 3.0187892269802177e-05, + "loss": 2.7952, "step": 119700 }, { "epoch": 0.4, - "learning_rate": 3.0171209523115935e-05, - "loss": 2.7915, + "learning_rate": 3.0171340801355896e-05, + "loss": 2.7855, "step": 119800 }, { "epoch": 0.4, - "learning_rate": 3.0154657945088487e-05, - "loss": 2.8025, + "learning_rate": 3.0154789332909618e-05, + "loss": 2.7886, "step": 119900 }, { "epoch": 0.4, - "learning_rate": 3.0138106367061035e-05, - "loss": 2.8126, + "learning_rate": 3.0138237864463337e-05, + "loss": 2.7865, "step": 120000 }, { "epoch": 0.4, - "learning_rate": 3.012155478903359e-05, - "loss": 2.7977, + "learning_rate": 3.0121686396017056e-05, + "loss": 2.7916, "step": 120100 }, { "epoch": 0.4, - "learning_rate": 3.0105003211006138e-05, - "loss": 2.8004, + "learning_rate": 3.010513492757078e-05, + "loss": 2.7964, "step": 120200 }, { "epoch": 0.4, - "learning_rate": 3.0088451632978686e-05, - "loss": 2.8008, + "learning_rate": 3.0088583459124497e-05, + "loss": 2.795, "step": 120300 }, { "epoch": 0.4, - "learning_rate": 3.007190005495124e-05, - "loss": 2.8039, + "learning_rate": 3.0072031990678213e-05, + "loss": 2.7841, "step": 120400 }, { "epoch": 0.4, - "learning_rate": 3.005534847692379e-05, - "loss": 2.8076, + "learning_rate": 3.0055480522231932e-05, + "loss": 2.7781, "step": 120500 }, { "epoch": 0.4, - "learning_rate": 3.0038796898896344e-05, - "loss": 2.8322, + "learning_rate": 3.0038929053785654e-05, + "loss": 2.8013, "step": 120600 }, { "epoch": 0.4, - "learning_rate": 3.0022245320868893e-05, - "loss": 2.8005, + "learning_rate": 3.0022377585339373e-05, + "loss": 2.7758, "step": 120700 }, { "epoch": 0.4, - "learning_rate": 3.0005693742841444e-05, - "loss": 2.8192, + "learning_rate": 3.0005826116893092e-05, + "loss": 2.7948, "step": 120800 }, { "epoch": 0.4, - "learning_rate": 2.9989142164813993e-05, - "loss": 2.8058, + "learning_rate": 2.9989274648446815e-05, + "loss": 2.7904, "step": 120900 }, { "epoch": 0.4, - "learning_rate": 2.9972590586786548e-05, - "loss": 2.801, + "learning_rate": 2.9972723180000534e-05, + "loss": 2.7814, "step": 121000 }, { "epoch": 0.4, - "learning_rate": 2.9956039008759096e-05, - "loss": 2.7984, + "learning_rate": 2.995617171155425e-05, + "loss": 2.7874, "step": 121100 }, { "epoch": 0.4, - "learning_rate": 2.993948743073165e-05, - "loss": 2.8151, + "learning_rate": 2.9939620243107968e-05, + "loss": 2.7753, "step": 121200 }, { "epoch": 0.4, - "learning_rate": 2.99229358527042e-05, - "loss": 2.8035, + "learning_rate": 2.992306877466169e-05, + "loss": 2.7826, "step": 121300 }, { "epoch": 0.4, - "learning_rate": 2.9906384274676747e-05, - "loss": 2.8062, + "learning_rate": 2.990651730621541e-05, + "loss": 2.7654, "step": 121400 }, { "epoch": 0.4, - "learning_rate": 2.98898326966493e-05, - "loss": 2.8015, + "learning_rate": 2.988996583776913e-05, + "loss": 2.789, "step": 121500 }, { "epoch": 0.4, - "learning_rate": 2.9873281118621847e-05, - "loss": 2.8086, + "learning_rate": 2.987341436932285e-05, + "loss": 2.7849, "step": 121600 }, { "epoch": 0.4, - "learning_rate": 2.9856729540594402e-05, - "loss": 2.8128, + "learning_rate": 2.985686290087657e-05, + "loss": 2.7842, "step": 121700 }, { "epoch": 0.4, - "learning_rate": 2.984017796256695e-05, - "loss": 2.8111, + "learning_rate": 2.9840311432430285e-05, + "loss": 2.795, "step": 121800 }, { "epoch": 0.4, - "learning_rate": 2.9823626384539505e-05, - "loss": 2.8093, + "learning_rate": 2.9823759963984004e-05, + "loss": 2.7939, "step": 121900 }, { "epoch": 0.4, - "learning_rate": 2.9807074806512053e-05, - "loss": 2.8167, + "learning_rate": 2.9807208495537723e-05, + "loss": 2.7912, "step": 122000 }, { "epoch": 0.4, - "learning_rate": 2.979052322848461e-05, - "loss": 2.821, + "learning_rate": 2.9790657027091446e-05, + "loss": 2.7731, "step": 122100 }, { "epoch": 0.4, - "learning_rate": 2.9773971650457157e-05, - "loss": 2.7971, + "learning_rate": 2.9774105558645165e-05, + "loss": 2.7808, "step": 122200 }, { "epoch": 0.4, - "learning_rate": 2.9757420072429708e-05, - "loss": 2.8163, + "learning_rate": 2.9757554090198887e-05, + "loss": 2.7876, "step": 122300 }, { "epoch": 0.41, - "learning_rate": 2.9740868494402256e-05, - "loss": 2.8089, + "learning_rate": 2.97410026217526e-05, + "loss": 2.7896, "step": 122400 }, { "epoch": 0.41, - "learning_rate": 2.9724316916374805e-05, - "loss": 2.8084, + "learning_rate": 2.9724451153306322e-05, + "loss": 2.7935, "step": 122500 }, { "epoch": 0.41, - "learning_rate": 2.970776533834736e-05, - "loss": 2.8118, + "learning_rate": 2.970789968486004e-05, + "loss": 2.7737, "step": 122600 }, { "epoch": 0.41, - "learning_rate": 2.9691213760319908e-05, - "loss": 2.8044, + "learning_rate": 2.969134821641376e-05, + "loss": 2.7898, "step": 122700 }, { "epoch": 0.41, - "learning_rate": 2.9674662182292463e-05, - "loss": 2.8016, + "learning_rate": 2.9674796747967482e-05, + "loss": 2.7938, "step": 122800 }, { "epoch": 0.41, - "learning_rate": 2.965811060426501e-05, - "loss": 2.8074, + "learning_rate": 2.96582452795212e-05, + "loss": 2.7695, "step": 122900 }, { "epoch": 0.41, - "learning_rate": 2.9641559026237563e-05, - "loss": 2.8067, + "learning_rate": 2.9641693811074923e-05, + "loss": 2.7848, "step": 123000 }, { "epoch": 0.41, - "learning_rate": 2.9625007448210114e-05, - "loss": 2.8026, + "learning_rate": 2.9625142342628636e-05, + "loss": 2.7643, "step": 123100 }, { "epoch": 0.41, - "learning_rate": 2.9608455870182666e-05, - "loss": 2.7914, + "learning_rate": 2.9608590874182358e-05, + "loss": 2.7844, "step": 123200 }, { "epoch": 0.41, - "learning_rate": 2.9591904292155214e-05, - "loss": 2.8077, + "learning_rate": 2.9592039405736077e-05, + "loss": 2.7749, "step": 123300 }, { "epoch": 0.41, - "learning_rate": 2.957535271412777e-05, - "loss": 2.8034, + "learning_rate": 2.9575487937289796e-05, + "loss": 2.8042, "step": 123400 }, { "epoch": 0.41, - "learning_rate": 2.9558801136100317e-05, - "loss": 2.8051, + "learning_rate": 2.955893646884352e-05, + "loss": 2.7863, "step": 123500 }, { "epoch": 0.41, - "learning_rate": 2.9542249558072865e-05, - "loss": 2.7906, + "learning_rate": 2.9542385000397237e-05, + "loss": 2.7736, "step": 123600 }, { "epoch": 0.41, - "learning_rate": 2.952569798004542e-05, - "loss": 2.8012, + "learning_rate": 2.952583353195096e-05, + "loss": 2.7848, "step": 123700 }, { "epoch": 0.41, - "learning_rate": 2.950914640201797e-05, - "loss": 2.8069, + "learning_rate": 2.9509282063504672e-05, + "loss": 2.7822, "step": 123800 }, { "epoch": 0.41, - "learning_rate": 2.949259482399052e-05, - "loss": 2.8001, + "learning_rate": 2.9492730595058394e-05, + "loss": 2.7827, "step": 123900 }, { "epoch": 0.41, - "learning_rate": 2.947604324596307e-05, - "loss": 2.7992, + "learning_rate": 2.9476179126612113e-05, + "loss": 2.7772, "step": 124000 }, { "epoch": 0.41, - "learning_rate": 2.9459491667935623e-05, - "loss": 2.8004, + "learning_rate": 2.9459627658165832e-05, + "loss": 2.7733, "step": 124100 }, { "epoch": 0.41, - "learning_rate": 2.944294008990817e-05, - "loss": 2.8019, + "learning_rate": 2.9443076189719555e-05, + "loss": 2.7775, "step": 124200 }, { "epoch": 0.41, - "learning_rate": 2.9426388511880727e-05, - "loss": 2.8012, + "learning_rate": 2.9426524721273274e-05, + "loss": 2.7931, "step": 124300 }, { "epoch": 0.41, - "learning_rate": 2.9409836933853275e-05, - "loss": 2.8143, + "learning_rate": 2.9409973252826996e-05, + "loss": 2.7846, "step": 124400 }, { "epoch": 0.41, - "learning_rate": 2.9393285355825826e-05, - "loss": 2.8162, + "learning_rate": 2.9393421784380708e-05, + "loss": 2.7969, "step": 124500 }, { "epoch": 0.41, - "learning_rate": 2.9376733777798378e-05, - "loss": 2.8062, + "learning_rate": 2.937687031593443e-05, + "loss": 2.7884, "step": 124600 }, { "epoch": 0.41, - "learning_rate": 2.936018219977093e-05, - "loss": 2.8091, + "learning_rate": 2.936031884748815e-05, + "loss": 2.783, "step": 124700 }, { "epoch": 0.41, - "learning_rate": 2.9343630621743478e-05, - "loss": 2.8125, + "learning_rate": 2.934376737904187e-05, + "loss": 2.7943, "step": 124800 }, { "epoch": 0.41, - "learning_rate": 2.9327079043716026e-05, - "loss": 2.8002, + "learning_rate": 2.932721591059559e-05, + "loss": 2.7843, "step": 124900 }, { "epoch": 0.41, - "learning_rate": 2.931052746568858e-05, - "loss": 2.8116, + "learning_rate": 2.931066444214931e-05, + "loss": 2.7877, "step": 125000 }, { "epoch": 0.41, - "learning_rate": 2.929397588766113e-05, - "loss": 2.8091, + "learning_rate": 2.9294112973703032e-05, + "loss": 2.7661, "step": 125100 }, { "epoch": 0.41, - "learning_rate": 2.9277424309633684e-05, - "loss": 2.8073, + "learning_rate": 2.9277561505256744e-05, + "loss": 2.791, "step": 125200 }, { "epoch": 0.41, - "learning_rate": 2.9260872731606232e-05, - "loss": 2.8001, + "learning_rate": 2.9261010036810467e-05, + "loss": 2.7752, "step": 125300 }, { "epoch": 0.42, - "learning_rate": 2.9244321153578784e-05, - "loss": 2.7961, + "learning_rate": 2.9244458568364186e-05, + "loss": 2.7788, "step": 125400 }, { "epoch": 0.42, - "learning_rate": 2.9227769575551332e-05, - "loss": 2.7983, + "learning_rate": 2.9227907099917905e-05, + "loss": 2.7977, "step": 125500 }, { "epoch": 0.42, - "learning_rate": 2.9211217997523887e-05, - "loss": 2.7885, + "learning_rate": 2.9211355631471627e-05, + "loss": 2.7815, "step": 125600 }, { "epoch": 0.42, - "learning_rate": 2.9194666419496435e-05, - "loss": 2.7952, + "learning_rate": 2.9194804163025346e-05, + "loss": 2.7868, "step": 125700 }, { "epoch": 0.42, - "learning_rate": 2.917811484146899e-05, - "loss": 2.8147, + "learning_rate": 2.917825269457907e-05, + "loss": 2.7939, "step": 125800 }, { "epoch": 0.42, - "learning_rate": 2.916156326344154e-05, - "loss": 2.7904, + "learning_rate": 2.916170122613278e-05, + "loss": 2.7874, "step": 125900 }, { "epoch": 0.42, - "learning_rate": 2.9145011685414087e-05, - "loss": 2.8095, + "learning_rate": 2.9145149757686503e-05, + "loss": 2.7769, "step": 126000 }, { "epoch": 0.42, - "learning_rate": 2.9128460107386642e-05, - "loss": 2.8045, + "learning_rate": 2.9128598289240222e-05, + "loss": 2.7846, "step": 126100 }, { "epoch": 0.42, - "learning_rate": 2.911190852935919e-05, - "loss": 2.8099, + "learning_rate": 2.911204682079394e-05, + "loss": 2.7854, "step": 126200 }, { "epoch": 0.42, - "learning_rate": 2.909535695133174e-05, - "loss": 2.8024, + "learning_rate": 2.9095495352347663e-05, + "loss": 2.7891, "step": 126300 }, { "epoch": 0.42, - "learning_rate": 2.907880537330429e-05, - "loss": 2.8015, + "learning_rate": 2.9078943883901382e-05, + "loss": 2.7594, "step": 126400 }, { "epoch": 0.42, - "learning_rate": 2.9062253795276845e-05, - "loss": 2.8066, + "learning_rate": 2.90623924154551e-05, + "loss": 2.7707, "step": 126500 }, { "epoch": 0.42, - "learning_rate": 2.9045702217249393e-05, - "loss": 2.8114, + "learning_rate": 2.9045840947008817e-05, + "loss": 2.8029, "step": 126600 }, { "epoch": 0.42, - "learning_rate": 2.9029150639221948e-05, - "loss": 2.8002, + "learning_rate": 2.902928947856254e-05, + "loss": 2.7806, "step": 126700 }, { "epoch": 0.42, - "learning_rate": 2.9012599061194496e-05, - "loss": 2.8052, + "learning_rate": 2.901273801011626e-05, + "loss": 2.8062, "step": 126800 }, { "epoch": 0.42, - "learning_rate": 2.8996047483167048e-05, - "loss": 2.8027, + "learning_rate": 2.8996186541669977e-05, + "loss": 2.7994, "step": 126900 }, { "epoch": 0.42, - "learning_rate": 2.8979495905139596e-05, - "loss": 2.8116, + "learning_rate": 2.89796350732237e-05, + "loss": 2.8045, "step": 127000 }, { "epoch": 0.42, - "learning_rate": 2.8962944327112144e-05, - "loss": 2.8039, + "learning_rate": 2.896308360477742e-05, + "loss": 2.7918, "step": 127100 }, { "epoch": 0.42, - "learning_rate": 2.89463927490847e-05, - "loss": 2.8143, + "learning_rate": 2.8946532136331138e-05, + "loss": 2.7762, "step": 127200 }, { "epoch": 0.42, - "learning_rate": 2.8929841171057247e-05, - "loss": 2.8046, + "learning_rate": 2.8929980667884853e-05, + "loss": 2.7756, "step": 127300 }, { "epoch": 0.42, - "learning_rate": 2.8913289593029802e-05, - "loss": 2.7823, + "learning_rate": 2.8913429199438576e-05, + "loss": 2.7933, "step": 127400 }, { "epoch": 0.42, - "learning_rate": 2.889673801500235e-05, - "loss": 2.79, + "learning_rate": 2.8896877730992295e-05, + "loss": 2.7812, "step": 127500 }, { "epoch": 0.42, - "learning_rate": 2.8880186436974906e-05, - "loss": 2.816, + "learning_rate": 2.8880326262546014e-05, + "loss": 2.762, "step": 127600 }, { "epoch": 0.42, - "learning_rate": 2.8863634858947454e-05, - "loss": 2.7952, + "learning_rate": 2.8863774794099736e-05, + "loss": 2.7768, "step": 127700 }, { "epoch": 0.42, - "learning_rate": 2.8847083280920005e-05, - "loss": 2.801, + "learning_rate": 2.8847223325653455e-05, + "loss": 2.778, "step": 127800 }, { "epoch": 0.42, - "learning_rate": 2.8830531702892554e-05, - "loss": 2.8058, + "learning_rate": 2.8830671857207174e-05, + "loss": 2.8011, "step": 127900 }, { "epoch": 0.42, - "learning_rate": 2.881398012486511e-05, - "loss": 2.8029, + "learning_rate": 2.881412038876089e-05, + "loss": 2.7789, "step": 128000 }, { "epoch": 0.42, - "learning_rate": 2.8797428546837657e-05, - "loss": 2.8067, + "learning_rate": 2.8797568920314612e-05, + "loss": 2.7836, "step": 128100 }, { "epoch": 0.42, - "learning_rate": 2.8780876968810205e-05, - "loss": 2.8006, + "learning_rate": 2.878101745186833e-05, + "loss": 2.7829, "step": 128200 }, { "epoch": 0.42, - "learning_rate": 2.876432539078276e-05, - "loss": 2.7942, + "learning_rate": 2.876446598342205e-05, + "loss": 2.7892, "step": 128300 }, { "epoch": 0.43, - "learning_rate": 2.874777381275531e-05, - "loss": 2.8058, + "learning_rate": 2.8747914514975772e-05, + "loss": 2.7763, "step": 128400 }, { "epoch": 0.43, - "learning_rate": 2.873122223472786e-05, - "loss": 2.781, + "learning_rate": 2.873136304652949e-05, + "loss": 2.7696, "step": 128500 }, { "epoch": 0.43, - "learning_rate": 2.8714670656700408e-05, - "loss": 2.8035, + "learning_rate": 2.871481157808321e-05, + "loss": 2.7819, "step": 128600 }, { "epoch": 0.43, - "learning_rate": 2.8698119078672963e-05, - "loss": 2.7969, + "learning_rate": 2.8698260109636926e-05, + "loss": 2.7802, "step": 128700 }, { "epoch": 0.43, - "learning_rate": 2.868156750064551e-05, - "loss": 2.7947, + "learning_rate": 2.8681708641190648e-05, + "loss": 2.7636, "step": 128800 }, { "epoch": 0.43, - "learning_rate": 2.8665015922618066e-05, - "loss": 2.7803, + "learning_rate": 2.8665157172744367e-05, + "loss": 2.7766, "step": 128900 }, { "epoch": 0.43, - "learning_rate": 2.8648464344590615e-05, - "loss": 2.8101, + "learning_rate": 2.8648605704298086e-05, + "loss": 2.7798, "step": 129000 }, { "epoch": 0.43, - "learning_rate": 2.863191276656317e-05, - "loss": 2.8057, + "learning_rate": 2.863205423585181e-05, + "loss": 2.7852, "step": 129100 }, { "epoch": 0.43, - "learning_rate": 2.8615361188535718e-05, - "loss": 2.7974, + "learning_rate": 2.8615502767405527e-05, + "loss": 2.7858, "step": 129200 }, { "epoch": 0.43, - "learning_rate": 2.8598809610508266e-05, - "loss": 2.7949, + "learning_rate": 2.8598951298959243e-05, + "loss": 2.7768, "step": 129300 }, { "epoch": 0.43, - "learning_rate": 2.8582258032480818e-05, - "loss": 2.804, + "learning_rate": 2.8582399830512962e-05, + "loss": 2.7866, "step": 129400 }, { "epoch": 0.43, - "learning_rate": 2.8565706454453366e-05, - "loss": 2.804, + "learning_rate": 2.8565848362066684e-05, + "loss": 2.7912, "step": 129500 }, { "epoch": 0.43, - "learning_rate": 2.854915487642592e-05, - "loss": 2.7992, + "learning_rate": 2.8549296893620403e-05, + "loss": 2.7902, "step": 129600 }, { "epoch": 0.43, - "learning_rate": 2.853260329839847e-05, - "loss": 2.8074, + "learning_rate": 2.8532745425174122e-05, + "loss": 2.8026, "step": 129700 }, { "epoch": 0.43, - "learning_rate": 2.8516051720371024e-05, - "loss": 2.7944, + "learning_rate": 2.8516193956727845e-05, + "loss": 2.7745, "step": 129800 }, { "epoch": 0.43, - "learning_rate": 2.8499500142343572e-05, - "loss": 2.791, + "learning_rate": 2.8499642488281564e-05, + "loss": 2.7687, "step": 129900 }, { "epoch": 0.43, - "learning_rate": 2.8482948564316124e-05, - "loss": 2.8026, + "learning_rate": 2.848309101983528e-05, + "loss": 2.7818, "step": 130000 }, { "epoch": 0.43, - "learning_rate": 2.8466396986288675e-05, - "loss": 2.8073, + "learning_rate": 2.8466539551388998e-05, + "loss": 2.7829, "step": 130100 }, { "epoch": 0.43, - "learning_rate": 2.8449845408261227e-05, - "loss": 2.7903, + "learning_rate": 2.844998808294272e-05, + "loss": 2.7766, "step": 130200 }, { "epoch": 0.43, - "learning_rate": 2.8433293830233775e-05, - "loss": 2.7957, + "learning_rate": 2.843343661449644e-05, + "loss": 2.7889, "step": 130300 }, { "epoch": 0.43, - "learning_rate": 2.8416742252206323e-05, - "loss": 2.8007, + "learning_rate": 2.841688514605016e-05, + "loss": 2.7971, "step": 130400 }, { "epoch": 0.43, - "learning_rate": 2.840019067417888e-05, - "loss": 2.8036, + "learning_rate": 2.840033367760388e-05, + "loss": 2.7856, "step": 130500 }, { "epoch": 0.43, - "learning_rate": 2.8383639096151427e-05, - "loss": 2.8099, + "learning_rate": 2.83837822091576e-05, + "loss": 2.7652, "step": 130600 }, { "epoch": 0.43, - "learning_rate": 2.836708751812398e-05, - "loss": 2.8121, + "learning_rate": 2.8367230740711316e-05, + "loss": 2.7974, "step": 130700 }, { "epoch": 0.43, - "learning_rate": 2.835053594009653e-05, - "loss": 2.8009, + "learning_rate": 2.8350679272265035e-05, + "loss": 2.7818, "step": 130800 }, { "epoch": 0.43, - "learning_rate": 2.833398436206908e-05, - "loss": 2.8142, + "learning_rate": 2.8334127803818754e-05, + "loss": 2.7826, "step": 130900 }, { "epoch": 0.43, - "learning_rate": 2.831743278404163e-05, - "loss": 2.8003, + "learning_rate": 2.8317576335372476e-05, + "loss": 2.7694, "step": 131000 }, { "epoch": 0.43, - "learning_rate": 2.8300881206014185e-05, - "loss": 2.7973, + "learning_rate": 2.8301024866926195e-05, + "loss": 2.7688, "step": 131100 }, { "epoch": 0.43, - "learning_rate": 2.8284329627986733e-05, - "loss": 2.7986, + "learning_rate": 2.8284473398479917e-05, + "loss": 2.7764, "step": 131200 }, { "epoch": 0.43, - "learning_rate": 2.8267778049959288e-05, - "loss": 2.8036, + "learning_rate": 2.8267921930033636e-05, + "loss": 2.7901, "step": 131300 }, { "epoch": 0.43, - "learning_rate": 2.8251226471931836e-05, - "loss": 2.8099, + "learning_rate": 2.8251370461587352e-05, + "loss": 2.7953, "step": 131400 }, { "epoch": 0.44, - "learning_rate": 2.8234674893904384e-05, - "loss": 2.8025, + "learning_rate": 2.823481899314107e-05, + "loss": 2.7874, "step": 131500 }, { "epoch": 0.44, - "learning_rate": 2.821812331587694e-05, - "loss": 2.7987, + "learning_rate": 2.821826752469479e-05, + "loss": 2.7741, "step": 131600 }, { "epoch": 0.44, - "learning_rate": 2.8201571737849487e-05, - "loss": 2.7895, + "learning_rate": 2.8201716056248512e-05, + "loss": 2.7774, "step": 131700 }, { "epoch": 0.44, - "learning_rate": 2.818502015982204e-05, - "loss": 2.8032, + "learning_rate": 2.818516458780223e-05, + "loss": 2.7807, "step": 131800 }, { "epoch": 0.44, - "learning_rate": 2.8168468581794587e-05, - "loss": 2.7815, + "learning_rate": 2.8168613119355954e-05, + "loss": 2.789, "step": 131900 }, { "epoch": 0.44, - "learning_rate": 2.8151917003767142e-05, - "loss": 2.7876, + "learning_rate": 2.8152061650909673e-05, + "loss": 2.7807, "step": 132000 }, { "epoch": 0.44, - "learning_rate": 2.813536542573969e-05, - "loss": 2.8044, + "learning_rate": 2.8135510182463388e-05, + "loss": 2.7825, "step": 132100 }, { "epoch": 0.44, - "learning_rate": 2.8118813847712245e-05, - "loss": 2.8078, + "learning_rate": 2.8118958714017107e-05, + "loss": 2.7828, "step": 132200 }, { "epoch": 0.44, - "learning_rate": 2.8102262269684794e-05, - "loss": 2.7846, + "learning_rate": 2.8102407245570826e-05, + "loss": 2.782, "step": 132300 }, { "epoch": 0.44, - "learning_rate": 2.8085710691657345e-05, - "loss": 2.7908, + "learning_rate": 2.808585577712455e-05, + "loss": 2.7815, "step": 132400 }, { "epoch": 0.44, - "learning_rate": 2.8069159113629893e-05, - "loss": 2.8132, + "learning_rate": 2.8069304308678267e-05, + "loss": 2.7775, "step": 132500 }, { "epoch": 0.44, - "learning_rate": 2.805260753560244e-05, - "loss": 2.7996, + "learning_rate": 2.805275284023199e-05, + "loss": 2.7914, "step": 132600 }, { "epoch": 0.44, - "learning_rate": 2.8036055957574997e-05, - "loss": 2.8208, + "learning_rate": 2.803620137178571e-05, + "loss": 2.78, "step": 132700 }, { "epoch": 0.44, - "learning_rate": 2.8019504379547545e-05, - "loss": 2.8057, + "learning_rate": 2.8019649903339424e-05, + "loss": 2.7863, "step": 132800 }, { "epoch": 0.44, - "learning_rate": 2.80029528015201e-05, - "loss": 2.8044, + "learning_rate": 2.8003098434893143e-05, + "loss": 2.7995, "step": 132900 }, { "epoch": 0.44, - "learning_rate": 2.7986401223492648e-05, - "loss": 2.8047, + "learning_rate": 2.7986546966446862e-05, + "loss": 2.7783, "step": 133000 }, { "epoch": 0.44, - "learning_rate": 2.7969849645465203e-05, - "loss": 2.8155, + "learning_rate": 2.7969995498000585e-05, + "loss": 2.7738, "step": 133100 }, { "epoch": 0.44, - "learning_rate": 2.795329806743775e-05, - "loss": 2.795, + "learning_rate": 2.7953444029554304e-05, + "loss": 2.7817, "step": 133200 }, { "epoch": 0.44, - "learning_rate": 2.7936746489410303e-05, - "loss": 2.8083, + "learning_rate": 2.7936892561108026e-05, + "loss": 2.7834, "step": 133300 }, { "epoch": 0.44, - "learning_rate": 2.792019491138285e-05, - "loss": 2.8051, + "learning_rate": 2.7920341092661745e-05, + "loss": 2.784, "step": 133400 }, { "epoch": 0.44, - "learning_rate": 2.7903643333355406e-05, - "loss": 2.7845, + "learning_rate": 2.790378962421546e-05, + "loss": 2.7928, "step": 133500 }, { "epoch": 0.44, - "learning_rate": 2.7887091755327954e-05, - "loss": 2.7974, + "learning_rate": 2.788723815576918e-05, + "loss": 2.7845, "step": 133600 }, { "epoch": 0.44, - "learning_rate": 2.7870540177300502e-05, - "loss": 2.8128, + "learning_rate": 2.78706866873229e-05, + "loss": 2.7788, "step": 133700 }, { "epoch": 0.44, - "learning_rate": 2.7853988599273057e-05, - "loss": 2.8141, + "learning_rate": 2.785413521887662e-05, + "loss": 2.7807, "step": 133800 }, { "epoch": 0.44, - "learning_rate": 2.7837437021245606e-05, - "loss": 2.7992, + "learning_rate": 2.783758375043034e-05, + "loss": 2.7874, "step": 133900 }, { "epoch": 0.44, - "learning_rate": 2.7820885443218157e-05, - "loss": 2.8088, + "learning_rate": 2.7821032281984062e-05, + "loss": 2.7794, "step": 134000 }, { "epoch": 0.44, - "learning_rate": 2.7804333865190705e-05, - "loss": 2.8036, + "learning_rate": 2.780448081353778e-05, + "loss": 2.7964, "step": 134100 }, { "epoch": 0.44, - "learning_rate": 2.778778228716326e-05, - "loss": 2.8064, + "learning_rate": 2.7787929345091497e-05, + "loss": 2.7854, "step": 134200 }, { "epoch": 0.44, - "learning_rate": 2.777123070913581e-05, - "loss": 2.8269, + "learning_rate": 2.7771377876645216e-05, + "loss": 2.7872, "step": 134300 }, { "epoch": 0.44, - "learning_rate": 2.7754679131108364e-05, - "loss": 2.804, + "learning_rate": 2.7754826408198935e-05, + "loss": 2.7848, "step": 134400 }, { "epoch": 0.45, - "learning_rate": 2.7738127553080912e-05, - "loss": 2.7965, + "learning_rate": 2.7738274939752657e-05, + "loss": 2.7696, "step": 134500 }, { "epoch": 0.45, - "learning_rate": 2.7721575975053467e-05, - "loss": 2.787, + "learning_rate": 2.7721723471306376e-05, + "loss": 2.7839, "step": 134600 }, { "epoch": 0.45, - "learning_rate": 2.7705024397026015e-05, - "loss": 2.796, + "learning_rate": 2.77051720028601e-05, + "loss": 2.7744, "step": 134700 }, { "epoch": 0.45, - "learning_rate": 2.7688472818998563e-05, - "loss": 2.8038, + "learning_rate": 2.7688620534413818e-05, + "loss": 2.7838, "step": 134800 }, { "epoch": 0.45, - "learning_rate": 2.7671921240971115e-05, - "loss": 2.7932, + "learning_rate": 2.7672069065967533e-05, + "loss": 2.7841, "step": 134900 }, { "epoch": 0.45, - "learning_rate": 2.7655369662943663e-05, - "loss": 2.7878, + "learning_rate": 2.7655517597521252e-05, + "loss": 2.7769, "step": 135000 }, { "epoch": 0.45, - "learning_rate": 2.7638818084916218e-05, - "loss": 2.7919, + "learning_rate": 2.763896612907497e-05, + "loss": 2.7861, "step": 135100 }, { "epoch": 0.45, - "learning_rate": 2.7622266506888766e-05, - "loss": 2.7972, + "learning_rate": 2.7622414660628694e-05, + "loss": 2.7781, "step": 135200 }, { "epoch": 0.45, - "learning_rate": 2.760571492886132e-05, - "loss": 2.8122, + "learning_rate": 2.7605863192182412e-05, + "loss": 2.7735, "step": 135300 }, { "epoch": 0.45, - "learning_rate": 2.758916335083387e-05, - "loss": 2.8169, + "learning_rate": 2.758931172373613e-05, + "loss": 2.7855, "step": 135400 }, { "epoch": 0.45, - "learning_rate": 2.757261177280642e-05, - "loss": 2.8151, + "learning_rate": 2.7572760255289854e-05, + "loss": 2.7908, "step": 135500 }, { "epoch": 0.45, - "learning_rate": 2.755606019477897e-05, - "loss": 2.7907, + "learning_rate": 2.755620878684357e-05, + "loss": 2.7784, "step": 135600 }, { "epoch": 0.45, - "learning_rate": 2.7539508616751524e-05, - "loss": 2.7802, + "learning_rate": 2.753965731839729e-05, + "loss": 2.7769, "step": 135700 }, { "epoch": 0.45, - "learning_rate": 2.7522957038724073e-05, - "loss": 2.8084, + "learning_rate": 2.7523105849951007e-05, + "loss": 2.7862, "step": 135800 }, { "epoch": 0.45, - "learning_rate": 2.750640546069662e-05, - "loss": 2.7956, + "learning_rate": 2.750655438150473e-05, + "loss": 2.7947, "step": 135900 }, { "epoch": 0.45, - "learning_rate": 2.7489853882669176e-05, - "loss": 2.8046, + "learning_rate": 2.749000291305845e-05, + "loss": 2.782, "step": 136000 }, { "epoch": 0.45, - "learning_rate": 2.7473302304641724e-05, - "loss": 2.8017, + "learning_rate": 2.7473451444612168e-05, + "loss": 2.7997, "step": 136100 }, { "epoch": 0.45, - "learning_rate": 2.745675072661428e-05, - "loss": 2.7996, + "learning_rate": 2.7456899976165883e-05, + "loss": 2.7789, "step": 136200 }, { "epoch": 0.45, - "learning_rate": 2.7440199148586827e-05, - "loss": 2.8007, + "learning_rate": 2.7440348507719606e-05, + "loss": 2.7945, "step": 136300 }, { "epoch": 0.45, - "learning_rate": 2.742364757055938e-05, - "loss": 2.7933, + "learning_rate": 2.7423797039273325e-05, + "loss": 2.7824, "step": 136400 }, { "epoch": 0.45, - "learning_rate": 2.7407095992531927e-05, - "loss": 2.8131, + "learning_rate": 2.7407245570827044e-05, + "loss": 2.7898, "step": 136500 }, { "epoch": 0.45, - "learning_rate": 2.7390544414504482e-05, - "loss": 2.8018, + "learning_rate": 2.7390694102380766e-05, + "loss": 2.7922, "step": 136600 }, { "epoch": 0.45, - "learning_rate": 2.737399283647703e-05, - "loss": 2.7996, + "learning_rate": 2.7374142633934485e-05, + "loss": 2.7762, "step": 136700 }, { "epoch": 0.45, - "learning_rate": 2.7357441258449585e-05, - "loss": 2.8014, + "learning_rate": 2.7357591165488204e-05, + "loss": 2.7785, "step": 136800 }, { "epoch": 0.45, - "learning_rate": 2.7340889680422133e-05, - "loss": 2.7993, + "learning_rate": 2.734103969704192e-05, + "loss": 2.786, "step": 136900 }, { "epoch": 0.45, - "learning_rate": 2.732433810239468e-05, - "loss": 2.8102, + "learning_rate": 2.7324488228595642e-05, + "loss": 2.7985, "step": 137000 }, { "epoch": 0.45, - "learning_rate": 2.7307786524367233e-05, - "loss": 2.8043, + "learning_rate": 2.730793676014936e-05, + "loss": 2.7882, "step": 137100 }, { "epoch": 0.45, - "learning_rate": 2.7291234946339785e-05, - "loss": 2.8065, + "learning_rate": 2.729138529170308e-05, + "loss": 2.7706, "step": 137200 }, { "epoch": 0.45, - "learning_rate": 2.7274683368312336e-05, - "loss": 2.8117, + "learning_rate": 2.7274833823256802e-05, + "loss": 2.7838, "step": 137300 }, { "epoch": 0.45, - "learning_rate": 2.7258131790284885e-05, - "loss": 2.7911, + "learning_rate": 2.725828235481052e-05, + "loss": 2.7664, "step": 137400 }, { "epoch": 0.46, - "learning_rate": 2.724158021225744e-05, - "loss": 2.8053, + "learning_rate": 2.724173088636424e-05, + "loss": 2.7881, "step": 137500 }, { "epoch": 0.46, - "learning_rate": 2.7225028634229988e-05, - "loss": 2.8088, + "learning_rate": 2.7225179417917956e-05, + "loss": 2.787, "step": 137600 }, { "epoch": 0.46, - "learning_rate": 2.7208477056202543e-05, - "loss": 2.8071, + "learning_rate": 2.7208627949471678e-05, + "loss": 2.7782, "step": 137700 }, { "epoch": 0.46, - "learning_rate": 2.719192547817509e-05, - "loss": 2.7977, + "learning_rate": 2.7192076481025397e-05, + "loss": 2.7863, "step": 137800 }, { "epoch": 0.46, - "learning_rate": 2.7175373900147643e-05, - "loss": 2.8085, + "learning_rate": 2.7175525012579116e-05, + "loss": 2.7757, "step": 137900 }, { "epoch": 0.46, - "learning_rate": 2.715882232212019e-05, - "loss": 2.8051, + "learning_rate": 2.715897354413284e-05, + "loss": 2.7758, "step": 138000 }, { "epoch": 0.46, - "learning_rate": 2.714227074409274e-05, - "loss": 2.7923, + "learning_rate": 2.7142422075686558e-05, + "loss": 2.7666, "step": 138100 }, { "epoch": 0.46, - "learning_rate": 2.7125719166065294e-05, - "loss": 2.8005, + "learning_rate": 2.7125870607240277e-05, + "loss": 2.7665, "step": 138200 }, { "epoch": 0.46, - "learning_rate": 2.7109167588037842e-05, - "loss": 2.8018, + "learning_rate": 2.7109319138793992e-05, + "loss": 2.7796, "step": 138300 }, { "epoch": 0.46, - "learning_rate": 2.7092616010010397e-05, - "loss": 2.7954, + "learning_rate": 2.7092767670347715e-05, + "loss": 2.7694, "step": 138400 }, { "epoch": 0.46, - "learning_rate": 2.7076064431982945e-05, - "loss": 2.8132, + "learning_rate": 2.7076216201901433e-05, + "loss": 2.7732, "step": 138500 }, { "epoch": 0.46, - "learning_rate": 2.70595128539555e-05, - "loss": 2.803, + "learning_rate": 2.7059664733455152e-05, + "loss": 2.7804, "step": 138600 }, { "epoch": 0.46, - "learning_rate": 2.704296127592805e-05, - "loss": 2.7983, + "learning_rate": 2.7043113265008875e-05, + "loss": 2.7899, "step": 138700 }, { "epoch": 0.46, - "learning_rate": 2.70264096979006e-05, - "loss": 2.8008, + "learning_rate": 2.7026561796562594e-05, + "loss": 2.7705, "step": 138800 }, { "epoch": 0.46, - "learning_rate": 2.700985811987315e-05, - "loss": 2.8014, + "learning_rate": 2.7010010328116313e-05, + "loss": 2.7996, "step": 138900 }, { "epoch": 0.46, - "learning_rate": 2.6993306541845703e-05, - "loss": 2.7978, + "learning_rate": 2.699345885967003e-05, + "loss": 2.7725, "step": 139000 }, { "epoch": 0.46, - "learning_rate": 2.697675496381825e-05, - "loss": 2.797, + "learning_rate": 2.697690739122375e-05, + "loss": 2.783, "step": 139100 }, { "epoch": 0.46, - "learning_rate": 2.69602033857908e-05, - "loss": 2.8073, + "learning_rate": 2.696035592277747e-05, + "loss": 2.7715, "step": 139200 }, { "epoch": 0.46, - "learning_rate": 2.6943651807763355e-05, - "loss": 2.7872, + "learning_rate": 2.694380445433119e-05, + "loss": 2.7758, "step": 139300 }, { "epoch": 0.46, - "learning_rate": 2.6927100229735903e-05, - "loss": 2.7826, + "learning_rate": 2.692725298588491e-05, + "loss": 2.7751, "step": 139400 }, { "epoch": 0.46, - "learning_rate": 2.6910548651708455e-05, - "loss": 2.7921, + "learning_rate": 2.691070151743863e-05, + "loss": 2.7897, "step": 139500 }, { "epoch": 0.46, - "learning_rate": 2.6893997073681003e-05, - "loss": 2.8042, + "learning_rate": 2.689415004899235e-05, + "loss": 2.7988, "step": 139600 }, { "epoch": 0.46, - "learning_rate": 2.6877445495653558e-05, - "loss": 2.8215, + "learning_rate": 2.6877598580546065e-05, + "loss": 2.7896, "step": 139700 }, { "epoch": 0.46, - "learning_rate": 2.6860893917626106e-05, - "loss": 2.7855, + "learning_rate": 2.6861047112099784e-05, + "loss": 2.7764, "step": 139800 }, { "epoch": 0.46, - "learning_rate": 2.684434233959866e-05, - "loss": 2.8031, + "learning_rate": 2.6844495643653506e-05, + "loss": 2.7989, "step": 139900 }, { "epoch": 0.46, - "learning_rate": 2.682779076157121e-05, - "loss": 2.8039, + "learning_rate": 2.6827944175207225e-05, + "loss": 2.7605, "step": 140000 }, { "epoch": 0.46, - "learning_rate": 2.6811239183543764e-05, - "loss": 2.7946, + "learning_rate": 2.6811392706760947e-05, + "loss": 2.7864, "step": 140100 }, { "epoch": 0.46, - "learning_rate": 2.6794687605516312e-05, - "loss": 2.7835, + "learning_rate": 2.6794841238314666e-05, + "loss": 2.7637, "step": 140200 }, { "epoch": 0.46, - "learning_rate": 2.677813602748886e-05, - "loss": 2.812, + "learning_rate": 2.6778289769868385e-05, + "loss": 2.777, "step": 140300 }, { "epoch": 0.46, - "learning_rate": 2.6761584449461412e-05, - "loss": 2.8114, + "learning_rate": 2.67617383014221e-05, + "loss": 2.7684, "step": 140400 }, { "epoch": 0.47, - "learning_rate": 2.674503287143396e-05, - "loss": 2.7935, + "learning_rate": 2.674518683297582e-05, + "loss": 2.8049, "step": 140500 }, { "epoch": 0.47, - "learning_rate": 2.6728481293406515e-05, - "loss": 2.804, + "learning_rate": 2.6728635364529542e-05, + "loss": 2.7747, "step": 140600 }, { "epoch": 0.47, - "learning_rate": 2.6711929715379064e-05, - "loss": 2.7761, + "learning_rate": 2.671208389608326e-05, + "loss": 2.7763, "step": 140700 }, { "epoch": 0.47, - "learning_rate": 2.669537813735162e-05, - "loss": 2.7875, + "learning_rate": 2.6695532427636984e-05, + "loss": 2.7659, "step": 140800 }, { "epoch": 0.47, - "learning_rate": 2.6678826559324167e-05, - "loss": 2.7876, + "learning_rate": 2.6678980959190703e-05, + "loss": 2.7725, "step": 140900 }, { "epoch": 0.47, - "learning_rate": 2.666227498129672e-05, - "loss": 2.8027, + "learning_rate": 2.666242949074442e-05, + "loss": 2.761, "step": 141000 }, { "epoch": 0.47, - "learning_rate": 2.6645723403269267e-05, - "loss": 2.7894, + "learning_rate": 2.6645878022298137e-05, + "loss": 2.7763, "step": 141100 }, { "epoch": 0.47, - "learning_rate": 2.662917182524182e-05, - "loss": 2.7998, + "learning_rate": 2.6629326553851856e-05, + "loss": 2.7859, "step": 141200 }, { "epoch": 0.47, - "learning_rate": 2.661262024721437e-05, - "loss": 2.7859, + "learning_rate": 2.661277508540558e-05, + "loss": 2.7826, "step": 141300 }, { "epoch": 0.47, - "learning_rate": 2.6596068669186925e-05, - "loss": 2.7907, + "learning_rate": 2.6596223616959298e-05, + "loss": 2.7764, "step": 141400 }, { "epoch": 0.47, - "learning_rate": 2.6579517091159473e-05, - "loss": 2.8002, + "learning_rate": 2.657967214851302e-05, + "loss": 2.789, "step": 141500 }, { "epoch": 0.47, - "learning_rate": 2.656296551313202e-05, - "loss": 2.8047, + "learning_rate": 2.656312068006674e-05, + "loss": 2.7831, "step": 141600 }, { "epoch": 0.47, - "learning_rate": 2.6546413935104576e-05, - "loss": 2.7911, + "learning_rate": 2.6546569211620458e-05, + "loss": 2.7786, "step": 141700 }, { "epoch": 0.47, - "learning_rate": 2.6529862357077124e-05, - "loss": 2.8036, + "learning_rate": 2.6530017743174173e-05, + "loss": 2.7798, "step": 141800 }, { "epoch": 0.47, - "learning_rate": 2.6513310779049676e-05, - "loss": 2.7881, + "learning_rate": 2.6513466274727892e-05, + "loss": 2.7768, "step": 141900 }, { "epoch": 0.47, - "learning_rate": 2.6496759201022224e-05, - "loss": 2.8015, + "learning_rate": 2.6496914806281615e-05, + "loss": 2.7826, "step": 142000 }, { "epoch": 0.47, - "learning_rate": 2.648020762299478e-05, - "loss": 2.7973, + "learning_rate": 2.6480363337835334e-05, + "loss": 2.7907, "step": 142100 }, { "epoch": 0.47, - "learning_rate": 2.6463656044967328e-05, - "loss": 2.7894, + "learning_rate": 2.6463811869389056e-05, + "loss": 2.7842, "step": 142200 }, { "epoch": 0.47, - "learning_rate": 2.6447104466939882e-05, - "loss": 2.7997, + "learning_rate": 2.6447260400942775e-05, + "loss": 2.7853, "step": 142300 }, { "epoch": 0.47, - "learning_rate": 2.643055288891243e-05, - "loss": 2.7983, + "learning_rate": 2.643070893249649e-05, + "loss": 2.79, "step": 142400 }, { "epoch": 0.47, - "learning_rate": 2.6414001310884982e-05, - "loss": 2.801, + "learning_rate": 2.641415746405021e-05, + "loss": 2.7784, "step": 142500 }, { "epoch": 0.47, - "learning_rate": 2.639744973285753e-05, - "loss": 2.7884, + "learning_rate": 2.639760599560393e-05, + "loss": 2.7841, "step": 142600 }, { "epoch": 0.47, - "learning_rate": 2.6380898154830082e-05, - "loss": 2.7991, + "learning_rate": 2.638105452715765e-05, + "loss": 2.7839, "step": 142700 }, { "epoch": 0.47, - "learning_rate": 2.6364346576802634e-05, - "loss": 2.8019, + "learning_rate": 2.636450305871137e-05, + "loss": 2.7875, "step": 142800 }, { "epoch": 0.47, - "learning_rate": 2.6347794998775182e-05, - "loss": 2.7981, + "learning_rate": 2.6347951590265092e-05, + "loss": 2.7778, "step": 142900 }, { "epoch": 0.47, - "learning_rate": 2.6331243420747737e-05, - "loss": 2.8116, + "learning_rate": 2.633140012181881e-05, + "loss": 2.7759, "step": 143000 }, { "epoch": 0.47, - "learning_rate": 2.6314691842720285e-05, - "loss": 2.7918, + "learning_rate": 2.6314848653372527e-05, + "loss": 2.7884, "step": 143100 }, { "epoch": 0.47, - "learning_rate": 2.629814026469284e-05, - "loss": 2.7814, + "learning_rate": 2.6298297184926246e-05, + "loss": 2.796, "step": 143200 }, { "epoch": 0.47, - "learning_rate": 2.628158868666539e-05, - "loss": 2.7907, + "learning_rate": 2.6281745716479965e-05, + "loss": 2.7757, "step": 143300 }, { "epoch": 0.47, - "learning_rate": 2.626503710863794e-05, - "loss": 2.796, + "learning_rate": 2.6265194248033687e-05, + "loss": 2.7815, "step": 143400 }, { "epoch": 0.48, - "learning_rate": 2.6248485530610488e-05, - "loss": 2.8082, + "learning_rate": 2.6248642779587406e-05, + "loss": 2.781, "step": 143500 }, { "epoch": 0.48, - "learning_rate": 2.6231933952583043e-05, - "loss": 2.7956, + "learning_rate": 2.623209131114113e-05, + "loss": 2.7859, "step": 143600 }, { "epoch": 0.48, - "learning_rate": 2.621538237455559e-05, - "loss": 2.7989, + "learning_rate": 2.6215539842694848e-05, + "loss": 2.7894, "step": 143700 }, { "epoch": 0.48, - "learning_rate": 2.619883079652814e-05, - "loss": 2.8037, + "learning_rate": 2.6198988374248563e-05, + "loss": 2.7809, "step": 143800 }, { "epoch": 0.48, - "learning_rate": 2.6182279218500695e-05, - "loss": 2.7961, + "learning_rate": 2.6182436905802282e-05, + "loss": 2.7695, "step": 143900 }, { "epoch": 0.48, - "learning_rate": 2.6165727640473243e-05, - "loss": 2.7906, + "learning_rate": 2.6165885437356e-05, + "loss": 2.7796, "step": 144000 }, { "epoch": 0.48, - "learning_rate": 2.6149176062445794e-05, - "loss": 2.7928, + "learning_rate": 2.6149333968909724e-05, + "loss": 2.7594, "step": 144100 }, { "epoch": 0.48, - "learning_rate": 2.6132624484418346e-05, - "loss": 2.8038, + "learning_rate": 2.6132782500463443e-05, + "loss": 2.7758, "step": 144200 }, { "epoch": 0.48, - "learning_rate": 2.6116072906390898e-05, - "loss": 2.7985, + "learning_rate": 2.611623103201716e-05, + "loss": 2.7983, "step": 144300 }, { "epoch": 0.48, - "learning_rate": 2.6099521328363446e-05, - "loss": 2.793, + "learning_rate": 2.6099679563570884e-05, + "loss": 2.7545, "step": 144400 }, { "epoch": 0.48, - "learning_rate": 2.6082969750336e-05, - "loss": 2.7953, + "learning_rate": 2.60831280951246e-05, + "loss": 2.7757, "step": 144500 }, { "epoch": 0.48, - "learning_rate": 2.606641817230855e-05, - "loss": 2.7914, + "learning_rate": 2.606657662667832e-05, + "loss": 2.7695, "step": 144600 }, { "epoch": 0.48, - "learning_rate": 2.6049866594281104e-05, - "loss": 2.799, + "learning_rate": 2.6050025158232038e-05, + "loss": 2.7797, "step": 144700 }, { "epoch": 0.48, - "learning_rate": 2.6033315016253652e-05, - "loss": 2.7984, + "learning_rate": 2.603347368978576e-05, + "loss": 2.7871, "step": 144800 }, { "epoch": 0.48, - "learning_rate": 2.60167634382262e-05, - "loss": 2.7837, + "learning_rate": 2.601692222133948e-05, + "loss": 2.776, "step": 144900 }, { "epoch": 0.48, - "learning_rate": 2.6000211860198752e-05, - "loss": 2.7956, + "learning_rate": 2.6000370752893198e-05, + "loss": 2.783, "step": 145000 }, { "epoch": 0.48, - "learning_rate": 2.59836602821713e-05, - "loss": 2.7889, + "learning_rate": 2.598381928444692e-05, + "loss": 2.7894, "step": 145100 }, { "epoch": 0.48, - "learning_rate": 2.5967108704143855e-05, - "loss": 2.7957, + "learning_rate": 2.5967267816000636e-05, + "loss": 2.7742, "step": 145200 }, { "epoch": 0.48, - "learning_rate": 2.5950557126116403e-05, - "loss": 2.8074, + "learning_rate": 2.5950716347554355e-05, + "loss": 2.7708, "step": 145300 }, { "epoch": 0.48, - "learning_rate": 2.593400554808896e-05, - "loss": 2.7957, + "learning_rate": 2.5934164879108074e-05, + "loss": 2.7786, "step": 145400 }, { "epoch": 0.48, - "learning_rate": 2.5917453970061507e-05, - "loss": 2.8062, + "learning_rate": 2.5917613410661796e-05, + "loss": 2.7765, "step": 145500 }, { "epoch": 0.48, - "learning_rate": 2.5900902392034058e-05, - "loss": 2.7881, + "learning_rate": 2.5901061942215515e-05, + "loss": 2.785, "step": 145600 }, { "epoch": 0.48, - "learning_rate": 2.588435081400661e-05, - "loss": 2.7942, + "learning_rate": 2.5884510473769234e-05, + "loss": 2.7876, "step": 145700 }, { "epoch": 0.48, - "learning_rate": 2.586779923597916e-05, - "loss": 2.801, + "learning_rate": 2.5867959005322956e-05, + "loss": 2.7776, "step": 145800 }, { "epoch": 0.48, - "learning_rate": 2.585124765795171e-05, - "loss": 2.7872, + "learning_rate": 2.5851407536876672e-05, + "loss": 2.7827, "step": 145900 }, { "epoch": 0.48, - "learning_rate": 2.5834696079924258e-05, - "loss": 2.7961, + "learning_rate": 2.583485606843039e-05, + "loss": 2.7813, "step": 146000 }, { "epoch": 0.48, - "learning_rate": 2.5818144501896813e-05, - "loss": 2.7885, + "learning_rate": 2.581830459998411e-05, + "loss": 2.7732, "step": 146100 }, { "epoch": 0.48, - "learning_rate": 2.580159292386936e-05, - "loss": 2.8036, + "learning_rate": 2.5801753131537832e-05, + "loss": 2.7818, "step": 146200 }, { "epoch": 0.48, - "learning_rate": 2.5785041345841916e-05, - "loss": 2.803, + "learning_rate": 2.578520166309155e-05, + "loss": 2.7636, "step": 146300 }, { "epoch": 0.48, - "learning_rate": 2.5768489767814464e-05, - "loss": 2.796, + "learning_rate": 2.576865019464527e-05, + "loss": 2.7867, "step": 146400 }, { "epoch": 0.48, - "learning_rate": 2.5751938189787016e-05, - "loss": 2.795, + "learning_rate": 2.5752098726198993e-05, + "loss": 2.7743, "step": 146500 }, { "epoch": 0.49, - "learning_rate": 2.5735386611759564e-05, - "loss": 2.7934, + "learning_rate": 2.573554725775271e-05, + "loss": 2.78, "step": 146600 }, { "epoch": 0.49, - "learning_rate": 2.571883503373212e-05, - "loss": 2.7997, + "learning_rate": 2.5718995789306427e-05, + "loss": 2.7681, "step": 146700 }, { "epoch": 0.49, - "learning_rate": 2.5702283455704667e-05, - "loss": 2.7884, + "learning_rate": 2.5702444320860146e-05, + "loss": 2.7787, "step": 146800 }, { "epoch": 0.49, - "learning_rate": 2.5685731877677222e-05, - "loss": 2.799, + "learning_rate": 2.568589285241387e-05, + "loss": 2.776, "step": 146900 }, { "epoch": 0.49, - "learning_rate": 2.566918029964977e-05, - "loss": 2.7889, + "learning_rate": 2.5669341383967588e-05, + "loss": 2.7711, "step": 147000 }, { "epoch": 0.49, - "learning_rate": 2.565262872162232e-05, - "loss": 2.7969, + "learning_rate": 2.5652789915521307e-05, + "loss": 2.7769, "step": 147100 }, { "epoch": 0.49, - "learning_rate": 2.5636077143594874e-05, - "loss": 2.7825, + "learning_rate": 2.563623844707503e-05, + "loss": 2.7751, "step": 147200 }, { "epoch": 0.49, - "learning_rate": 2.5619525565567422e-05, - "loss": 2.8001, + "learning_rate": 2.5619686978628745e-05, + "loss": 2.7804, "step": 147300 }, { "epoch": 0.49, - "learning_rate": 2.5602973987539973e-05, - "loss": 2.7939, + "learning_rate": 2.5603135510182464e-05, + "loss": 2.7816, "step": 147400 }, { "epoch": 0.49, - "learning_rate": 2.558642240951252e-05, - "loss": 2.7947, + "learning_rate": 2.5586584041736183e-05, + "loss": 2.7874, "step": 147500 }, { "epoch": 0.49, - "learning_rate": 2.5569870831485077e-05, - "loss": 2.8042, + "learning_rate": 2.5570032573289905e-05, + "loss": 2.7858, "step": 147600 }, { "epoch": 0.49, - "learning_rate": 2.5553319253457625e-05, - "loss": 2.7929, + "learning_rate": 2.5553481104843624e-05, + "loss": 2.7698, "step": 147700 }, { "epoch": 0.49, - "learning_rate": 2.553676767543018e-05, - "loss": 2.8079, + "learning_rate": 2.5536929636397343e-05, + "loss": 2.7745, "step": 147800 }, { "epoch": 0.49, - "learning_rate": 2.5520216097402728e-05, - "loss": 2.7852, + "learning_rate": 2.5520378167951065e-05, + "loss": 2.7773, "step": 147900 }, { "epoch": 0.49, - "learning_rate": 2.550366451937528e-05, - "loss": 2.8156, + "learning_rate": 2.550382669950478e-05, + "loss": 2.7765, "step": 148000 }, { "epoch": 0.49, - "learning_rate": 2.5487112941347828e-05, - "loss": 2.7925, + "learning_rate": 2.54872752310585e-05, + "loss": 2.7765, "step": 148100 }, { "epoch": 0.49, - "learning_rate": 2.5470561363320376e-05, - "loss": 2.7932, + "learning_rate": 2.547072376261222e-05, + "loss": 2.7636, "step": 148200 }, { "epoch": 0.49, - "learning_rate": 2.545400978529293e-05, - "loss": 2.7889, + "learning_rate": 2.545417229416594e-05, + "loss": 2.7628, "step": 148300 }, { "epoch": 0.49, - "learning_rate": 2.543745820726548e-05, - "loss": 2.7922, + "learning_rate": 2.543762082571966e-05, + "loss": 2.7709, "step": 148400 }, { "epoch": 0.49, - "learning_rate": 2.5420906629238034e-05, - "loss": 2.7935, + "learning_rate": 2.542106935727338e-05, + "loss": 2.7823, "step": 148500 }, { "epoch": 0.49, - "learning_rate": 2.5404355051210582e-05, - "loss": 2.7984, + "learning_rate": 2.54045178888271e-05, + "loss": 2.7758, "step": 148600 }, { "epoch": 0.49, - "learning_rate": 2.5387803473183137e-05, - "loss": 2.8059, + "learning_rate": 2.5387966420380814e-05, + "loss": 2.7836, "step": 148700 }, { "epoch": 0.49, - "learning_rate": 2.5371251895155686e-05, - "loss": 2.8022, + "learning_rate": 2.5371414951934536e-05, + "loss": 2.7923, "step": 148800 }, { "epoch": 0.49, - "learning_rate": 2.5354700317128237e-05, - "loss": 2.7878, + "learning_rate": 2.5354863483488255e-05, + "loss": 2.7784, "step": 148900 }, { "epoch": 0.49, - "learning_rate": 2.5338148739100785e-05, - "loss": 2.8015, + "learning_rate": 2.5338312015041977e-05, + "loss": 2.7932, "step": 149000 }, { "epoch": 0.49, - "learning_rate": 2.532159716107334e-05, - "loss": 2.8112, + "learning_rate": 2.5321760546595696e-05, + "loss": 2.7562, "step": 149100 }, { "epoch": 0.49, - "learning_rate": 2.530504558304589e-05, - "loss": 2.8051, + "learning_rate": 2.5305209078149415e-05, + "loss": 2.7844, "step": 149200 }, { "epoch": 0.49, - "learning_rate": 2.5288494005018437e-05, - "loss": 2.7968, + "learning_rate": 2.528865760970313e-05, + "loss": 2.7901, "step": 149300 }, { "epoch": 0.49, - "learning_rate": 2.5271942426990992e-05, - "loss": 2.7978, + "learning_rate": 2.527210614125685e-05, + "loss": 2.7835, "step": 149400 }, { "epoch": 0.49, - "learning_rate": 2.525539084896354e-05, - "loss": 2.7922, + "learning_rate": 2.5255554672810572e-05, + "loss": 2.7709, "step": 149500 }, { "epoch": 0.5, - "learning_rate": 2.5238839270936092e-05, - "loss": 2.7897, + "learning_rate": 2.523900320436429e-05, + "loss": 2.7873, "step": 149600 }, { "epoch": 0.5, - "learning_rate": 2.522228769290864e-05, - "loss": 2.8041, + "learning_rate": 2.5222451735918014e-05, + "loss": 2.7812, "step": 149700 }, { "epoch": 0.5, - "learning_rate": 2.5205736114881195e-05, - "loss": 2.7849, + "learning_rate": 2.5205900267471733e-05, + "loss": 2.7799, "step": 149800 }, { "epoch": 0.5, - "learning_rate": 2.5189184536853743e-05, - "loss": 2.7934, + "learning_rate": 2.5189348799025452e-05, + "loss": 2.7729, "step": 149900 }, { "epoch": 0.5, - "learning_rate": 2.5172632958826298e-05, - "loss": 2.8009, + "learning_rate": 2.5172797330579167e-05, + "loss": 2.7721, "step": 150000 }, { "epoch": 0.5, - "learning_rate": 2.5156081380798846e-05, - "loss": 2.7935, + "learning_rate": 2.5156245862132886e-05, + "loss": 2.7846, "step": 150100 }, { "epoch": 0.5, - "learning_rate": 2.51395298027714e-05, - "loss": 2.7875, + "learning_rate": 2.513969439368661e-05, + "loss": 2.7772, "step": 150200 }, { "epoch": 0.5, - "learning_rate": 2.512297822474395e-05, - "loss": 2.7978, + "learning_rate": 2.5123142925240328e-05, + "loss": 2.7811, "step": 150300 }, { "epoch": 0.5, - "learning_rate": 2.5106426646716498e-05, - "loss": 2.8039, + "learning_rate": 2.510659145679405e-05, + "loss": 2.7659, "step": 150400 }, { "epoch": 0.5, - "learning_rate": 2.508987506868905e-05, - "loss": 2.7968, + "learning_rate": 2.509003998834777e-05, + "loss": 2.7581, "step": 150500 }, { "epoch": 0.5, - "learning_rate": 2.5073323490661598e-05, - "loss": 2.7957, + "learning_rate": 2.5073488519901488e-05, + "loss": 2.7842, "step": 150600 }, { "epoch": 0.5, - "learning_rate": 2.5056771912634153e-05, - "loss": 2.8161, + "learning_rate": 2.5056937051455204e-05, + "loss": 2.7747, "step": 150700 }, { "epoch": 0.5, - "learning_rate": 2.50402203346067e-05, - "loss": 2.7918, + "learning_rate": 2.5040385583008923e-05, + "loss": 2.7642, "step": 150800 }, { "epoch": 0.5, - "learning_rate": 2.5023668756579256e-05, - "loss": 2.8016, + "learning_rate": 2.5023834114562645e-05, + "loss": 2.7829, "step": 150900 }, { "epoch": 0.5, - "learning_rate": 2.5007117178551804e-05, - "loss": 2.795, + "learning_rate": 2.5007282646116364e-05, + "loss": 2.7744, "step": 151000 }, { "epoch": 0.5, - "learning_rate": 2.4990565600524356e-05, - "loss": 2.8028, + "learning_rate": 2.4990731177670086e-05, + "loss": 2.7909, "step": 151100 }, { "epoch": 0.5, - "learning_rate": 2.4974014022496907e-05, - "loss": 2.7942, + "learning_rate": 2.4974179709223802e-05, + "loss": 2.788, "step": 151200 }, { "epoch": 0.5, - "learning_rate": 2.4957462444469455e-05, - "loss": 2.8005, + "learning_rate": 2.495762824077752e-05, + "loss": 2.7896, "step": 151300 }, { "epoch": 0.5, - "learning_rate": 2.4940910866442007e-05, - "loss": 2.7926, + "learning_rate": 2.4941076772331243e-05, + "loss": 2.7651, "step": 151400 }, { "epoch": 0.5, - "learning_rate": 2.492435928841456e-05, - "loss": 2.7654, + "learning_rate": 2.4924525303884962e-05, + "loss": 2.7675, "step": 151500 }, { "epoch": 0.5, - "learning_rate": 2.490780771038711e-05, - "loss": 2.7901, + "learning_rate": 2.490797383543868e-05, + "loss": 2.7791, "step": 151600 }, { "epoch": 0.5, - "learning_rate": 2.4891256132359662e-05, - "loss": 2.7793, + "learning_rate": 2.48914223669924e-05, + "loss": 2.764, "step": 151700 }, { "epoch": 0.5, - "learning_rate": 2.4874704554332213e-05, - "loss": 2.8031, + "learning_rate": 2.4874870898546123e-05, + "loss": 2.77, "step": 151800 }, { "epoch": 0.5, - "learning_rate": 2.485815297630476e-05, - "loss": 2.7981, + "learning_rate": 2.4858319430099838e-05, + "loss": 2.7764, "step": 151900 }, { "epoch": 0.5, - "learning_rate": 2.4841601398277313e-05, - "loss": 2.7796, + "learning_rate": 2.4841767961653557e-05, + "loss": 2.7905, "step": 152000 }, { "epoch": 0.5, - "learning_rate": 2.482504982024986e-05, - "loss": 2.7915, + "learning_rate": 2.482521649320728e-05, + "loss": 2.7796, "step": 152100 }, { "epoch": 0.5, - "learning_rate": 2.4808498242222413e-05, - "loss": 2.7875, + "learning_rate": 2.4808665024761e-05, + "loss": 2.7783, "step": 152200 }, { "epoch": 0.5, - "learning_rate": 2.4791946664194965e-05, - "loss": 2.8046, + "learning_rate": 2.4792113556314717e-05, + "loss": 2.7838, "step": 152300 }, { "epoch": 0.5, - "learning_rate": 2.4775395086167516e-05, - "loss": 2.8003, + "learning_rate": 2.4775562087868436e-05, + "loss": 2.7647, "step": 152400 }, { "epoch": 0.5, - "learning_rate": 2.4758843508140068e-05, - "loss": 2.8011, + "learning_rate": 2.475901061942216e-05, + "loss": 2.7721, "step": 152500 }, { "epoch": 0.51, - "learning_rate": 2.474229193011262e-05, - "loss": 2.7818, + "learning_rate": 2.4742459150975874e-05, + "loss": 2.7786, "step": 152600 }, { "epoch": 0.51, - "learning_rate": 2.472574035208517e-05, - "loss": 2.7992, + "learning_rate": 2.4725907682529593e-05, + "loss": 2.7852, "step": 152700 }, { "epoch": 0.51, - "learning_rate": 2.470918877405772e-05, - "loss": 2.7956, + "learning_rate": 2.4709356214083316e-05, + "loss": 2.7804, "step": 152800 }, { "epoch": 0.51, - "learning_rate": 2.469263719603027e-05, - "loss": 2.7923, + "learning_rate": 2.4692804745637035e-05, + "loss": 2.7809, "step": 152900 }, { "epoch": 0.51, - "learning_rate": 2.4676085618002822e-05, - "loss": 2.7958, + "learning_rate": 2.4676253277190754e-05, + "loss": 2.7754, "step": 153000 }, { "epoch": 0.51, - "learning_rate": 2.4659534039975374e-05, - "loss": 2.8176, + "learning_rate": 2.4659701808744473e-05, + "loss": 2.7772, "step": 153100 }, { "epoch": 0.51, - "learning_rate": 2.4642982461947922e-05, - "loss": 2.7989, + "learning_rate": 2.464315034029819e-05, + "loss": 2.7783, "step": 153200 }, { "epoch": 0.51, - "learning_rate": 2.4626430883920474e-05, - "loss": 2.7749, + "learning_rate": 2.462659887185191e-05, + "loss": 2.7704, "step": 153300 }, { "epoch": 0.51, - "learning_rate": 2.4609879305893025e-05, - "loss": 2.7988, + "learning_rate": 2.461004740340563e-05, + "loss": 2.7845, "step": 153400 }, { "epoch": 0.51, - "learning_rate": 2.4593327727865577e-05, - "loss": 2.7817, + "learning_rate": 2.4593495934959352e-05, + "loss": 2.7748, "step": 153500 }, { "epoch": 0.51, - "learning_rate": 2.4576776149838125e-05, - "loss": 2.7879, + "learning_rate": 2.457694446651307e-05, + "loss": 2.779, "step": 153600 }, { "epoch": 0.51, - "learning_rate": 2.4560224571810677e-05, - "loss": 2.7868, + "learning_rate": 2.456039299806679e-05, + "loss": 2.7755, "step": 153700 }, { "epoch": 0.51, - "learning_rate": 2.454367299378323e-05, - "loss": 2.8013, + "learning_rate": 2.454384152962051e-05, + "loss": 2.7755, "step": 153800 }, { "epoch": 0.51, - "learning_rate": 2.452712141575578e-05, - "loss": 2.7971, + "learning_rate": 2.4527290061174228e-05, + "loss": 2.7758, "step": 153900 }, { "epoch": 0.51, - "learning_rate": 2.451056983772833e-05, - "loss": 2.7983, + "learning_rate": 2.4510738592727947e-05, + "loss": 2.7733, "step": 154000 }, { "epoch": 0.51, - "learning_rate": 2.4494018259700883e-05, - "loss": 2.8045, + "learning_rate": 2.4494187124281666e-05, + "loss": 2.7738, "step": 154100 }, { "epoch": 0.51, - "learning_rate": 2.4477466681673435e-05, - "loss": 2.778, + "learning_rate": 2.4477635655835388e-05, + "loss": 2.7675, "step": 154200 }, { "epoch": 0.51, - "learning_rate": 2.4460915103645983e-05, - "loss": 2.8098, + "learning_rate": 2.4461084187389104e-05, + "loss": 2.7745, "step": 154300 }, { "epoch": 0.51, - "learning_rate": 2.444436352561853e-05, - "loss": 2.7919, + "learning_rate": 2.4444532718942826e-05, + "loss": 2.7798, "step": 154400 }, { "epoch": 0.51, - "learning_rate": 2.4427811947591083e-05, - "loss": 2.7911, + "learning_rate": 2.4427981250496545e-05, + "loss": 2.7743, "step": 154500 }, { "epoch": 0.51, - "learning_rate": 2.4411260369563634e-05, - "loss": 2.7953, + "learning_rate": 2.4411429782050264e-05, + "loss": 2.7694, "step": 154600 }, { "epoch": 0.51, - "learning_rate": 2.4394708791536186e-05, + "learning_rate": 2.4394878313603983e-05, "loss": 2.7859, "step": 154700 }, { "epoch": 0.51, - "learning_rate": 2.4378157213508738e-05, - "loss": 2.7864, + "learning_rate": 2.4378326845157702e-05, + "loss": 2.7566, "step": 154800 }, { "epoch": 0.51, - "learning_rate": 2.436160563548129e-05, - "loss": 2.7885, + "learning_rate": 2.4361775376711425e-05, + "loss": 2.7756, "step": 154900 }, { "epoch": 0.51, - "learning_rate": 2.434505405745384e-05, - "loss": 2.7968, + "learning_rate": 2.434522390826514e-05, + "loss": 2.7873, "step": 155000 }, { "epoch": 0.51, - "learning_rate": 2.432850247942639e-05, - "loss": 2.7922, + "learning_rate": 2.4328672439818863e-05, + "loss": 2.7751, "step": 155100 }, { "epoch": 0.51, - "learning_rate": 2.431195090139894e-05, - "loss": 2.797, + "learning_rate": 2.431212097137258e-05, + "loss": 2.7782, "step": 155200 }, { "epoch": 0.51, - "learning_rate": 2.4295399323371492e-05, - "loss": 2.7934, + "learning_rate": 2.42955695029263e-05, + "loss": 2.7759, "step": 155300 }, { "epoch": 0.51, - "learning_rate": 2.427884774534404e-05, - "loss": 2.8056, + "learning_rate": 2.427901803448002e-05, + "loss": 2.7655, "step": 155400 }, { "epoch": 0.51, - "learning_rate": 2.4262296167316592e-05, - "loss": 2.7837, + "learning_rate": 2.426246656603374e-05, + "loss": 2.7713, "step": 155500 }, { "epoch": 0.52, - "learning_rate": 2.4245744589289144e-05, - "loss": 2.7987, + "learning_rate": 2.424591509758746e-05, + "loss": 2.7826, "step": 155600 }, { "epoch": 0.52, - "learning_rate": 2.4229193011261695e-05, - "loss": 2.7862, + "learning_rate": 2.4229363629141176e-05, + "loss": 2.7691, "step": 155700 }, { "epoch": 0.52, - "learning_rate": 2.4212641433234247e-05, - "loss": 2.7806, + "learning_rate": 2.42128121606949e-05, + "loss": 2.7695, "step": 155800 }, { "epoch": 0.52, - "learning_rate": 2.4196089855206795e-05, - "loss": 2.8093, + "learning_rate": 2.4196260692248618e-05, + "loss": 2.7703, "step": 155900 }, { "epoch": 0.52, - "learning_rate": 2.4179538277179347e-05, - "loss": 2.7917, + "learning_rate": 2.4179709223802337e-05, + "loss": 2.7866, "step": 156000 }, { "epoch": 0.52, - "learning_rate": 2.4162986699151898e-05, - "loss": 2.7814, + "learning_rate": 2.4163157755356056e-05, + "loss": 2.7742, "step": 156100 }, { "epoch": 0.52, - "learning_rate": 2.414643512112445e-05, - "loss": 2.7945, + "learning_rate": 2.4146606286909775e-05, + "loss": 2.7717, "step": 156200 }, { "epoch": 0.52, - "learning_rate": 2.4129883543097e-05, - "loss": 2.7962, + "learning_rate": 2.4130054818463497e-05, + "loss": 2.7704, "step": 156300 }, { "epoch": 0.52, - "learning_rate": 2.4113331965069553e-05, - "loss": 2.7905, + "learning_rate": 2.4113503350017213e-05, + "loss": 2.7683, "step": 156400 }, { "epoch": 0.52, - "learning_rate": 2.40967803870421e-05, - "loss": 2.789, + "learning_rate": 2.4096951881570935e-05, + "loss": 2.7762, "step": 156500 }, { "epoch": 0.52, - "learning_rate": 2.4080228809014653e-05, - "loss": 2.788, + "learning_rate": 2.4080400413124654e-05, + "loss": 2.7689, "step": 156600 }, { "epoch": 0.52, - "learning_rate": 2.40636772309872e-05, - "loss": 2.7885, + "learning_rate": 2.4063848944678373e-05, + "loss": 2.7627, "step": 156700 }, { "epoch": 0.52, - "learning_rate": 2.4047125652959753e-05, - "loss": 2.8027, + "learning_rate": 2.4047297476232092e-05, + "loss": 2.765, "step": 156800 }, { "epoch": 0.52, - "learning_rate": 2.4030574074932304e-05, - "loss": 2.8116, + "learning_rate": 2.403074600778581e-05, + "loss": 2.7791, "step": 156900 }, { "epoch": 0.52, - "learning_rate": 2.4014022496904856e-05, - "loss": 2.8074, + "learning_rate": 2.4014194539339533e-05, + "loss": 2.7762, "step": 157000 }, { "epoch": 0.52, - "learning_rate": 2.3997470918877408e-05, - "loss": 2.789, + "learning_rate": 2.399764307089325e-05, + "loss": 2.768, "step": 157100 }, { "epoch": 0.52, - "learning_rate": 2.398091934084996e-05, - "loss": 2.7971, + "learning_rate": 2.398109160244697e-05, + "loss": 2.7787, "step": 157200 }, { "epoch": 0.52, - "learning_rate": 2.396436776282251e-05, - "loss": 2.794, + "learning_rate": 2.396454013400069e-05, + "loss": 2.7708, "step": 157300 }, { "epoch": 0.52, - "learning_rate": 2.394781618479506e-05, - "loss": 2.7997, + "learning_rate": 2.394798866555441e-05, + "loss": 2.7643, "step": 157400 }, { "epoch": 0.52, - "learning_rate": 2.393126460676761e-05, - "loss": 2.7939, + "learning_rate": 2.3931437197108128e-05, + "loss": 2.7718, "step": 157500 }, { "epoch": 0.52, - "learning_rate": 2.391471302874016e-05, - "loss": 2.7972, + "learning_rate": 2.3914885728661847e-05, + "loss": 2.7743, "step": 157600 }, { "epoch": 0.52, - "learning_rate": 2.389816145071271e-05, - "loss": 2.7963, + "learning_rate": 2.389833426021557e-05, + "loss": 2.7786, "step": 157700 }, { "epoch": 0.52, - "learning_rate": 2.3881609872685262e-05, - "loss": 2.7948, + "learning_rate": 2.3881782791769285e-05, + "loss": 2.7858, "step": 157800 }, { "epoch": 0.52, - "learning_rate": 2.3865058294657814e-05, - "loss": 2.7908, + "learning_rate": 2.3865231323323008e-05, + "loss": 2.7724, "step": 157900 }, { "epoch": 0.52, - "learning_rate": 2.3848506716630365e-05, - "loss": 2.7979, + "learning_rate": 2.3848679854876727e-05, + "loss": 2.7746, "step": 158000 }, { "epoch": 0.52, - "learning_rate": 2.3831955138602917e-05, - "loss": 2.7867, + "learning_rate": 2.3832128386430446e-05, + "loss": 2.7739, "step": 158100 }, { "epoch": 0.52, - "learning_rate": 2.3815403560575465e-05, - "loss": 2.795, + "learning_rate": 2.3815576917984165e-05, + "loss": 2.7851, "step": 158200 }, { "epoch": 0.52, - "learning_rate": 2.3798851982548017e-05, - "loss": 2.7886, + "learning_rate": 2.3799025449537883e-05, + "loss": 2.7693, "step": 158300 }, { "epoch": 0.52, - "learning_rate": 2.3782300404520568e-05, - "loss": 2.7986, + "learning_rate": 2.3782473981091606e-05, + "loss": 2.7834, "step": 158400 }, { "epoch": 0.52, - "learning_rate": 2.376574882649312e-05, - "loss": 2.7909, + "learning_rate": 2.376592251264532e-05, + "loss": 2.7767, "step": 158500 }, { "epoch": 0.53, - "learning_rate": 2.374919724846567e-05, - "loss": 2.7936, + "learning_rate": 2.3749371044199044e-05, + "loss": 2.7717, "step": 158600 }, { "epoch": 0.53, - "learning_rate": 2.373264567043822e-05, - "loss": 2.7912, + "learning_rate": 2.3732819575752763e-05, + "loss": 2.7751, "step": 158700 }, { "epoch": 0.53, - "learning_rate": 2.371609409241077e-05, - "loss": 2.7844, + "learning_rate": 2.3716268107306482e-05, + "loss": 2.7723, "step": 158800 }, { "epoch": 0.53, - "learning_rate": 2.3699542514383323e-05, - "loss": 2.7965, + "learning_rate": 2.36997166388602e-05, + "loss": 2.7702, "step": 158900 }, { "epoch": 0.53, - "learning_rate": 2.3682990936355874e-05, - "loss": 2.8104, + "learning_rate": 2.368316517041392e-05, + "loss": 2.7705, "step": 159000 }, { "epoch": 0.53, - "learning_rate": 2.3666439358328423e-05, - "loss": 2.7856, + "learning_rate": 2.3666613701967642e-05, + "loss": 2.7683, "step": 159100 }, { "epoch": 0.53, - "learning_rate": 2.3649887780300974e-05, - "loss": 2.7953, + "learning_rate": 2.3650062233521358e-05, + "loss": 2.785, "step": 159200 }, { "epoch": 0.53, - "learning_rate": 2.3633336202273526e-05, - "loss": 2.8108, + "learning_rate": 2.363351076507508e-05, + "loss": 2.7568, "step": 159300 }, { "epoch": 0.53, - "learning_rate": 2.3616784624246077e-05, - "loss": 2.792, + "learning_rate": 2.36169592966288e-05, + "loss": 2.7794, "step": 159400 }, { "epoch": 0.53, - "learning_rate": 2.360023304621863e-05, - "loss": 2.7716, + "learning_rate": 2.3600407828182518e-05, + "loss": 2.7751, "step": 159500 }, { "epoch": 0.53, - "learning_rate": 2.358368146819118e-05, - "loss": 2.7978, + "learning_rate": 2.3583856359736237e-05, + "loss": 2.7912, "step": 159600 }, { "epoch": 0.53, - "learning_rate": 2.3567129890163732e-05, - "loss": 2.8178, + "learning_rate": 2.3567304891289956e-05, + "loss": 2.7772, "step": 159700 }, { "epoch": 0.53, - "learning_rate": 2.355057831213628e-05, - "loss": 2.8042, + "learning_rate": 2.355075342284368e-05, + "loss": 2.7737, "step": 159800 }, { "epoch": 0.53, - "learning_rate": 2.353402673410883e-05, - "loss": 2.7992, + "learning_rate": 2.3534201954397394e-05, + "loss": 2.7799, "step": 159900 }, { "epoch": 0.53, - "learning_rate": 2.351747515608138e-05, - "loss": 2.7971, + "learning_rate": 2.3517650485951116e-05, + "loss": 2.7545, "step": 160000 }, { "epoch": 0.53, - "learning_rate": 2.3500923578053932e-05, - "loss": 2.8003, + "learning_rate": 2.3501099017504835e-05, + "loss": 2.7612, "step": 160100 }, { "epoch": 0.53, - "learning_rate": 2.3484372000026483e-05, - "loss": 2.7822, + "learning_rate": 2.348454754905855e-05, + "loss": 2.7749, "step": 160200 }, { "epoch": 0.53, - "learning_rate": 2.3467820421999035e-05, - "loss": 2.8043, + "learning_rate": 2.3467996080612273e-05, + "loss": 2.7754, "step": 160300 }, { "epoch": 0.53, - "learning_rate": 2.3451268843971587e-05, - "loss": 2.7976, + "learning_rate": 2.3451444612165992e-05, + "loss": 2.789, "step": 160400 }, { "epoch": 0.53, - "learning_rate": 2.3434717265944138e-05, - "loss": 2.7896, + "learning_rate": 2.343489314371971e-05, + "loss": 2.7792, "step": 160500 }, { "epoch": 0.53, - "learning_rate": 2.3418165687916686e-05, - "loss": 2.8011, + "learning_rate": 2.341834167527343e-05, + "loss": 2.7658, "step": 160600 }, { "epoch": 0.53, - "learning_rate": 2.3401614109889238e-05, - "loss": 2.7774, + "learning_rate": 2.3401790206827153e-05, + "loss": 2.799, "step": 160700 }, { "epoch": 0.53, - "learning_rate": 2.338506253186179e-05, - "loss": 2.7947, + "learning_rate": 2.338523873838087e-05, + "loss": 2.7865, "step": 160800 }, { "epoch": 0.53, - "learning_rate": 2.3368510953834338e-05, - "loss": 2.7959, + "learning_rate": 2.3368687269934587e-05, + "loss": 2.7736, "step": 160900 }, { "epoch": 0.53, - "learning_rate": 2.335195937580689e-05, - "loss": 2.7781, + "learning_rate": 2.335213580148831e-05, + "loss": 2.7832, "step": 161000 }, { "epoch": 0.53, - "learning_rate": 2.333540779777944e-05, - "loss": 2.8011, + "learning_rate": 2.333558433304203e-05, + "loss": 2.7767, "step": 161100 }, { "epoch": 0.53, - "learning_rate": 2.3318856219751993e-05, - "loss": 2.7933, + "learning_rate": 2.3319032864595748e-05, + "loss": 2.7661, "step": 161200 }, { "epoch": 0.53, - "learning_rate": 2.3302304641724544e-05, - "loss": 2.7981, + "learning_rate": 2.3302481396149467e-05, + "loss": 2.7658, "step": 161300 }, { "epoch": 0.53, - "learning_rate": 2.3285753063697092e-05, - "loss": 2.8, + "learning_rate": 2.328592992770319e-05, + "loss": 2.7777, "step": 161400 }, { "epoch": 0.53, - "learning_rate": 2.3269201485669644e-05, - "loss": 2.788, + "learning_rate": 2.3269378459256908e-05, + "loss": 2.7651, "step": 161500 }, { "epoch": 0.53, - "learning_rate": 2.3252649907642196e-05, - "loss": 2.783, + "learning_rate": 2.3252826990810623e-05, + "loss": 2.768, "step": 161600 }, { "epoch": 0.54, - "learning_rate": 2.3236098329614747e-05, - "loss": 2.7905, + "learning_rate": 2.3236275522364346e-05, + "loss": 2.7773, "step": 161700 }, { "epoch": 0.54, - "learning_rate": 2.32195467515873e-05, - "loss": 2.8023, + "learning_rate": 2.3219724053918065e-05, + "loss": 2.7782, "step": 161800 }, { "epoch": 0.54, - "learning_rate": 2.320299517355985e-05, - "loss": 2.7838, + "learning_rate": 2.3203172585471784e-05, + "loss": 2.7709, "step": 161900 }, { "epoch": 0.54, - "learning_rate": 2.3186443595532402e-05, - "loss": 2.7919, + "learning_rate": 2.3186621117025503e-05, + "loss": 2.7593, "step": 162000 }, { "epoch": 0.54, - "learning_rate": 2.316989201750495e-05, - "loss": 2.8112, + "learning_rate": 2.3170069648579222e-05, + "loss": 2.7735, "step": 162100 }, { "epoch": 0.54, - "learning_rate": 2.31533404394775e-05, - "loss": 2.8049, + "learning_rate": 2.3153518180132944e-05, + "loss": 2.7657, "step": 162200 }, { "epoch": 0.54, - "learning_rate": 2.313678886145005e-05, - "loss": 2.7997, + "learning_rate": 2.313696671168666e-05, + "loss": 2.7746, "step": 162300 }, { "epoch": 0.54, - "learning_rate": 2.31202372834226e-05, - "loss": 2.7842, + "learning_rate": 2.3120415243240382e-05, + "loss": 2.7657, "step": 162400 }, { "epoch": 0.54, - "learning_rate": 2.3103685705395153e-05, - "loss": 2.7908, + "learning_rate": 2.31038637747941e-05, + "loss": 2.7741, "step": 162500 }, { "epoch": 0.54, - "learning_rate": 2.3087134127367705e-05, - "loss": 2.7995, + "learning_rate": 2.308731230634782e-05, + "loss": 2.7657, "step": 162600 }, { "epoch": 0.54, - "learning_rate": 2.3070582549340256e-05, - "loss": 2.7945, + "learning_rate": 2.307076083790154e-05, + "loss": 2.7794, "step": 162700 }, { "epoch": 0.54, - "learning_rate": 2.3054030971312808e-05, - "loss": 2.7875, + "learning_rate": 2.3054209369455258e-05, + "loss": 2.7656, "step": 162800 }, { "epoch": 0.54, - "learning_rate": 2.3037479393285356e-05, - "loss": 2.8003, + "learning_rate": 2.303765790100898e-05, + "loss": 2.7864, "step": 162900 }, { "epoch": 0.54, - "learning_rate": 2.3020927815257908e-05, - "loss": 2.794, + "learning_rate": 2.3021106432562696e-05, + "loss": 2.7802, "step": 163000 }, { "epoch": 0.54, - "learning_rate": 2.300437623723046e-05, - "loss": 2.7902, + "learning_rate": 2.300455496411642e-05, + "loss": 2.771, "step": 163100 }, { "epoch": 0.54, - "learning_rate": 2.2987824659203008e-05, - "loss": 2.7798, + "learning_rate": 2.2988003495670137e-05, + "loss": 2.7843, "step": 163200 }, { "epoch": 0.54, - "learning_rate": 2.297127308117556e-05, - "loss": 2.8108, + "learning_rate": 2.2971452027223856e-05, + "loss": 2.7701, "step": 163300 }, { "epoch": 0.54, - "learning_rate": 2.295472150314811e-05, - "loss": 2.7928, + "learning_rate": 2.2954900558777575e-05, + "loss": 2.7797, "step": 163400 }, { "epoch": 0.54, - "learning_rate": 2.2938169925120662e-05, - "loss": 2.7929, + "learning_rate": 2.2938349090331294e-05, + "loss": 2.7632, "step": 163500 }, { "epoch": 0.54, - "learning_rate": 2.2921618347093214e-05, - "loss": 2.7951, + "learning_rate": 2.2921797621885017e-05, + "loss": 2.7667, "step": 163600 }, { "epoch": 0.54, - "learning_rate": 2.2905066769065762e-05, - "loss": 2.7851, + "learning_rate": 2.2905246153438732e-05, + "loss": 2.7814, "step": 163700 }, { "epoch": 0.54, - "learning_rate": 2.2888515191038314e-05, - "loss": 2.7703, + "learning_rate": 2.2888694684992455e-05, + "loss": 2.7647, "step": 163800 }, { "epoch": 0.54, - "learning_rate": 2.2871963613010865e-05, - "loss": 2.7771, + "learning_rate": 2.2872143216546174e-05, + "loss": 2.777, "step": 163900 }, { "epoch": 0.54, - "learning_rate": 2.2855412034983417e-05, - "loss": 2.7898, + "learning_rate": 2.2855591748099893e-05, + "loss": 2.7843, "step": 164000 }, { "epoch": 0.54, - "learning_rate": 2.283886045695597e-05, - "loss": 2.7842, + "learning_rate": 2.283904027965361e-05, + "loss": 2.7703, "step": 164100 }, { "epoch": 0.54, - "learning_rate": 2.282230887892852e-05, - "loss": 2.789, + "learning_rate": 2.282248881120733e-05, + "loss": 2.7741, "step": 164200 }, { "epoch": 0.54, - "learning_rate": 2.280575730090107e-05, - "loss": 2.7911, + "learning_rate": 2.280593734276105e-05, + "loss": 2.7625, "step": 164300 }, { "epoch": 0.54, - "learning_rate": 2.278920572287362e-05, - "loss": 2.797, + "learning_rate": 2.278938587431477e-05, + "loss": 2.7684, "step": 164400 }, { "epoch": 0.54, - "learning_rate": 2.277265414484617e-05, - "loss": 2.794, + "learning_rate": 2.277283440586849e-05, + "loss": 2.7745, "step": 164500 }, { "epoch": 0.54, - "learning_rate": 2.275610256681872e-05, - "loss": 2.7953, + "learning_rate": 2.275628293742221e-05, + "loss": 2.7705, "step": 164600 }, { "epoch": 0.55, - "learning_rate": 2.273955098879127e-05, - "loss": 2.797, + "learning_rate": 2.273973146897593e-05, + "loss": 2.7706, "step": 164700 }, { "epoch": 0.55, - "learning_rate": 2.2722999410763823e-05, - "loss": 2.7811, + "learning_rate": 2.2723180000529648e-05, + "loss": 2.7703, "step": 164800 }, { "epoch": 0.55, - "learning_rate": 2.2706447832736375e-05, - "loss": 2.7897, + "learning_rate": 2.2706628532083367e-05, + "loss": 2.7832, "step": 164900 }, { "epoch": 0.55, - "learning_rate": 2.2689896254708926e-05, - "loss": 2.7879, + "learning_rate": 2.2690077063637086e-05, + "loss": 2.77, "step": 165000 }, { "epoch": 0.55, - "learning_rate": 2.2673344676681478e-05, - "loss": 2.7974, + "learning_rate": 2.2673525595190805e-05, + "loss": 2.7693, "step": 165100 }, { "epoch": 0.55, - "learning_rate": 2.2656793098654026e-05, - "loss": 2.7971, + "learning_rate": 2.2656974126744527e-05, + "loss": 2.7677, "step": 165200 }, { "epoch": 0.55, - "learning_rate": 2.2640241520626578e-05, - "loss": 2.7935, + "learning_rate": 2.2640422658298246e-05, + "loss": 2.7743, "step": 165300 }, { "epoch": 0.55, - "learning_rate": 2.2623689942599126e-05, - "loss": 2.7799, + "learning_rate": 2.2623871189851965e-05, + "loss": 2.7795, "step": 165400 }, { "epoch": 0.55, - "learning_rate": 2.2607138364571678e-05, - "loss": 2.79, + "learning_rate": 2.2607319721405684e-05, + "loss": 2.7723, "step": 165500 }, { "epoch": 0.55, - "learning_rate": 2.259058678654423e-05, - "loss": 2.7869, + "learning_rate": 2.2590768252959403e-05, + "loss": 2.7777, "step": 165600 }, { "epoch": 0.55, - "learning_rate": 2.257403520851678e-05, - "loss": 2.8034, + "learning_rate": 2.2574216784513122e-05, + "loss": 2.7832, "step": 165700 }, { "epoch": 0.55, - "learning_rate": 2.2557483630489332e-05, - "loss": 2.7931, + "learning_rate": 2.255766531606684e-05, + "loss": 2.776, "step": 165800 }, { "epoch": 0.55, - "learning_rate": 2.2540932052461884e-05, - "loss": 2.784, + "learning_rate": 2.2541113847620563e-05, + "loss": 2.7851, "step": 165900 }, { "epoch": 0.55, - "learning_rate": 2.2524380474434436e-05, - "loss": 2.7941, + "learning_rate": 2.2524562379174282e-05, + "loss": 2.7789, "step": 166000 }, { "epoch": 0.55, - "learning_rate": 2.2507828896406984e-05, - "loss": 2.7934, + "learning_rate": 2.2508010910728e-05, + "loss": 2.7758, "step": 166100 }, { "epoch": 0.55, - "learning_rate": 2.2491277318379535e-05, - "loss": 2.7961, + "learning_rate": 2.249145944228172e-05, + "loss": 2.7738, "step": 166200 }, { "epoch": 0.55, - "learning_rate": 2.2474725740352087e-05, - "loss": 2.7929, + "learning_rate": 2.247490797383544e-05, + "loss": 2.7708, "step": 166300 }, { "epoch": 0.55, - "learning_rate": 2.245817416232464e-05, - "loss": 2.8006, + "learning_rate": 2.245835650538916e-05, + "loss": 2.7732, "step": 166400 }, { "epoch": 0.55, - "learning_rate": 2.2441622584297187e-05, - "loss": 2.8081, + "learning_rate": 2.2441805036942877e-05, + "loss": 2.7713, "step": 166500 }, { "epoch": 0.55, - "learning_rate": 2.242507100626974e-05, - "loss": 2.7819, + "learning_rate": 2.24252535684966e-05, + "loss": 2.7775, "step": 166600 }, { "epoch": 0.55, - "learning_rate": 2.240851942824229e-05, - "loss": 2.7894, + "learning_rate": 2.240870210005032e-05, + "loss": 2.7756, "step": 166700 }, { "epoch": 0.55, - "learning_rate": 2.239196785021484e-05, - "loss": 2.786, + "learning_rate": 2.2392150631604038e-05, + "loss": 2.79, "step": 166800 }, { "epoch": 0.55, - "learning_rate": 2.237541627218739e-05, - "loss": 2.7752, + "learning_rate": 2.2375599163157757e-05, + "loss": 2.7529, "step": 166900 }, { "epoch": 0.55, - "learning_rate": 2.235886469415994e-05, - "loss": 2.8035, + "learning_rate": 2.2359047694711476e-05, + "loss": 2.7694, "step": 167000 }, { "epoch": 0.55, - "learning_rate": 2.2342313116132493e-05, - "loss": 2.7816, + "learning_rate": 2.2342496226265195e-05, + "loss": 2.7619, "step": 167100 }, { "epoch": 0.55, - "learning_rate": 2.2325761538105045e-05, - "loss": 2.7997, + "learning_rate": 2.2325944757818914e-05, + "loss": 2.78, "step": 167200 }, { "epoch": 0.55, - "learning_rate": 2.2309209960077596e-05, - "loss": 2.7921, + "learning_rate": 2.2309393289372636e-05, + "loss": 2.7749, "step": 167300 }, { "epoch": 0.55, - "learning_rate": 2.2292658382050148e-05, - "loss": 2.787, + "learning_rate": 2.229284182092635e-05, + "loss": 2.7647, "step": 167400 }, { "epoch": 0.55, - "learning_rate": 2.22761068040227e-05, - "loss": 2.7891, + "learning_rate": 2.2276290352480074e-05, + "loss": 2.7698, "step": 167500 }, { "epoch": 0.55, - "learning_rate": 2.2259555225995248e-05, - "loss": 2.7737, + "learning_rate": 2.2259738884033793e-05, + "loss": 2.7779, "step": 167600 }, { "epoch": 0.56, - "learning_rate": 2.2243003647967796e-05, - "loss": 2.7893, + "learning_rate": 2.2243187415587512e-05, + "loss": 2.7892, "step": 167700 }, { "epoch": 0.56, - "learning_rate": 2.2226452069940347e-05, - "loss": 2.7935, + "learning_rate": 2.222663594714123e-05, + "loss": 2.7611, "step": 167800 }, { "epoch": 0.56, - "learning_rate": 2.22099004919129e-05, - "loss": 2.7897, + "learning_rate": 2.221008447869495e-05, + "loss": 2.7672, "step": 167900 }, { "epoch": 0.56, - "learning_rate": 2.219334891388545e-05, - "loss": 2.7915, + "learning_rate": 2.2193533010248672e-05, + "loss": 2.7739, "step": 168000 }, { "epoch": 0.56, - "learning_rate": 2.2176797335858002e-05, - "loss": 2.7884, + "learning_rate": 2.2176981541802388e-05, + "loss": 2.7772, "step": 168100 }, { "epoch": 0.56, - "learning_rate": 2.2160245757830554e-05, - "loss": 2.7954, + "learning_rate": 2.216043007335611e-05, + "loss": 2.779, "step": 168200 }, { "epoch": 0.56, - "learning_rate": 2.2143694179803105e-05, - "loss": 2.7859, + "learning_rate": 2.214387860490983e-05, + "loss": 2.7805, "step": 168300 }, { "epoch": 0.56, - "learning_rate": 2.2127142601775654e-05, - "loss": 2.7954, + "learning_rate": 2.2127327136463548e-05, + "loss": 2.7793, "step": 168400 }, { "epoch": 0.56, - "learning_rate": 2.2110591023748205e-05, - "loss": 2.7861, + "learning_rate": 2.2110775668017267e-05, + "loss": 2.7771, "step": 168500 }, { "epoch": 0.56, - "learning_rate": 2.2094039445720757e-05, - "loss": 2.8, + "learning_rate": 2.2094224199570986e-05, + "loss": 2.7875, "step": 168600 }, { "epoch": 0.56, - "learning_rate": 2.2077487867693305e-05, - "loss": 2.7845, + "learning_rate": 2.207767273112471e-05, + "loss": 2.7675, "step": 168700 }, { "epoch": 0.56, - "learning_rate": 2.2060936289665857e-05, - "loss": 2.7812, + "learning_rate": 2.2061121262678424e-05, + "loss": 2.7653, "step": 168800 }, { "epoch": 0.56, - "learning_rate": 2.2044384711638408e-05, - "loss": 2.7922, + "learning_rate": 2.2044569794232146e-05, + "loss": 2.7586, "step": 168900 }, { "epoch": 0.56, - "learning_rate": 2.202783313361096e-05, - "loss": 2.7846, + "learning_rate": 2.2028018325785865e-05, + "loss": 2.7756, "step": 169000 }, { "epoch": 0.56, - "learning_rate": 2.201128155558351e-05, - "loss": 2.7985, + "learning_rate": 2.2011466857339584e-05, + "loss": 2.7629, "step": 169100 }, { "epoch": 0.56, - "learning_rate": 2.199472997755606e-05, - "loss": 2.7941, + "learning_rate": 2.1994915388893303e-05, + "loss": 2.7659, "step": 169200 }, { "epoch": 0.56, - "learning_rate": 2.197817839952861e-05, - "loss": 2.7939, + "learning_rate": 2.1978363920447022e-05, + "loss": 2.7691, "step": 169300 }, { "epoch": 0.56, - "learning_rate": 2.1961626821501163e-05, - "loss": 2.802, + "learning_rate": 2.1961812452000745e-05, + "loss": 2.7651, "step": 169400 }, { "epoch": 0.56, - "learning_rate": 2.1945075243473714e-05, - "loss": 2.7877, + "learning_rate": 2.194526098355446e-05, + "loss": 2.7774, "step": 169500 }, { "epoch": 0.56, - "learning_rate": 2.1928523665446266e-05, - "loss": 2.7953, + "learning_rate": 2.1928709515108183e-05, + "loss": 2.7766, "step": 169600 }, { "epoch": 0.56, - "learning_rate": 2.1911972087418818e-05, - "loss": 2.801, + "learning_rate": 2.1912158046661902e-05, + "loss": 2.7809, "step": 169700 }, { "epoch": 0.56, - "learning_rate": 2.189542050939137e-05, - "loss": 2.7746, + "learning_rate": 2.189560657821562e-05, + "loss": 2.7745, "step": 169800 }, { "epoch": 0.56, - "learning_rate": 2.1878868931363917e-05, - "loss": 2.7826, + "learning_rate": 2.187905510976934e-05, + "loss": 2.7677, "step": 169900 }, { "epoch": 0.56, - "learning_rate": 2.1862317353336466e-05, - "loss": 2.7944, + "learning_rate": 2.186250364132306e-05, + "loss": 2.7652, "step": 170000 }, { "epoch": 0.56, - "learning_rate": 2.1845765775309017e-05, - "loss": 2.7928, + "learning_rate": 2.184595217287678e-05, + "loss": 2.762, "step": 170100 }, { "epoch": 0.56, - "learning_rate": 2.182921419728157e-05, - "loss": 2.7993, + "learning_rate": 2.1829400704430497e-05, + "loss": 2.7749, "step": 170200 }, { "epoch": 0.56, - "learning_rate": 2.181266261925412e-05, - "loss": 2.7939, + "learning_rate": 2.181284923598422e-05, + "loss": 2.7707, "step": 170300 }, { "epoch": 0.56, - "learning_rate": 2.1796111041226672e-05, - "loss": 2.7935, + "learning_rate": 2.1796297767537938e-05, + "loss": 2.7644, "step": 170400 }, { "epoch": 0.56, - "learning_rate": 2.1779559463199224e-05, - "loss": 2.7751, + "learning_rate": 2.1779746299091654e-05, + "loss": 2.7718, "step": 170500 }, { "epoch": 0.56, - "learning_rate": 2.1763007885171775e-05, - "loss": 2.7852, + "learning_rate": 2.1763194830645376e-05, + "loss": 2.77, "step": 170600 }, { "epoch": 0.57, - "learning_rate": 2.1746456307144323e-05, - "loss": 2.7914, + "learning_rate": 2.1746643362199095e-05, + "loss": 2.7747, "step": 170700 }, { "epoch": 0.57, - "learning_rate": 2.1729904729116875e-05, - "loss": 2.787, + "learning_rate": 2.1730091893752817e-05, + "loss": 2.7642, "step": 170800 }, { "epoch": 0.57, - "learning_rate": 2.1713353151089427e-05, - "loss": 2.7973, + "learning_rate": 2.1713540425306533e-05, + "loss": 2.7832, "step": 170900 }, { "epoch": 0.57, - "learning_rate": 2.1696801573061975e-05, - "loss": 2.7812, + "learning_rate": 2.1696988956860255e-05, + "loss": 2.78, "step": 171000 }, { "epoch": 0.57, - "learning_rate": 2.1680249995034526e-05, - "loss": 2.7817, + "learning_rate": 2.1680437488413974e-05, + "loss": 2.7665, "step": 171100 }, { "epoch": 0.57, - "learning_rate": 2.1663698417007078e-05, - "loss": 2.7801, + "learning_rate": 2.166388601996769e-05, + "loss": 2.761, "step": 171200 }, { "epoch": 0.57, - "learning_rate": 2.164714683897963e-05, - "loss": 2.7992, + "learning_rate": 2.1647334551521412e-05, + "loss": 2.7768, "step": 171300 }, { "epoch": 0.57, - "learning_rate": 2.163059526095218e-05, - "loss": 2.7928, + "learning_rate": 2.163078308307513e-05, + "loss": 2.7644, "step": 171400 }, { "epoch": 0.57, - "learning_rate": 2.161404368292473e-05, - "loss": 2.7861, + "learning_rate": 2.1614231614628854e-05, + "loss": 2.7685, "step": 171500 }, { "epoch": 0.57, - "learning_rate": 2.159749210489728e-05, - "loss": 2.78, + "learning_rate": 2.159768014618257e-05, + "loss": 2.7757, "step": 171600 }, { "epoch": 0.57, - "learning_rate": 2.1580940526869833e-05, - "loss": 2.7886, + "learning_rate": 2.1581128677736288e-05, + "loss": 2.7757, "step": 171700 }, { "epoch": 0.57, - "learning_rate": 2.1564388948842384e-05, - "loss": 2.7836, + "learning_rate": 2.156457720929001e-05, + "loss": 2.7659, "step": 171800 }, { "epoch": 0.57, - "learning_rate": 2.1547837370814936e-05, - "loss": 2.7991, + "learning_rate": 2.1548025740843726e-05, + "loss": 2.7876, "step": 171900 }, { "epoch": 0.57, - "learning_rate": 2.1531285792787488e-05, - "loss": 2.7929, + "learning_rate": 2.153147427239745e-05, + "loss": 2.7802, "step": 172000 }, { "epoch": 0.57, - "learning_rate": 2.1514734214760036e-05, - "loss": 2.7892, + "learning_rate": 2.1514922803951167e-05, + "loss": 2.7587, "step": 172100 }, { "epoch": 0.57, - "learning_rate": 2.1498182636732587e-05, - "loss": 2.7888, + "learning_rate": 2.149837133550489e-05, + "loss": 2.7759, "step": 172200 }, { "epoch": 0.57, - "learning_rate": 2.148163105870514e-05, - "loss": 2.7904, + "learning_rate": 2.1481819867058605e-05, + "loss": 2.7621, "step": 172300 }, { "epoch": 0.57, - "learning_rate": 2.1465079480677687e-05, - "loss": 2.7775, + "learning_rate": 2.1465268398612324e-05, + "loss": 2.7692, "step": 172400 }, { "epoch": 0.57, - "learning_rate": 2.144852790265024e-05, - "loss": 2.786, + "learning_rate": 2.1448716930166047e-05, + "loss": 2.7707, "step": 172500 }, { "epoch": 0.57, - "learning_rate": 2.143197632462279e-05, - "loss": 2.7758, + "learning_rate": 2.1432165461719762e-05, + "loss": 2.7617, "step": 172600 }, { "epoch": 0.57, - "learning_rate": 2.1415424746595342e-05, - "loss": 2.7914, + "learning_rate": 2.1415613993273485e-05, + "loss": 2.7636, "step": 172700 }, { "epoch": 0.57, - "learning_rate": 2.1398873168567894e-05, - "loss": 2.7844, + "learning_rate": 2.1399062524827204e-05, + "loss": 2.7678, "step": 172800 }, { "epoch": 0.57, - "learning_rate": 2.1382321590540445e-05, - "loss": 2.7856, + "learning_rate": 2.1382511056380926e-05, + "loss": 2.7697, "step": 172900 }, { "epoch": 0.57, - "learning_rate": 2.1365770012512993e-05, - "loss": 2.7733, + "learning_rate": 2.136595958793464e-05, + "loss": 2.746, "step": 173000 }, { "epoch": 0.57, - "learning_rate": 2.1349218434485545e-05, - "loss": 2.7896, + "learning_rate": 2.134940811948836e-05, + "loss": 2.7609, "step": 173100 }, { "epoch": 0.57, - "learning_rate": 2.1332666856458093e-05, - "loss": 2.7977, + "learning_rate": 2.1332856651042083e-05, + "loss": 2.7696, "step": 173200 }, { "epoch": 0.57, - "learning_rate": 2.1316115278430645e-05, - "loss": 2.7908, + "learning_rate": 2.13163051825958e-05, + "loss": 2.7779, "step": 173300 }, { "epoch": 0.57, - "learning_rate": 2.1299563700403196e-05, - "loss": 2.7907, + "learning_rate": 2.129975371414952e-05, + "loss": 2.7735, "step": 173400 }, { "epoch": 0.57, - "learning_rate": 2.1283012122375748e-05, - "loss": 2.7966, + "learning_rate": 2.128320224570324e-05, + "loss": 2.7653, "step": 173500 }, { "epoch": 0.57, - "learning_rate": 2.12664605443483e-05, - "loss": 2.7788, + "learning_rate": 2.126665077725696e-05, + "loss": 2.7689, "step": 173600 }, { - "epoch": 0.58, - "learning_rate": 2.124990896632085e-05, - "loss": 2.8073, + "epoch": 0.57, + "learning_rate": 2.1250099308810678e-05, + "loss": 2.7639, "step": 173700 }, { "epoch": 0.58, - "learning_rate": 2.1233357388293403e-05, - "loss": 2.7864, + "learning_rate": 2.1233547840364397e-05, + "loss": 2.7797, "step": 173800 }, { "epoch": 0.58, - "learning_rate": 2.121680581026595e-05, - "loss": 2.7897, + "learning_rate": 2.121699637191812e-05, + "loss": 2.7747, "step": 173900 }, { "epoch": 0.58, - "learning_rate": 2.1200254232238503e-05, - "loss": 2.7927, + "learning_rate": 2.1200444903471835e-05, + "loss": 2.7779, "step": 174000 }, { "epoch": 0.58, - "learning_rate": 2.1183702654211054e-05, - "loss": 2.7977, + "learning_rate": 2.1183893435025557e-05, + "loss": 2.7676, "step": 174100 }, { "epoch": 0.58, - "learning_rate": 2.1167151076183606e-05, - "loss": 2.7975, + "learning_rate": 2.1167341966579276e-05, + "loss": 2.7788, "step": 174200 }, { "epoch": 0.58, - "learning_rate": 2.1150599498156154e-05, - "loss": 2.7921, + "learning_rate": 2.1150790498132995e-05, + "loss": 2.7644, "step": 174300 }, { "epoch": 0.58, - "learning_rate": 2.1134047920128706e-05, - "loss": 2.7944, + "learning_rate": 2.1134239029686714e-05, + "loss": 2.7671, "step": 174400 }, { "epoch": 0.58, - "learning_rate": 2.1117496342101257e-05, - "loss": 2.7874, + "learning_rate": 2.1117687561240433e-05, + "loss": 2.7686, "step": 174500 }, { "epoch": 0.58, - "learning_rate": 2.110094476407381e-05, - "loss": 2.7813, + "learning_rate": 2.1101136092794156e-05, + "loss": 2.7638, "step": 174600 }, { "epoch": 0.58, - "learning_rate": 2.1084393186046357e-05, - "loss": 2.7868, + "learning_rate": 2.108458462434787e-05, + "loss": 2.774, "step": 174700 }, { "epoch": 0.58, - "learning_rate": 2.106784160801891e-05, - "loss": 2.7899, + "learning_rate": 2.1068033155901594e-05, + "loss": 2.7704, "step": 174800 }, { "epoch": 0.58, - "learning_rate": 2.105129002999146e-05, - "loss": 2.7966, + "learning_rate": 2.1051481687455313e-05, + "loss": 2.7678, "step": 174900 }, { "epoch": 0.58, - "learning_rate": 2.1034738451964012e-05, - "loss": 2.7899, + "learning_rate": 2.103493021900903e-05, + "loss": 2.765, "step": 175000 }, { "epoch": 0.58, - "learning_rate": 2.1018186873936563e-05, - "loss": 2.778, + "learning_rate": 2.101837875056275e-05, + "loss": 2.7626, "step": 175100 }, { "epoch": 0.58, - "learning_rate": 2.1001635295909115e-05, - "loss": 2.7951, + "learning_rate": 2.100182728211647e-05, + "loss": 2.7687, "step": 175200 }, { "epoch": 0.58, - "learning_rate": 2.0985083717881667e-05, - "loss": 2.779, + "learning_rate": 2.0985275813670192e-05, + "loss": 2.7849, "step": 175300 }, { "epoch": 0.58, - "learning_rate": 2.0968532139854215e-05, - "loss": 2.7977, + "learning_rate": 2.0968724345223907e-05, + "loss": 2.7829, "step": 175400 }, { "epoch": 0.58, - "learning_rate": 2.0951980561826763e-05, - "loss": 2.7838, + "learning_rate": 2.095217287677763e-05, + "loss": 2.7749, "step": 175500 }, { "epoch": 0.58, - "learning_rate": 2.0935428983799315e-05, - "loss": 2.7883, + "learning_rate": 2.093562140833135e-05, + "loss": 2.7706, "step": 175600 }, { "epoch": 0.58, - "learning_rate": 2.0918877405771866e-05, - "loss": 2.7894, + "learning_rate": 2.0919069939885068e-05, + "loss": 2.7783, "step": 175700 }, { "epoch": 0.58, - "learning_rate": 2.0902325827744418e-05, - "loss": 2.7983, + "learning_rate": 2.0902518471438787e-05, + "loss": 2.7617, "step": 175800 }, { "epoch": 0.58, - "learning_rate": 2.088577424971697e-05, - "loss": 2.7935, + "learning_rate": 2.0885967002992506e-05, + "loss": 2.7624, "step": 175900 }, { "epoch": 0.58, - "learning_rate": 2.086922267168952e-05, - "loss": 2.7906, + "learning_rate": 2.0869415534546228e-05, + "loss": 2.7728, "step": 176000 }, { "epoch": 0.58, - "learning_rate": 2.0852671093662073e-05, - "loss": 2.7899, + "learning_rate": 2.0852864066099944e-05, + "loss": 2.7636, "step": 176100 }, { "epoch": 0.58, - "learning_rate": 2.083611951563462e-05, - "loss": 2.7823, + "learning_rate": 2.0836312597653666e-05, + "loss": 2.7811, "step": 176200 }, { "epoch": 0.58, - "learning_rate": 2.0819567937607172e-05, - "loss": 2.7977, + "learning_rate": 2.0819761129207385e-05, + "loss": 2.7646, "step": 176300 }, { "epoch": 0.58, - "learning_rate": 2.0803016359579724e-05, - "loss": 2.7763, + "learning_rate": 2.0803209660761104e-05, + "loss": 2.7692, "step": 176400 }, { "epoch": 0.58, - "learning_rate": 2.0786464781552272e-05, - "loss": 2.7875, + "learning_rate": 2.0786658192314823e-05, + "loss": 2.7841, "step": 176500 }, { "epoch": 0.58, - "learning_rate": 2.0769913203524824e-05, - "loss": 2.7813, + "learning_rate": 2.0770106723868542e-05, + "loss": 2.7692, "step": 176600 }, { "epoch": 0.58, - "learning_rate": 2.0753361625497375e-05, - "loss": 2.7979, + "learning_rate": 2.0753555255422264e-05, + "loss": 2.7731, "step": 176700 }, { "epoch": 0.59, - "learning_rate": 2.0736810047469927e-05, - "loss": 2.7844, + "learning_rate": 2.073700378697598e-05, + "loss": 2.7535, "step": 176800 }, { "epoch": 0.59, - "learning_rate": 2.072025846944248e-05, - "loss": 2.7936, + "learning_rate": 2.0720452318529702e-05, + "loss": 2.7508, "step": 176900 }, { "epoch": 0.59, - "learning_rate": 2.0703706891415027e-05, - "loss": 2.7889, + "learning_rate": 2.070390085008342e-05, + "loss": 2.7703, "step": 177000 }, { "epoch": 0.59, - "learning_rate": 2.068715531338758e-05, - "loss": 2.7903, + "learning_rate": 2.068734938163714e-05, + "loss": 2.7787, "step": 177100 }, { "epoch": 0.59, - "learning_rate": 2.067060373536013e-05, - "loss": 2.7869, + "learning_rate": 2.067079791319086e-05, + "loss": 2.7648, "step": 177200 }, { "epoch": 0.59, - "learning_rate": 2.065405215733268e-05, - "loss": 2.7854, + "learning_rate": 2.0654246444744578e-05, + "loss": 2.7818, "step": 177300 }, { "epoch": 0.59, - "learning_rate": 2.0637500579305233e-05, - "loss": 2.7792, + "learning_rate": 2.0637694976298297e-05, + "loss": 2.761, "step": 177400 }, { "epoch": 0.59, - "learning_rate": 2.0620949001277785e-05, - "loss": 2.8066, + "learning_rate": 2.0621143507852016e-05, + "loss": 2.7701, "step": 177500 }, { "epoch": 0.59, - "learning_rate": 2.0604397423250333e-05, - "loss": 2.7967, + "learning_rate": 2.060459203940574e-05, + "loss": 2.7804, "step": 177600 }, { "epoch": 0.59, - "learning_rate": 2.0587845845222885e-05, - "loss": 2.7868, + "learning_rate": 2.0588040570959458e-05, + "loss": 2.7745, "step": 177700 }, { "epoch": 0.59, - "learning_rate": 2.0571294267195433e-05, - "loss": 2.7801, + "learning_rate": 2.0571489102513177e-05, + "loss": 2.7703, "step": 177800 }, { "epoch": 0.59, - "learning_rate": 2.0554742689167984e-05, - "loss": 2.7923, + "learning_rate": 2.0554937634066896e-05, + "loss": 2.7754, "step": 177900 }, { "epoch": 0.59, - "learning_rate": 2.0538191111140536e-05, - "loss": 2.7918, + "learning_rate": 2.0538386165620615e-05, + "loss": 2.779, "step": 178000 }, { "epoch": 0.59, - "learning_rate": 2.0521639533113088e-05, - "loss": 2.7949, + "learning_rate": 2.0521834697174334e-05, + "loss": 2.7749, "step": 178100 }, { "epoch": 0.59, - "learning_rate": 2.050508795508564e-05, - "loss": 2.7872, + "learning_rate": 2.0505283228728052e-05, + "loss": 2.7779, "step": 178200 }, { "epoch": 0.59, - "learning_rate": 2.048853637705819e-05, - "loss": 2.7918, + "learning_rate": 2.0488731760281775e-05, + "loss": 2.7831, "step": 178300 }, { "epoch": 0.59, - "learning_rate": 2.0471984799030742e-05, - "loss": 2.7784, + "learning_rate": 2.0472180291835494e-05, + "loss": 2.7718, "step": 178400 }, { "epoch": 0.59, - "learning_rate": 2.045543322100329e-05, - "loss": 2.7822, + "learning_rate": 2.0455628823389213e-05, + "loss": 2.7558, "step": 178500 }, { "epoch": 0.59, - "learning_rate": 2.0438881642975842e-05, - "loss": 2.7811, + "learning_rate": 2.0439077354942932e-05, + "loss": 2.7524, "step": 178600 }, { "epoch": 0.59, - "learning_rate": 2.0422330064948394e-05, - "loss": 2.776, + "learning_rate": 2.042252588649665e-05, + "loss": 2.7689, "step": 178700 }, { "epoch": 0.59, - "learning_rate": 2.0405778486920942e-05, - "loss": 2.7812, + "learning_rate": 2.040597441805037e-05, + "loss": 2.7726, "step": 178800 }, { "epoch": 0.59, - "learning_rate": 2.0389226908893494e-05, - "loss": 2.7953, + "learning_rate": 2.038942294960409e-05, + "loss": 2.7735, "step": 178900 }, { "epoch": 0.59, - "learning_rate": 2.0372675330866045e-05, - "loss": 2.7858, + "learning_rate": 2.037287148115781e-05, + "loss": 2.7695, "step": 179000 }, { "epoch": 0.59, - "learning_rate": 2.0356123752838597e-05, - "loss": 2.7789, + "learning_rate": 2.035632001271153e-05, + "loss": 2.7745, "step": 179100 }, { "epoch": 0.59, - "learning_rate": 2.033957217481115e-05, - "loss": 2.7935, + "learning_rate": 2.033976854426525e-05, + "loss": 2.7693, "step": 179200 }, { "epoch": 0.59, - "learning_rate": 2.0323020596783697e-05, - "loss": 2.788, + "learning_rate": 2.0323217075818968e-05, + "loss": 2.7759, "step": 179300 }, { "epoch": 0.59, - "learning_rate": 2.030646901875625e-05, - "loss": 2.7901, + "learning_rate": 2.0306665607372687e-05, + "loss": 2.7561, "step": 179400 }, { "epoch": 0.59, - "learning_rate": 2.02899174407288e-05, - "loss": 2.7831, + "learning_rate": 2.0290114138926406e-05, + "loss": 2.7643, "step": 179500 }, { "epoch": 0.59, - "learning_rate": 2.027336586270135e-05, - "loss": 2.8024, + "learning_rate": 2.0273562670480125e-05, + "loss": 2.7722, "step": 179600 }, { "epoch": 0.59, - "learning_rate": 2.0256814284673903e-05, - "loss": 2.7814, + "learning_rate": 2.0257011202033847e-05, + "loss": 2.7734, "step": 179700 }, { "epoch": 0.6, - "learning_rate": 2.0240262706646455e-05, - "loss": 2.7952, + "learning_rate": 2.0240459733587566e-05, + "loss": 2.764, "step": 179800 }, { "epoch": 0.6, - "learning_rate": 2.0223711128619003e-05, - "loss": 2.7988, + "learning_rate": 2.0223908265141285e-05, + "loss": 2.7772, "step": 179900 }, { "epoch": 0.6, - "learning_rate": 2.0207159550591555e-05, - "loss": 2.8031, + "learning_rate": 2.0207356796695004e-05, + "loss": 2.7599, "step": 180000 }, { "epoch": 0.6, - "learning_rate": 2.0190607972564106e-05, - "loss": 2.7809, + "learning_rate": 2.0190805328248723e-05, + "loss": 2.7759, "step": 180100 }, { "epoch": 0.6, - "learning_rate": 2.0174056394536654e-05, - "loss": 2.7775, + "learning_rate": 2.0174253859802442e-05, + "loss": 2.7576, "step": 180200 }, { "epoch": 0.6, - "learning_rate": 2.0157504816509206e-05, - "loss": 2.7924, + "learning_rate": 2.015770239135616e-05, + "loss": 2.7691, "step": 180300 }, { "epoch": 0.6, - "learning_rate": 2.0140953238481758e-05, - "loss": 2.7779, + "learning_rate": 2.0141150922909884e-05, + "loss": 2.7625, "step": 180400 }, { "epoch": 0.6, - "learning_rate": 2.012440166045431e-05, - "loss": 2.7922, + "learning_rate": 2.01245994544636e-05, + "loss": 2.7595, "step": 180500 }, { "epoch": 0.6, - "learning_rate": 2.010785008242686e-05, - "loss": 2.8033, + "learning_rate": 2.0108047986017318e-05, + "loss": 2.7678, "step": 180600 }, { "epoch": 0.6, - "learning_rate": 2.0091298504399412e-05, - "loss": 2.7846, + "learning_rate": 2.009149651757104e-05, + "loss": 2.7664, "step": 180700 }, { "epoch": 0.6, - "learning_rate": 2.0074746926371964e-05, - "loss": 2.7837, + "learning_rate": 2.007494504912476e-05, + "loss": 2.7717, "step": 180800 }, { "epoch": 0.6, - "learning_rate": 2.0058195348344512e-05, - "loss": 2.786, + "learning_rate": 2.005839358067848e-05, + "loss": 2.7672, "step": 180900 }, { "epoch": 0.6, - "learning_rate": 2.004164377031706e-05, - "loss": 2.7828, + "learning_rate": 2.0041842112232198e-05, + "loss": 2.7558, "step": 181000 }, { "epoch": 0.6, - "learning_rate": 2.0025092192289612e-05, - "loss": 2.7819, + "learning_rate": 2.002529064378592e-05, + "loss": 2.764, "step": 181100 }, { "epoch": 0.6, - "learning_rate": 2.0008540614262164e-05, - "loss": 2.7885, + "learning_rate": 2.0008739175339636e-05, + "loss": 2.787, "step": 181200 }, { "epoch": 0.6, - "learning_rate": 1.9991989036234715e-05, - "loss": 2.7835, + "learning_rate": 1.9992187706893355e-05, + "loss": 2.7576, "step": 181300 }, { "epoch": 0.6, - "learning_rate": 1.9975437458207267e-05, - "loss": 2.8044, + "learning_rate": 1.9975636238447077e-05, + "loss": 2.7626, "step": 181400 }, { "epoch": 0.6, - "learning_rate": 1.995888588017982e-05, - "loss": 2.7883, + "learning_rate": 1.9959084770000796e-05, + "loss": 2.7736, "step": 181500 }, { "epoch": 0.6, - "learning_rate": 1.994233430215237e-05, - "loss": 2.7993, + "learning_rate": 1.9942533301554515e-05, + "loss": 2.779, "step": 181600 }, { "epoch": 0.6, - "learning_rate": 1.9925782724124918e-05, - "loss": 2.7905, + "learning_rate": 1.9925981833108234e-05, + "loss": 2.7547, "step": 181700 }, { "epoch": 0.6, - "learning_rate": 1.990923114609747e-05, - "loss": 2.7928, + "learning_rate": 1.9909430364661956e-05, + "loss": 2.7641, "step": 181800 }, { "epoch": 0.6, - "learning_rate": 1.989267956807002e-05, - "loss": 2.7767, + "learning_rate": 1.9892878896215672e-05, + "loss": 2.7756, "step": 181900 }, { "epoch": 0.6, - "learning_rate": 1.9876127990042573e-05, - "loss": 2.797, + "learning_rate": 1.987632742776939e-05, + "loss": 2.77, "step": 182000 }, { "epoch": 0.6, - "learning_rate": 1.985957641201512e-05, - "loss": 2.7831, + "learning_rate": 1.9859775959323113e-05, + "loss": 2.7693, "step": 182100 }, { "epoch": 0.6, - "learning_rate": 1.9843024833987673e-05, - "loss": 2.7785, + "learning_rate": 1.9843224490876832e-05, + "loss": 2.7573, "step": 182200 }, { "epoch": 0.6, - "learning_rate": 1.9826473255960224e-05, - "loss": 2.7857, + "learning_rate": 1.982667302243055e-05, + "loss": 2.7708, "step": 182300 }, { "epoch": 0.6, - "learning_rate": 1.9809921677932776e-05, - "loss": 2.7957, + "learning_rate": 1.981012155398427e-05, + "loss": 2.7711, "step": 182400 }, { "epoch": 0.6, - "learning_rate": 1.9793370099905324e-05, - "loss": 2.7912, + "learning_rate": 1.979357008553799e-05, + "loss": 2.7591, "step": 182500 }, { "epoch": 0.6, - "learning_rate": 1.9776818521877876e-05, - "loss": 2.7786, + "learning_rate": 1.9777018617091708e-05, + "loss": 2.7848, "step": 182600 }, { "epoch": 0.6, - "learning_rate": 1.9760266943850427e-05, - "loss": 2.7924, + "learning_rate": 1.9760467148645427e-05, + "loss": 2.7717, "step": 182700 }, { "epoch": 0.61, - "learning_rate": 1.974371536582298e-05, - "loss": 2.7801, + "learning_rate": 1.974391568019915e-05, + "loss": 2.76, "step": 182800 }, { "epoch": 0.61, - "learning_rate": 1.972716378779553e-05, - "loss": 2.769, + "learning_rate": 1.972736421175287e-05, + "loss": 2.7876, "step": 182900 }, { "epoch": 0.61, - "learning_rate": 1.9710612209768082e-05, - "loss": 2.8033, + "learning_rate": 1.9710812743306587e-05, + "loss": 2.7658, "step": 183000 }, { "epoch": 0.61, - "learning_rate": 1.9694060631740634e-05, - "loss": 2.7863, + "learning_rate": 1.9694261274860306e-05, + "loss": 2.7631, "step": 183100 }, { "epoch": 0.61, - "learning_rate": 1.9677509053713182e-05, - "loss": 2.8017, + "learning_rate": 1.9677709806414025e-05, + "loss": 2.7673, "step": 183200 }, { "epoch": 0.61, - "learning_rate": 1.966095747568573e-05, - "loss": 2.7897, + "learning_rate": 1.9661158337967744e-05, + "loss": 2.7736, "step": 183300 }, { "epoch": 0.61, - "learning_rate": 1.9644405897658282e-05, - "loss": 2.7852, + "learning_rate": 1.9644606869521463e-05, + "loss": 2.7583, "step": 183400 }, { "epoch": 0.61, - "learning_rate": 1.9627854319630833e-05, - "loss": 2.7825, + "learning_rate": 1.9628055401075186e-05, + "loss": 2.7608, "step": 183500 }, { "epoch": 0.61, - "learning_rate": 1.9611302741603385e-05, - "loss": 2.7902, + "learning_rate": 1.96115039326289e-05, + "loss": 2.7583, "step": 183600 }, { "epoch": 0.61, - "learning_rate": 1.9594751163575937e-05, - "loss": 2.7879, + "learning_rate": 1.9594952464182624e-05, + "loss": 2.7671, "step": 183700 }, { "epoch": 0.61, - "learning_rate": 1.9578199585548488e-05, - "loss": 2.7986, + "learning_rate": 1.9578400995736343e-05, + "loss": 2.7809, "step": 183800 }, { "epoch": 0.61, - "learning_rate": 1.956164800752104e-05, - "loss": 2.8005, + "learning_rate": 1.956184952729006e-05, + "loss": 2.7612, "step": 183900 }, { "epoch": 0.61, - "learning_rate": 1.9545096429493588e-05, - "loss": 2.787, + "learning_rate": 1.954529805884378e-05, + "loss": 2.7756, "step": 184000 }, { "epoch": 0.61, - "learning_rate": 1.952854485146614e-05, - "loss": 2.7891, + "learning_rate": 1.95287465903975e-05, + "loss": 2.778, "step": 184100 }, { "epoch": 0.61, - "learning_rate": 1.951199327343869e-05, - "loss": 2.7923, + "learning_rate": 1.9512195121951222e-05, + "loss": 2.7684, "step": 184200 }, { "epoch": 0.61, - "learning_rate": 1.949544169541124e-05, - "loss": 2.7871, + "learning_rate": 1.9495643653504938e-05, + "loss": 2.7734, "step": 184300 }, { "epoch": 0.61, - "learning_rate": 1.947889011738379e-05, - "loss": 2.796, + "learning_rate": 1.947909218505866e-05, + "loss": 2.7585, "step": 184400 }, { "epoch": 0.61, - "learning_rate": 1.9462338539356343e-05, - "loss": 2.7827, + "learning_rate": 1.946254071661238e-05, + "loss": 2.7597, "step": 184500 }, { "epoch": 0.61, - "learning_rate": 1.9445786961328894e-05, - "loss": 2.8019, + "learning_rate": 1.9445989248166098e-05, + "loss": 2.7885, "step": 184600 }, { "epoch": 0.61, - "learning_rate": 1.9429235383301446e-05, - "loss": 2.7981, + "learning_rate": 1.9429437779719817e-05, + "loss": 2.7789, "step": 184700 }, { "epoch": 0.61, - "learning_rate": 1.9412683805273994e-05, - "loss": 2.7839, + "learning_rate": 1.9412886311273536e-05, + "loss": 2.7688, "step": 184800 }, { "epoch": 0.61, - "learning_rate": 1.9396132227246546e-05, - "loss": 2.802, + "learning_rate": 1.9396334842827258e-05, + "loss": 2.7744, "step": 184900 }, { "epoch": 0.61, - "learning_rate": 1.9379580649219097e-05, - "loss": 2.7866, + "learning_rate": 1.9379783374380974e-05, + "loss": 2.7818, "step": 185000 }, { "epoch": 0.61, - "learning_rate": 1.936302907119165e-05, - "loss": 2.7863, + "learning_rate": 1.9363231905934696e-05, + "loss": 2.7694, "step": 185100 }, { "epoch": 0.61, - "learning_rate": 1.93464774931642e-05, - "loss": 2.7929, + "learning_rate": 1.9346680437488415e-05, + "loss": 2.7829, "step": 185200 }, { "epoch": 0.61, - "learning_rate": 1.9329925915136752e-05, - "loss": 2.7913, + "learning_rate": 1.9330128969042134e-05, + "loss": 2.763, "step": 185300 }, { "epoch": 0.61, - "learning_rate": 1.93133743371093e-05, - "loss": 2.806, + "learning_rate": 1.9313577500595853e-05, + "loss": 2.7785, "step": 185400 }, { "epoch": 0.61, - "learning_rate": 1.9296822759081852e-05, - "loss": 2.7864, + "learning_rate": 1.9297026032149572e-05, + "loss": 2.7672, "step": 185500 }, { "epoch": 0.61, - "learning_rate": 1.92802711810544e-05, - "loss": 2.7772, + "learning_rate": 1.9280474563703294e-05, + "loss": 2.7602, "step": 185600 }, { "epoch": 0.61, - "learning_rate": 1.9263719603026952e-05, - "loss": 2.7821, + "learning_rate": 1.926392309525701e-05, + "loss": 2.7606, "step": 185700 }, { "epoch": 0.62, - "learning_rate": 1.9247168024999503e-05, - "loss": 2.8022, + "learning_rate": 1.9247371626810732e-05, + "loss": 2.7565, "step": 185800 }, { "epoch": 0.62, - "learning_rate": 1.9230616446972055e-05, - "loss": 2.784, + "learning_rate": 1.923082015836445e-05, + "loss": 2.7636, "step": 185900 }, { "epoch": 0.62, - "learning_rate": 1.9214064868944607e-05, - "loss": 2.7731, + "learning_rate": 1.921426868991817e-05, + "loss": 2.7625, "step": 186000 }, { "epoch": 0.62, - "learning_rate": 1.9197513290917158e-05, - "loss": 2.7891, + "learning_rate": 1.919771722147189e-05, + "loss": 2.7672, "step": 186100 }, { "epoch": 0.62, - "learning_rate": 1.918096171288971e-05, - "loss": 2.7874, + "learning_rate": 1.918116575302561e-05, + "loss": 2.7605, "step": 186200 }, { "epoch": 0.62, - "learning_rate": 1.9164410134862258e-05, - "loss": 2.795, + "learning_rate": 1.916461428457933e-05, + "loss": 2.7693, "step": 186300 }, { "epoch": 0.62, - "learning_rate": 1.914785855683481e-05, - "loss": 2.7826, + "learning_rate": 1.9148062816133046e-05, + "loss": 2.7771, "step": 186400 }, { "epoch": 0.62, - "learning_rate": 1.913130697880736e-05, - "loss": 2.788, + "learning_rate": 1.913151134768677e-05, + "loss": 2.752, "step": 186500 }, { "epoch": 0.62, - "learning_rate": 1.911475540077991e-05, - "loss": 2.7907, + "learning_rate": 1.9114959879240488e-05, + "loss": 2.7688, "step": 186600 }, { "epoch": 0.62, - "learning_rate": 1.909820382275246e-05, - "loss": 2.7846, + "learning_rate": 1.9098408410794207e-05, + "loss": 2.7591, "step": 186700 }, { "epoch": 0.62, - "learning_rate": 1.9081652244725013e-05, - "loss": 2.7934, + "learning_rate": 1.9081856942347926e-05, + "loss": 2.7863, "step": 186800 }, { "epoch": 0.62, - "learning_rate": 1.9065100666697564e-05, - "loss": 2.8002, + "learning_rate": 1.9065305473901645e-05, + "loss": 2.7794, "step": 186900 }, { "epoch": 0.62, - "learning_rate": 1.9048549088670116e-05, - "loss": 2.7893, + "learning_rate": 1.9048754005455367e-05, + "loss": 2.7691, "step": 187000 }, { "epoch": 0.62, - "learning_rate": 1.9031997510642667e-05, - "loss": 2.779, + "learning_rate": 1.9032202537009083e-05, + "loss": 2.7752, "step": 187100 }, { "epoch": 0.62, - "learning_rate": 1.9015445932615216e-05, - "loss": 2.7811, + "learning_rate": 1.9015651068562805e-05, + "loss": 2.7635, "step": 187200 }, { "epoch": 0.62, - "learning_rate": 1.8998894354587767e-05, - "loss": 2.787, + "learning_rate": 1.8999099600116524e-05, + "loss": 2.7686, "step": 187300 }, { "epoch": 0.62, - "learning_rate": 1.898234277656032e-05, - "loss": 2.8011, + "learning_rate": 1.8982548131670243e-05, + "loss": 2.7562, "step": 187400 }, { "epoch": 0.62, - "learning_rate": 1.896579119853287e-05, - "loss": 2.7856, + "learning_rate": 1.8965996663223962e-05, + "loss": 2.7718, "step": 187500 }, { "epoch": 0.62, - "learning_rate": 1.8949239620505422e-05, - "loss": 2.7933, + "learning_rate": 1.894944519477768e-05, + "loss": 2.7574, "step": 187600 }, { "epoch": 0.62, - "learning_rate": 1.893268804247797e-05, - "loss": 2.7819, + "learning_rate": 1.8932893726331403e-05, + "loss": 2.775, "step": 187700 }, { "epoch": 0.62, - "learning_rate": 1.8916136464450522e-05, - "loss": 2.7836, + "learning_rate": 1.891634225788512e-05, + "loss": 2.7597, "step": 187800 }, { "epoch": 0.62, - "learning_rate": 1.8899584886423073e-05, - "loss": 2.7787, + "learning_rate": 1.889979078943884e-05, + "loss": 2.785, "step": 187900 }, { "epoch": 0.62, - "learning_rate": 1.888303330839562e-05, - "loss": 2.7898, + "learning_rate": 1.888323932099256e-05, + "loss": 2.7544, "step": 188000 }, { "epoch": 0.62, - "learning_rate": 1.8866481730368173e-05, - "loss": 2.7964, + "learning_rate": 1.886668785254628e-05, + "loss": 2.7748, "step": 188100 }, { "epoch": 0.62, - "learning_rate": 1.8849930152340725e-05, - "loss": 2.7848, + "learning_rate": 1.8850136384099998e-05, + "loss": 2.7543, "step": 188200 }, { "epoch": 0.62, - "learning_rate": 1.8833378574313276e-05, - "loss": 2.7872, + "learning_rate": 1.8833584915653717e-05, + "loss": 2.769, "step": 188300 }, { "epoch": 0.62, - "learning_rate": 1.8816826996285828e-05, - "loss": 2.785, + "learning_rate": 1.881703344720744e-05, + "loss": 2.7602, "step": 188400 }, { "epoch": 0.62, - "learning_rate": 1.880027541825838e-05, - "loss": 2.7843, + "learning_rate": 1.8800481978761155e-05, + "loss": 2.7781, "step": 188500 }, { "epoch": 0.62, - "learning_rate": 1.878372384023093e-05, - "loss": 2.7806, + "learning_rate": 1.8783930510314877e-05, + "loss": 2.7662, "step": 188600 }, { "epoch": 0.62, - "learning_rate": 1.876717226220348e-05, - "loss": 2.7773, + "learning_rate": 1.8767379041868596e-05, + "loss": 2.7859, "step": 188700 }, { "epoch": 0.62, - "learning_rate": 1.8750620684176028e-05, - "loss": 2.7825, + "learning_rate": 1.8750827573422315e-05, + "loss": 2.7706, "step": 188800 }, { "epoch": 0.63, - "learning_rate": 1.873406910614858e-05, - "loss": 2.7737, + "learning_rate": 1.8734276104976034e-05, + "loss": 2.7667, "step": 188900 }, { "epoch": 0.63, - "learning_rate": 1.871751752812113e-05, - "loss": 2.7886, + "learning_rate": 1.8717724636529753e-05, + "loss": 2.7641, "step": 189000 }, { "epoch": 0.63, - "learning_rate": 1.8700965950093682e-05, - "loss": 2.7784, + "learning_rate": 1.8701173168083476e-05, + "loss": 2.7637, "step": 189100 }, { "epoch": 0.63, - "learning_rate": 1.8684414372066234e-05, - "loss": 2.778, + "learning_rate": 1.868462169963719e-05, + "loss": 2.7765, "step": 189200 }, { "epoch": 0.63, - "learning_rate": 1.8667862794038786e-05, - "loss": 2.7807, + "learning_rate": 1.8668070231190914e-05, + "loss": 2.7639, "step": 189300 }, { "epoch": 0.63, - "learning_rate": 1.8651311216011337e-05, - "loss": 2.7912, + "learning_rate": 1.8651518762744633e-05, + "loss": 2.7632, "step": 189400 }, { "epoch": 0.63, - "learning_rate": 1.8634759637983885e-05, - "loss": 2.7789, + "learning_rate": 1.863496729429835e-05, + "loss": 2.7781, "step": 189500 }, { "epoch": 0.63, - "learning_rate": 1.8618208059956437e-05, - "loss": 2.772, + "learning_rate": 1.861841582585207e-05, + "loss": 2.759, "step": 189600 }, { "epoch": 0.63, - "learning_rate": 1.860165648192899e-05, - "loss": 2.7711, + "learning_rate": 1.860186435740579e-05, + "loss": 2.7747, "step": 189700 }, { "epoch": 0.63, - "learning_rate": 1.858510490390154e-05, - "loss": 2.7909, + "learning_rate": 1.8585312888959512e-05, + "loss": 2.7788, "step": 189800 }, { "epoch": 0.63, - "learning_rate": 1.856855332587409e-05, - "loss": 2.7671, + "learning_rate": 1.8568761420513228e-05, + "loss": 2.7649, "step": 189900 }, { "epoch": 0.63, - "learning_rate": 1.855200174784664e-05, - "loss": 2.7917, + "learning_rate": 1.855220995206695e-05, + "loss": 2.7657, "step": 190000 }, { "epoch": 0.63, - "learning_rate": 1.853545016981919e-05, - "loss": 2.7881, + "learning_rate": 1.853565848362067e-05, + "loss": 2.7779, "step": 190100 }, { "epoch": 0.63, - "learning_rate": 1.8518898591791743e-05, - "loss": 2.7954, + "learning_rate": 1.8519107015174385e-05, + "loss": 2.7649, "step": 190200 }, { "epoch": 0.63, - "learning_rate": 1.850234701376429e-05, - "loss": 2.7849, + "learning_rate": 1.8502555546728107e-05, + "loss": 2.7537, "step": 190300 }, { "epoch": 0.63, - "learning_rate": 1.8485795435736843e-05, - "loss": 2.7672, + "learning_rate": 1.8486004078281826e-05, + "loss": 2.7542, "step": 190400 }, { "epoch": 0.63, - "learning_rate": 1.8469243857709395e-05, - "loss": 2.7858, + "learning_rate": 1.8469452609835545e-05, + "loss": 2.7635, "step": 190500 }, { "epoch": 0.63, - "learning_rate": 1.8452692279681946e-05, - "loss": 2.7806, + "learning_rate": 1.8452901141389264e-05, + "loss": 2.7576, "step": 190600 }, { "epoch": 0.63, - "learning_rate": 1.8436140701654498e-05, - "loss": 2.778, + "learning_rate": 1.8436349672942986e-05, + "loss": 2.7732, "step": 190700 }, { "epoch": 0.63, - "learning_rate": 1.841958912362705e-05, - "loss": 2.7877, + "learning_rate": 1.8419798204496705e-05, + "loss": 2.763, "step": 190800 }, { "epoch": 0.63, - "learning_rate": 1.84030375455996e-05, - "loss": 2.7767, + "learning_rate": 1.840324673605042e-05, + "loss": 2.7685, "step": 190900 }, { "epoch": 0.63, - "learning_rate": 1.838648596757215e-05, - "loss": 2.78, + "learning_rate": 1.8386695267604143e-05, + "loss": 2.7675, "step": 191000 }, { "epoch": 0.63, - "learning_rate": 1.8369934389544697e-05, - "loss": 2.7834, + "learning_rate": 1.8370143799157862e-05, + "loss": 2.757, "step": 191100 }, { "epoch": 0.63, - "learning_rate": 1.835338281151725e-05, - "loss": 2.7978, + "learning_rate": 1.835359233071158e-05, + "loss": 2.7541, "step": 191200 }, { "epoch": 0.63, - "learning_rate": 1.83368312334898e-05, - "loss": 2.7921, + "learning_rate": 1.83370408622653e-05, + "loss": 2.773, "step": 191300 }, { "epoch": 0.63, - "learning_rate": 1.8320279655462352e-05, - "loss": 2.7897, + "learning_rate": 1.832048939381902e-05, + "loss": 2.7706, "step": 191400 }, { "epoch": 0.63, - "learning_rate": 1.8303728077434904e-05, - "loss": 2.7675, + "learning_rate": 1.830393792537274e-05, + "loss": 2.7747, "step": 191500 }, { "epoch": 0.63, - "learning_rate": 1.8287176499407455e-05, - "loss": 2.7845, + "learning_rate": 1.8287386456926457e-05, + "loss": 2.7699, "step": 191600 }, { "epoch": 0.63, - "learning_rate": 1.8270624921380007e-05, - "loss": 2.787, + "learning_rate": 1.827083498848018e-05, + "loss": 2.7587, "step": 191700 }, { "epoch": 0.63, - "learning_rate": 1.8254073343352555e-05, - "loss": 2.7935, + "learning_rate": 1.82542835200339e-05, + "loss": 2.7776, "step": 191800 }, { "epoch": 0.64, - "learning_rate": 1.8237521765325107e-05, - "loss": 2.8011, + "learning_rate": 1.8237732051587617e-05, + "loss": 2.7496, "step": 191900 }, { "epoch": 0.64, - "learning_rate": 1.822097018729766e-05, - "loss": 2.7808, + "learning_rate": 1.8221180583141336e-05, + "loss": 2.7576, "step": 192000 }, { "epoch": 0.64, - "learning_rate": 1.8204418609270207e-05, - "loss": 2.7791, + "learning_rate": 1.8204629114695055e-05, + "loss": 2.7596, "step": 192100 }, { "epoch": 0.64, - "learning_rate": 1.8187867031242758e-05, - "loss": 2.7935, + "learning_rate": 1.8188077646248778e-05, + "loss": 2.7648, "step": 192200 }, { "epoch": 0.64, - "learning_rate": 1.817131545321531e-05, - "loss": 2.7797, + "learning_rate": 1.8171526177802493e-05, + "loss": 2.7717, "step": 192300 }, { "epoch": 0.64, - "learning_rate": 1.815476387518786e-05, - "loss": 2.7752, + "learning_rate": 1.8154974709356216e-05, + "loss": 2.7882, "step": 192400 }, { "epoch": 0.64, - "learning_rate": 1.8138212297160413e-05, - "loss": 2.7793, + "learning_rate": 1.8138423240909935e-05, + "loss": 2.7604, "step": 192500 }, { "epoch": 0.64, - "learning_rate": 1.812166071913296e-05, - "loss": 2.7816, + "learning_rate": 1.8121871772463654e-05, + "loss": 2.7539, "step": 192600 }, { "epoch": 0.64, - "learning_rate": 1.8105109141105513e-05, - "loss": 2.785, + "learning_rate": 1.8105320304017373e-05, + "loss": 2.7602, "step": 192700 }, { "epoch": 0.64, - "learning_rate": 1.8088557563078064e-05, - "loss": 2.7815, + "learning_rate": 1.8088768835571092e-05, + "loss": 2.7713, "step": 192800 }, { "epoch": 0.64, - "learning_rate": 1.8072005985050616e-05, - "loss": 2.7901, + "learning_rate": 1.8072217367124814e-05, + "loss": 2.7632, "step": 192900 }, { "epoch": 0.64, - "learning_rate": 1.8055454407023168e-05, - "loss": 2.7821, + "learning_rate": 1.805566589867853e-05, + "loss": 2.7697, "step": 193000 }, { "epoch": 0.64, - "learning_rate": 1.803890282899572e-05, - "loss": 2.7781, + "learning_rate": 1.8039114430232252e-05, + "loss": 2.7612, "step": 193100 }, { "epoch": 0.64, - "learning_rate": 1.8022351250968268e-05, - "loss": 2.7818, + "learning_rate": 1.802256296178597e-05, + "loss": 2.7671, "step": 193200 }, { "epoch": 0.64, - "learning_rate": 1.800579967294082e-05, - "loss": 2.7791, + "learning_rate": 1.800601149333969e-05, + "loss": 2.774, "step": 193300 }, { "epoch": 0.64, - "learning_rate": 1.798924809491337e-05, - "loss": 2.794, + "learning_rate": 1.798946002489341e-05, + "loss": 2.7677, "step": 193400 }, { "epoch": 0.64, - "learning_rate": 1.797269651688592e-05, - "loss": 2.7643, + "learning_rate": 1.7972908556447128e-05, + "loss": 2.7751, "step": 193500 }, { "epoch": 0.64, - "learning_rate": 1.795614493885847e-05, - "loss": 2.7786, + "learning_rate": 1.7956357088000847e-05, + "loss": 2.7664, "step": 193600 }, { "epoch": 0.64, - "learning_rate": 1.7939593360831022e-05, - "loss": 2.7815, + "learning_rate": 1.7939805619554566e-05, + "loss": 2.7738, "step": 193700 }, { "epoch": 0.64, - "learning_rate": 1.7923041782803574e-05, - "loss": 2.7852, + "learning_rate": 1.7923254151108288e-05, + "loss": 2.7726, "step": 193800 }, { "epoch": 0.64, - "learning_rate": 1.7906490204776125e-05, - "loss": 2.778, + "learning_rate": 1.7906702682662007e-05, + "loss": 2.7597, "step": 193900 }, { "epoch": 0.64, - "learning_rate": 1.7889938626748677e-05, - "loss": 2.7754, + "learning_rate": 1.7890151214215726e-05, + "loss": 2.7672, "step": 194000 }, { "epoch": 0.64, - "learning_rate": 1.787338704872123e-05, - "loss": 2.7887, + "learning_rate": 1.7873599745769445e-05, + "loss": 2.7716, "step": 194100 }, { "epoch": 0.64, - "learning_rate": 1.7856835470693777e-05, - "loss": 2.7839, + "learning_rate": 1.7857048277323164e-05, + "loss": 2.7638, "step": 194200 }, { "epoch": 0.64, - "learning_rate": 1.784028389266633e-05, - "loss": 2.7986, + "learning_rate": 1.7840496808876883e-05, + "loss": 2.7747, "step": 194300 }, { "epoch": 0.64, - "learning_rate": 1.7823732314638877e-05, - "loss": 2.7748, + "learning_rate": 1.7823945340430602e-05, + "loss": 2.7643, "step": 194400 }, { "epoch": 0.64, - "learning_rate": 1.7807180736611428e-05, - "loss": 2.7818, + "learning_rate": 1.7807393871984325e-05, + "loss": 2.7669, "step": 194500 }, { "epoch": 0.64, - "learning_rate": 1.779062915858398e-05, - "loss": 2.7834, + "learning_rate": 1.7790842403538044e-05, + "loss": 2.7663, "step": 194600 }, { "epoch": 0.64, - "learning_rate": 1.777407758055653e-05, - "loss": 2.7748, + "learning_rate": 1.7774290935091763e-05, + "loss": 2.7682, "step": 194700 }, { "epoch": 0.64, - "learning_rate": 1.7757526002529083e-05, - "loss": 2.7843, + "learning_rate": 1.775773946664548e-05, + "loss": 2.7708, "step": 194800 }, { "epoch": 0.65, - "learning_rate": 1.7740974424501635e-05, - "loss": 2.7985, + "learning_rate": 1.77411879981992e-05, + "loss": 2.7674, "step": 194900 }, { "epoch": 0.65, - "learning_rate": 1.7724422846474183e-05, - "loss": 2.7956, + "learning_rate": 1.772463652975292e-05, + "loss": 2.7655, "step": 195000 }, { "epoch": 0.65, - "learning_rate": 1.7707871268446734e-05, - "loss": 2.7751, + "learning_rate": 1.770808506130664e-05, + "loss": 2.7644, "step": 195100 }, { "epoch": 0.65, - "learning_rate": 1.7691319690419286e-05, - "loss": 2.7825, + "learning_rate": 1.769153359286036e-05, + "loss": 2.767, "step": 195200 }, { "epoch": 0.65, - "learning_rate": 1.7674768112391838e-05, - "loss": 2.7777, + "learning_rate": 1.767498212441408e-05, + "loss": 2.755, "step": 195300 }, { "epoch": 0.65, - "learning_rate": 1.765821653436439e-05, - "loss": 2.7835, + "learning_rate": 1.76584306559678e-05, + "loss": 2.7718, "step": 195400 }, { "epoch": 0.65, - "learning_rate": 1.7641664956336937e-05, - "loss": 2.7832, + "learning_rate": 1.7641879187521518e-05, + "loss": 2.7488, "step": 195500 }, { "epoch": 0.65, - "learning_rate": 1.762511337830949e-05, - "loss": 2.7964, + "learning_rate": 1.7625327719075237e-05, + "loss": 2.7556, "step": 195600 }, { "epoch": 0.65, - "learning_rate": 1.760856180028204e-05, - "loss": 2.781, + "learning_rate": 1.7608776250628956e-05, + "loss": 2.7695, "step": 195700 }, { "epoch": 0.65, - "learning_rate": 1.759201022225459e-05, - "loss": 2.7867, + "learning_rate": 1.7592224782182675e-05, + "loss": 2.7587, "step": 195800 }, { "epoch": 0.65, - "learning_rate": 1.757545864422714e-05, - "loss": 2.7831, + "learning_rate": 1.7575673313736397e-05, + "loss": 2.7693, "step": 195900 }, { "epoch": 0.65, - "learning_rate": 1.7558907066199692e-05, - "loss": 2.7588, + "learning_rate": 1.7559121845290116e-05, + "loss": 2.7675, "step": 196000 }, { "epoch": 0.65, - "learning_rate": 1.7542355488172244e-05, - "loss": 2.7939, + "learning_rate": 1.7542570376843835e-05, + "loss": 2.7771, "step": 196100 }, { "epoch": 0.65, - "learning_rate": 1.7525803910144795e-05, - "loss": 2.7844, + "learning_rate": 1.7526018908397554e-05, + "loss": 2.7595, "step": 196200 }, { "epoch": 0.65, - "learning_rate": 1.7509252332117347e-05, - "loss": 2.7959, + "learning_rate": 1.7509467439951273e-05, + "loss": 2.7705, "step": 196300 }, { "epoch": 0.65, - "learning_rate": 1.74927007540899e-05, - "loss": 2.782, + "learning_rate": 1.7492915971504992e-05, + "loss": 2.7699, "step": 196400 }, { "epoch": 0.65, - "learning_rate": 1.7476149176062447e-05, - "loss": 2.782, + "learning_rate": 1.747636450305871e-05, + "loss": 2.7563, "step": 196500 }, { "epoch": 0.65, - "learning_rate": 1.7459597598034995e-05, - "loss": 2.786, + "learning_rate": 1.7459813034612433e-05, + "loss": 2.7528, "step": 196600 }, { "epoch": 0.65, - "learning_rate": 1.7443046020007546e-05, - "loss": 2.7947, + "learning_rate": 1.7443261566166152e-05, + "loss": 2.7555, "step": 196700 }, { "epoch": 0.65, - "learning_rate": 1.7426494441980098e-05, - "loss": 2.7875, + "learning_rate": 1.742671009771987e-05, + "loss": 2.7614, "step": 196800 }, { "epoch": 0.65, - "learning_rate": 1.740994286395265e-05, - "loss": 2.774, + "learning_rate": 1.741015862927359e-05, + "loss": 2.7654, "step": 196900 }, { "epoch": 0.65, - "learning_rate": 1.73933912859252e-05, - "loss": 2.7776, + "learning_rate": 1.739360716082731e-05, + "loss": 2.7746, "step": 197000 }, { "epoch": 0.65, - "learning_rate": 1.7376839707897753e-05, - "loss": 2.789, + "learning_rate": 1.7377055692381028e-05, + "loss": 2.7823, "step": 197100 }, { "epoch": 0.65, - "learning_rate": 1.7360288129870304e-05, - "loss": 2.7963, + "learning_rate": 1.7360504223934747e-05, + "loss": 2.7636, "step": 197200 }, { "epoch": 0.65, - "learning_rate": 1.7343736551842853e-05, - "loss": 2.799, + "learning_rate": 1.734395275548847e-05, + "loss": 2.7673, "step": 197300 }, { "epoch": 0.65, - "learning_rate": 1.7327184973815404e-05, - "loss": 2.8063, + "learning_rate": 1.7327401287042185e-05, + "loss": 2.7687, "step": 197400 }, { "epoch": 0.65, - "learning_rate": 1.7310633395787956e-05, - "loss": 2.766, + "learning_rate": 1.7310849818595908e-05, + "loss": 2.767, "step": 197500 }, { "epoch": 0.65, - "learning_rate": 1.7294081817760507e-05, - "loss": 2.7777, + "learning_rate": 1.7294298350149627e-05, + "loss": 2.7665, "step": 197600 }, { "epoch": 0.65, - "learning_rate": 1.7277530239733056e-05, - "loss": 2.779, + "learning_rate": 1.7277746881703346e-05, + "loss": 2.766, "step": 197700 }, { "epoch": 0.65, - "learning_rate": 1.7260978661705607e-05, - "loss": 2.7926, + "learning_rate": 1.7261195413257065e-05, + "loss": 2.7693, "step": 197800 }, { "epoch": 0.66, - "learning_rate": 1.724442708367816e-05, - "loss": 2.7586, + "learning_rate": 1.7244643944810784e-05, + "loss": 2.755, "step": 197900 }, { "epoch": 0.66, - "learning_rate": 1.722787550565071e-05, - "loss": 2.7683, + "learning_rate": 1.7228092476364506e-05, + "loss": 2.7694, "step": 198000 }, { "epoch": 0.66, - "learning_rate": 1.721132392762326e-05, - "loss": 2.7979, + "learning_rate": 1.721154100791822e-05, + "loss": 2.7638, "step": 198100 }, { "epoch": 0.66, - "learning_rate": 1.719477234959581e-05, - "loss": 2.7865, + "learning_rate": 1.7194989539471944e-05, + "loss": 2.7543, "step": 198200 }, { "epoch": 0.66, - "learning_rate": 1.7178220771568362e-05, - "loss": 2.7916, + "learning_rate": 1.7178438071025663e-05, + "loss": 2.745, "step": 198300 }, { "epoch": 0.66, - "learning_rate": 1.7161669193540913e-05, - "loss": 2.771, + "learning_rate": 1.7161886602579382e-05, + "loss": 2.7536, "step": 198400 }, { "epoch": 0.66, - "learning_rate": 1.7145117615513465e-05, - "loss": 2.801, + "learning_rate": 1.71453351341331e-05, + "loss": 2.7689, "step": 198500 }, { "epoch": 0.66, - "learning_rate": 1.7128566037486017e-05, - "loss": 2.7844, + "learning_rate": 1.712878366568682e-05, + "loss": 2.7601, "step": 198600 }, { "epoch": 0.66, - "learning_rate": 1.7112014459458568e-05, - "loss": 2.7698, + "learning_rate": 1.7112232197240542e-05, + "loss": 2.746, "step": 198700 }, { "epoch": 0.66, - "learning_rate": 1.7095462881431116e-05, - "loss": 2.7815, + "learning_rate": 1.7095680728794258e-05, + "loss": 2.7534, "step": 198800 }, { "epoch": 0.66, - "learning_rate": 1.7078911303403665e-05, - "loss": 2.7991, + "learning_rate": 1.707912926034798e-05, + "loss": 2.7565, "step": 198900 }, { "epoch": 0.66, - "learning_rate": 1.7062359725376216e-05, - "loss": 2.7834, + "learning_rate": 1.70625777919017e-05, + "loss": 2.7682, "step": 199000 }, { "epoch": 0.66, - "learning_rate": 1.7045808147348768e-05, - "loss": 2.7745, + "learning_rate": 1.7046026323455418e-05, + "loss": 2.754, "step": 199100 }, { "epoch": 0.66, - "learning_rate": 1.702925656932132e-05, - "loss": 2.7823, + "learning_rate": 1.7029474855009137e-05, + "loss": 2.7548, "step": 199200 }, { "epoch": 0.66, - "learning_rate": 1.701270499129387e-05, - "loss": 2.7903, + "learning_rate": 1.7012923386562856e-05, + "loss": 2.7587, "step": 199300 }, { "epoch": 0.66, - "learning_rate": 1.6996153413266423e-05, - "loss": 2.7715, + "learning_rate": 1.699637191811658e-05, + "loss": 2.7688, "step": 199400 }, { "epoch": 0.66, - "learning_rate": 1.6979601835238974e-05, - "loss": 2.7763, + "learning_rate": 1.6979820449670294e-05, + "loss": 2.7688, "step": 199500 }, { "epoch": 0.66, - "learning_rate": 1.6963050257211522e-05, - "loss": 2.7808, + "learning_rate": 1.6963268981224016e-05, + "loss": 2.7574, "step": 199600 }, { "epoch": 0.66, - "learning_rate": 1.6946498679184074e-05, - "loss": 2.7848, + "learning_rate": 1.6946717512777735e-05, + "loss": 2.7669, "step": 199700 }, { "epoch": 0.66, - "learning_rate": 1.6929947101156626e-05, - "loss": 2.7665, + "learning_rate": 1.6930166044331454e-05, + "loss": 2.755, "step": 199800 }, { "epoch": 0.66, - "learning_rate": 1.6913395523129174e-05, - "loss": 2.7843, + "learning_rate": 1.6913614575885173e-05, + "loss": 2.7706, "step": 199900 }, { "epoch": 0.66, - "learning_rate": 1.6896843945101725e-05, - "loss": 2.8, + "learning_rate": 1.6897063107438892e-05, + "loss": 2.7628, "step": 200000 }, { "epoch": 0.66, - "learning_rate": 1.6880292367074277e-05, - "loss": 2.789, + "learning_rate": 1.6880511638992615e-05, + "loss": 2.7714, "step": 200100 }, { "epoch": 0.66, - "learning_rate": 1.686374078904683e-05, - "loss": 2.7978, + "learning_rate": 1.686396017054633e-05, + "loss": 2.7604, "step": 200200 }, { "epoch": 0.66, - "learning_rate": 1.684718921101938e-05, - "loss": 2.7828, + "learning_rate": 1.684740870210005e-05, + "loss": 2.7644, "step": 200300 }, { "epoch": 0.66, - "learning_rate": 1.683063763299193e-05, - "loss": 2.7703, + "learning_rate": 1.683085723365377e-05, + "loss": 2.7634, "step": 200400 }, { "epoch": 0.66, - "learning_rate": 1.681408605496448e-05, - "loss": 2.7743, + "learning_rate": 1.6814305765207487e-05, + "loss": 2.7519, "step": 200500 }, { "epoch": 0.66, - "learning_rate": 1.6797534476937032e-05, - "loss": 2.7809, + "learning_rate": 1.679775429676121e-05, + "loss": 2.7659, "step": 200600 }, { "epoch": 0.66, - "learning_rate": 1.6780982898909583e-05, - "loss": 2.7726, + "learning_rate": 1.678120282831493e-05, + "loss": 2.7687, "step": 200700 }, { "epoch": 0.66, - "learning_rate": 1.6764431320882135e-05, - "loss": 2.7812, + "learning_rate": 1.676465135986865e-05, + "loss": 2.769, "step": 200800 }, { "epoch": 0.67, - "learning_rate": 1.6747879742854687e-05, - "loss": 2.7812, + "learning_rate": 1.6748099891422367e-05, + "loss": 2.7653, "step": 200900 }, { "epoch": 0.67, - "learning_rate": 1.6731328164827235e-05, - "loss": 2.7754, + "learning_rate": 1.6731548422976086e-05, + "loss": 2.7697, "step": 201000 }, { "epoch": 0.67, - "learning_rate": 1.6714776586799786e-05, - "loss": 2.7848, + "learning_rate": 1.6714996954529808e-05, + "loss": 2.7651, "step": 201100 }, { "epoch": 0.67, - "learning_rate": 1.6698225008772338e-05, - "loss": 2.7822, + "learning_rate": 1.6698445486083523e-05, + "loss": 2.7784, "step": 201200 }, { "epoch": 0.67, - "learning_rate": 1.6681673430744886e-05, - "loss": 2.787, + "learning_rate": 1.6681894017637246e-05, + "loss": 2.7694, "step": 201300 }, { "epoch": 0.67, - "learning_rate": 1.6665121852717438e-05, - "loss": 2.783, + "learning_rate": 1.6665342549190965e-05, + "loss": 2.7672, "step": 201400 }, { "epoch": 0.67, - "learning_rate": 1.664857027468999e-05, - "loss": 2.7775, + "learning_rate": 1.6648791080744687e-05, + "loss": 2.7708, "step": 201500 }, { "epoch": 0.67, - "learning_rate": 1.663201869666254e-05, - "loss": 2.7904, + "learning_rate": 1.6632239612298403e-05, + "loss": 2.7602, "step": 201600 }, { "epoch": 0.67, - "learning_rate": 1.6615467118635093e-05, - "loss": 2.7758, + "learning_rate": 1.6615688143852122e-05, + "loss": 2.7647, "step": 201700 }, { "epoch": 0.67, - "learning_rate": 1.6598915540607644e-05, - "loss": 2.7869, + "learning_rate": 1.6599136675405844e-05, + "loss": 2.7553, "step": 201800 }, { "epoch": 0.67, - "learning_rate": 1.6582363962580196e-05, - "loss": 2.7888, + "learning_rate": 1.658258520695956e-05, + "loss": 2.7653, "step": 201900 }, { "epoch": 0.67, - "learning_rate": 1.6565812384552744e-05, - "loss": 2.8011, + "learning_rate": 1.6566033738513282e-05, + "loss": 2.7675, "step": 202000 }, { "epoch": 0.67, - "learning_rate": 1.6549260806525292e-05, - "loss": 2.7794, + "learning_rate": 1.6549482270067e-05, + "loss": 2.7668, "step": 202100 }, { "epoch": 0.67, - "learning_rate": 1.6532709228497844e-05, - "loss": 2.7842, + "learning_rate": 1.6532930801620723e-05, + "loss": 2.7561, "step": 202200 }, { "epoch": 0.67, - "learning_rate": 1.6516157650470395e-05, - "loss": 2.7827, + "learning_rate": 1.651637933317444e-05, + "loss": 2.7594, "step": 202300 }, { "epoch": 0.67, - "learning_rate": 1.6499606072442947e-05, - "loss": 2.7751, + "learning_rate": 1.6499827864728158e-05, + "loss": 2.7636, "step": 202400 }, { "epoch": 0.67, - "learning_rate": 1.64830544944155e-05, - "loss": 2.7624, + "learning_rate": 1.648327639628188e-05, + "loss": 2.7597, "step": 202500 }, { "epoch": 0.67, - "learning_rate": 1.646650291638805e-05, - "loss": 2.7823, + "learning_rate": 1.6466724927835596e-05, + "loss": 2.7781, "step": 202600 }, { "epoch": 0.67, - "learning_rate": 1.6449951338360602e-05, - "loss": 2.7813, + "learning_rate": 1.645017345938932e-05, + "loss": 2.762, "step": 202700 }, { "epoch": 0.67, - "learning_rate": 1.643339976033315e-05, - "loss": 2.7822, + "learning_rate": 1.6433621990943037e-05, + "loss": 2.7585, "step": 202800 }, { "epoch": 0.67, - "learning_rate": 1.64168481823057e-05, - "loss": 2.7737, + "learning_rate": 1.6417070522496756e-05, + "loss": 2.7733, "step": 202900 }, { "epoch": 0.67, - "learning_rate": 1.6400296604278253e-05, - "loss": 2.7906, + "learning_rate": 1.6400519054050475e-05, + "loss": 2.7604, "step": 203000 }, { "epoch": 0.67, - "learning_rate": 1.6383745026250805e-05, - "loss": 2.772, + "learning_rate": 1.6383967585604194e-05, + "loss": 2.7742, "step": 203100 }, { "epoch": 0.67, - "learning_rate": 1.6367193448223356e-05, - "loss": 2.7745, + "learning_rate": 1.6367416117157917e-05, + "loss": 2.756, "step": 203200 }, { "epoch": 0.67, - "learning_rate": 1.6350641870195905e-05, - "loss": 2.7748, + "learning_rate": 1.6350864648711632e-05, + "loss": 2.7693, "step": 203300 }, { "epoch": 0.67, - "learning_rate": 1.6334090292168456e-05, - "loss": 2.7753, + "learning_rate": 1.6334313180265355e-05, + "loss": 2.7572, "step": 203400 }, { "epoch": 0.67, - "learning_rate": 1.6317538714141008e-05, - "loss": 2.7828, + "learning_rate": 1.6317761711819074e-05, + "loss": 2.773, "step": 203500 }, { "epoch": 0.67, - "learning_rate": 1.6300987136113556e-05, - "loss": 2.7662, + "learning_rate": 1.6301210243372793e-05, + "loss": 2.754, "step": 203600 }, { "epoch": 0.67, - "learning_rate": 1.6284435558086108e-05, - "loss": 2.7789, + "learning_rate": 1.628465877492651e-05, + "loss": 2.7596, "step": 203700 }, { "epoch": 0.67, - "learning_rate": 1.626788398005866e-05, - "loss": 2.7856, + "learning_rate": 1.626810730648023e-05, + "loss": 2.7827, "step": 203800 }, { "epoch": 0.67, - "learning_rate": 1.625133240203121e-05, - "loss": 2.7961, + "learning_rate": 1.6251555838033953e-05, + "loss": 2.765, "step": 203900 }, { "epoch": 0.68, - "learning_rate": 1.6234780824003762e-05, - "loss": 2.7855, + "learning_rate": 1.623500436958767e-05, + "loss": 2.7653, "step": 204000 }, { "epoch": 0.68, - "learning_rate": 1.6218229245976314e-05, - "loss": 2.7763, + "learning_rate": 1.621845290114139e-05, + "loss": 2.7537, "step": 204100 }, { "epoch": 0.68, - "learning_rate": 1.6201677667948866e-05, - "loss": 2.7862, + "learning_rate": 1.620190143269511e-05, + "loss": 2.7681, "step": 204200 }, { "epoch": 0.68, - "learning_rate": 1.6185126089921414e-05, - "loss": 2.7744, + "learning_rate": 1.618534996424883e-05, + "loss": 2.764, "step": 204300 }, { "epoch": 0.68, - "learning_rate": 1.6168574511893962e-05, - "loss": 2.7719, + "learning_rate": 1.6168798495802548e-05, + "loss": 2.7661, "step": 204400 }, { "epoch": 0.68, - "learning_rate": 1.6152022933866514e-05, - "loss": 2.7838, + "learning_rate": 1.6152247027356267e-05, + "loss": 2.7472, "step": 204500 }, { "epoch": 0.68, - "learning_rate": 1.6135471355839065e-05, - "loss": 2.7956, + "learning_rate": 1.613569555890999e-05, + "loss": 2.7576, "step": 204600 }, { "epoch": 0.68, - "learning_rate": 1.6118919777811617e-05, - "loss": 2.7762, + "learning_rate": 1.6119144090463705e-05, + "loss": 2.7733, "step": 204700 }, { "epoch": 0.68, - "learning_rate": 1.610236819978417e-05, - "loss": 2.7889, + "learning_rate": 1.6102592622017427e-05, + "loss": 2.7607, "step": 204800 }, { "epoch": 0.68, - "learning_rate": 1.608581662175672e-05, - "loss": 2.7683, + "learning_rate": 1.6086041153571146e-05, + "loss": 2.7699, "step": 204900 }, { "epoch": 0.68, - "learning_rate": 1.606926504372927e-05, - "loss": 2.7729, + "learning_rate": 1.6069489685124865e-05, + "loss": 2.7631, "step": 205000 }, { "epoch": 0.68, - "learning_rate": 1.605271346570182e-05, - "loss": 2.7862, + "learning_rate": 1.6052938216678584e-05, + "loss": 2.7644, "step": 205100 }, { "epoch": 0.68, - "learning_rate": 1.603616188767437e-05, - "loss": 2.788, + "learning_rate": 1.6036386748232303e-05, + "loss": 2.7772, "step": 205200 }, { "epoch": 0.68, - "learning_rate": 1.6019610309646923e-05, - "loss": 2.7746, + "learning_rate": 1.6019835279786025e-05, + "loss": 2.7662, "step": 205300 }, { "epoch": 0.68, - "learning_rate": 1.6003058731619475e-05, - "loss": 2.7966, + "learning_rate": 1.600328381133974e-05, + "loss": 2.7611, "step": 205400 }, { "epoch": 0.68, - "learning_rate": 1.5986507153592023e-05, - "loss": 2.7762, + "learning_rate": 1.5986732342893463e-05, + "loss": 2.7568, "step": 205500 }, { "epoch": 0.68, - "learning_rate": 1.5969955575564574e-05, - "loss": 2.7726, + "learning_rate": 1.5970180874447182e-05, + "loss": 2.7605, "step": 205600 }, { "epoch": 0.68, - "learning_rate": 1.5953403997537126e-05, - "loss": 2.7854, + "learning_rate": 1.59536294060009e-05, + "loss": 2.7605, "step": 205700 }, { "epoch": 0.68, - "learning_rate": 1.5936852419509678e-05, - "loss": 2.7823, + "learning_rate": 1.593707793755462e-05, + "loss": 2.7673, "step": 205800 }, { "epoch": 0.68, - "learning_rate": 1.5920300841482226e-05, - "loss": 2.7777, + "learning_rate": 1.592052646910834e-05, + "loss": 2.7588, "step": 205900 }, { "epoch": 0.68, - "learning_rate": 1.5903749263454777e-05, - "loss": 2.766, + "learning_rate": 1.5903975000662062e-05, + "loss": 2.7709, "step": 206000 }, { "epoch": 0.68, - "learning_rate": 1.588719768542733e-05, - "loss": 2.787, + "learning_rate": 1.5887423532215777e-05, + "loss": 2.7593, "step": 206100 }, { "epoch": 0.68, - "learning_rate": 1.587064610739988e-05, - "loss": 2.779, + "learning_rate": 1.58708720637695e-05, + "loss": 2.7684, "step": 206200 }, { "epoch": 0.68, - "learning_rate": 1.5854094529372432e-05, - "loss": 2.7708, + "learning_rate": 1.585432059532322e-05, + "loss": 2.7634, "step": 206300 }, { "epoch": 0.68, - "learning_rate": 1.5837542951344984e-05, - "loss": 2.77, + "learning_rate": 1.5837769126876938e-05, + "loss": 2.7529, "step": 206400 }, { "epoch": 0.68, - "learning_rate": 1.5820991373317535e-05, - "loss": 2.7677, + "learning_rate": 1.5821217658430657e-05, + "loss": 2.7538, "step": 206500 }, { "epoch": 0.68, - "learning_rate": 1.5804439795290084e-05, - "loss": 2.7897, + "learning_rate": 1.5804666189984376e-05, + "loss": 2.7731, "step": 206600 }, { "epoch": 0.68, - "learning_rate": 1.5787888217262632e-05, - "loss": 2.7841, + "learning_rate": 1.5788114721538095e-05, + "loss": 2.776, "step": 206700 }, { "epoch": 0.68, - "learning_rate": 1.5771336639235183e-05, - "loss": 2.7656, + "learning_rate": 1.5771563253091814e-05, + "loss": 2.7787, "step": 206800 }, { "epoch": 0.68, - "learning_rate": 1.5754785061207735e-05, - "loss": 2.7803, + "learning_rate": 1.5755011784645536e-05, + "loss": 2.7656, "step": 206900 }, { "epoch": 0.69, - "learning_rate": 1.5738233483180287e-05, - "loss": 2.7791, + "learning_rate": 1.5738460316199255e-05, + "loss": 2.7516, "step": 207000 }, { "epoch": 0.69, - "learning_rate": 1.5721681905152838e-05, - "loss": 2.7858, + "learning_rate": 1.5721908847752974e-05, + "loss": 2.7672, "step": 207100 }, { "epoch": 0.69, - "learning_rate": 1.570513032712539e-05, - "loss": 2.7822, + "learning_rate": 1.5705357379306693e-05, + "loss": 2.7688, "step": 207200 }, { "epoch": 0.69, - "learning_rate": 1.568857874909794e-05, - "loss": 2.7759, + "learning_rate": 1.5688805910860412e-05, + "loss": 2.7646, "step": 207300 }, { "epoch": 0.69, - "learning_rate": 1.567202717107049e-05, - "loss": 2.7826, + "learning_rate": 1.567225444241413e-05, + "loss": 2.7679, "step": 207400 }, { "epoch": 0.69, - "learning_rate": 1.565547559304304e-05, - "loss": 2.7791, + "learning_rate": 1.565570297396785e-05, + "loss": 2.7549, "step": 207500 }, { "epoch": 0.69, - "learning_rate": 1.5638924015015593e-05, - "loss": 2.7693, + "learning_rate": 1.5639151505521572e-05, + "loss": 2.7493, "step": 207600 }, { "epoch": 0.69, - "learning_rate": 1.562237243698814e-05, - "loss": 2.7833, + "learning_rate": 1.562260003707529e-05, + "loss": 2.7651, "step": 207700 }, { "epoch": 0.69, - "learning_rate": 1.5605820858960693e-05, - "loss": 2.77, + "learning_rate": 1.560604856862901e-05, + "loss": 2.7626, "step": 207800 }, { "epoch": 0.69, - "learning_rate": 1.5589269280933244e-05, - "loss": 2.7854, + "learning_rate": 1.558949710018273e-05, + "loss": 2.7696, "step": 207900 }, { "epoch": 0.69, - "learning_rate": 1.5572717702905796e-05, - "loss": 2.7585, + "learning_rate": 1.5572945631736448e-05, + "loss": 2.7629, "step": 208000 }, { "epoch": 0.69, - "learning_rate": 1.5556166124878348e-05, - "loss": 2.788, + "learning_rate": 1.5556394163290167e-05, + "loss": 2.7609, "step": 208100 }, { "epoch": 0.69, - "learning_rate": 1.55396145468509e-05, - "loss": 2.7803, + "learning_rate": 1.5539842694843886e-05, + "loss": 2.7546, "step": 208200 }, { "epoch": 0.69, - "learning_rate": 1.5523062968823447e-05, - "loss": 2.7762, + "learning_rate": 1.552329122639761e-05, + "loss": 2.7777, "step": 208300 }, { "epoch": 0.69, - "learning_rate": 1.5506511390796e-05, - "loss": 2.7797, + "learning_rate": 1.5506739757951328e-05, + "loss": 2.7735, "step": 208400 }, { "epoch": 0.69, - "learning_rate": 1.548995981276855e-05, - "loss": 2.7897, + "learning_rate": 1.5490188289505046e-05, + "loss": 2.7711, "step": 208500 }, { "epoch": 0.69, - "learning_rate": 1.5473408234741102e-05, - "loss": 2.7685, + "learning_rate": 1.5473636821058765e-05, + "loss": 2.7568, "step": 208600 }, { "epoch": 0.69, - "learning_rate": 1.5456856656713654e-05, - "loss": 2.782, + "learning_rate": 1.5457085352612484e-05, + "loss": 2.7651, "step": 208700 }, { "epoch": 0.69, - "learning_rate": 1.5440305078686202e-05, - "loss": 2.7697, + "learning_rate": 1.5440533884166203e-05, + "loss": 2.7577, "step": 208800 }, { "epoch": 0.69, - "learning_rate": 1.5423753500658754e-05, - "loss": 2.7905, + "learning_rate": 1.5423982415719922e-05, + "loss": 2.7658, "step": 208900 }, { "epoch": 0.69, - "learning_rate": 1.5407201922631305e-05, - "loss": 2.7726, + "learning_rate": 1.5407430947273645e-05, + "loss": 2.763, "step": 209000 }, { "epoch": 0.69, - "learning_rate": 1.5390650344603853e-05, - "loss": 2.778, + "learning_rate": 1.5390879478827364e-05, + "loss": 2.7485, "step": 209100 }, { "epoch": 0.69, - "learning_rate": 1.5374098766576405e-05, - "loss": 2.7738, + "learning_rate": 1.5374328010381083e-05, + "loss": 2.7614, "step": 209200 }, { "epoch": 0.69, - "learning_rate": 1.5357547188548957e-05, - "loss": 2.7829, + "learning_rate": 1.5357776541934802e-05, + "loss": 2.7624, "step": 209300 }, { "epoch": 0.69, - "learning_rate": 1.5340995610521508e-05, - "loss": 2.7854, + "learning_rate": 1.534122507348852e-05, + "loss": 2.7601, "step": 209400 }, { "epoch": 0.69, - "learning_rate": 1.532444403249406e-05, - "loss": 2.7783, + "learning_rate": 1.532467360504224e-05, + "loss": 2.7649, "step": 209500 }, { "epoch": 0.69, - "learning_rate": 1.530789245446661e-05, - "loss": 2.7845, + "learning_rate": 1.530812213659596e-05, + "loss": 2.7732, "step": 209600 }, { "epoch": 0.69, - "learning_rate": 1.5291340876439163e-05, - "loss": 2.7727, + "learning_rate": 1.529157066814968e-05, + "loss": 2.7569, "step": 209700 }, { "epoch": 0.69, - "learning_rate": 1.527478929841171e-05, - "loss": 2.7626, + "learning_rate": 1.52750191997034e-05, + "loss": 2.7487, "step": 209800 }, { "epoch": 0.69, - "learning_rate": 1.5258237720384261e-05, - "loss": 2.7827, + "learning_rate": 1.5258467731257117e-05, + "loss": 2.7646, "step": 209900 }, { "epoch": 0.7, - "learning_rate": 1.5241686142356813e-05, - "loss": 2.7603, + "learning_rate": 1.5241916262810838e-05, + "loss": 2.7701, "step": 210000 }, { "epoch": 0.7, - "learning_rate": 1.5225134564329363e-05, - "loss": 2.7725, + "learning_rate": 1.5225364794364557e-05, + "loss": 2.7624, "step": 210100 }, { "epoch": 0.7, - "learning_rate": 1.5208582986301914e-05, - "loss": 2.7828, + "learning_rate": 1.5208813325918276e-05, + "loss": 2.7584, "step": 210200 }, { "epoch": 0.7, - "learning_rate": 1.5192031408274466e-05, - "loss": 2.7675, + "learning_rate": 1.5192261857471995e-05, + "loss": 2.7564, "step": 210300 }, { "epoch": 0.7, - "learning_rate": 1.5175479830247016e-05, - "loss": 2.7773, + "learning_rate": 1.5175710389025716e-05, + "loss": 2.75, "step": 210400 }, { "epoch": 0.7, - "learning_rate": 1.5158928252219567e-05, - "loss": 2.7935, + "learning_rate": 1.5159158920579433e-05, + "loss": 2.7519, "step": 210500 }, { "epoch": 0.7, - "learning_rate": 1.5142376674192119e-05, - "loss": 2.7717, + "learning_rate": 1.5142607452133154e-05, + "loss": 2.7688, "step": 210600 }, { "epoch": 0.7, - "learning_rate": 1.512582509616467e-05, - "loss": 2.7718, + "learning_rate": 1.5126055983686874e-05, + "loss": 2.7544, "step": 210700 }, { "epoch": 0.7, - "learning_rate": 1.510927351813722e-05, - "loss": 2.7842, + "learning_rate": 1.5109504515240593e-05, + "loss": 2.7614, "step": 210800 }, { "epoch": 0.7, - "learning_rate": 1.5092721940109772e-05, - "loss": 2.7783, + "learning_rate": 1.5092953046794312e-05, + "loss": 2.7605, "step": 210900 }, { "epoch": 0.7, - "learning_rate": 1.5076170362082324e-05, - "loss": 2.7753, + "learning_rate": 1.5076401578348031e-05, + "loss": 2.7745, "step": 211000 }, { "epoch": 0.7, - "learning_rate": 1.5059618784054872e-05, - "loss": 2.7936, + "learning_rate": 1.5059850109901752e-05, + "loss": 2.7695, "step": 211100 }, { "epoch": 0.7, - "learning_rate": 1.5043067206027422e-05, - "loss": 2.7798, + "learning_rate": 1.504329864145547e-05, + "loss": 2.7476, "step": 211200 }, { "epoch": 0.7, - "learning_rate": 1.5026515627999973e-05, - "loss": 2.7795, + "learning_rate": 1.502674717300919e-05, + "loss": 2.7688, "step": 211300 }, { "epoch": 0.7, - "learning_rate": 1.5009964049972525e-05, - "loss": 2.7827, + "learning_rate": 1.501019570456291e-05, + "loss": 2.7568, "step": 211400 }, { "epoch": 0.7, - "learning_rate": 1.4993412471945076e-05, - "loss": 2.7699, + "learning_rate": 1.499364423611663e-05, + "loss": 2.7646, "step": 211500 }, { "epoch": 0.7, - "learning_rate": 1.4976860893917626e-05, - "loss": 2.7736, + "learning_rate": 1.4977092767670347e-05, + "loss": 2.7504, "step": 211600 }, { "epoch": 0.7, - "learning_rate": 1.4960309315890178e-05, - "loss": 2.7838, + "learning_rate": 1.4960541299224067e-05, + "loss": 2.7674, "step": 211700 }, { "epoch": 0.7, - "learning_rate": 1.494375773786273e-05, - "loss": 2.7779, + "learning_rate": 1.4943989830777788e-05, + "loss": 2.7603, "step": 211800 }, { "epoch": 0.7, - "learning_rate": 1.492720615983528e-05, - "loss": 2.7788, + "learning_rate": 1.4927438362331505e-05, + "loss": 2.7682, "step": 211900 }, { "epoch": 0.7, - "learning_rate": 1.4910654581807831e-05, - "loss": 2.7742, + "learning_rate": 1.4910886893885226e-05, + "loss": 2.7744, "step": 212000 }, { "epoch": 0.7, - "learning_rate": 1.4894103003780383e-05, - "loss": 2.7802, + "learning_rate": 1.4894335425438947e-05, + "loss": 2.7598, "step": 212100 }, { "epoch": 0.7, - "learning_rate": 1.4877551425752931e-05, - "loss": 2.7821, + "learning_rate": 1.4877783956992666e-05, + "loss": 2.7579, "step": 212200 }, { "epoch": 0.7, - "learning_rate": 1.4860999847725483e-05, - "loss": 2.781, + "learning_rate": 1.4861232488546383e-05, + "loss": 2.751, "step": 212300 }, { "epoch": 0.7, - "learning_rate": 1.4844448269698032e-05, - "loss": 2.788, + "learning_rate": 1.4844681020100104e-05, + "loss": 2.7527, "step": 212400 }, { "epoch": 0.7, - "learning_rate": 1.4827896691670584e-05, - "loss": 2.7773, + "learning_rate": 1.4828129551653824e-05, + "loss": 2.7657, "step": 212500 }, { "epoch": 0.7, - "learning_rate": 1.4811345113643136e-05, - "loss": 2.7721, + "learning_rate": 1.4811578083207542e-05, + "loss": 2.7597, "step": 212600 }, { "epoch": 0.7, - "learning_rate": 1.4794793535615686e-05, - "loss": 2.7753, + "learning_rate": 1.4795026614761262e-05, + "loss": 2.7654, "step": 212700 }, { "epoch": 0.7, - "learning_rate": 1.4778241957588237e-05, - "loss": 2.774, + "learning_rate": 1.4778475146314983e-05, + "loss": 2.7741, "step": 212800 }, { "epoch": 0.7, - "learning_rate": 1.4761690379560789e-05, - "loss": 2.7763, + "learning_rate": 1.4761923677868702e-05, + "loss": 2.7525, "step": 212900 }, { "epoch": 0.71, - "learning_rate": 1.474513880153334e-05, - "loss": 2.7833, + "learning_rate": 1.474537220942242e-05, + "loss": 2.7613, "step": 213000 }, { "epoch": 0.71, - "learning_rate": 1.472858722350589e-05, - "loss": 2.7756, + "learning_rate": 1.472882074097614e-05, + "loss": 2.7507, "step": 213100 }, { "epoch": 0.71, - "learning_rate": 1.4712035645478442e-05, - "loss": 2.7613, + "learning_rate": 1.471226927252986e-05, + "loss": 2.7606, "step": 213200 }, { "epoch": 0.71, - "learning_rate": 1.469548406745099e-05, - "loss": 2.7937, + "learning_rate": 1.4695717804083578e-05, + "loss": 2.7705, "step": 213300 }, { "epoch": 0.71, - "learning_rate": 1.4678932489423542e-05, - "loss": 2.8018, + "learning_rate": 1.4679166335637299e-05, + "loss": 2.7665, "step": 213400 }, { "epoch": 0.71, - "learning_rate": 1.4662380911396093e-05, - "loss": 2.7708, + "learning_rate": 1.466261486719102e-05, + "loss": 2.7562, "step": 213500 }, { "epoch": 0.71, - "learning_rate": 1.4645829333368643e-05, - "loss": 2.7859, + "learning_rate": 1.4646063398744737e-05, + "loss": 2.7638, "step": 213600 }, { "epoch": 0.71, - "learning_rate": 1.4629277755341195e-05, - "loss": 2.7795, + "learning_rate": 1.4629511930298456e-05, + "loss": 2.7577, "step": 213700 }, { "epoch": 0.71, - "learning_rate": 1.4612726177313746e-05, - "loss": 2.7769, + "learning_rate": 1.4612960461852176e-05, + "loss": 2.7539, "step": 213800 }, { "epoch": 0.71, - "learning_rate": 1.4596174599286296e-05, - "loss": 2.7645, + "learning_rate": 1.4596408993405897e-05, + "loss": 2.7723, "step": 213900 }, { "epoch": 0.71, - "learning_rate": 1.4579623021258848e-05, - "loss": 2.7902, + "learning_rate": 1.4579857524959614e-05, + "loss": 2.7708, "step": 214000 }, { "epoch": 0.71, - "learning_rate": 1.45630714432314e-05, - "loss": 2.7726, + "learning_rate": 1.4563306056513335e-05, + "loss": 2.758, "step": 214100 }, { "epoch": 0.71, - "learning_rate": 1.4546519865203951e-05, - "loss": 2.7864, + "learning_rate": 1.4546754588067054e-05, + "loss": 2.7772, "step": 214200 }, { "epoch": 0.71, - "learning_rate": 1.4529968287176501e-05, - "loss": 2.791, + "learning_rate": 1.4530203119620773e-05, + "loss": 2.7653, "step": 214300 }, { "epoch": 0.71, - "learning_rate": 1.451341670914905e-05, - "loss": 2.7676, + "learning_rate": 1.4513651651174492e-05, + "loss": 2.7629, "step": 214400 }, { "epoch": 0.71, - "learning_rate": 1.44968651311216e-05, - "loss": 2.7801, + "learning_rate": 1.4497100182728213e-05, + "loss": 2.7615, "step": 214500 }, { "epoch": 0.71, - "learning_rate": 1.4480313553094152e-05, - "loss": 2.7868, + "learning_rate": 1.4480548714281933e-05, + "loss": 2.7692, "step": 214600 }, { "epoch": 0.71, - "learning_rate": 1.4463761975066702e-05, - "loss": 2.7857, + "learning_rate": 1.446399724583565e-05, + "loss": 2.754, "step": 214700 }, { "epoch": 0.71, - "learning_rate": 1.4447210397039254e-05, - "loss": 2.7808, + "learning_rate": 1.4447445777389371e-05, + "loss": 2.7641, "step": 214800 }, { "epoch": 0.71, - "learning_rate": 1.4430658819011805e-05, - "loss": 2.7754, + "learning_rate": 1.443089430894309e-05, + "loss": 2.7685, "step": 214900 }, { "epoch": 0.71, - "learning_rate": 1.4414107240984357e-05, - "loss": 2.7776, + "learning_rate": 1.4414342840496809e-05, + "loss": 2.7615, "step": 215000 }, { "epoch": 0.71, - "learning_rate": 1.4397555662956907e-05, - "loss": 2.7831, + "learning_rate": 1.4397791372050528e-05, + "loss": 2.7597, "step": 215100 }, { "epoch": 0.71, - "learning_rate": 1.4381004084929459e-05, - "loss": 2.7737, + "learning_rate": 1.4381239903604249e-05, + "loss": 2.7684, "step": 215200 }, { "epoch": 0.71, - "learning_rate": 1.436445250690201e-05, - "loss": 2.7739, + "learning_rate": 1.436468843515797e-05, + "loss": 2.7564, "step": 215300 }, { "epoch": 0.71, - "learning_rate": 1.434790092887456e-05, - "loss": 2.7742, + "learning_rate": 1.4348136966711687e-05, + "loss": 2.7552, "step": 215400 }, { "epoch": 0.71, - "learning_rate": 1.433134935084711e-05, - "loss": 2.779, + "learning_rate": 1.4331585498265407e-05, + "loss": 2.7693, "step": 215500 }, { "epoch": 0.71, - "learning_rate": 1.431479777281966e-05, - "loss": 2.7776, + "learning_rate": 1.4315034029819126e-05, + "loss": 2.7732, "step": 215600 }, { "epoch": 0.71, - "learning_rate": 1.4298246194792212e-05, - "loss": 2.7773, + "learning_rate": 1.4298482561372845e-05, + "loss": 2.7628, "step": 215700 }, { "epoch": 0.71, - "learning_rate": 1.4281694616764763e-05, - "loss": 2.7841, + "learning_rate": 1.4281931092926564e-05, + "loss": 2.7624, "step": 215800 }, { "epoch": 0.71, - "learning_rate": 1.4265143038737313e-05, - "loss": 2.7783, + "learning_rate": 1.4265379624480285e-05, + "loss": 2.7623, "step": 215900 }, { "epoch": 0.72, - "learning_rate": 1.4248591460709865e-05, - "loss": 2.7827, + "learning_rate": 1.4248828156034006e-05, + "loss": 2.7694, "step": 216000 }, { "epoch": 0.72, - "learning_rate": 1.4232039882682416e-05, - "loss": 2.7839, + "learning_rate": 1.4232276687587723e-05, + "loss": 2.7802, "step": 216100 }, { "epoch": 0.72, - "learning_rate": 1.4215488304654966e-05, - "loss": 2.7757, + "learning_rate": 1.4215725219141444e-05, + "loss": 2.7446, "step": 216200 }, { "epoch": 0.72, - "learning_rate": 1.4198936726627518e-05, - "loss": 2.7761, + "learning_rate": 1.4199173750695163e-05, + "loss": 2.7755, "step": 216300 }, { "epoch": 0.72, - "learning_rate": 1.418238514860007e-05, - "loss": 2.7862, + "learning_rate": 1.418262228224888e-05, + "loss": 2.7581, "step": 216400 }, { "epoch": 0.72, - "learning_rate": 1.4165833570572621e-05, - "loss": 2.7676, + "learning_rate": 1.41660708138026e-05, + "loss": 2.7618, "step": 216500 }, { "epoch": 0.72, - "learning_rate": 1.4149281992545169e-05, - "loss": 2.7792, + "learning_rate": 1.4149519345356321e-05, + "loss": 2.7691, "step": 216600 }, { "epoch": 0.72, - "learning_rate": 1.4132730414517719e-05, - "loss": 2.761, + "learning_rate": 1.4132967876910039e-05, + "loss": 2.7626, "step": 216700 }, { "epoch": 0.72, - "learning_rate": 1.411617883649027e-05, - "loss": 2.7882, + "learning_rate": 1.411641640846376e-05, + "loss": 2.7571, "step": 216800 }, { "epoch": 0.72, - "learning_rate": 1.4099627258462822e-05, - "loss": 2.774, + "learning_rate": 1.409986494001748e-05, + "loss": 2.7696, "step": 216900 }, { "epoch": 0.72, - "learning_rate": 1.4083075680435374e-05, - "loss": 2.7681, + "learning_rate": 1.4083313471571199e-05, + "loss": 2.7652, "step": 217000 }, { "epoch": 0.72, - "learning_rate": 1.4066524102407924e-05, - "loss": 2.7714, + "learning_rate": 1.4066762003124916e-05, + "loss": 2.7553, "step": 217100 }, { "epoch": 0.72, - "learning_rate": 1.4049972524380475e-05, - "loss": 2.7685, + "learning_rate": 1.4050210534678637e-05, + "loss": 2.7521, "step": 217200 }, { "epoch": 0.72, - "learning_rate": 1.4033420946353027e-05, - "loss": 2.7736, + "learning_rate": 1.4033659066232358e-05, + "loss": 2.7479, "step": 217300 }, { "epoch": 0.72, - "learning_rate": 1.4016869368325577e-05, - "loss": 2.767, + "learning_rate": 1.4017107597786075e-05, + "loss": 2.7609, "step": 217400 }, { "epoch": 0.72, - "learning_rate": 1.4000317790298128e-05, - "loss": 2.7712, + "learning_rate": 1.4000556129339796e-05, + "loss": 2.7603, "step": 217500 }, { "epoch": 0.72, - "learning_rate": 1.398376621227068e-05, - "loss": 2.7789, + "learning_rate": 1.3984004660893516e-05, + "loss": 2.7474, "step": 217600 }, { "epoch": 0.72, - "learning_rate": 1.3967214634243228e-05, - "loss": 2.7729, + "learning_rate": 1.3967453192447235e-05, + "loss": 2.7572, "step": 217700 }, { "epoch": 0.72, - "learning_rate": 1.395066305621578e-05, - "loss": 2.7706, + "learning_rate": 1.3950901724000953e-05, + "loss": 2.754, "step": 217800 }, { "epoch": 0.72, - "learning_rate": 1.393411147818833e-05, - "loss": 2.7913, + "learning_rate": 1.3934350255554673e-05, + "loss": 2.7588, "step": 217900 }, { "epoch": 0.72, - "learning_rate": 1.3917559900160881e-05, - "loss": 2.7899, + "learning_rate": 1.3917798787108394e-05, + "loss": 2.7542, "step": 218000 }, { "epoch": 0.72, - "learning_rate": 1.3901008322133433e-05, - "loss": 2.785, + "learning_rate": 1.3901247318662111e-05, + "loss": 2.7586, "step": 218100 }, { "epoch": 0.72, - "learning_rate": 1.3884456744105983e-05, - "loss": 2.7863, + "learning_rate": 1.3884695850215832e-05, + "loss": 2.7664, "step": 218200 }, { "epoch": 0.72, - "learning_rate": 1.3867905166078534e-05, - "loss": 2.7744, + "learning_rate": 1.3868144381769553e-05, + "loss": 2.7626, "step": 218300 }, { "epoch": 0.72, - "learning_rate": 1.3851353588051086e-05, - "loss": 2.7764, + "learning_rate": 1.3851592913323271e-05, + "loss": 2.7712, "step": 218400 }, { "epoch": 0.72, - "learning_rate": 1.3834802010023638e-05, - "loss": 2.765, + "learning_rate": 1.3835041444876989e-05, + "loss": 2.7606, "step": 218500 }, { "epoch": 0.72, - "learning_rate": 1.3818250431996188e-05, - "loss": 2.7826, + "learning_rate": 1.381848997643071e-05, + "loss": 2.7465, "step": 218600 }, { "epoch": 0.72, - "learning_rate": 1.380169885396874e-05, - "loss": 2.7837, + "learning_rate": 1.380193850798443e-05, + "loss": 2.7608, "step": 218700 }, { "epoch": 0.72, - "learning_rate": 1.3785147275941287e-05, - "loss": 2.7844, + "learning_rate": 1.3785387039538147e-05, + "loss": 2.7693, "step": 218800 }, { "epoch": 0.72, - "learning_rate": 1.3768595697913839e-05, - "loss": 2.7795, + "learning_rate": 1.3768835571091868e-05, + "loss": 2.7496, "step": 218900 }, { "epoch": 0.72, - "learning_rate": 1.3752044119886389e-05, - "loss": 2.776, + "learning_rate": 1.3752284102645587e-05, + "loss": 2.7704, "step": 219000 }, { "epoch": 0.73, - "learning_rate": 1.373549254185894e-05, - "loss": 2.7573, + "learning_rate": 1.3735732634199308e-05, + "loss": 2.7589, "step": 219100 }, { "epoch": 0.73, - "learning_rate": 1.3718940963831492e-05, - "loss": 2.7787, + "learning_rate": 1.3719181165753025e-05, + "loss": 2.7588, "step": 219200 }, { "epoch": 0.73, - "learning_rate": 1.3702389385804044e-05, - "loss": 2.7677, + "learning_rate": 1.3702629697306746e-05, + "loss": 2.7525, "step": 219300 }, { "epoch": 0.73, - "learning_rate": 1.3685837807776594e-05, - "loss": 2.765, + "learning_rate": 1.3686078228860466e-05, + "loss": 2.7568, "step": 219400 }, { "epoch": 0.73, - "learning_rate": 1.3669286229749145e-05, - "loss": 2.7833, + "learning_rate": 1.3669526760414184e-05, + "loss": 2.7687, "step": 219500 }, { "epoch": 0.73, - "learning_rate": 1.3652734651721697e-05, - "loss": 2.782, + "learning_rate": 1.3652975291967904e-05, + "loss": 2.7713, "step": 219600 }, { "epoch": 0.73, - "learning_rate": 1.3636183073694247e-05, - "loss": 2.7896, + "learning_rate": 1.3636423823521623e-05, + "loss": 2.7573, "step": 219700 }, { "epoch": 0.73, - "learning_rate": 1.3619631495666798e-05, - "loss": 2.7659, + "learning_rate": 1.3619872355075344e-05, + "loss": 2.7625, "step": 219800 }, { "epoch": 0.73, - "learning_rate": 1.360307991763935e-05, - "loss": 2.7828, + "learning_rate": 1.3603320886629061e-05, + "loss": 2.7552, "step": 219900 }, { "epoch": 0.73, - "learning_rate": 1.3586528339611898e-05, - "loss": 2.7867, + "learning_rate": 1.3586769418182782e-05, + "loss": 2.7736, "step": 220000 }, { "epoch": 0.73, - "learning_rate": 1.356997676158445e-05, - "loss": 2.7735, + "learning_rate": 1.3570217949736503e-05, + "loss": 2.7452, "step": 220100 }, { "epoch": 0.73, - "learning_rate": 1.3553425183557e-05, - "loss": 2.7849, + "learning_rate": 1.355366648129022e-05, + "loss": 2.7462, "step": 220200 }, { "epoch": 0.73, - "learning_rate": 1.3536873605529551e-05, - "loss": 2.7757, + "learning_rate": 1.353711501284394e-05, + "loss": 2.7562, "step": 220300 }, { "epoch": 0.73, - "learning_rate": 1.3520322027502103e-05, - "loss": 2.7678, + "learning_rate": 1.352056354439766e-05, + "loss": 2.7479, "step": 220400 }, { "epoch": 0.73, - "learning_rate": 1.3503770449474654e-05, - "loss": 2.79, + "learning_rate": 1.3504012075951377e-05, + "loss": 2.7564, "step": 220500 }, { "epoch": 0.73, - "learning_rate": 1.3487218871447204e-05, - "loss": 2.7703, + "learning_rate": 1.3487460607505098e-05, + "loss": 2.7735, "step": 220600 }, { "epoch": 0.73, - "learning_rate": 1.3470667293419756e-05, - "loss": 2.7859, + "learning_rate": 1.3470909139058818e-05, + "loss": 2.7683, "step": 220700 }, { "epoch": 0.73, - "learning_rate": 1.3454115715392308e-05, - "loss": 2.7765, + "learning_rate": 1.3454357670612539e-05, + "loss": 2.7601, "step": 220800 }, { "epoch": 0.73, - "learning_rate": 1.3437564137364857e-05, - "loss": 2.7944, + "learning_rate": 1.3437806202166256e-05, + "loss": 2.7441, "step": 220900 }, { "epoch": 0.73, - "learning_rate": 1.3421012559337409e-05, - "loss": 2.7816, + "learning_rate": 1.3421254733719977e-05, + "loss": 2.7692, "step": 221000 }, { "epoch": 0.73, - "learning_rate": 1.3404460981309957e-05, - "loss": 2.7688, + "learning_rate": 1.3404703265273696e-05, + "loss": 2.7497, "step": 221100 }, { "epoch": 0.73, - "learning_rate": 1.3387909403282509e-05, - "loss": 2.7732, + "learning_rate": 1.3388151796827413e-05, + "loss": 2.7671, "step": 221200 }, { "epoch": 0.73, - "learning_rate": 1.337135782525506e-05, - "loss": 2.7708, + "learning_rate": 1.3371600328381134e-05, + "loss": 2.7618, "step": 221300 }, { "epoch": 0.73, - "learning_rate": 1.335480624722761e-05, - "loss": 2.7726, + "learning_rate": 1.3355048859934855e-05, + "loss": 2.7591, "step": 221400 }, { "epoch": 0.73, - "learning_rate": 1.3338254669200162e-05, - "loss": 2.7667, + "learning_rate": 1.3338497391488575e-05, + "loss": 2.7558, "step": 221500 }, { "epoch": 0.73, - "learning_rate": 1.3321703091172714e-05, - "loss": 2.7835, + "learning_rate": 1.3321945923042292e-05, + "loss": 2.7706, "step": 221600 }, { "epoch": 0.73, - "learning_rate": 1.3305151513145263e-05, - "loss": 2.7829, + "learning_rate": 1.3305394454596013e-05, + "loss": 2.7579, "step": 221700 }, { "epoch": 0.73, - "learning_rate": 1.3288599935117815e-05, - "loss": 2.7843, + "learning_rate": 1.3288842986149732e-05, + "loss": 2.762, "step": 221800 }, { "epoch": 0.73, - "learning_rate": 1.3272048357090367e-05, - "loss": 2.7799, + "learning_rate": 1.327229151770345e-05, + "loss": 2.7587, "step": 221900 }, { "epoch": 0.73, - "learning_rate": 1.3255496779062918e-05, - "loss": 2.7738, + "learning_rate": 1.325574004925717e-05, + "loss": 2.7797, "step": 222000 }, { "epoch": 0.74, - "learning_rate": 1.3238945201035468e-05, - "loss": 2.7715, + "learning_rate": 1.323918858081089e-05, + "loss": 2.7493, "step": 222100 }, { "epoch": 0.74, - "learning_rate": 1.3222393623008016e-05, - "loss": 2.7695, + "learning_rate": 1.3222637112364611e-05, + "loss": 2.7677, "step": 222200 }, { "epoch": 0.74, - "learning_rate": 1.3205842044980568e-05, - "loss": 2.788, + "learning_rate": 1.3206085643918329e-05, + "loss": 2.758, "step": 222300 }, { "epoch": 0.74, - "learning_rate": 1.318929046695312e-05, - "loss": 2.7764, + "learning_rate": 1.318953417547205e-05, + "loss": 2.7514, "step": 222400 }, { "epoch": 0.74, - "learning_rate": 1.317273888892567e-05, - "loss": 2.7676, + "learning_rate": 1.3172982707025768e-05, + "loss": 2.7609, "step": 222500 }, { "epoch": 0.74, - "learning_rate": 1.3156187310898221e-05, - "loss": 2.7812, + "learning_rate": 1.3156431238579486e-05, + "loss": 2.7669, "step": 222600 }, { "epoch": 0.74, - "learning_rate": 1.3139635732870773e-05, - "loss": 2.7706, + "learning_rate": 1.3139879770133206e-05, + "loss": 2.7623, "step": 222700 }, { "epoch": 0.74, - "learning_rate": 1.3123084154843324e-05, - "loss": 2.7703, + "learning_rate": 1.3123328301686927e-05, + "loss": 2.7669, "step": 222800 }, { "epoch": 0.74, - "learning_rate": 1.3106532576815874e-05, - "loss": 2.782, + "learning_rate": 1.3106776833240648e-05, + "loss": 2.7461, "step": 222900 }, { "epoch": 0.74, - "learning_rate": 1.3089980998788426e-05, - "loss": 2.7787, + "learning_rate": 1.3090225364794365e-05, + "loss": 2.747, "step": 223000 }, { "epoch": 0.74, - "learning_rate": 1.3073429420760977e-05, - "loss": 2.7709, + "learning_rate": 1.3073673896348084e-05, + "loss": 2.7535, "step": 223100 }, { "epoch": 0.74, - "learning_rate": 1.3056877842733527e-05, - "loss": 2.7651, + "learning_rate": 1.3057122427901805e-05, + "loss": 2.7713, "step": 223200 }, { "epoch": 0.74, - "learning_rate": 1.3040326264706077e-05, - "loss": 2.7864, + "learning_rate": 1.3040570959455522e-05, + "loss": 2.7419, "step": 223300 }, { "epoch": 0.74, - "learning_rate": 1.3023774686678627e-05, - "loss": 2.7718, + "learning_rate": 1.3024019491009243e-05, + "loss": 2.7733, "step": 223400 }, { "epoch": 0.74, - "learning_rate": 1.3007223108651179e-05, - "loss": 2.778, + "learning_rate": 1.3007468022562963e-05, + "loss": 2.7672, "step": 223500 }, { "epoch": 0.74, - "learning_rate": 1.299067153062373e-05, - "loss": 2.7754, + "learning_rate": 1.299091655411668e-05, + "loss": 2.7738, "step": 223600 }, { "epoch": 0.74, - "learning_rate": 1.297411995259628e-05, - "loss": 2.7697, + "learning_rate": 1.2974365085670401e-05, + "loss": 2.7636, "step": 223700 }, { "epoch": 0.74, - "learning_rate": 1.2957568374568832e-05, - "loss": 2.7835, + "learning_rate": 1.295781361722412e-05, + "loss": 2.765, "step": 223800 }, { "epoch": 0.74, - "learning_rate": 1.2941016796541383e-05, - "loss": 2.7811, + "learning_rate": 1.2941262148777841e-05, + "loss": 2.7526, "step": 223900 }, { "epoch": 0.74, - "learning_rate": 1.2924465218513935e-05, - "loss": 2.7657, + "learning_rate": 1.2924710680331558e-05, + "loss": 2.7703, "step": 224000 }, { "epoch": 0.74, - "learning_rate": 1.2907913640486485e-05, - "loss": 2.7919, + "learning_rate": 1.2908159211885279e-05, + "loss": 2.7737, "step": 224100 }, { "epoch": 0.74, - "learning_rate": 1.2891362062459037e-05, - "loss": 2.7828, + "learning_rate": 1.2891607743439e-05, + "loss": 2.7686, "step": 224200 }, { "epoch": 0.74, - "learning_rate": 1.2874810484431588e-05, - "loss": 2.7745, + "learning_rate": 1.2875056274992717e-05, + "loss": 2.7574, "step": 224300 }, { "epoch": 0.74, - "learning_rate": 1.2858258906404136e-05, - "loss": 2.7838, + "learning_rate": 1.2858504806546438e-05, + "loss": 2.7592, "step": 224400 }, { "epoch": 0.74, - "learning_rate": 1.2841707328376686e-05, - "loss": 2.7829, + "learning_rate": 1.2841953338100157e-05, + "loss": 2.7584, "step": 224500 }, { "epoch": 0.74, - "learning_rate": 1.2825155750349238e-05, - "loss": 2.7785, + "learning_rate": 1.2825401869653877e-05, + "loss": 2.761, "step": 224600 }, { "epoch": 0.74, - "learning_rate": 1.280860417232179e-05, - "loss": 2.7747, + "learning_rate": 1.2808850401207594e-05, + "loss": 2.7542, "step": 224700 }, { "epoch": 0.74, - "learning_rate": 1.2792052594294341e-05, - "loss": 2.7817, + "learning_rate": 1.2792298932761315e-05, + "loss": 2.7585, "step": 224800 }, { "epoch": 0.74, - "learning_rate": 1.2775501016266891e-05, - "loss": 2.7767, + "learning_rate": 1.2775747464315036e-05, + "loss": 2.7566, "step": 224900 }, { "epoch": 0.74, - "learning_rate": 1.2758949438239443e-05, - "loss": 2.7668, + "learning_rate": 1.2759195995868753e-05, + "loss": 2.7727, "step": 225000 }, { "epoch": 0.75, - "learning_rate": 1.2742397860211994e-05, - "loss": 2.774, + "learning_rate": 1.2742644527422474e-05, + "loss": 2.7518, "step": 225100 }, { "epoch": 0.75, - "learning_rate": 1.2725846282184544e-05, - "loss": 2.783, + "learning_rate": 1.2726093058976193e-05, + "loss": 2.754, "step": 225200 }, { "epoch": 0.75, - "learning_rate": 1.2709294704157096e-05, - "loss": 2.779, + "learning_rate": 1.2709541590529913e-05, + "loss": 2.7637, "step": 225300 }, { "epoch": 0.75, - "learning_rate": 1.2692743126129647e-05, - "loss": 2.7765, + "learning_rate": 1.269299012208363e-05, + "loss": 2.7561, "step": 225400 }, { "epoch": 0.75, - "learning_rate": 1.2676191548102195e-05, - "loss": 2.772, + "learning_rate": 1.2676438653637351e-05, + "loss": 2.7536, "step": 225500 }, { "epoch": 0.75, - "learning_rate": 1.2659639970074747e-05, - "loss": 2.7808, + "learning_rate": 1.2659887185191072e-05, + "loss": 2.7477, "step": 225600 }, { "epoch": 0.75, - "learning_rate": 1.2643088392047297e-05, - "loss": 2.7823, + "learning_rate": 1.264333571674479e-05, + "loss": 2.7465, "step": 225700 }, { "epoch": 0.75, - "learning_rate": 1.2626536814019849e-05, - "loss": 2.7773, + "learning_rate": 1.262678424829851e-05, + "loss": 2.77, "step": 225800 }, { "epoch": 0.75, - "learning_rate": 1.26099852359924e-05, - "loss": 2.765, + "learning_rate": 1.2610232779852229e-05, + "loss": 2.754, "step": 225900 }, { "epoch": 0.75, - "learning_rate": 1.259343365796495e-05, - "loss": 2.7644, + "learning_rate": 1.259368131140595e-05, + "loss": 2.7335, "step": 226000 }, { "epoch": 0.75, - "learning_rate": 1.2576882079937502e-05, - "loss": 2.7682, + "learning_rate": 1.2577129842959667e-05, + "loss": 2.754, "step": 226100 }, { "epoch": 0.75, - "learning_rate": 1.2560330501910053e-05, - "loss": 2.771, + "learning_rate": 1.2560578374513388e-05, + "loss": 2.7408, "step": 226200 }, { "epoch": 0.75, - "learning_rate": 1.2543778923882605e-05, - "loss": 2.7807, + "learning_rate": 1.2544026906067108e-05, + "loss": 2.7549, "step": 226300 }, { "epoch": 0.75, - "learning_rate": 1.2527227345855155e-05, - "loss": 2.7717, + "learning_rate": 1.2527475437620826e-05, + "loss": 2.7623, "step": 226400 }, { "epoch": 0.75, - "learning_rate": 1.2510675767827706e-05, - "loss": 2.7698, + "learning_rate": 1.2510923969174546e-05, + "loss": 2.7483, "step": 226500 }, { "epoch": 0.75, - "learning_rate": 1.2494124189800256e-05, - "loss": 2.7649, + "learning_rate": 1.2494372500728265e-05, + "loss": 2.7609, "step": 226600 }, { "epoch": 0.75, - "learning_rate": 1.2477572611772808e-05, - "loss": 2.7751, + "learning_rate": 1.2477821032281984e-05, + "loss": 2.7531, "step": 226700 }, { "epoch": 0.75, - "learning_rate": 1.2461021033745358e-05, - "loss": 2.7969, + "learning_rate": 1.2461269563835703e-05, + "loss": 2.7631, "step": 226800 }, { "epoch": 0.75, - "learning_rate": 1.2444469455717908e-05, - "loss": 2.7919, + "learning_rate": 1.2444718095389424e-05, + "loss": 2.7389, "step": 226900 }, { "epoch": 0.75, - "learning_rate": 1.242791787769046e-05, - "loss": 2.7713, + "learning_rate": 1.2428166626943143e-05, + "loss": 2.7543, "step": 227000 }, { "epoch": 0.75, - "learning_rate": 1.2411366299663011e-05, - "loss": 2.7709, + "learning_rate": 1.2411615158496864e-05, + "loss": 2.758, "step": 227100 }, { "epoch": 0.75, - "learning_rate": 1.239481472163556e-05, - "loss": 2.7906, + "learning_rate": 1.2395063690050583e-05, + "loss": 2.7611, "step": 227200 }, { "epoch": 0.75, - "learning_rate": 1.2378263143608112e-05, - "loss": 2.7685, + "learning_rate": 1.2378512221604302e-05, + "loss": 2.7658, "step": 227300 }, { "epoch": 0.75, - "learning_rate": 1.2361711565580664e-05, - "loss": 2.7867, + "learning_rate": 1.236196075315802e-05, + "loss": 2.7604, "step": 227400 }, { "epoch": 0.75, - "learning_rate": 1.2345159987553214e-05, - "loss": 2.7695, + "learning_rate": 1.234540928471174e-05, + "loss": 2.7575, "step": 227500 }, { "epoch": 0.75, - "learning_rate": 1.2328608409525764e-05, - "loss": 2.7857, + "learning_rate": 1.232885781626546e-05, + "loss": 2.7522, "step": 227600 }, { "epoch": 0.75, - "learning_rate": 1.2312056831498315e-05, - "loss": 2.7779, + "learning_rate": 1.231230634781918e-05, + "loss": 2.7691, "step": 227700 }, { "epoch": 0.75, - "learning_rate": 1.2295505253470867e-05, - "loss": 2.7753, + "learning_rate": 1.22957548793729e-05, + "loss": 2.7591, "step": 227800 }, { "epoch": 0.75, - "learning_rate": 1.2278953675443419e-05, - "loss": 2.7652, + "learning_rate": 1.2279203410926617e-05, + "loss": 2.765, "step": 227900 }, { "epoch": 0.75, - "learning_rate": 1.2262402097415967e-05, - "loss": 2.7779, + "learning_rate": 1.2262651942480336e-05, + "loss": 2.7542, "step": 228000 }, { "epoch": 0.76, - "learning_rate": 1.2245850519388518e-05, - "loss": 2.7801, + "learning_rate": 1.2246100474034057e-05, + "loss": 2.7567, "step": 228100 }, { "epoch": 0.76, - "learning_rate": 1.222929894136107e-05, - "loss": 2.7713, + "learning_rate": 1.2229549005587776e-05, + "loss": 2.7569, "step": 228200 }, { "epoch": 0.76, - "learning_rate": 1.2212747363333622e-05, - "loss": 2.7899, + "learning_rate": 1.2212997537141496e-05, + "loss": 2.7528, "step": 228300 }, { "epoch": 0.76, - "learning_rate": 1.2196195785306172e-05, - "loss": 2.7841, + "learning_rate": 1.2196446068695215e-05, + "loss": 2.7642, "step": 228400 }, { "epoch": 0.76, - "learning_rate": 1.2179644207278723e-05, - "loss": 2.7639, + "learning_rate": 1.2179894600248934e-05, + "loss": 2.7623, "step": 228500 }, { "epoch": 0.76, - "learning_rate": 1.2163092629251273e-05, - "loss": 2.7855, + "learning_rate": 1.2163343131802653e-05, + "loss": 2.7576, "step": 228600 }, { "epoch": 0.76, - "learning_rate": 1.2146541051223825e-05, - "loss": 2.7811, + "learning_rate": 1.2146791663356372e-05, + "loss": 2.7557, "step": 228700 }, { "epoch": 0.76, - "learning_rate": 1.2129989473196375e-05, - "loss": 2.7811, + "learning_rate": 1.2130240194910093e-05, + "loss": 2.7578, "step": 228800 }, { "epoch": 0.76, - "learning_rate": 1.2113437895168926e-05, - "loss": 2.7775, + "learning_rate": 1.2113688726463812e-05, + "loss": 2.7517, "step": 228900 }, { "epoch": 0.76, - "learning_rate": 1.2096886317141478e-05, - "loss": 2.7763, + "learning_rate": 1.2097137258017533e-05, + "loss": 2.7623, "step": 229000 }, { "epoch": 0.76, - "learning_rate": 1.2080334739114028e-05, - "loss": 2.7743, + "learning_rate": 1.2080585789571252e-05, + "loss": 2.7628, "step": 229100 }, { "epoch": 0.76, - "learning_rate": 1.2063783161086578e-05, - "loss": 2.7704, + "learning_rate": 1.206403432112497e-05, + "loss": 2.7588, "step": 229200 }, { "epoch": 0.76, - "learning_rate": 1.204723158305913e-05, - "loss": 2.7849, + "learning_rate": 1.204748285267869e-05, + "loss": 2.763, "step": 229300 }, { "epoch": 0.76, - "learning_rate": 1.203068000503168e-05, - "loss": 2.7874, + "learning_rate": 1.2030931384232409e-05, + "loss": 2.7596, "step": 229400 }, { "epoch": 0.76, - "learning_rate": 1.201412842700423e-05, - "loss": 2.7715, + "learning_rate": 1.201437991578613e-05, + "loss": 2.7613, "step": 229500 }, { "epoch": 0.76, - "learning_rate": 1.1997576848976782e-05, - "loss": 2.7735, + "learning_rate": 1.1997828447339848e-05, + "loss": 2.754, "step": 229600 }, { "epoch": 0.76, - "learning_rate": 1.1981025270949332e-05, - "loss": 2.7658, + "learning_rate": 1.1981276978893569e-05, + "loss": 2.7586, "step": 229700 }, { "epoch": 0.76, - "learning_rate": 1.1964473692921884e-05, - "loss": 2.7789, + "learning_rate": 1.1964725510447288e-05, + "loss": 2.7566, "step": 229800 }, { "epoch": 0.76, - "learning_rate": 1.1947922114894434e-05, - "loss": 2.7732, + "learning_rate": 1.1948174042001007e-05, + "loss": 2.7488, "step": 229900 }, { "epoch": 0.76, - "learning_rate": 1.1931370536866985e-05, - "loss": 2.7672, + "learning_rate": 1.1931622573554726e-05, + "loss": 2.7631, "step": 230000 }, { "epoch": 0.76, - "learning_rate": 1.1914818958839537e-05, - "loss": 2.7619, + "learning_rate": 1.1915071105108445e-05, + "loss": 2.7658, "step": 230100 }, { "epoch": 0.76, - "learning_rate": 1.1898267380812089e-05, - "loss": 2.7835, + "learning_rate": 1.1898519636662166e-05, + "loss": 2.755, "step": 230200 }, { "epoch": 0.76, - "learning_rate": 1.1881715802784638e-05, - "loss": 2.7663, + "learning_rate": 1.1881968168215885e-05, + "loss": 2.7556, "step": 230300 }, { "epoch": 0.76, - "learning_rate": 1.1865164224757188e-05, - "loss": 2.7645, + "learning_rate": 1.1865416699769605e-05, + "loss": 2.7565, "step": 230400 }, { "epoch": 0.76, - "learning_rate": 1.184861264672974e-05, - "loss": 2.7891, + "learning_rate": 1.1848865231323324e-05, + "loss": 2.7496, "step": 230500 }, { "epoch": 0.76, - "learning_rate": 1.1832061068702292e-05, - "loss": 2.7799, + "learning_rate": 1.1832313762877043e-05, + "loss": 2.7626, "step": 230600 }, { "epoch": 0.76, - "learning_rate": 1.1815509490674841e-05, - "loss": 2.7811, + "learning_rate": 1.1815762294430762e-05, + "loss": 2.7598, "step": 230700 }, { "epoch": 0.76, - "learning_rate": 1.1798957912647391e-05, - "loss": 2.7809, + "learning_rate": 1.1799210825984481e-05, + "loss": 2.7589, "step": 230800 }, { "epoch": 0.76, - "learning_rate": 1.1782406334619943e-05, - "loss": 2.7808, + "learning_rate": 1.1782659357538202e-05, + "loss": 2.751, "step": 230900 }, { "epoch": 0.76, - "learning_rate": 1.1765854756592495e-05, - "loss": 2.7797, + "learning_rate": 1.1766107889091921e-05, + "loss": 2.7669, "step": 231000 }, { "epoch": 0.77, - "learning_rate": 1.1749303178565044e-05, - "loss": 2.7936, + "learning_rate": 1.174955642064564e-05, + "loss": 2.7537, "step": 231100 }, { "epoch": 0.77, - "learning_rate": 1.1732751600537596e-05, - "loss": 2.7512, + "learning_rate": 1.173300495219936e-05, + "loss": 2.7612, "step": 231200 }, { "epoch": 0.77, - "learning_rate": 1.1716200022510148e-05, - "loss": 2.7703, + "learning_rate": 1.171645348375308e-05, + "loss": 2.7601, "step": 231300 }, { "epoch": 0.77, - "learning_rate": 1.1699648444482698e-05, - "loss": 2.7656, + "learning_rate": 1.1699902015306799e-05, + "loss": 2.7486, "step": 231400 }, { "epoch": 0.77, - "learning_rate": 1.1683096866455247e-05, - "loss": 2.7751, + "learning_rate": 1.1683350546860517e-05, + "loss": 2.7521, "step": 231500 }, { "epoch": 0.77, - "learning_rate": 1.1666545288427799e-05, - "loss": 2.7674, + "learning_rate": 1.1666799078414238e-05, + "loss": 2.7573, "step": 231600 }, { "epoch": 0.77, - "learning_rate": 1.164999371040035e-05, - "loss": 2.7884, + "learning_rate": 1.1650247609967957e-05, + "loss": 2.7527, "step": 231700 }, { "epoch": 0.77, - "learning_rate": 1.1633442132372902e-05, - "loss": 2.7763, + "learning_rate": 1.1633696141521676e-05, + "loss": 2.7562, "step": 231800 }, { "epoch": 0.77, - "learning_rate": 1.161689055434545e-05, - "loss": 2.7829, + "learning_rate": 1.1617144673075397e-05, + "loss": 2.7593, "step": 231900 }, { "epoch": 0.77, - "learning_rate": 1.1600338976318002e-05, - "loss": 2.777, + "learning_rate": 1.1600593204629114e-05, + "loss": 2.7537, "step": 232000 }, { "epoch": 0.77, - "learning_rate": 1.1583787398290554e-05, - "loss": 2.7672, + "learning_rate": 1.1584041736182835e-05, + "loss": 2.7641, "step": 232100 }, { "epoch": 0.77, - "learning_rate": 1.1567235820263105e-05, - "loss": 2.7757, + "learning_rate": 1.1567490267736554e-05, + "loss": 2.7569, "step": 232200 }, { "epoch": 0.77, - "learning_rate": 1.1550684242235655e-05, - "loss": 2.7595, + "learning_rate": 1.1550938799290273e-05, + "loss": 2.7614, "step": 232300 }, { "epoch": 0.77, - "learning_rate": 1.1534132664208207e-05, - "loss": 2.7848, + "learning_rate": 1.1534387330843993e-05, + "loss": 2.7532, "step": 232400 }, { "epoch": 0.77, - "learning_rate": 1.1517581086180757e-05, - "loss": 2.7708, + "learning_rate": 1.1517835862397712e-05, + "loss": 2.7478, "step": 232500 }, { "epoch": 0.77, - "learning_rate": 1.1501029508153308e-05, - "loss": 2.7843, + "learning_rate": 1.1501284393951433e-05, + "loss": 2.7649, "step": 232600 }, { "epoch": 0.77, - "learning_rate": 1.1484477930125858e-05, - "loss": 2.7719, + "learning_rate": 1.148473292550515e-05, + "loss": 2.7545, "step": 232700 }, { "epoch": 0.77, - "learning_rate": 1.146792635209841e-05, - "loss": 2.7573, + "learning_rate": 1.1468181457058871e-05, + "loss": 2.7577, "step": 232800 }, { "epoch": 0.77, - "learning_rate": 1.1451374774070961e-05, - "loss": 2.768, + "learning_rate": 1.145162998861259e-05, + "loss": 2.7467, "step": 232900 }, { "epoch": 0.77, - "learning_rate": 1.1434823196043511e-05, - "loss": 2.7725, + "learning_rate": 1.1435078520166309e-05, + "loss": 2.7622, "step": 233000 }, { "epoch": 0.77, - "learning_rate": 1.1418271618016061e-05, - "loss": 2.7836, + "learning_rate": 1.141852705172003e-05, + "loss": 2.7779, "step": 233100 }, { "epoch": 0.77, - "learning_rate": 1.1401720039988613e-05, - "loss": 2.7838, + "learning_rate": 1.1401975583273749e-05, + "loss": 2.7469, "step": 233200 }, { "epoch": 0.77, - "learning_rate": 1.1385168461961164e-05, - "loss": 2.7725, + "learning_rate": 1.1385424114827468e-05, + "loss": 2.7561, "step": 233300 }, { "epoch": 0.77, - "learning_rate": 1.1368616883933714e-05, - "loss": 2.7801, + "learning_rate": 1.1368872646381187e-05, + "loss": 2.7556, "step": 233400 }, { "epoch": 0.77, - "learning_rate": 1.1352065305906266e-05, - "loss": 2.775, + "learning_rate": 1.1352321177934907e-05, + "loss": 2.7612, "step": 233500 }, { "epoch": 0.77, - "learning_rate": 1.1335513727878816e-05, - "loss": 2.7912, + "learning_rate": 1.1335769709488626e-05, + "loss": 2.7486, "step": 233600 }, { "epoch": 0.77, - "learning_rate": 1.1318962149851367e-05, - "loss": 2.768, + "learning_rate": 1.1319218241042345e-05, + "loss": 2.7379, "step": 233700 }, { "epoch": 0.77, - "learning_rate": 1.1302410571823917e-05, - "loss": 2.7755, + "learning_rate": 1.1302666772596066e-05, + "loss": 2.7548, "step": 233800 }, { "epoch": 0.77, - "learning_rate": 1.1285858993796469e-05, - "loss": 2.7656, + "learning_rate": 1.1286115304149785e-05, + "loss": 2.7648, "step": 233900 }, { "epoch": 0.77, - "learning_rate": 1.126930741576902e-05, - "loss": 2.7597, + "learning_rate": 1.1269563835703504e-05, + "loss": 2.7504, "step": 234000 }, { "epoch": 0.77, - "learning_rate": 1.1252755837741572e-05, - "loss": 2.7706, + "learning_rate": 1.1253012367257223e-05, + "loss": 2.7582, "step": 234100 }, { "epoch": 0.78, - "learning_rate": 1.1236204259714122e-05, - "loss": 2.777, + "learning_rate": 1.1236460898810942e-05, + "loss": 2.7601, "step": 234200 }, { "epoch": 0.78, - "learning_rate": 1.1219652681686672e-05, - "loss": 2.7779, + "learning_rate": 1.1219909430364663e-05, + "loss": 2.7572, "step": 234300 }, { "epoch": 0.78, - "learning_rate": 1.1203101103659224e-05, - "loss": 2.7704, + "learning_rate": 1.1203357961918382e-05, + "loss": 2.7595, "step": 234400 }, { "epoch": 0.78, - "learning_rate": 1.1186549525631775e-05, - "loss": 2.7757, + "learning_rate": 1.1186806493472102e-05, + "loss": 2.7622, "step": 234500 }, { "epoch": 0.78, - "learning_rate": 1.1169997947604325e-05, - "loss": 2.7882, + "learning_rate": 1.1170255025025821e-05, + "loss": 2.7495, "step": 234600 }, { "epoch": 0.78, - "learning_rate": 1.1153446369576875e-05, - "loss": 2.7777, + "learning_rate": 1.115370355657954e-05, + "loss": 2.7545, "step": 234700 }, { "epoch": 0.78, - "learning_rate": 1.1136894791549427e-05, - "loss": 2.7668, + "learning_rate": 1.113715208813326e-05, + "loss": 2.7541, "step": 234800 }, { "epoch": 0.78, - "learning_rate": 1.1120343213521978e-05, - "loss": 2.7771, + "learning_rate": 1.1120600619686978e-05, + "loss": 2.7475, "step": 234900 }, { "epoch": 0.78, - "learning_rate": 1.1103791635494528e-05, - "loss": 2.7815, + "learning_rate": 1.1104049151240699e-05, + "loss": 2.7687, "step": 235000 }, { "epoch": 0.78, - "learning_rate": 1.108724005746708e-05, - "loss": 2.7717, + "learning_rate": 1.1087497682794418e-05, + "loss": 2.7556, "step": 235100 }, { "epoch": 0.78, - "learning_rate": 1.1070688479439631e-05, - "loss": 2.7744, + "learning_rate": 1.1070946214348138e-05, + "loss": 2.7571, "step": 235200 }, { "epoch": 0.78, - "learning_rate": 1.1054136901412181e-05, - "loss": 2.7715, + "learning_rate": 1.1054394745901857e-05, + "loss": 2.7557, "step": 235300 }, { "epoch": 0.78, - "learning_rate": 1.1037585323384731e-05, - "loss": 2.7729, + "learning_rate": 1.1037843277455576e-05, + "loss": 2.7581, "step": 235400 }, { "epoch": 0.78, - "learning_rate": 1.1021033745357283e-05, - "loss": 2.779, + "learning_rate": 1.1021291809009295e-05, + "loss": 2.7619, "step": 235500 }, { "epoch": 0.78, - "learning_rate": 1.1004482167329834e-05, - "loss": 2.7752, + "learning_rate": 1.1004740340563014e-05, + "loss": 2.7568, "step": 235600 }, { "epoch": 0.78, - "learning_rate": 1.0987930589302386e-05, - "loss": 2.7864, + "learning_rate": 1.0988188872116735e-05, + "loss": 2.7632, "step": 235700 }, { "epoch": 0.78, - "learning_rate": 1.0971379011274934e-05, - "loss": 2.77, + "learning_rate": 1.0971637403670454e-05, + "loss": 2.7567, "step": 235800 }, { "epoch": 0.78, - "learning_rate": 1.0954827433247486e-05, - "loss": 2.77, + "learning_rate": 1.0955085935224175e-05, + "loss": 2.7542, "step": 235900 }, { "epoch": 0.78, - "learning_rate": 1.0938275855220037e-05, - "loss": 2.7795, + "learning_rate": 1.0938534466777894e-05, + "loss": 2.7441, "step": 236000 }, { "epoch": 0.78, - "learning_rate": 1.0921724277192589e-05, - "loss": 2.7865, + "learning_rate": 1.0921982998331613e-05, + "loss": 2.7627, "step": 236100 }, { "epoch": 0.78, - "learning_rate": 1.0905172699165139e-05, - "loss": 2.7799, + "learning_rate": 1.0905431529885332e-05, + "loss": 2.7559, "step": 236200 }, { "epoch": 0.78, - "learning_rate": 1.088862112113769e-05, - "loss": 2.7632, + "learning_rate": 1.088888006143905e-05, + "loss": 2.7558, "step": 236300 }, { "epoch": 0.78, - "learning_rate": 1.087206954311024e-05, - "loss": 2.7746, + "learning_rate": 1.0872328592992771e-05, + "loss": 2.7548, "step": 236400 }, { "epoch": 0.78, - "learning_rate": 1.0855517965082792e-05, - "loss": 2.774, + "learning_rate": 1.085577712454649e-05, + "loss": 2.7463, "step": 236500 }, { "epoch": 0.78, - "learning_rate": 1.0838966387055342e-05, - "loss": 2.7748, + "learning_rate": 1.0839225656100211e-05, + "loss": 2.7541, "step": 236600 }, { "epoch": 0.78, - "learning_rate": 1.0822414809027893e-05, - "loss": 2.7667, + "learning_rate": 1.082267418765393e-05, + "loss": 2.7527, "step": 236700 }, { "epoch": 0.78, - "learning_rate": 1.0805863231000445e-05, - "loss": 2.7768, + "learning_rate": 1.0806122719207647e-05, + "loss": 2.7564, "step": 236800 }, { "epoch": 0.78, - "learning_rate": 1.0789311652972995e-05, - "loss": 2.7743, + "learning_rate": 1.0789571250761368e-05, + "loss": 2.7602, "step": 236900 }, { "epoch": 0.78, - "learning_rate": 1.0772760074945545e-05, - "loss": 2.7706, + "learning_rate": 1.0773019782315087e-05, + "loss": 2.7584, "step": 237000 }, { "epoch": 0.78, - "learning_rate": 1.0756208496918096e-05, - "loss": 2.7704, + "learning_rate": 1.0756468313868808e-05, + "loss": 2.7595, "step": 237100 }, { "epoch": 0.79, - "learning_rate": 1.0739656918890648e-05, - "loss": 2.7823, + "learning_rate": 1.0739916845422527e-05, + "loss": 2.7478, "step": 237200 }, { "epoch": 0.79, - "learning_rate": 1.0723105340863198e-05, - "loss": 2.7721, + "learning_rate": 1.0723365376976246e-05, + "loss": 2.7501, "step": 237300 }, { "epoch": 0.79, - "learning_rate": 1.070655376283575e-05, - "loss": 2.7644, + "learning_rate": 1.0706813908529966e-05, + "loss": 2.7441, "step": 237400 }, { "epoch": 0.79, - "learning_rate": 1.06900021848083e-05, - "loss": 2.7668, + "learning_rate": 1.0690262440083684e-05, + "loss": 2.7687, "step": 237500 }, { "epoch": 0.79, - "learning_rate": 1.0673450606780851e-05, - "loss": 2.7836, + "learning_rate": 1.0673710971637404e-05, + "loss": 2.7644, "step": 237600 }, { "epoch": 0.79, - "learning_rate": 1.0656899028753403e-05, - "loss": 2.7539, + "learning_rate": 1.0657159503191123e-05, + "loss": 2.7472, "step": 237700 }, { "epoch": 0.79, - "learning_rate": 1.0640347450725953e-05, - "loss": 2.7699, + "learning_rate": 1.0640608034744844e-05, + "loss": 2.7465, "step": 237800 }, { "epoch": 0.79, - "learning_rate": 1.0623795872698504e-05, - "loss": 2.7743, + "learning_rate": 1.0624056566298563e-05, + "loss": 2.7558, "step": 237900 }, { "epoch": 0.79, - "learning_rate": 1.0607244294671056e-05, - "loss": 2.7742, + "learning_rate": 1.0607505097852282e-05, + "loss": 2.7526, "step": 238000 }, { "epoch": 0.79, - "learning_rate": 1.0590692716643606e-05, - "loss": 2.7647, + "learning_rate": 1.0590953629406e-05, + "loss": 2.7455, "step": 238100 }, { "epoch": 0.79, - "learning_rate": 1.0574141138616156e-05, - "loss": 2.7881, + "learning_rate": 1.057440216095972e-05, + "loss": 2.7548, "step": 238200 }, { "epoch": 0.79, - "learning_rate": 1.0557589560588707e-05, - "loss": 2.7587, + "learning_rate": 1.055785069251344e-05, + "loss": 2.7483, "step": 238300 }, { "epoch": 0.79, - "learning_rate": 1.0541037982561259e-05, - "loss": 2.7577, + "learning_rate": 1.054129922406716e-05, + "loss": 2.7454, "step": 238400 }, { "epoch": 0.79, - "learning_rate": 1.0524486404533809e-05, - "loss": 2.7665, + "learning_rate": 1.052474775562088e-05, + "loss": 2.7615, "step": 238500 }, { "epoch": 0.79, - "learning_rate": 1.0507934826506359e-05, - "loss": 2.769, + "learning_rate": 1.0508196287174599e-05, + "loss": 2.765, "step": 238600 }, { "epoch": 0.79, - "learning_rate": 1.049138324847891e-05, - "loss": 2.767, + "learning_rate": 1.0491644818728318e-05, + "loss": 2.7463, "step": 238700 }, { "epoch": 0.79, - "learning_rate": 1.0474831670451462e-05, - "loss": 2.7701, + "learning_rate": 1.0475093350282037e-05, + "loss": 2.7658, "step": 238800 }, { "epoch": 0.79, - "learning_rate": 1.0458280092424012e-05, - "loss": 2.774, + "learning_rate": 1.0458541881835756e-05, + "loss": 2.7769, "step": 238900 }, { "epoch": 0.79, - "learning_rate": 1.0441728514396563e-05, - "loss": 2.7849, + "learning_rate": 1.0441990413389477e-05, + "loss": 2.7475, "step": 239000 }, { "epoch": 0.79, - "learning_rate": 1.0425176936369115e-05, - "loss": 2.7741, + "learning_rate": 1.0425438944943196e-05, + "loss": 2.7513, "step": 239100 }, { "epoch": 0.79, - "learning_rate": 1.0408625358341665e-05, - "loss": 2.768, + "learning_rate": 1.0408887476496915e-05, + "loss": 2.752, "step": 239200 }, { "epoch": 0.79, - "learning_rate": 1.0392073780314215e-05, - "loss": 2.7848, + "learning_rate": 1.0392336008050635e-05, + "loss": 2.7589, "step": 239300 }, { "epoch": 0.79, - "learning_rate": 1.0375522202286766e-05, - "loss": 2.7722, + "learning_rate": 1.0375784539604354e-05, + "loss": 2.7562, "step": 239400 }, { "epoch": 0.79, - "learning_rate": 1.0358970624259318e-05, - "loss": 2.7782, + "learning_rate": 1.0359233071158073e-05, + "loss": 2.7609, "step": 239500 }, { "epoch": 0.79, - "learning_rate": 1.034241904623187e-05, - "loss": 2.7746, + "learning_rate": 1.0342681602711792e-05, + "loss": 2.7548, "step": 239600 }, { "epoch": 0.79, - "learning_rate": 1.0325867468204418e-05, - "loss": 2.7652, + "learning_rate": 1.0326130134265513e-05, + "loss": 2.7456, "step": 239700 }, { "epoch": 0.79, - "learning_rate": 1.030931589017697e-05, - "loss": 2.7736, + "learning_rate": 1.0309578665819232e-05, + "loss": 2.7518, "step": 239800 }, { "epoch": 0.79, - "learning_rate": 1.0292764312149521e-05, - "loss": 2.7632, + "learning_rate": 1.0293027197372951e-05, + "loss": 2.759, "step": 239900 }, { "epoch": 0.79, - "learning_rate": 1.0276212734122072e-05, - "loss": 2.7599, + "learning_rate": 1.0276475728926672e-05, + "loss": 2.7448, "step": 240000 }, { "epoch": 0.79, - "learning_rate": 1.0259661156094622e-05, - "loss": 2.7729, + "learning_rate": 1.025992426048039e-05, + "loss": 2.7573, "step": 240100 }, { "epoch": 0.8, - "learning_rate": 1.0243109578067174e-05, - "loss": 2.7692, + "learning_rate": 1.024337279203411e-05, + "loss": 2.7477, "step": 240200 }, { "epoch": 0.8, - "learning_rate": 1.0226558000039724e-05, - "loss": 2.7622, + "learning_rate": 1.0226821323587829e-05, + "loss": 2.7491, "step": 240300 }, { "epoch": 0.8, - "learning_rate": 1.0210006422012275e-05, - "loss": 2.7777, + "learning_rate": 1.021026985514155e-05, + "loss": 2.7671, "step": 240400 }, { "epoch": 0.8, - "learning_rate": 1.0193454843984825e-05, - "loss": 2.7695, + "learning_rate": 1.0193718386695268e-05, + "loss": 2.7504, "step": 240500 }, { "epoch": 0.8, - "learning_rate": 1.0176903265957377e-05, - "loss": 2.7788, + "learning_rate": 1.0177166918248987e-05, + "loss": 2.7602, "step": 240600 }, { "epoch": 0.8, - "learning_rate": 1.0160351687929929e-05, - "loss": 2.7727, + "learning_rate": 1.0160615449802708e-05, + "loss": 2.7566, "step": 240700 }, { "epoch": 0.8, - "learning_rate": 1.0143800109902479e-05, - "loss": 2.7712, + "learning_rate": 1.0144063981356427e-05, + "loss": 2.7492, "step": 240800 }, { "epoch": 0.8, - "learning_rate": 1.0127248531875028e-05, - "loss": 2.7676, + "learning_rate": 1.0127512512910146e-05, + "loss": 2.7732, "step": 240900 }, { "epoch": 0.8, - "learning_rate": 1.011069695384758e-05, - "loss": 2.7716, + "learning_rate": 1.0110961044463865e-05, + "loss": 2.7491, "step": 241000 }, { "epoch": 0.8, - "learning_rate": 1.0094145375820132e-05, - "loss": 2.7742, + "learning_rate": 1.0094409576017584e-05, + "loss": 2.7615, "step": 241100 }, { "epoch": 0.8, - "learning_rate": 1.0077593797792683e-05, - "loss": 2.7731, + "learning_rate": 1.0077858107571305e-05, + "loss": 2.7608, "step": 241200 }, { "epoch": 0.8, - "learning_rate": 1.0061042219765233e-05, - "loss": 2.7734, + "learning_rate": 1.0061306639125024e-05, + "loss": 2.752, "step": 241300 }, { "epoch": 0.8, - "learning_rate": 1.0044490641737783e-05, - "loss": 2.7736, + "learning_rate": 1.0044755170678744e-05, + "loss": 2.7352, "step": 241400 }, { "epoch": 0.8, - "learning_rate": 1.0027939063710335e-05, - "loss": 2.7665, + "learning_rate": 1.0028203702232463e-05, + "loss": 2.7488, "step": 241500 }, { "epoch": 0.8, - "learning_rate": 1.0011387485682886e-05, - "loss": 2.7749, + "learning_rate": 1.0011652233786182e-05, + "loss": 2.7588, "step": 241600 }, { "epoch": 0.8, - "learning_rate": 9.994835907655436e-06, - "loss": 2.7757, + "learning_rate": 9.995100765339901e-06, + "loss": 2.7578, "step": 241700 }, { "epoch": 0.8, - "learning_rate": 9.978284329627988e-06, - "loss": 2.7706, + "learning_rate": 9.97854929689362e-06, + "loss": 2.7627, "step": 241800 }, { "epoch": 0.8, - "learning_rate": 9.96173275160054e-06, - "loss": 2.7598, + "learning_rate": 9.96199782844734e-06, + "loss": 2.7565, "step": 241900 }, { "epoch": 0.8, - "learning_rate": 9.94518117357309e-06, - "loss": 2.7707, + "learning_rate": 9.94544636000106e-06, + "loss": 2.7434, "step": 242000 }, { "epoch": 0.8, - "learning_rate": 9.928629595545639e-06, - "loss": 2.7636, + "learning_rate": 9.92889489155478e-06, + "loss": 2.7456, "step": 242100 }, { "epoch": 0.8, - "learning_rate": 9.91207801751819e-06, - "loss": 2.7739, + "learning_rate": 9.912343423108498e-06, + "loss": 2.7598, "step": 242200 }, { "epoch": 0.8, - "learning_rate": 9.895526439490742e-06, - "loss": 2.7646, + "learning_rate": 9.895791954662217e-06, + "loss": 2.7496, "step": 242300 }, { "epoch": 0.8, - "learning_rate": 9.878974861463292e-06, - "loss": 2.7642, + "learning_rate": 9.879240486215937e-06, + "loss": 2.7579, "step": 242400 }, { "epoch": 0.8, - "learning_rate": 9.862423283435842e-06, - "loss": 2.7718, + "learning_rate": 9.862689017769656e-06, + "loss": 2.7486, "step": 242500 }, { "epoch": 0.8, - "learning_rate": 9.845871705408394e-06, - "loss": 2.7773, + "learning_rate": 9.846137549323377e-06, + "loss": 2.7576, "step": 242600 }, { "epoch": 0.8, - "learning_rate": 9.829320127380945e-06, - "loss": 2.772, + "learning_rate": 9.829586080877096e-06, + "loss": 2.7585, "step": 242700 }, { "epoch": 0.8, - "learning_rate": 9.812768549353495e-06, - "loss": 2.777, + "learning_rate": 9.813034612430817e-06, + "loss": 2.7527, "step": 242800 }, { "epoch": 0.8, - "learning_rate": 9.796216971326047e-06, - "loss": 2.778, + "learning_rate": 9.796483143984534e-06, + "loss": 2.7563, "step": 242900 }, { "epoch": 0.8, - "learning_rate": 9.779665393298598e-06, - "loss": 2.7682, + "learning_rate": 9.779931675538253e-06, + "loss": 2.7464, "step": 243000 }, { "epoch": 0.8, - "learning_rate": 9.763113815271148e-06, - "loss": 2.7699, + "learning_rate": 9.763380207091974e-06, + "loss": 2.7391, "step": 243100 }, { "epoch": 0.81, - "learning_rate": 9.746562237243698e-06, - "loss": 2.7716, + "learning_rate": 9.746828738645693e-06, + "loss": 2.7617, "step": 243200 }, { "epoch": 0.81, - "learning_rate": 9.73001065921625e-06, - "loss": 2.764, + "learning_rate": 9.730277270199413e-06, + "loss": 2.7574, "step": 243300 }, { "epoch": 0.81, - "learning_rate": 9.713459081188801e-06, - "loss": 2.7613, + "learning_rate": 9.713725801753132e-06, + "loss": 2.758, "step": 243400 }, { "epoch": 0.81, - "learning_rate": 9.696907503161353e-06, - "loss": 2.7611, + "learning_rate": 9.697174333306851e-06, + "loss": 2.758, "step": 243500 }, { "epoch": 0.81, - "learning_rate": 9.680355925133901e-06, - "loss": 2.7699, + "learning_rate": 9.68062286486057e-06, + "loss": 2.7697, "step": 243600 }, { "epoch": 0.81, - "learning_rate": 9.663804347106453e-06, - "loss": 2.7678, + "learning_rate": 9.66407139641429e-06, + "loss": 2.7418, "step": 243700 }, { "epoch": 0.81, - "learning_rate": 9.647252769079004e-06, - "loss": 2.7683, + "learning_rate": 9.64751992796801e-06, + "loss": 2.7591, "step": 243800 }, { "epoch": 0.81, - "learning_rate": 9.630701191051556e-06, - "loss": 2.7796, + "learning_rate": 9.630968459521729e-06, + "loss": 2.7602, "step": 243900 }, { "epoch": 0.81, - "learning_rate": 9.614149613024106e-06, - "loss": 2.7683, + "learning_rate": 9.61441699107545e-06, + "loss": 2.7543, "step": 244000 }, { "epoch": 0.81, - "learning_rate": 9.597598034996658e-06, - "loss": 2.7754, + "learning_rate": 9.597865522629169e-06, + "loss": 2.7484, "step": 244100 }, { "epoch": 0.81, - "learning_rate": 9.581046456969208e-06, - "loss": 2.7735, + "learning_rate": 9.581314054182888e-06, + "loss": 2.7543, "step": 244200 }, { "epoch": 0.81, - "learning_rate": 9.564494878941759e-06, - "loss": 2.76, + "learning_rate": 9.564762585736607e-06, + "loss": 2.7647, "step": 244300 }, { "epoch": 0.81, - "learning_rate": 9.547943300914309e-06, - "loss": 2.7785, + "learning_rate": 9.548211117290326e-06, + "loss": 2.7472, "step": 244400 }, { "epoch": 0.81, - "learning_rate": 9.53139172288686e-06, - "loss": 2.7514, + "learning_rate": 9.531659648844046e-06, + "loss": 2.7552, "step": 244500 }, { "epoch": 0.81, - "learning_rate": 9.514840144859412e-06, - "loss": 2.7772, + "learning_rate": 9.515108180397765e-06, + "loss": 2.7469, "step": 244600 }, { "epoch": 0.81, - "learning_rate": 9.498288566831962e-06, - "loss": 2.7686, + "learning_rate": 9.498556711951486e-06, + "loss": 2.7635, "step": 244700 }, { "epoch": 0.81, - "learning_rate": 9.481736988804512e-06, - "loss": 2.7667, + "learning_rate": 9.482005243505205e-06, + "loss": 2.764, "step": 244800 }, { "epoch": 0.81, - "learning_rate": 9.465185410777064e-06, - "loss": 2.7799, + "learning_rate": 9.465453775058924e-06, + "loss": 2.7551, "step": 244900 }, { "epoch": 0.81, - "learning_rate": 9.448633832749615e-06, - "loss": 2.7817, + "learning_rate": 9.448902306612643e-06, + "loss": 2.7641, "step": 245000 }, { "epoch": 0.81, - "learning_rate": 9.432082254722167e-06, - "loss": 2.7622, + "learning_rate": 9.432350838166362e-06, + "loss": 2.7573, "step": 245100 }, { "epoch": 0.81, - "learning_rate": 9.415530676694717e-06, - "loss": 2.7542, + "learning_rate": 9.415799369720082e-06, + "loss": 2.7588, "step": 245200 }, { "epoch": 0.81, - "learning_rate": 9.398979098667267e-06, - "loss": 2.7681, + "learning_rate": 9.399247901273801e-06, + "loss": 2.7564, "step": 245300 }, { "epoch": 0.81, - "learning_rate": 9.382427520639818e-06, - "loss": 2.7886, + "learning_rate": 9.382696432827522e-06, + "loss": 2.7435, "step": 245400 }, { "epoch": 0.81, - "learning_rate": 9.36587594261237e-06, - "loss": 2.7677, + "learning_rate": 9.366144964381241e-06, + "loss": 2.7468, "step": 245500 }, { "epoch": 0.81, - "learning_rate": 9.34932436458492e-06, - "loss": 2.7759, + "learning_rate": 9.34959349593496e-06, + "loss": 2.7513, "step": 245600 }, { "epoch": 0.81, - "learning_rate": 9.332772786557471e-06, - "loss": 2.7708, + "learning_rate": 9.333042027488679e-06, + "loss": 2.7653, "step": 245700 }, { "epoch": 0.81, - "learning_rate": 9.316221208530021e-06, - "loss": 2.7786, + "learning_rate": 9.316490559042398e-06, + "loss": 2.774, "step": 245800 }, { "epoch": 0.81, - "learning_rate": 9.299669630502573e-06, - "loss": 2.78, + "learning_rate": 9.299939090596119e-06, + "loss": 2.7381, "step": 245900 }, { "epoch": 0.81, - "learning_rate": 9.283118052475123e-06, - "loss": 2.775, + "learning_rate": 9.283387622149838e-06, + "loss": 2.7602, "step": 246000 }, { "epoch": 0.81, - "learning_rate": 9.266566474447674e-06, - "loss": 2.7716, + "learning_rate": 9.266836153703557e-06, + "loss": 2.7598, "step": 246100 }, { "epoch": 0.81, - "learning_rate": 9.250014896420226e-06, - "loss": 2.7837, + "learning_rate": 9.250284685257277e-06, + "loss": 2.7557, "step": 246200 }, { "epoch": 0.82, - "learning_rate": 9.233463318392776e-06, - "loss": 2.7798, + "learning_rate": 9.233733216810996e-06, + "loss": 2.7556, "step": 246300 }, { "epoch": 0.82, - "learning_rate": 9.216911740365326e-06, - "loss": 2.7652, + "learning_rate": 9.217181748364715e-06, + "loss": 2.7567, "step": 246400 }, { "epoch": 0.82, - "learning_rate": 9.200360162337877e-06, - "loss": 2.7571, + "learning_rate": 9.200630279918434e-06, + "loss": 2.7707, "step": 246500 }, { "epoch": 0.82, - "learning_rate": 9.183808584310429e-06, - "loss": 2.7657, + "learning_rate": 9.184078811472155e-06, + "loss": 2.753, "step": 246600 }, { "epoch": 0.82, - "learning_rate": 9.167257006282979e-06, - "loss": 2.7653, + "learning_rate": 9.167527343025874e-06, + "loss": 2.7478, "step": 246700 }, { "epoch": 0.82, - "learning_rate": 9.15070542825553e-06, - "loss": 2.7713, + "learning_rate": 9.150975874579593e-06, + "loss": 2.7509, "step": 246800 }, { "epoch": 0.82, - "learning_rate": 9.134153850228082e-06, - "loss": 2.7716, + "learning_rate": 9.134424406133314e-06, + "loss": 2.7545, "step": 246900 }, { "epoch": 0.82, - "learning_rate": 9.117602272200632e-06, - "loss": 2.7716, + "learning_rate": 9.117872937687031e-06, + "loss": 2.7644, "step": 247000 }, { "epoch": 0.82, - "learning_rate": 9.101050694173182e-06, - "loss": 2.7649, + "learning_rate": 9.101321469240752e-06, + "loss": 2.757, "step": 247100 }, { "epoch": 0.82, - "learning_rate": 9.084499116145733e-06, - "loss": 2.7717, + "learning_rate": 9.08477000079447e-06, + "loss": 2.76, "step": 247200 }, { "epoch": 0.82, - "learning_rate": 9.067947538118285e-06, - "loss": 2.771, + "learning_rate": 9.06821853234819e-06, + "loss": 2.7464, "step": 247300 }, { "epoch": 0.82, - "learning_rate": 9.051395960090837e-06, - "loss": 2.7733, + "learning_rate": 9.05166706390191e-06, + "loss": 2.7534, "step": 247400 }, { "epoch": 0.82, - "learning_rate": 9.034844382063385e-06, - "loss": 2.7483, + "learning_rate": 9.03511559545563e-06, + "loss": 2.7672, "step": 247500 }, { "epoch": 0.82, - "learning_rate": 9.018292804035936e-06, - "loss": 2.7739, + "learning_rate": 9.018564127009348e-06, + "loss": 2.7423, "step": 247600 }, { "epoch": 0.82, - "learning_rate": 9.001741226008488e-06, - "loss": 2.7631, + "learning_rate": 9.002012658563067e-06, + "loss": 2.753, "step": 247700 }, { "epoch": 0.82, - "learning_rate": 8.98518964798104e-06, - "loss": 2.7699, + "learning_rate": 8.985461190116788e-06, + "loss": 2.7415, "step": 247800 }, { "epoch": 0.82, - "learning_rate": 8.96863806995359e-06, - "loss": 2.7713, + "learning_rate": 8.968909721670507e-06, + "loss": 2.749, "step": 247900 }, { "epoch": 0.82, - "learning_rate": 8.952086491926141e-06, - "loss": 2.7621, + "learning_rate": 8.952358253224226e-06, + "loss": 2.7373, "step": 248000 }, { "epoch": 0.82, - "learning_rate": 8.935534913898691e-06, - "loss": 2.7698, + "learning_rate": 8.935806784777947e-06, + "loss": 2.75, "step": 248100 }, { "epoch": 0.82, - "learning_rate": 8.918983335871243e-06, - "loss": 2.7497, + "learning_rate": 8.919255316331665e-06, + "loss": 2.7545, "step": 248200 }, { "epoch": 0.82, - "learning_rate": 8.902431757843793e-06, - "loss": 2.7679, + "learning_rate": 8.902703847885384e-06, + "loss": 2.7428, "step": 248300 }, { "epoch": 0.82, - "learning_rate": 8.885880179816344e-06, - "loss": 2.7717, + "learning_rate": 8.886152379439103e-06, + "loss": 2.7619, "step": 248400 }, { "epoch": 0.82, - "learning_rate": 8.869328601788896e-06, - "loss": 2.7655, + "learning_rate": 8.869600910992824e-06, + "loss": 2.7557, "step": 248500 }, { "epoch": 0.82, - "learning_rate": 8.852777023761446e-06, - "loss": 2.7793, + "learning_rate": 8.853049442546543e-06, + "loss": 2.7511, "step": 248600 }, { "epoch": 0.82, - "learning_rate": 8.836225445733996e-06, - "loss": 2.7746, + "learning_rate": 8.836497974100262e-06, + "loss": 2.7657, "step": 248700 }, { "epoch": 0.82, - "learning_rate": 8.819673867706547e-06, - "loss": 2.7594, + "learning_rate": 8.819946505653983e-06, + "loss": 2.7629, "step": 248800 }, { "epoch": 0.82, - "learning_rate": 8.803122289679099e-06, - "loss": 2.7616, + "learning_rate": 8.803395037207702e-06, + "loss": 2.7567, "step": 248900 }, { "epoch": 0.82, - "learning_rate": 8.78657071165165e-06, - "loss": 2.7787, + "learning_rate": 8.78684356876142e-06, + "loss": 2.7626, "step": 249000 }, { "epoch": 0.82, - "learning_rate": 8.7700191336242e-06, - "loss": 2.7784, + "learning_rate": 8.77029210031514e-06, + "loss": 2.7587, "step": 249100 }, { "epoch": 0.82, - "learning_rate": 8.75346755559675e-06, - "loss": 2.7709, + "learning_rate": 8.753740631868859e-06, + "loss": 2.7437, "step": 249200 }, { "epoch": 0.83, - "learning_rate": 8.736915977569302e-06, - "loss": 2.7634, + "learning_rate": 8.73718916342258e-06, + "loss": 2.7416, "step": 249300 }, { "epoch": 0.83, - "learning_rate": 8.720364399541853e-06, - "loss": 2.7635, + "learning_rate": 8.720637694976298e-06, + "loss": 2.7517, "step": 249400 }, { "epoch": 0.83, - "learning_rate": 8.703812821514403e-06, - "loss": 2.7798, + "learning_rate": 8.704086226530019e-06, + "loss": 2.7495, "step": 249500 }, { "epoch": 0.83, - "learning_rate": 8.687261243486955e-06, - "loss": 2.7722, + "learning_rate": 8.687534758083738e-06, + "loss": 2.7541, "step": 249600 }, { "epoch": 0.83, - "learning_rate": 8.670709665459505e-06, - "loss": 2.773, + "learning_rate": 8.670983289637457e-06, + "loss": 2.7381, "step": 249700 }, { "epoch": 0.83, - "learning_rate": 8.654158087432056e-06, - "loss": 2.7844, + "learning_rate": 8.654431821191176e-06, + "loss": 2.7364, "step": 249800 }, { "epoch": 0.83, - "learning_rate": 8.637606509404606e-06, - "loss": 2.7683, + "learning_rate": 8.637880352744895e-06, + "loss": 2.7609, "step": 249900 }, { "epoch": 0.83, - "learning_rate": 8.621054931377158e-06, - "loss": 2.783, + "learning_rate": 8.621328884298616e-06, + "loss": 2.7481, "step": 250000 }, { "epoch": 0.83, - "learning_rate": 8.60450335334971e-06, - "loss": 2.7504, + "learning_rate": 8.604777415852335e-06, + "loss": 2.7681, "step": 250100 }, { "epoch": 0.83, - "learning_rate": 8.58795177532226e-06, - "loss": 2.7681, + "learning_rate": 8.588225947406055e-06, + "loss": 2.7547, "step": 250200 }, { "epoch": 0.83, - "learning_rate": 8.57140019729481e-06, - "loss": 2.7513, + "learning_rate": 8.571674478959774e-06, + "loss": 2.7478, "step": 250300 }, { "epoch": 0.83, - "learning_rate": 8.554848619267361e-06, - "loss": 2.7642, + "learning_rate": 8.555123010513493e-06, + "loss": 2.7537, "step": 250400 }, { "epoch": 0.83, - "learning_rate": 8.538297041239913e-06, - "loss": 2.781, + "learning_rate": 8.538571542067212e-06, + "loss": 2.7402, "step": 250500 }, { "epoch": 0.83, - "learning_rate": 8.521745463212462e-06, - "loss": 2.7735, + "learning_rate": 8.522020073620931e-06, + "loss": 2.749, "step": 250600 }, { "epoch": 0.83, - "learning_rate": 8.505193885185014e-06, - "loss": 2.77, + "learning_rate": 8.505468605174652e-06, + "loss": 2.7618, "step": 250700 }, { "epoch": 0.83, - "learning_rate": 8.488642307157566e-06, - "loss": 2.7528, + "learning_rate": 8.488917136728371e-06, + "loss": 2.7421, "step": 250800 }, { "epoch": 0.83, - "learning_rate": 8.472090729130116e-06, - "loss": 2.7725, + "learning_rate": 8.472365668282092e-06, + "loss": 2.7597, "step": 250900 }, { "epoch": 0.83, - "learning_rate": 8.455539151102665e-06, - "loss": 2.7716, + "learning_rate": 8.45581419983581e-06, + "loss": 2.7598, "step": 251000 }, { "epoch": 0.83, - "learning_rate": 8.438987573075217e-06, - "loss": 2.7669, + "learning_rate": 8.439262731389528e-06, + "loss": 2.7549, "step": 251100 }, { "epoch": 0.83, - "learning_rate": 8.422435995047769e-06, - "loss": 2.7667, + "learning_rate": 8.422711262943249e-06, + "loss": 2.7564, "step": 251200 }, { "epoch": 0.83, - "learning_rate": 8.40588441702032e-06, - "loss": 2.7791, + "learning_rate": 8.406159794496967e-06, + "loss": 2.7566, "step": 251300 }, { "epoch": 0.83, - "learning_rate": 8.38933283899287e-06, - "loss": 2.7687, + "learning_rate": 8.389608326050688e-06, + "loss": 2.7518, "step": 251400 }, { "epoch": 0.83, - "learning_rate": 8.37278126096542e-06, - "loss": 2.7772, + "learning_rate": 8.373056857604407e-06, + "loss": 2.7604, "step": 251500 }, { "epoch": 0.83, - "learning_rate": 8.356229682937972e-06, - "loss": 2.7709, + "learning_rate": 8.356505389158128e-06, + "loss": 2.7532, "step": 251600 }, { "epoch": 0.83, - "learning_rate": 8.339678104910523e-06, - "loss": 2.7825, + "learning_rate": 8.339953920711847e-06, + "loss": 2.7611, "step": 251700 }, { "epoch": 0.83, - "learning_rate": 8.323126526883073e-06, - "loss": 2.7673, + "learning_rate": 8.323402452265564e-06, + "loss": 2.7478, "step": 251800 }, { "epoch": 0.83, - "learning_rate": 8.306574948855625e-06, - "loss": 2.7696, + "learning_rate": 8.306850983819285e-06, + "loss": 2.7529, "step": 251900 }, { "epoch": 0.83, - "learning_rate": 8.290023370828175e-06, - "loss": 2.7569, + "learning_rate": 8.290299515373004e-06, + "loss": 2.7382, "step": 252000 }, { "epoch": 0.83, - "learning_rate": 8.273471792800726e-06, - "loss": 2.7712, + "learning_rate": 8.273748046926724e-06, + "loss": 2.7683, "step": 252100 }, { "epoch": 0.83, - "learning_rate": 8.256920214773276e-06, - "loss": 2.7655, + "learning_rate": 8.257196578480443e-06, + "loss": 2.7738, "step": 252200 }, { "epoch": 0.84, - "learning_rate": 8.240368636745828e-06, - "loss": 2.7739, + "learning_rate": 8.240645110034162e-06, + "loss": 2.7573, "step": 252300 }, { "epoch": 0.84, - "learning_rate": 8.22381705871838e-06, - "loss": 2.7635, + "learning_rate": 8.224093641587881e-06, + "loss": 2.7543, "step": 252400 }, { "epoch": 0.84, - "learning_rate": 8.20726548069093e-06, - "loss": 2.7692, + "learning_rate": 8.2075421731416e-06, + "loss": 2.7482, "step": 252500 }, { "epoch": 0.84, - "learning_rate": 8.19071390266348e-06, - "loss": 2.7616, + "learning_rate": 8.190990704695321e-06, + "loss": 2.7458, "step": 252600 }, { "epoch": 0.84, - "learning_rate": 8.17416232463603e-06, - "loss": 2.7583, + "learning_rate": 8.17443923624904e-06, + "loss": 2.764, "step": 252700 }, { "epoch": 0.84, - "learning_rate": 8.157610746608582e-06, - "loss": 2.7871, + "learning_rate": 8.15788776780276e-06, + "loss": 2.7482, "step": 252800 }, { "epoch": 0.84, - "learning_rate": 8.141059168581134e-06, - "loss": 2.7719, + "learning_rate": 8.14133629935648e-06, + "loss": 2.7414, "step": 252900 }, { "epoch": 0.84, - "learning_rate": 8.124507590553684e-06, - "loss": 2.766, + "learning_rate": 8.124784830910199e-06, + "loss": 2.7455, "step": 253000 }, { "epoch": 0.84, - "learning_rate": 8.107956012526234e-06, - "loss": 2.7839, + "learning_rate": 8.108233362463918e-06, + "loss": 2.7433, "step": 253100 }, { "epoch": 0.84, - "learning_rate": 8.091404434498785e-06, - "loss": 2.7757, + "learning_rate": 8.091681894017637e-06, + "loss": 2.7544, "step": 253200 }, { "epoch": 0.84, - "learning_rate": 8.074852856471337e-06, - "loss": 2.7773, + "learning_rate": 8.075130425571357e-06, + "loss": 2.752, "step": 253300 }, { "epoch": 0.84, - "learning_rate": 8.058301278443887e-06, - "loss": 2.7651, + "learning_rate": 8.058578957125076e-06, + "loss": 2.7592, "step": 253400 }, { "epoch": 0.84, - "learning_rate": 8.041749700416439e-06, - "loss": 2.7656, + "learning_rate": 8.042027488678797e-06, + "loss": 2.7584, "step": 253500 }, { "epoch": 0.84, - "learning_rate": 8.025198122388988e-06, - "loss": 2.7645, + "learning_rate": 8.025476020232516e-06, + "loss": 2.7561, "step": 253600 }, { "epoch": 0.84, - "learning_rate": 8.00864654436154e-06, - "loss": 2.7687, + "learning_rate": 8.008924551786235e-06, + "loss": 2.7424, "step": 253700 }, { "epoch": 0.84, - "learning_rate": 7.99209496633409e-06, - "loss": 2.7715, + "learning_rate": 7.992373083339954e-06, + "loss": 2.7556, "step": 253800 }, { "epoch": 0.84, - "learning_rate": 7.975543388306642e-06, - "loss": 2.7576, + "learning_rate": 7.975821614893673e-06, + "loss": 2.7459, "step": 253900 }, { "epoch": 0.84, - "learning_rate": 7.958991810279193e-06, - "loss": 2.7905, + "learning_rate": 7.959270146447394e-06, + "loss": 2.7412, "step": 254000 }, { "epoch": 0.84, - "learning_rate": 7.942440232251743e-06, - "loss": 2.7582, + "learning_rate": 7.942718678001113e-06, + "loss": 2.7453, "step": 254100 }, { "epoch": 0.84, - "learning_rate": 7.925888654224293e-06, - "loss": 2.7566, + "learning_rate": 7.926167209554832e-06, + "loss": 2.767, "step": 254200 }, { "epoch": 0.84, - "learning_rate": 7.909337076196845e-06, - "loss": 2.7845, + "learning_rate": 7.909615741108552e-06, + "loss": 2.7336, "step": 254300 }, { "epoch": 0.84, - "learning_rate": 7.892785498169396e-06, - "loss": 2.7775, + "learning_rate": 7.893064272662271e-06, + "loss": 2.7463, "step": 254400 }, { "epoch": 0.84, - "learning_rate": 7.876233920141946e-06, - "loss": 2.7544, + "learning_rate": 7.87651280421599e-06, + "loss": 2.7542, "step": 254500 }, { "epoch": 0.84, - "learning_rate": 7.859682342114498e-06, - "loss": 2.753, + "learning_rate": 7.85996133576971e-06, + "loss": 2.7599, "step": 254600 }, { "epoch": 0.84, - "learning_rate": 7.84313076408705e-06, - "loss": 2.7713, + "learning_rate": 7.84340986732343e-06, + "loss": 2.7506, "step": 254700 }, { "epoch": 0.84, - "learning_rate": 7.8265791860596e-06, - "loss": 2.7805, + "learning_rate": 7.826858398877149e-06, + "loss": 2.7518, "step": 254800 }, { "epoch": 0.84, - "learning_rate": 7.81002760803215e-06, - "loss": 2.7738, + "learning_rate": 7.810306930430868e-06, + "loss": 2.7397, "step": 254900 }, { "epoch": 0.84, - "learning_rate": 7.7934760300047e-06, - "loss": 2.7666, + "learning_rate": 7.793755461984588e-06, + "loss": 2.7508, "step": 255000 }, { "epoch": 0.84, - "learning_rate": 7.776924451977252e-06, - "loss": 2.768, + "learning_rate": 7.777203993538307e-06, + "loss": 2.7566, "step": 255100 }, { "epoch": 0.84, - "learning_rate": 7.760372873949804e-06, - "loss": 2.7697, + "learning_rate": 7.760652525092026e-06, + "loss": 2.7464, "step": 255200 }, { "epoch": 0.85, - "learning_rate": 7.743821295922354e-06, - "loss": 2.7588, + "learning_rate": 7.744101056645745e-06, + "loss": 2.7414, "step": 255300 }, { "epoch": 0.85, - "learning_rate": 7.727269717894904e-06, - "loss": 2.7738, + "learning_rate": 7.727549588199466e-06, + "loss": 2.7552, "step": 255400 }, { "epoch": 0.85, - "learning_rate": 7.710718139867455e-06, - "loss": 2.7695, + "learning_rate": 7.710998119753185e-06, + "loss": 2.743, "step": 255500 }, { "epoch": 0.85, - "learning_rate": 7.694166561840007e-06, - "loss": 2.7625, + "learning_rate": 7.694446651306904e-06, + "loss": 2.7563, "step": 255600 }, { "epoch": 0.85, - "learning_rate": 7.677614983812557e-06, - "loss": 2.7545, + "learning_rate": 7.677895182860625e-06, + "loss": 2.7409, "step": 255700 }, { "epoch": 0.85, - "learning_rate": 7.661063405785108e-06, - "loss": 2.7722, + "learning_rate": 7.661343714414344e-06, + "loss": 2.7585, "step": 255800 }, { "epoch": 0.85, - "learning_rate": 7.644511827757658e-06, - "loss": 2.7493, + "learning_rate": 7.644792245968063e-06, + "loss": 2.7638, "step": 255900 }, { "epoch": 0.85, - "learning_rate": 7.627960249730209e-06, - "loss": 2.7633, + "learning_rate": 7.628240777521782e-06, + "loss": 2.7583, "step": 256000 }, { "epoch": 0.85, - "learning_rate": 7.611408671702761e-06, - "loss": 2.7659, + "learning_rate": 7.611689309075501e-06, + "loss": 2.739, "step": 256100 }, { "epoch": 0.85, - "learning_rate": 7.5948570936753114e-06, - "loss": 2.7667, + "learning_rate": 7.595137840629221e-06, + "loss": 2.7411, "step": 256200 }, { "epoch": 0.85, - "learning_rate": 7.578305515647863e-06, - "loss": 2.7713, + "learning_rate": 7.57858637218294e-06, + "loss": 2.7533, "step": 256300 }, { "epoch": 0.85, - "learning_rate": 7.561753937620412e-06, - "loss": 2.7596, + "learning_rate": 7.56203490373666e-06, + "loss": 2.7521, "step": 256400 }, { "epoch": 0.85, - "learning_rate": 7.545202359592964e-06, - "loss": 2.7615, + "learning_rate": 7.545483435290379e-06, + "loss": 2.7548, "step": 256500 }, { "epoch": 0.85, - "learning_rate": 7.5286507815655144e-06, - "loss": 2.7719, + "learning_rate": 7.528931966844099e-06, + "loss": 2.7567, "step": 256600 }, { "epoch": 0.85, - "learning_rate": 7.512099203538066e-06, - "loss": 2.7652, + "learning_rate": 7.512380498397818e-06, + "loss": 2.7472, "step": 256700 }, { "epoch": 0.85, - "learning_rate": 7.495547625510617e-06, - "loss": 2.7641, + "learning_rate": 7.495829029951537e-06, + "loss": 2.734, "step": 256800 }, { "epoch": 0.85, - "learning_rate": 7.478996047483168e-06, - "loss": 2.7652, + "learning_rate": 7.479277561505258e-06, + "loss": 2.7375, "step": 256900 }, { "epoch": 0.85, - "learning_rate": 7.4624444694557175e-06, - "loss": 2.7606, + "learning_rate": 7.462726093058977e-06, + "loss": 2.7464, "step": 257000 }, { "epoch": 0.85, - "learning_rate": 7.445892891428269e-06, - "loss": 2.7678, + "learning_rate": 7.4461746246126964e-06, + "loss": 2.7558, "step": 257100 }, { "epoch": 0.85, - "learning_rate": 7.42934131340082e-06, - "loss": 2.7761, + "learning_rate": 7.429623156166415e-06, + "loss": 2.7455, "step": 257200 }, { "epoch": 0.85, - "learning_rate": 7.412789735373371e-06, - "loss": 2.7665, + "learning_rate": 7.413071687720134e-06, + "loss": 2.7457, "step": 257300 }, { "epoch": 0.85, - "learning_rate": 7.396238157345922e-06, - "loss": 2.7682, + "learning_rate": 7.396520219273854e-06, + "loss": 2.752, "step": 257400 }, { "epoch": 0.85, - "learning_rate": 7.379686579318472e-06, - "loss": 2.7521, + "learning_rate": 7.379968750827573e-06, + "loss": 2.7553, "step": 257500 }, { "epoch": 0.85, - "learning_rate": 7.363135001291023e-06, - "loss": 2.7655, + "learning_rate": 7.363417282381294e-06, + "loss": 2.77, "step": 257600 }, { "epoch": 0.85, - "learning_rate": 7.346583423263574e-06, - "loss": 2.7678, + "learning_rate": 7.346865813935012e-06, + "loss": 2.7628, "step": 257700 }, { "epoch": 0.85, - "learning_rate": 7.330031845236125e-06, - "loss": 2.7691, + "learning_rate": 7.330314345488733e-06, + "loss": 2.7503, "step": 257800 }, { "epoch": 0.85, - "learning_rate": 7.313480267208677e-06, - "loss": 2.7559, + "learning_rate": 7.313762877042452e-06, + "loss": 2.7369, "step": 257900 }, { "epoch": 0.85, - "learning_rate": 7.2969286891812275e-06, - "loss": 2.7462, + "learning_rate": 7.297211408596171e-06, + "loss": 2.7516, "step": 258000 }, { "epoch": 0.85, - "learning_rate": 7.2803771111537774e-06, - "loss": 2.7766, + "learning_rate": 7.2806599401498905e-06, + "loss": 2.7439, "step": 258100 }, { "epoch": 0.85, - "learning_rate": 7.263825533126328e-06, - "loss": 2.765, + "learning_rate": 7.2641084717036095e-06, + "loss": 2.7648, "step": 258200 }, { "epoch": 0.86, - "learning_rate": 7.24727395509888e-06, - "loss": 2.7699, + "learning_rate": 7.24755700325733e-06, + "loss": 2.7483, "step": 258300 }, { "epoch": 0.86, - "learning_rate": 7.2307223770714305e-06, - "loss": 2.7704, + "learning_rate": 7.231005534811048e-06, + "loss": 2.7534, "step": 258400 }, { "epoch": 0.86, - "learning_rate": 7.214170799043981e-06, - "loss": 2.7552, + "learning_rate": 7.214454066364769e-06, + "loss": 2.7499, "step": 258500 }, { "epoch": 0.86, - "learning_rate": 7.197619221016533e-06, - "loss": 2.7449, + "learning_rate": 7.197902597918488e-06, + "loss": 2.7586, "step": 258600 }, { "epoch": 0.86, - "learning_rate": 7.181067642989083e-06, - "loss": 2.7649, + "learning_rate": 7.181351129472207e-06, + "loss": 2.7542, "step": 258700 }, { "epoch": 0.86, - "learning_rate": 7.1645160649616336e-06, - "loss": 2.778, + "learning_rate": 7.164799661025927e-06, + "loss": 2.7603, "step": 258800 }, { "epoch": 0.86, - "learning_rate": 7.147964486934184e-06, - "loss": 2.7721, + "learning_rate": 7.148248192579646e-06, + "loss": 2.7507, "step": 258900 }, { "epoch": 0.86, - "learning_rate": 7.131412908906736e-06, - "loss": 2.7679, + "learning_rate": 7.1316967241333656e-06, + "loss": 2.7567, "step": 259000 }, { "epoch": 0.86, - "learning_rate": 7.114861330879287e-06, - "loss": 2.7653, + "learning_rate": 7.1151452556870846e-06, + "loss": 2.7538, "step": 259100 }, { "epoch": 0.86, - "learning_rate": 7.0983097528518366e-06, - "loss": 2.7743, + "learning_rate": 7.0985937872408035e-06, + "loss": 2.7438, "step": 259200 }, { "epoch": 0.86, - "learning_rate": 7.081758174824388e-06, - "loss": 2.7797, + "learning_rate": 7.082042318794524e-06, + "loss": 2.7572, "step": 259300 }, { "epoch": 0.86, - "learning_rate": 7.065206596796939e-06, - "loss": 2.7731, + "learning_rate": 7.065490850348243e-06, + "loss": 2.7405, "step": 259400 }, { "epoch": 0.86, - "learning_rate": 7.04865501876949e-06, - "loss": 2.7821, + "learning_rate": 7.048939381901963e-06, + "loss": 2.7389, "step": 259500 }, { "epoch": 0.86, - "learning_rate": 7.032103440742041e-06, - "loss": 2.7588, + "learning_rate": 7.032387913455682e-06, + "loss": 2.7333, "step": 259600 }, { "epoch": 0.86, - "learning_rate": 7.015551862714592e-06, - "loss": 2.7716, + "learning_rate": 7.015836445009402e-06, + "loss": 2.7451, "step": 259700 }, { "epoch": 0.86, - "learning_rate": 6.999000284687142e-06, - "loss": 2.7732, + "learning_rate": 6.999284976563121e-06, + "loss": 2.7516, "step": 259800 }, { "epoch": 0.86, - "learning_rate": 6.982448706659693e-06, - "loss": 2.7597, + "learning_rate": 6.98273350811684e-06, + "loss": 2.7503, "step": 259900 }, { "epoch": 0.86, - "learning_rate": 6.965897128632244e-06, - "loss": 2.7779, + "learning_rate": 6.9661820396705605e-06, + "loss": 2.746, "step": 260000 }, { "epoch": 0.86, - "learning_rate": 6.949345550604795e-06, - "loss": 2.7778, + "learning_rate": 6.949630571224279e-06, + "loss": 2.7416, "step": 260100 }, { "epoch": 0.86, - "learning_rate": 6.932793972577347e-06, - "loss": 2.7737, + "learning_rate": 6.933079102777999e-06, + "loss": 2.7475, "step": 260200 }, { "epoch": 0.86, - "learning_rate": 6.916242394549896e-06, - "loss": 2.7704, + "learning_rate": 6.916527634331718e-06, + "loss": 2.7491, "step": 260300 }, { "epoch": 0.86, - "learning_rate": 6.899690816522447e-06, - "loss": 2.7703, + "learning_rate": 6.899976165885437e-06, + "loss": 2.7493, "step": 260400 }, { "epoch": 0.86, - "learning_rate": 6.883139238494998e-06, - "loss": 2.7716, + "learning_rate": 6.883424697439157e-06, + "loss": 2.7503, "step": 260500 }, { "epoch": 0.86, - "learning_rate": 6.86658766046755e-06, - "loss": 2.7532, + "learning_rate": 6.866873228992876e-06, + "loss": 2.7372, "step": 260600 }, { "epoch": 0.86, - "learning_rate": 6.8500360824401e-06, - "loss": 2.7803, + "learning_rate": 6.850321760546597e-06, + "loss": 2.7666, "step": 260700 }, { "epoch": 0.86, - "learning_rate": 6.833484504412652e-06, - "loss": 2.7742, + "learning_rate": 6.833770292100315e-06, + "loss": 2.7661, "step": 260800 }, { "epoch": 0.86, - "learning_rate": 6.816932926385201e-06, - "loss": 2.7769, + "learning_rate": 6.8172188236540356e-06, + "loss": 2.7542, "step": 260900 }, { "epoch": 0.86, - "learning_rate": 6.800381348357753e-06, - "loss": 2.7803, + "learning_rate": 6.8006673552077545e-06, + "loss": 2.7459, "step": 261000 }, { "epoch": 0.86, - "learning_rate": 6.783829770330303e-06, - "loss": 2.756, + "learning_rate": 6.7841158867614735e-06, + "loss": 2.7432, "step": 261100 }, { "epoch": 0.86, - "learning_rate": 6.767278192302855e-06, - "loss": 2.7768, + "learning_rate": 6.767564418315193e-06, + "loss": 2.7501, "step": 261200 }, { "epoch": 0.86, - "learning_rate": 6.750726614275406e-06, - "loss": 2.7702, + "learning_rate": 6.751012949868912e-06, + "loss": 2.7415, "step": 261300 }, { "epoch": 0.87, - "learning_rate": 6.734175036247956e-06, - "loss": 2.7564, + "learning_rate": 6.734461481422632e-06, + "loss": 2.754, "step": 261400 }, { "epoch": 0.87, - "learning_rate": 6.7176234582205064e-06, - "loss": 2.77, + "learning_rate": 6.717910012976351e-06, + "loss": 2.7459, "step": 261500 }, { "epoch": 0.87, - "learning_rate": 6.701071880193058e-06, - "loss": 2.7672, + "learning_rate": 6.701358544530072e-06, + "loss": 2.7676, "step": 261600 }, { "epoch": 0.87, - "learning_rate": 6.684520302165609e-06, - "loss": 2.7723, + "learning_rate": 6.684807076083791e-06, + "loss": 2.7591, "step": 261700 }, { "epoch": 0.87, - "learning_rate": 6.66796872413816e-06, - "loss": 2.7716, + "learning_rate": 6.66825560763751e-06, + "loss": 2.7401, "step": 261800 }, { "epoch": 0.87, - "learning_rate": 6.651417146110711e-06, - "loss": 2.7565, + "learning_rate": 6.65170413919123e-06, + "loss": 2.7533, "step": 261900 }, { "epoch": 0.87, - "learning_rate": 6.634865568083261e-06, - "loss": 2.7654, + "learning_rate": 6.635152670744949e-06, + "loss": 2.7438, "step": 262000 }, { "epoch": 0.87, - "learning_rate": 6.618313990055812e-06, - "loss": 2.7622, + "learning_rate": 6.6186012022986684e-06, + "loss": 2.746, "step": 262100 }, { "epoch": 0.87, - "learning_rate": 6.601762412028363e-06, - "loss": 2.7793, + "learning_rate": 6.602049733852387e-06, + "loss": 2.7529, "step": 262200 }, { "epoch": 0.87, - "learning_rate": 6.585210834000914e-06, - "loss": 2.763, + "learning_rate": 6.585498265406106e-06, + "loss": 2.7414, "step": 262300 }, { "epoch": 0.87, - "learning_rate": 6.568659255973465e-06, - "loss": 2.7734, + "learning_rate": 6.568946796959827e-06, + "loss": 2.7319, "step": 262400 }, { "epoch": 0.87, - "learning_rate": 6.5521076779460165e-06, - "loss": 2.7566, + "learning_rate": 6.552395328513545e-06, + "loss": 2.7405, "step": 262500 }, { "epoch": 0.87, - "learning_rate": 6.535556099918566e-06, - "loss": 2.7749, + "learning_rate": 6.535843860067266e-06, + "loss": 2.7606, "step": 262600 }, { "epoch": 0.87, - "learning_rate": 6.519004521891117e-06, - "loss": 2.7646, + "learning_rate": 6.519292391620985e-06, + "loss": 2.7472, "step": 262700 }, { "epoch": 0.87, - "learning_rate": 6.502452943863668e-06, - "loss": 2.7524, + "learning_rate": 6.502740923174705e-06, + "loss": 2.7333, "step": 262800 }, { "epoch": 0.87, - "learning_rate": 6.4859013658362195e-06, - "loss": 2.7691, + "learning_rate": 6.486189454728424e-06, + "loss": 2.7405, "step": 262900 }, { "epoch": 0.87, - "learning_rate": 6.46934978780877e-06, - "loss": 2.7642, + "learning_rate": 6.469637986282143e-06, + "loss": 2.7524, "step": 263000 }, { "epoch": 0.87, - "learning_rate": 6.45279820978132e-06, - "loss": 2.772, + "learning_rate": 6.453086517835863e-06, + "loss": 2.741, "step": 263100 }, { "epoch": 0.87, - "learning_rate": 6.436246631753872e-06, - "loss": 2.7662, + "learning_rate": 6.4365350493895815e-06, + "loss": 2.7545, "step": 263200 }, { "epoch": 0.87, - "learning_rate": 6.4196950537264225e-06, - "loss": 2.7615, + "learning_rate": 6.419983580943302e-06, + "loss": 2.7353, "step": 263300 }, { "epoch": 0.87, - "learning_rate": 6.403143475698973e-06, - "loss": 2.7704, + "learning_rate": 6.403432112497021e-06, + "loss": 2.7545, "step": 263400 }, { "epoch": 0.87, - "learning_rate": 6.386591897671525e-06, - "loss": 2.7625, + "learning_rate": 6.386880644050741e-06, + "loss": 2.7504, "step": 263500 }, { "epoch": 0.87, - "learning_rate": 6.370040319644076e-06, - "loss": 2.7684, + "learning_rate": 6.37032917560446e-06, + "loss": 2.7419, "step": 263600 }, { "epoch": 0.87, - "learning_rate": 6.3534887416166255e-06, - "loss": 2.7538, + "learning_rate": 6.353777707158179e-06, + "loss": 2.7564, "step": 263700 }, { "epoch": 0.87, - "learning_rate": 6.336937163589176e-06, - "loss": 2.7771, + "learning_rate": 6.337226238711899e-06, + "loss": 2.7474, "step": 263800 }, { "epoch": 0.87, - "learning_rate": 6.320385585561728e-06, - "loss": 2.7626, + "learning_rate": 6.320674770265618e-06, + "loss": 2.758, "step": 263900 }, { "epoch": 0.87, - "learning_rate": 6.303834007534279e-06, - "loss": 2.7598, + "learning_rate": 6.304123301819338e-06, + "loss": 2.7502, "step": 264000 }, { "epoch": 0.87, - "learning_rate": 6.28728242950683e-06, - "loss": 2.768, + "learning_rate": 6.287571833373057e-06, + "loss": 2.733, "step": 264100 }, { "epoch": 0.87, - "learning_rate": 6.270730851479379e-06, - "loss": 2.7856, + "learning_rate": 6.271020364926776e-06, + "loss": 2.7444, "step": 264200 }, { "epoch": 0.87, - "learning_rate": 6.254179273451931e-06, - "loss": 2.77, + "learning_rate": 6.254468896480496e-06, + "loss": 2.7605, "step": 264300 }, { "epoch": 0.88, - "learning_rate": 6.237627695424482e-06, - "loss": 2.7441, + "learning_rate": 6.237917428034216e-06, + "loss": 2.7537, "step": 264400 }, { "epoch": 0.88, - "learning_rate": 6.221076117397033e-06, - "loss": 2.7742, + "learning_rate": 6.221365959587935e-06, + "loss": 2.7518, "step": 264500 }, { "epoch": 0.88, - "learning_rate": 6.204524539369583e-06, - "loss": 2.7799, + "learning_rate": 6.204814491141654e-06, + "loss": 2.7624, "step": 264600 }, { "epoch": 0.88, - "learning_rate": 6.187972961342135e-06, - "loss": 2.7872, + "learning_rate": 6.188263022695374e-06, + "loss": 2.7612, "step": 264700 }, { "epoch": 0.88, - "learning_rate": 6.1714213833146855e-06, - "loss": 2.7708, + "learning_rate": 6.171711554249094e-06, + "loss": 2.7534, "step": 264800 }, { "epoch": 0.88, - "learning_rate": 6.154869805287236e-06, - "loss": 2.7656, + "learning_rate": 6.155160085802813e-06, + "loss": 2.7624, "step": 264900 }, { "epoch": 0.88, - "learning_rate": 6.138318227259787e-06, - "loss": 2.7648, + "learning_rate": 6.1386086173565325e-06, + "loss": 2.7515, "step": 265000 }, { "epoch": 0.88, - "learning_rate": 6.121766649232339e-06, - "loss": 2.7708, + "learning_rate": 6.1220571489102515e-06, + "loss": 2.7534, "step": 265100 }, { "epoch": 0.88, - "learning_rate": 6.1052150712048885e-06, - "loss": 2.7572, + "learning_rate": 6.1055056804639704e-06, + "loss": 2.7669, "step": 265200 }, { "epoch": 0.88, - "learning_rate": 6.08866349317744e-06, - "loss": 2.7684, + "learning_rate": 6.08895421201769e-06, + "loss": 2.7546, "step": 265300 }, { "epoch": 0.88, - "learning_rate": 6.072111915149991e-06, - "loss": 2.7669, + "learning_rate": 6.07240274357141e-06, + "loss": 2.7538, "step": 265400 }, { "epoch": 0.88, - "learning_rate": 6.055560337122542e-06, - "loss": 2.7669, + "learning_rate": 6.05585127512513e-06, + "loss": 2.7525, "step": 265500 }, { "epoch": 0.88, - "learning_rate": 6.039008759095092e-06, - "loss": 2.7536, + "learning_rate": 6.039299806678849e-06, + "loss": 2.7458, "step": 265600 }, { "epoch": 0.88, - "learning_rate": 6.022457181067644e-06, - "loss": 2.7697, + "learning_rate": 6.022748338232569e-06, + "loss": 2.7465, "step": 265700 }, { "epoch": 0.88, - "learning_rate": 6.005905603040194e-06, - "loss": 2.7625, + "learning_rate": 6.006196869786288e-06, + "loss": 2.7563, "step": 265800 }, { "epoch": 0.88, - "learning_rate": 5.9893540250127455e-06, - "loss": 2.7662, + "learning_rate": 5.989645401340007e-06, + "loss": 2.744, "step": 265900 }, { "epoch": 0.88, - "learning_rate": 5.972802446985295e-06, - "loss": 2.7573, + "learning_rate": 5.9730939328937265e-06, + "loss": 2.7377, "step": 266000 }, { "epoch": 0.88, - "learning_rate": 5.956250868957847e-06, - "loss": 2.7646, + "learning_rate": 5.956542464447446e-06, + "loss": 2.7466, "step": 266100 }, { "epoch": 0.88, - "learning_rate": 5.939699290930398e-06, - "loss": 2.7738, + "learning_rate": 5.939990996001165e-06, + "loss": 2.7559, "step": 266200 }, { "epoch": 0.88, - "learning_rate": 5.9231477129029485e-06, - "loss": 2.7738, + "learning_rate": 5.923439527554885e-06, + "loss": 2.7584, "step": 266300 }, { "epoch": 0.88, - "learning_rate": 5.906596134875499e-06, - "loss": 2.7708, + "learning_rate": 5.906888059108604e-06, + "loss": 2.7524, "step": 266400 }, { "epoch": 0.88, - "learning_rate": 5.89004455684805e-06, - "loss": 2.7694, + "learning_rate": 5.890336590662324e-06, + "loss": 2.7572, "step": 266500 }, { "epoch": 0.88, - "learning_rate": 5.873492978820601e-06, - "loss": 2.7756, + "learning_rate": 5.873785122216043e-06, + "loss": 2.7473, "step": 266600 }, { "epoch": 0.88, - "learning_rate": 5.856941400793152e-06, - "loss": 2.7584, + "learning_rate": 5.857233653769763e-06, + "loss": 2.7638, "step": 266700 }, { "epoch": 0.88, - "learning_rate": 5.840389822765703e-06, - "loss": 2.7743, + "learning_rate": 5.840682185323483e-06, + "loss": 2.7577, "step": 266800 }, { "epoch": 0.88, - "learning_rate": 5.823838244738254e-06, - "loss": 2.7803, + "learning_rate": 5.824130716877202e-06, + "loss": 2.7519, "step": 266900 }, { "epoch": 0.88, - "learning_rate": 5.807286666710805e-06, - "loss": 2.7708, + "learning_rate": 5.807579248430921e-06, + "loss": 2.7518, "step": 267000 }, { "epoch": 0.88, - "learning_rate": 5.790735088683355e-06, - "loss": 2.7586, + "learning_rate": 5.79102777998464e-06, + "loss": 2.7483, "step": 267100 }, { "epoch": 0.88, - "learning_rate": 5.774183510655906e-06, - "loss": 2.7636, + "learning_rate": 5.77447631153836e-06, + "loss": 2.7452, "step": 267200 }, { "epoch": 0.88, - "learning_rate": 5.757631932628457e-06, - "loss": 2.7692, + "learning_rate": 5.757924843092079e-06, + "loss": 2.7326, "step": 267300 }, { "epoch": 0.89, - "learning_rate": 5.741080354601008e-06, - "loss": 2.7634, + "learning_rate": 5.741373374645799e-06, + "loss": 2.7618, "step": 267400 }, { "epoch": 0.89, - "learning_rate": 5.724528776573558e-06, - "loss": 2.7604, + "learning_rate": 5.724821906199519e-06, + "loss": 2.7353, "step": 267500 }, { "epoch": 0.89, - "learning_rate": 5.70797719854611e-06, - "loss": 2.7605, + "learning_rate": 5.708270437753237e-06, + "loss": 2.7488, "step": 267600 }, { "epoch": 0.89, - "learning_rate": 5.69142562051866e-06, - "loss": 2.7626, + "learning_rate": 5.691718969306957e-06, + "loss": 2.7483, "step": 267700 }, { "epoch": 0.89, - "learning_rate": 5.6748740424912115e-06, - "loss": 2.7691, + "learning_rate": 5.675167500860677e-06, + "loss": 2.7467, "step": 267800 }, { "epoch": 0.89, - "learning_rate": 5.658322464463762e-06, - "loss": 2.7748, + "learning_rate": 5.658616032414396e-06, + "loss": 2.7538, "step": 267900 }, { "epoch": 0.89, - "learning_rate": 5.641770886436313e-06, - "loss": 2.7718, + "learning_rate": 5.6420645639681155e-06, + "loss": 2.7592, "step": 268000 }, { "epoch": 0.89, - "learning_rate": 5.625219308408864e-06, - "loss": 2.7604, + "learning_rate": 5.625513095521835e-06, + "loss": 2.7456, "step": 268100 }, { "epoch": 0.89, - "learning_rate": 5.608667730381415e-06, - "loss": 2.7638, + "learning_rate": 5.608961627075555e-06, + "loss": 2.7734, "step": 268200 }, { "epoch": 0.89, - "learning_rate": 5.592116152353965e-06, - "loss": 2.7702, + "learning_rate": 5.592410158629273e-06, + "loss": 2.7488, "step": 268300 }, { "epoch": 0.89, - "learning_rate": 5.575564574326517e-06, - "loss": 2.765, + "learning_rate": 5.575858690182993e-06, + "loss": 2.7549, "step": 268400 }, { "epoch": 0.89, - "learning_rate": 5.559012996299067e-06, - "loss": 2.7857, + "learning_rate": 5.559307221736713e-06, + "loss": 2.748, "step": 268500 }, { "epoch": 0.89, - "learning_rate": 5.542461418271618e-06, - "loss": 2.772, + "learning_rate": 5.542755753290432e-06, + "loss": 2.7621, "step": 268600 }, { "epoch": 0.89, - "learning_rate": 5.525909840244169e-06, - "loss": 2.7643, + "learning_rate": 5.526204284844152e-06, + "loss": 2.7469, "step": 268700 }, { "epoch": 0.89, - "learning_rate": 5.50935826221672e-06, - "loss": 2.7735, + "learning_rate": 5.509652816397872e-06, + "loss": 2.7381, "step": 268800 }, { "epoch": 0.89, - "learning_rate": 5.492806684189271e-06, - "loss": 2.7666, + "learning_rate": 5.493101347951591e-06, + "loss": 2.7526, "step": 268900 }, { "epoch": 0.89, - "learning_rate": 5.476255106161822e-06, - "loss": 2.7689, + "learning_rate": 5.4765498795053096e-06, + "loss": 2.759, "step": 269000 }, { "epoch": 0.89, - "learning_rate": 5.459703528134372e-06, - "loss": 2.7513, + "learning_rate": 5.459998411059029e-06, + "loss": 2.7419, "step": 269100 }, { "epoch": 0.89, - "learning_rate": 5.443151950106924e-06, - "loss": 2.7576, + "learning_rate": 5.443446942612749e-06, + "loss": 2.7591, "step": 269200 }, { "epoch": 0.89, - "learning_rate": 5.4266003720794745e-06, - "loss": 2.7721, + "learning_rate": 5.426895474166468e-06, + "loss": 2.7486, "step": 269300 }, { "epoch": 0.89, - "learning_rate": 5.410048794052025e-06, - "loss": 2.7635, + "learning_rate": 5.410344005720188e-06, + "loss": 2.7511, "step": 269400 }, { "epoch": 0.89, - "learning_rate": 5.393497216024576e-06, - "loss": 2.7593, + "learning_rate": 5.393792537273907e-06, + "loss": 2.7438, "step": 269500 }, { "epoch": 0.89, - "learning_rate": 5.376945637997128e-06, - "loss": 2.7706, + "learning_rate": 5.377241068827627e-06, + "loss": 2.7584, "step": 269600 }, { "epoch": 0.89, - "learning_rate": 5.3603940599696775e-06, - "loss": 2.7671, + "learning_rate": 5.360689600381346e-06, + "loss": 2.7333, "step": 269700 }, { "epoch": 0.89, - "learning_rate": 5.343842481942229e-06, - "loss": 2.7708, + "learning_rate": 5.344138131935066e-06, + "loss": 2.7365, "step": 269800 }, { "epoch": 0.89, - "learning_rate": 5.327290903914779e-06, - "loss": 2.7464, + "learning_rate": 5.3275866634887855e-06, + "loss": 2.7407, "step": 269900 }, { "epoch": 0.89, - "learning_rate": 5.310739325887331e-06, - "loss": 2.7503, + "learning_rate": 5.3110351950425045e-06, + "loss": 2.7517, "step": 270000 }, { "epoch": 0.89, - "learning_rate": 5.294187747859881e-06, - "loss": 2.7724, + "learning_rate": 5.2944837265962235e-06, + "loss": 2.7426, "step": 270100 }, { "epoch": 0.89, - "learning_rate": 5.277636169832432e-06, - "loss": 2.7719, + "learning_rate": 5.277932258149943e-06, + "loss": 2.7449, "step": 270200 }, { "epoch": 0.89, - "learning_rate": 5.261084591804983e-06, - "loss": 2.761, + "learning_rate": 5.261380789703662e-06, + "loss": 2.7402, "step": 270300 }, { "epoch": 0.9, - "learning_rate": 5.2445330137775345e-06, - "loss": 2.7856, + "learning_rate": 5.244829321257382e-06, + "loss": 2.7582, "step": 270400 }, { "epoch": 0.9, - "learning_rate": 5.227981435750084e-06, - "loss": 2.7648, + "learning_rate": 5.228277852811102e-06, + "loss": 2.7453, "step": 270500 }, { "epoch": 0.9, - "learning_rate": 5.211429857722636e-06, - "loss": 2.7564, + "learning_rate": 5.211726384364822e-06, + "loss": 2.7585, "step": 270600 }, { "epoch": 0.9, - "learning_rate": 5.194878279695187e-06, - "loss": 2.7682, + "learning_rate": 5.195174915918541e-06, + "loss": 2.7502, "step": 270700 }, { "epoch": 0.9, - "learning_rate": 5.1783267016677375e-06, - "loss": 2.7659, + "learning_rate": 5.17862344747226e-06, + "loss": 2.7567, "step": 270800 }, { "epoch": 0.9, - "learning_rate": 5.161775123640288e-06, - "loss": 2.7711, + "learning_rate": 5.1620719790259795e-06, + "loss": 2.7506, "step": 270900 }, { "epoch": 0.9, - "learning_rate": 5.145223545612839e-06, - "loss": 2.745, + "learning_rate": 5.1455205105796985e-06, + "loss": 2.7401, "step": 271000 }, { "epoch": 0.9, - "learning_rate": 5.12867196758539e-06, - "loss": 2.7714, + "learning_rate": 5.128969042133418e-06, + "loss": 2.7438, "step": 271100 }, { "epoch": 0.9, - "learning_rate": 5.1121203895579405e-06, - "loss": 2.7759, + "learning_rate": 5.112417573687138e-06, + "loss": 2.7304, "step": 271200 }, { "epoch": 0.9, - "learning_rate": 5.095568811530491e-06, - "loss": 2.7694, + "learning_rate": 5.095866105240857e-06, + "loss": 2.7508, "step": 271300 }, { "epoch": 0.9, - "learning_rate": 5.079017233503042e-06, - "loss": 2.7687, + "learning_rate": 5.079314636794576e-06, + "loss": 2.7388, "step": 271400 }, { "epoch": 0.9, - "learning_rate": 5.062465655475594e-06, - "loss": 2.7697, + "learning_rate": 5.062763168348296e-06, + "loss": 2.7519, "step": 271500 }, { "epoch": 0.9, - "learning_rate": 5.045914077448144e-06, - "loss": 2.754, + "learning_rate": 5.046211699902016e-06, + "loss": 2.731, "step": 271600 }, { "epoch": 0.9, - "learning_rate": 5.029362499420695e-06, - "loss": 2.7589, + "learning_rate": 5.029660231455735e-06, + "loss": 2.76, "step": 271700 }, { "epoch": 0.9, - "learning_rate": 5.012810921393246e-06, - "loss": 2.7676, + "learning_rate": 5.013108763009455e-06, + "loss": 2.7508, "step": 271800 }, { "epoch": 0.9, - "learning_rate": 4.996259343365797e-06, - "loss": 2.7601, + "learning_rate": 4.9965572945631745e-06, + "loss": 2.7589, "step": 271900 }, { "epoch": 0.9, - "learning_rate": 4.979707765338347e-06, - "loss": 2.7634, + "learning_rate": 4.9800058261168934e-06, + "loss": 2.7485, "step": 272000 }, { "epoch": 0.9, - "learning_rate": 4.963156187310899e-06, - "loss": 2.7662, + "learning_rate": 4.963454357670612e-06, + "loss": 2.743, "step": 272100 }, { "epoch": 0.9, - "learning_rate": 4.946604609283449e-06, - "loss": 2.7749, + "learning_rate": 4.946902889224332e-06, + "loss": 2.7607, "step": 272200 }, { "epoch": 0.9, - "learning_rate": 4.9300530312560005e-06, - "loss": 2.7557, + "learning_rate": 4.930351420778052e-06, + "loss": 2.742, "step": 272300 }, { "epoch": 0.9, - "learning_rate": 4.91350145322855e-06, - "loss": 2.7594, + "learning_rate": 4.913799952331771e-06, + "loss": 2.7419, "step": 272400 }, { "epoch": 0.9, - "learning_rate": 4.896949875201102e-06, - "loss": 2.7659, + "learning_rate": 4.897248483885491e-06, + "loss": 2.7496, "step": 272500 }, { "epoch": 0.9, - "learning_rate": 4.880398297173653e-06, - "loss": 2.7749, + "learning_rate": 4.88069701543921e-06, + "loss": 2.7502, "step": 272600 }, { "epoch": 0.9, - "learning_rate": 4.8638467191462035e-06, - "loss": 2.7658, + "learning_rate": 4.864145546992929e-06, + "loss": 2.7372, "step": 272700 }, { "epoch": 0.9, - "learning_rate": 4.847295141118754e-06, - "loss": 2.7487, + "learning_rate": 4.847594078546649e-06, + "loss": 2.7502, "step": 272800 }, { "epoch": 0.9, - "learning_rate": 4.830743563091306e-06, - "loss": 2.7657, + "learning_rate": 4.8310426101003685e-06, + "loss": 2.7352, "step": 272900 }, { "epoch": 0.9, - "learning_rate": 4.814191985063856e-06, - "loss": 2.7698, + "learning_rate": 4.8144911416540875e-06, + "loss": 2.7495, "step": 273000 }, { "epoch": 0.9, - "learning_rate": 4.797640407036407e-06, - "loss": 2.7643, + "learning_rate": 4.797939673207807e-06, + "loss": 2.7424, "step": 273100 }, { "epoch": 0.9, - "learning_rate": 4.781088829008958e-06, - "loss": 2.7781, + "learning_rate": 4.781388204761527e-06, + "loss": 2.7468, "step": 273200 }, { "epoch": 0.9, - "learning_rate": 4.764537250981509e-06, - "loss": 2.7734, + "learning_rate": 4.764836736315246e-06, + "loss": 2.7409, "step": 273300 }, { "epoch": 0.91, - "learning_rate": 4.74798567295406e-06, - "loss": 2.7563, + "learning_rate": 4.748285267868965e-06, + "loss": 2.7514, "step": 273400 }, { "epoch": 0.91, - "learning_rate": 4.73143409492661e-06, - "loss": 2.769, + "learning_rate": 4.731733799422685e-06, + "loss": 2.7444, "step": 273500 }, { "epoch": 0.91, - "learning_rate": 4.714882516899161e-06, - "loss": 2.759, + "learning_rate": 4.715182330976405e-06, + "loss": 2.7648, "step": 273600 }, { "epoch": 0.91, - "learning_rate": 4.698330938871713e-06, - "loss": 2.7749, + "learning_rate": 4.698630862530124e-06, + "loss": 2.7497, "step": 273700 }, { "epoch": 0.91, - "learning_rate": 4.681779360844263e-06, - "loss": 2.7714, + "learning_rate": 4.682079394083844e-06, + "loss": 2.7593, "step": 273800 }, { "epoch": 0.91, - "learning_rate": 4.665227782816814e-06, - "loss": 2.7649, + "learning_rate": 4.6655279256375626e-06, + "loss": 2.7445, "step": 273900 }, { "epoch": 0.91, - "learning_rate": 4.648676204789365e-06, - "loss": 2.7629, + "learning_rate": 4.648976457191282e-06, + "loss": 2.7566, "step": 274000 }, { "epoch": 0.91, - "learning_rate": 4.632124626761916e-06, - "loss": 2.7762, + "learning_rate": 4.632424988745001e-06, + "loss": 2.7489, "step": 274100 }, { "epoch": 0.91, - "learning_rate": 4.6155730487344665e-06, - "loss": 2.7652, + "learning_rate": 4.615873520298721e-06, + "loss": 2.7413, "step": 274200 }, { "epoch": 0.91, - "learning_rate": 4.599021470707018e-06, - "loss": 2.7589, + "learning_rate": 4.599322051852441e-06, + "loss": 2.7411, "step": 274300 }, { "epoch": 0.91, - "learning_rate": 4.582469892679568e-06, - "loss": 2.7504, + "learning_rate": 4.58277058340616e-06, + "loss": 2.7686, "step": 274400 }, { "epoch": 0.91, - "learning_rate": 4.56591831465212e-06, - "loss": 2.7477, + "learning_rate": 4.566219114959879e-06, + "loss": 2.7492, "step": 274500 }, { "epoch": 0.91, - "learning_rate": 4.54936673662467e-06, - "loss": 2.7657, + "learning_rate": 4.549667646513599e-06, + "loss": 2.7524, "step": 274600 }, { "epoch": 0.91, - "learning_rate": 4.532815158597221e-06, - "loss": 2.7675, + "learning_rate": 4.533116178067319e-06, + "loss": 2.759, "step": 274700 }, { "epoch": 0.91, - "learning_rate": 4.516263580569772e-06, - "loss": 2.7696, + "learning_rate": 4.516564709621038e-06, + "loss": 2.7587, "step": 274800 }, { "epoch": 0.91, - "learning_rate": 4.499712002542323e-06, - "loss": 2.787, + "learning_rate": 4.5000132411747575e-06, + "loss": 2.7479, "step": 274900 }, { "epoch": 0.91, - "learning_rate": 4.483160424514873e-06, - "loss": 2.7675, + "learning_rate": 4.483461772728477e-06, + "loss": 2.7479, "step": 275000 }, { "epoch": 0.91, - "learning_rate": 4.466608846487424e-06, - "loss": 2.7719, + "learning_rate": 4.4669103042821954e-06, + "loss": 2.7593, "step": 275100 }, { "epoch": 0.91, - "learning_rate": 4.450057268459975e-06, - "loss": 2.7602, + "learning_rate": 4.450358835835915e-06, + "loss": 2.7551, "step": 275200 }, { "epoch": 0.91, - "learning_rate": 4.4335056904325265e-06, - "loss": 2.7737, + "learning_rate": 4.433807367389635e-06, + "loss": 2.7317, "step": 275300 }, { "epoch": 0.91, - "learning_rate": 4.416954112405077e-06, - "loss": 2.7703, + "learning_rate": 4.417255898943354e-06, + "loss": 2.7403, "step": 275400 }, { "epoch": 0.91, - "learning_rate": 4.400402534377628e-06, - "loss": 2.7746, + "learning_rate": 4.400704430497074e-06, + "loss": 2.7439, "step": 275500 }, { "epoch": 0.91, - "learning_rate": 4.383850956350179e-06, - "loss": 2.7715, + "learning_rate": 4.384152962050794e-06, + "loss": 2.7422, "step": 275600 }, { "epoch": 0.91, - "learning_rate": 4.3672993783227295e-06, - "loss": 2.7488, + "learning_rate": 4.367601493604513e-06, + "loss": 2.74, "step": 275700 }, { "epoch": 0.91, - "learning_rate": 4.35074780029528e-06, - "loss": 2.771, + "learning_rate": 4.351050025158232e-06, + "loss": 2.7425, "step": 275800 }, { "epoch": 0.91, - "learning_rate": 4.334196222267831e-06, - "loss": 2.771, + "learning_rate": 4.3344985567119515e-06, + "loss": 2.7427, "step": 275900 }, { "epoch": 0.91, - "learning_rate": 4.3176446442403826e-06, - "loss": 2.774, + "learning_rate": 4.317947088265671e-06, + "loss": 2.7584, "step": 276000 }, { "epoch": 0.91, - "learning_rate": 4.3010930662129325e-06, - "loss": 2.755, + "learning_rate": 4.30139561981939e-06, + "loss": 2.7433, "step": 276100 }, { "epoch": 0.91, - "learning_rate": 4.284541488185484e-06, - "loss": 2.766, + "learning_rate": 4.28484415137311e-06, + "loss": 2.7418, "step": 276200 }, { "epoch": 0.91, - "learning_rate": 4.267989910158034e-06, - "loss": 2.7588, + "learning_rate": 4.26829268292683e-06, + "loss": 2.7536, "step": 276300 }, { "epoch": 0.91, - "learning_rate": 4.251438332130586e-06, - "loss": 2.7596, + "learning_rate": 4.251741214480549e-06, + "loss": 2.7477, "step": 276400 }, { "epoch": 0.92, - "learning_rate": 4.234886754103136e-06, - "loss": 2.7643, + "learning_rate": 4.235189746034268e-06, + "loss": 2.7344, "step": 276500 }, { "epoch": 0.92, - "learning_rate": 4.218335176075687e-06, - "loss": 2.7635, + "learning_rate": 4.218638277587988e-06, + "loss": 2.7535, "step": 276600 }, { "epoch": 0.92, - "learning_rate": 4.201783598048238e-06, - "loss": 2.77, + "learning_rate": 4.202086809141708e-06, + "loss": 2.7492, "step": 276700 }, { "epoch": 0.92, - "learning_rate": 4.1852320200207894e-06, - "loss": 2.7722, + "learning_rate": 4.185535340695427e-06, + "loss": 2.7449, "step": 276800 }, { "epoch": 0.92, - "learning_rate": 4.168680441993339e-06, - "loss": 2.7592, + "learning_rate": 4.1689838722491464e-06, + "loss": 2.7552, "step": 276900 }, { "epoch": 0.92, - "learning_rate": 4.152128863965891e-06, - "loss": 2.7431, + "learning_rate": 4.1524324038028654e-06, + "loss": 2.7392, "step": 277000 }, { "epoch": 0.92, - "learning_rate": 4.135577285938442e-06, - "loss": 2.7806, + "learning_rate": 4.135880935356585e-06, + "loss": 2.7482, "step": 277100 }, { "epoch": 0.92, - "learning_rate": 4.1190257079109925e-06, - "loss": 2.7542, + "learning_rate": 4.119329466910304e-06, + "loss": 2.7502, "step": 277200 }, { "epoch": 0.92, - "learning_rate": 4.102474129883543e-06, - "loss": 2.7594, + "learning_rate": 4.102777998464024e-06, + "loss": 2.7499, "step": 277300 }, { "epoch": 0.92, - "learning_rate": 4.085922551856094e-06, - "loss": 2.7528, + "learning_rate": 4.086226530017744e-06, + "loss": 2.7431, "step": 277400 }, { "epoch": 0.92, - "learning_rate": 4.069370973828645e-06, - "loss": 2.7548, + "learning_rate": 4.069675061571463e-06, + "loss": 2.734, "step": 277500 }, { "epoch": 0.92, - "learning_rate": 4.052819395801196e-06, - "loss": 2.7584, + "learning_rate": 4.053123593125182e-06, + "loss": 2.7456, "step": 277600 }, { "epoch": 0.92, - "learning_rate": 4.036267817773746e-06, - "loss": 2.7587, + "learning_rate": 4.036572124678902e-06, + "loss": 2.7669, "step": 277700 }, { "epoch": 0.92, - "learning_rate": 4.019716239746298e-06, - "loss": 2.7597, + "learning_rate": 4.020020656232621e-06, + "loss": 2.7498, "step": 277800 }, { "epoch": 0.92, - "learning_rate": 4.0031646617188486e-06, - "loss": 2.764, + "learning_rate": 4.0034691877863405e-06, + "loss": 2.7376, "step": 277900 }, { "epoch": 0.92, - "learning_rate": 3.986613083691399e-06, - "loss": 2.7787, + "learning_rate": 3.98691771934006e-06, + "loss": 2.7455, "step": 278000 }, { "epoch": 0.92, - "learning_rate": 3.97006150566395e-06, - "loss": 2.7605, + "learning_rate": 3.970366250893779e-06, + "loss": 2.751, "step": 278100 }, { "epoch": 0.92, - "learning_rate": 3.953509927636502e-06, - "loss": 2.7677, + "learning_rate": 3.953814782447498e-06, + "loss": 2.7451, "step": 278200 }, { "epoch": 0.92, - "learning_rate": 3.936958349609052e-06, - "loss": 2.7641, + "learning_rate": 3.937263314001218e-06, + "loss": 2.7316, "step": 278300 }, { "epoch": 0.92, - "learning_rate": 3.920406771581603e-06, - "loss": 2.7538, + "learning_rate": 3.920711845554938e-06, + "loss": 2.7511, "step": 278400 }, { "epoch": 0.92, - "learning_rate": 3.903855193554154e-06, - "loss": 2.758, + "learning_rate": 3.904160377108657e-06, + "loss": 2.7521, "step": 278500 }, { "epoch": 0.92, - "learning_rate": 3.887303615526705e-06, - "loss": 2.751, + "learning_rate": 3.887608908662377e-06, + "loss": 2.7418, "step": 278600 }, { "epoch": 0.92, - "learning_rate": 3.8707520374992554e-06, - "loss": 2.7733, + "learning_rate": 3.871057440216097e-06, + "loss": 2.7696, "step": 278700 }, { "epoch": 0.92, - "learning_rate": 3.854200459471806e-06, - "loss": 2.7657, + "learning_rate": 3.854505971769816e-06, + "loss": 2.7411, "step": 278800 }, { "epoch": 0.92, - "learning_rate": 3.837648881444357e-06, - "loss": 2.7584, + "learning_rate": 3.8379545033235346e-06, + "loss": 2.7362, "step": 278900 }, { "epoch": 0.92, - "learning_rate": 3.8210973034169086e-06, - "loss": 2.7495, + "learning_rate": 3.821403034877254e-06, + "loss": 2.7447, "step": 279000 }, { "epoch": 0.92, - "learning_rate": 3.8045457253894585e-06, - "loss": 2.7687, + "learning_rate": 3.804851566430974e-06, + "loss": 2.7331, "step": 279100 }, { "epoch": 0.92, - "learning_rate": 3.7879941473620096e-06, - "loss": 2.7691, + "learning_rate": 3.7883000979846936e-06, + "loss": 2.7555, "step": 279200 }, { "epoch": 0.92, - "learning_rate": 3.771442569334561e-06, - "loss": 2.7734, + "learning_rate": 3.771748629538413e-06, + "loss": 2.7549, "step": 279300 }, { "epoch": 0.92, - "learning_rate": 3.754890991307111e-06, - "loss": 2.7624, + "learning_rate": 3.7551971610921324e-06, + "loss": 2.7387, "step": 279400 }, { "epoch": 0.93, - "learning_rate": 3.7383394132796623e-06, - "loss": 2.7785, + "learning_rate": 3.7386456926458514e-06, + "loss": 2.7558, "step": 279500 }, { "epoch": 0.93, - "learning_rate": 3.7217878352522135e-06, - "loss": 2.7674, + "learning_rate": 3.722094224199571e-06, + "loss": 2.7497, "step": 279600 }, { "epoch": 0.93, - "learning_rate": 3.705236257224764e-06, - "loss": 2.7511, + "learning_rate": 3.7055427557532907e-06, + "loss": 2.7438, "step": 279700 }, { "epoch": 0.93, - "learning_rate": 3.688684679197315e-06, - "loss": 2.7612, + "learning_rate": 3.68899128730701e-06, + "loss": 2.7423, "step": 279800 }, { "epoch": 0.93, - "learning_rate": 3.6721331011698653e-06, - "loss": 2.7648, + "learning_rate": 3.6724398188607295e-06, + "loss": 2.7559, "step": 279900 }, { "epoch": 0.93, - "learning_rate": 3.6555815231424165e-06, - "loss": 2.7686, + "learning_rate": 3.6558883504144493e-06, + "loss": 2.7352, "step": 280000 }, { "epoch": 0.93, - "learning_rate": 3.6390299451149677e-06, - "loss": 2.7511, + "learning_rate": 3.6393368819681683e-06, + "loss": 2.7382, "step": 280100 }, { "epoch": 0.93, - "learning_rate": 3.622478367087518e-06, - "loss": 2.7586, + "learning_rate": 3.6227854135218877e-06, + "loss": 2.7566, "step": 280200 }, { "epoch": 0.93, - "learning_rate": 3.605926789060069e-06, - "loss": 2.7722, + "learning_rate": 3.606233945075607e-06, + "loss": 2.7594, "step": 280300 }, { "epoch": 0.93, - "learning_rate": 3.5893752110326204e-06, - "loss": 2.7653, + "learning_rate": 3.589682476629327e-06, + "loss": 2.7479, "step": 280400 }, { "epoch": 0.93, - "learning_rate": 3.5728236330051707e-06, - "loss": 2.7532, + "learning_rate": 3.5731310081830463e-06, + "loss": 2.7465, "step": 280500 }, { "epoch": 0.93, - "learning_rate": 3.556272054977722e-06, - "loss": 2.7684, + "learning_rate": 3.5565795397367657e-06, + "loss": 2.7462, "step": 280600 }, { "epoch": 0.93, - "learning_rate": 3.539720476950273e-06, - "loss": 2.7791, + "learning_rate": 3.5400280712904847e-06, + "loss": 2.7472, "step": 280700 }, { "epoch": 0.93, - "learning_rate": 3.5231688989228234e-06, - "loss": 2.7682, + "learning_rate": 3.523476602844204e-06, + "loss": 2.7328, "step": 280800 }, { "epoch": 0.93, - "learning_rate": 3.5066173208953746e-06, - "loss": 2.7519, + "learning_rate": 3.506925134397924e-06, + "loss": 2.7364, "step": 280900 }, { "epoch": 0.93, - "learning_rate": 3.4900657428679253e-06, - "loss": 2.7832, + "learning_rate": 3.4903736659516434e-06, + "loss": 2.7636, "step": 281000 }, { "epoch": 0.93, - "learning_rate": 3.473514164840476e-06, - "loss": 2.7668, + "learning_rate": 3.4738221975053628e-06, + "loss": 2.7424, "step": 281100 }, { "epoch": 0.93, - "learning_rate": 3.4569625868130272e-06, - "loss": 2.7601, + "learning_rate": 3.4572707290590826e-06, + "loss": 2.7461, "step": 281200 }, { "epoch": 0.93, - "learning_rate": 3.4404110087855776e-06, - "loss": 2.7603, + "learning_rate": 3.440719260612802e-06, + "loss": 2.7489, "step": 281300 }, { "epoch": 0.93, - "learning_rate": 3.4238594307581287e-06, - "loss": 2.7665, + "learning_rate": 3.424167792166521e-06, + "loss": 2.7425, "step": 281400 }, { "epoch": 0.93, - "learning_rate": 3.4073078527306795e-06, - "loss": 2.7529, + "learning_rate": 3.4076163237202404e-06, + "loss": 2.7326, "step": 281500 }, { "epoch": 0.93, - "learning_rate": 3.3907562747032303e-06, - "loss": 2.7746, + "learning_rate": 3.3910648552739602e-06, + "loss": 2.753, "step": 281600 }, { "epoch": 0.93, - "learning_rate": 3.374204696675781e-06, - "loss": 2.7524, + "learning_rate": 3.3745133868276796e-06, + "loss": 2.7561, "step": 281700 }, { "epoch": 0.93, - "learning_rate": 3.357653118648332e-06, - "loss": 2.7713, + "learning_rate": 3.357961918381399e-06, + "loss": 2.7518, "step": 281800 }, { "epoch": 0.93, - "learning_rate": 3.341101540620883e-06, - "loss": 2.7595, + "learning_rate": 3.341410449935119e-06, + "loss": 2.7343, "step": 281900 }, { "epoch": 0.93, - "learning_rate": 3.3245499625934337e-06, - "loss": 2.7553, + "learning_rate": 3.3248589814888374e-06, + "loss": 2.7507, "step": 282000 }, { "epoch": 0.93, - "learning_rate": 3.307998384565985e-06, - "loss": 2.7636, + "learning_rate": 3.3083075130425573e-06, + "loss": 2.7425, "step": 282100 }, { "epoch": 0.93, - "learning_rate": 3.291446806538535e-06, - "loss": 2.7534, + "learning_rate": 3.2917560445962767e-06, + "loss": 2.7406, "step": 282200 }, { "epoch": 0.93, - "learning_rate": 3.2748952285110864e-06, - "loss": 2.7597, + "learning_rate": 3.275204576149996e-06, + "loss": 2.7563, "step": 282300 }, { "epoch": 0.93, - "learning_rate": 3.2583436504836375e-06, - "loss": 2.7756, + "learning_rate": 3.258653107703716e-06, + "loss": 2.7447, "step": 282400 }, { "epoch": 0.94, - "learning_rate": 3.241792072456188e-06, - "loss": 2.7706, + "learning_rate": 3.2421016392574353e-06, + "loss": 2.7492, "step": 282500 }, { "epoch": 0.94, - "learning_rate": 3.225240494428739e-06, - "loss": 2.763, + "learning_rate": 3.2255501708111543e-06, + "loss": 2.7582, "step": 282600 }, { "epoch": 0.94, - "learning_rate": 3.2086889164012894e-06, - "loss": 2.757, + "learning_rate": 3.2089987023648737e-06, + "loss": 2.74, "step": 282700 }, { "epoch": 0.94, - "learning_rate": 3.1921373383738406e-06, - "loss": 2.7645, + "learning_rate": 3.1924472339185935e-06, + "loss": 2.7476, "step": 282800 }, { "epoch": 0.94, - "learning_rate": 3.1755857603463917e-06, - "loss": 2.765, + "learning_rate": 3.175895765472313e-06, + "loss": 2.7407, "step": 282900 }, { "epoch": 0.94, - "learning_rate": 3.159034182318942e-06, - "loss": 2.7456, + "learning_rate": 3.1593442970260323e-06, + "loss": 2.7461, "step": 283000 }, { "epoch": 0.94, - "learning_rate": 3.1424826042914932e-06, - "loss": 2.7523, + "learning_rate": 3.142792828579752e-06, + "loss": 2.7401, "step": 283100 }, { "epoch": 0.94, - "learning_rate": 3.1259310262640444e-06, - "loss": 2.7531, + "learning_rate": 3.1262413601334707e-06, + "loss": 2.7581, "step": 283200 }, { "epoch": 0.94, - "learning_rate": 3.109379448236595e-06, - "loss": 2.7682, + "learning_rate": 3.1096898916871905e-06, + "loss": 2.7486, "step": 283300 }, { "epoch": 0.94, - "learning_rate": 3.092827870209146e-06, - "loss": 2.7644, + "learning_rate": 3.09313842324091e-06, + "loss": 2.7606, "step": 283400 }, { "epoch": 0.94, - "learning_rate": 3.0762762921816967e-06, - "loss": 2.7639, + "learning_rate": 3.0765869547946294e-06, + "loss": 2.7364, "step": 283500 }, { "epoch": 0.94, - "learning_rate": 3.0597247141542474e-06, - "loss": 2.755, + "learning_rate": 3.060035486348349e-06, + "loss": 2.7462, "step": 283600 }, { "epoch": 0.94, - "learning_rate": 3.0431731361267986e-06, - "loss": 2.7569, + "learning_rate": 3.043484017902068e-06, + "loss": 2.7555, "step": 283700 }, { "epoch": 0.94, - "learning_rate": 3.0266215580993494e-06, - "loss": 2.7662, + "learning_rate": 3.026932549455788e-06, + "loss": 2.7372, "step": 283800 }, { "epoch": 0.94, - "learning_rate": 3.0100699800719e-06, - "loss": 2.7598, + "learning_rate": 3.0103810810095074e-06, + "loss": 2.7559, "step": 283900 }, { "epoch": 0.94, - "learning_rate": 2.9935184020444513e-06, - "loss": 2.7594, + "learning_rate": 2.993829612563227e-06, + "loss": 2.7416, "step": 284000 }, { "epoch": 0.94, - "learning_rate": 2.976966824017002e-06, - "loss": 2.7604, + "learning_rate": 2.9772781441169462e-06, + "loss": 2.7477, "step": 284100 }, { "epoch": 0.94, - "learning_rate": 2.960415245989553e-06, - "loss": 2.7594, + "learning_rate": 2.9607266756706656e-06, + "loss": 2.7414, "step": 284200 }, { "epoch": 0.94, - "learning_rate": 2.9438636679621036e-06, - "loss": 2.77, + "learning_rate": 2.944175207224385e-06, + "loss": 2.7488, "step": 284300 }, { "epoch": 0.94, - "learning_rate": 2.9273120899346547e-06, - "loss": 2.7569, + "learning_rate": 2.9276237387781044e-06, + "loss": 2.7358, "step": 284400 }, { "epoch": 0.94, - "learning_rate": 2.9107605119072055e-06, - "loss": 2.747, + "learning_rate": 2.911072270331824e-06, + "loss": 2.7428, "step": 284500 }, { "epoch": 0.94, - "learning_rate": 2.8942089338797562e-06, - "loss": 2.7644, + "learning_rate": 2.8945208018855432e-06, + "loss": 2.7465, "step": 284600 }, { "epoch": 0.94, - "learning_rate": 2.8776573558523074e-06, - "loss": 2.755, + "learning_rate": 2.8779693334392627e-06, + "loss": 2.7357, "step": 284700 }, { "epoch": 0.94, - "learning_rate": 2.861105777824858e-06, - "loss": 2.7556, + "learning_rate": 2.8614178649929825e-06, + "loss": 2.7398, "step": 284800 }, { "epoch": 0.94, - "learning_rate": 2.844554199797409e-06, - "loss": 2.7634, + "learning_rate": 2.844866396546702e-06, + "loss": 2.7407, "step": 284900 }, { "epoch": 0.94, - "learning_rate": 2.8280026217699597e-06, - "loss": 2.7685, + "learning_rate": 2.8283149281004213e-06, + "loss": 2.7411, "step": 285000 }, { "epoch": 0.94, - "learning_rate": 2.811451043742511e-06, - "loss": 2.7717, + "learning_rate": 2.8117634596541407e-06, + "loss": 2.7504, "step": 285100 }, { "epoch": 0.94, - "learning_rate": 2.7948994657150616e-06, - "loss": 2.7484, + "learning_rate": 2.79521199120786e-06, + "loss": 2.7491, "step": 285200 }, { "epoch": 0.94, - "learning_rate": 2.7783478876876124e-06, - "loss": 2.7598, + "learning_rate": 2.7786605227615795e-06, + "loss": 2.7464, "step": 285300 }, { "epoch": 0.94, - "learning_rate": 2.761796309660163e-06, - "loss": 2.7702, + "learning_rate": 2.762109054315299e-06, + "loss": 2.744, "step": 285400 }, { "epoch": 0.95, - "learning_rate": 2.7452447316327143e-06, - "loss": 2.7638, + "learning_rate": 2.7455575858690187e-06, + "loss": 2.7457, "step": 285500 }, { "epoch": 0.95, - "learning_rate": 2.728693153605265e-06, - "loss": 2.7581, + "learning_rate": 2.7290061174227377e-06, + "loss": 2.7404, "step": 285600 }, { "epoch": 0.95, - "learning_rate": 2.7121415755778158e-06, - "loss": 2.759, + "learning_rate": 2.712454648976457e-06, + "loss": 2.7522, "step": 285700 }, { "epoch": 0.95, - "learning_rate": 2.6955899975503665e-06, - "loss": 2.7583, + "learning_rate": 2.695903180530177e-06, + "loss": 2.756, "step": 285800 }, { "epoch": 0.95, - "learning_rate": 2.6790384195229173e-06, - "loss": 2.7643, + "learning_rate": 2.679351712083896e-06, + "loss": 2.7467, "step": 285900 }, { "epoch": 0.95, - "learning_rate": 2.662486841495468e-06, - "loss": 2.7443, + "learning_rate": 2.6628002436376158e-06, + "loss": 2.7523, "step": 286000 }, { "epoch": 0.95, - "learning_rate": 2.645935263468019e-06, - "loss": 2.7604, + "learning_rate": 2.646248775191335e-06, + "loss": 2.7469, "step": 286100 }, { "epoch": 0.95, - "learning_rate": 2.62938368544057e-06, - "loss": 2.7428, + "learning_rate": 2.6296973067450546e-06, + "loss": 2.74, "step": 286200 }, { "epoch": 0.95, - "learning_rate": 2.6128321074131207e-06, - "loss": 2.76, + "learning_rate": 2.613145838298774e-06, + "loss": 2.7507, "step": 286300 }, { "epoch": 0.95, - "learning_rate": 2.5962805293856715e-06, - "loss": 2.775, + "learning_rate": 2.5965943698524934e-06, + "loss": 2.75, "step": 286400 }, { "epoch": 0.95, - "learning_rate": 2.5797289513582227e-06, - "loss": 2.7718, + "learning_rate": 2.580042901406213e-06, + "loss": 2.7437, "step": 286500 }, { "epoch": 0.95, - "learning_rate": 2.5631773733307734e-06, - "loss": 2.7489, + "learning_rate": 2.5634914329599322e-06, + "loss": 2.7476, "step": 286600 }, { "epoch": 0.95, - "learning_rate": 2.546625795303324e-06, - "loss": 2.7576, + "learning_rate": 2.546939964513652e-06, + "loss": 2.7286, "step": 286700 }, { "epoch": 0.95, - "learning_rate": 2.530074217275875e-06, - "loss": 2.7615, + "learning_rate": 2.530388496067371e-06, + "loss": 2.7356, "step": 286800 }, { "epoch": 0.95, - "learning_rate": 2.513522639248426e-06, - "loss": 2.7633, + "learning_rate": 2.5138370276210904e-06, + "loss": 2.7454, "step": 286900 }, { "epoch": 0.95, - "learning_rate": 2.496971061220977e-06, - "loss": 2.7699, + "learning_rate": 2.4972855591748103e-06, + "loss": 2.7489, "step": 287000 }, { "epoch": 0.95, - "learning_rate": 2.4804194831935276e-06, - "loss": 2.764, + "learning_rate": 2.4807340907285292e-06, + "loss": 2.7597, "step": 287100 }, { "epoch": 0.95, - "learning_rate": 2.4638679051660788e-06, - "loss": 2.7634, + "learning_rate": 2.464182622282249e-06, + "loss": 2.7469, "step": 287200 }, { "epoch": 0.95, - "learning_rate": 2.4473163271386295e-06, - "loss": 2.7695, + "learning_rate": 2.4476311538359685e-06, + "loss": 2.7373, "step": 287300 }, { "epoch": 0.95, - "learning_rate": 2.4307647491111803e-06, - "loss": 2.753, + "learning_rate": 2.431079685389688e-06, + "loss": 2.7459, "step": 287400 }, { "epoch": 0.95, - "learning_rate": 2.414213171083731e-06, - "loss": 2.7665, + "learning_rate": 2.4145282169434073e-06, + "loss": 2.7457, "step": 287500 }, { "epoch": 0.95, - "learning_rate": 2.3976615930562822e-06, - "loss": 2.7699, + "learning_rate": 2.3979767484971267e-06, + "loss": 2.7496, "step": 287600 }, { "epoch": 0.95, - "learning_rate": 2.381110015028833e-06, - "loss": 2.7685, + "learning_rate": 2.3814252800508465e-06, + "loss": 2.7484, "step": 287700 }, { "epoch": 0.95, - "learning_rate": 2.3645584370013837e-06, - "loss": 2.7666, + "learning_rate": 2.3648738116045655e-06, + "loss": 2.7411, "step": 287800 }, { "epoch": 0.95, - "learning_rate": 2.348006858973935e-06, - "loss": 2.7665, + "learning_rate": 2.3483223431582853e-06, + "loss": 2.7451, "step": 287900 }, { "epoch": 0.95, - "learning_rate": 2.3314552809464857e-06, - "loss": 2.7586, + "learning_rate": 2.3317708747120047e-06, + "loss": 2.7447, "step": 288000 }, { "epoch": 0.95, - "learning_rate": 2.3149037029190364e-06, - "loss": 2.7492, + "learning_rate": 2.3152194062657237e-06, + "loss": 2.7529, "step": 288100 }, { "epoch": 0.95, - "learning_rate": 2.298352124891587e-06, - "loss": 2.7686, + "learning_rate": 2.2986679378194436e-06, + "loss": 2.7578, "step": 288200 }, { "epoch": 0.95, - "learning_rate": 2.2818005468641383e-06, - "loss": 2.7525, + "learning_rate": 2.282116469373163e-06, + "loss": 2.7456, "step": 288300 }, { "epoch": 0.95, - "learning_rate": 2.265248968836689e-06, - "loss": 2.7564, + "learning_rate": 2.2655650009268824e-06, + "loss": 2.7471, "step": 288400 }, { "epoch": 0.96, - "learning_rate": 2.24869739080924e-06, - "loss": 2.7599, + "learning_rate": 2.2490135324806018e-06, + "loss": 2.7395, "step": 288500 }, { "epoch": 0.96, - "learning_rate": 2.2321458127817906e-06, - "loss": 2.7499, + "learning_rate": 2.232462064034321e-06, + "loss": 2.7413, "step": 288600 }, { "epoch": 0.96, - "learning_rate": 2.2155942347543418e-06, - "loss": 2.7649, + "learning_rate": 2.2159105955880406e-06, + "loss": 2.7405, "step": 288700 }, { "epoch": 0.96, - "learning_rate": 2.1990426567268925e-06, - "loss": 2.7458, + "learning_rate": 2.19935912714176e-06, + "loss": 2.7453, "step": 288800 }, { "epoch": 0.96, - "learning_rate": 2.1824910786994433e-06, - "loss": 2.7644, + "learning_rate": 2.18280765869548e-06, + "loss": 2.7467, "step": 288900 }, { "epoch": 0.96, - "learning_rate": 2.1659395006719945e-06, - "loss": 2.7678, + "learning_rate": 2.166256190249199e-06, + "loss": 2.7482, "step": 289000 }, { "epoch": 0.96, - "learning_rate": 2.149387922644545e-06, - "loss": 2.7589, + "learning_rate": 2.1497047218029186e-06, + "loss": 2.7424, "step": 289100 }, { "epoch": 0.96, - "learning_rate": 2.132836344617096e-06, - "loss": 2.7483, + "learning_rate": 2.133153253356638e-06, + "loss": 2.7557, "step": 289200 }, { "epoch": 0.96, - "learning_rate": 2.1162847665896467e-06, - "loss": 2.7625, + "learning_rate": 2.116601784910357e-06, + "loss": 2.7541, "step": 289300 }, { "epoch": 0.96, - "learning_rate": 2.099733188562198e-06, - "loss": 2.7619, + "learning_rate": 2.100050316464077e-06, + "loss": 2.7388, "step": 289400 }, { "epoch": 0.96, - "learning_rate": 2.0831816105347486e-06, - "loss": 2.7583, + "learning_rate": 2.0834988480177963e-06, + "loss": 2.7535, "step": 289500 }, { "epoch": 0.96, - "learning_rate": 2.0666300325072994e-06, - "loss": 2.7717, + "learning_rate": 2.0669473795715157e-06, + "loss": 2.7298, "step": 289600 }, { "epoch": 0.96, - "learning_rate": 2.05007845447985e-06, - "loss": 2.75, + "learning_rate": 2.050395911125235e-06, + "loss": 2.745, "step": 289700 }, { "epoch": 0.96, - "learning_rate": 2.0335268764524013e-06, - "loss": 2.7632, + "learning_rate": 2.0338444426789545e-06, + "loss": 2.7455, "step": 289800 }, { "epoch": 0.96, - "learning_rate": 2.016975298424952e-06, - "loss": 2.7626, + "learning_rate": 2.0172929742326743e-06, + "loss": 2.738, "step": 289900 }, { "epoch": 0.96, - "learning_rate": 2.000423720397503e-06, - "loss": 2.7623, + "learning_rate": 2.0007415057863933e-06, + "loss": 2.7626, "step": 290000 }, { "epoch": 0.96, - "learning_rate": 1.9838721423700536e-06, - "loss": 2.755, + "learning_rate": 1.984190037340113e-06, + "loss": 2.7373, "step": 290100 }, { "epoch": 0.96, - "learning_rate": 1.9673205643426043e-06, - "loss": 2.7518, + "learning_rate": 1.9676385688938325e-06, + "loss": 2.7464, "step": 290200 }, { "epoch": 0.96, - "learning_rate": 1.950768986315155e-06, - "loss": 2.7627, + "learning_rate": 1.9510871004475515e-06, + "loss": 2.7452, "step": 290300 }, { "epoch": 0.96, - "learning_rate": 1.9342174082877063e-06, - "loss": 2.7697, + "learning_rate": 1.9345356320012713e-06, + "loss": 2.7437, "step": 290400 }, { "epoch": 0.96, - "learning_rate": 1.917665830260257e-06, - "loss": 2.7543, + "learning_rate": 1.9179841635549907e-06, + "loss": 2.7349, "step": 290500 }, { "epoch": 0.96, - "learning_rate": 1.901114252232808e-06, - "loss": 2.7607, + "learning_rate": 1.90143269510871e-06, + "loss": 2.7581, "step": 290600 }, { "epoch": 0.96, - "learning_rate": 1.8845626742053587e-06, - "loss": 2.7785, + "learning_rate": 1.8848812266624296e-06, + "loss": 2.7641, "step": 290700 }, { "epoch": 0.96, - "learning_rate": 1.8680110961779097e-06, - "loss": 2.7622, + "learning_rate": 1.8683297582161492e-06, + "loss": 2.7371, "step": 290800 }, { "epoch": 0.96, - "learning_rate": 1.8514595181504607e-06, - "loss": 2.7556, + "learning_rate": 1.8517782897698684e-06, + "loss": 2.7527, "step": 290900 }, { "epoch": 0.96, - "learning_rate": 1.8349079401230114e-06, - "loss": 2.7532, + "learning_rate": 1.835226821323588e-06, + "loss": 2.7542, "step": 291000 }, { "epoch": 0.96, - "learning_rate": 1.8183563620955624e-06, - "loss": 2.7485, + "learning_rate": 1.8186753528773074e-06, + "loss": 2.7375, "step": 291100 }, { "epoch": 0.96, - "learning_rate": 1.8018047840681131e-06, - "loss": 2.7499, + "learning_rate": 1.8021238844310266e-06, + "loss": 2.745, "step": 291200 }, { "epoch": 0.96, - "learning_rate": 1.7852532060406639e-06, - "loss": 2.7686, + "learning_rate": 1.7855724159847462e-06, + "loss": 2.744, "step": 291300 }, { "epoch": 0.96, - "learning_rate": 1.7687016280132146e-06, - "loss": 2.7707, + "learning_rate": 1.7690209475384658e-06, + "loss": 2.7416, "step": 291400 }, { "epoch": 0.96, - "learning_rate": 1.7521500499857658e-06, - "loss": 2.7797, + "learning_rate": 1.752469479092185e-06, + "loss": 2.7422, "step": 291500 }, { "epoch": 0.97, - "learning_rate": 1.7355984719583166e-06, - "loss": 2.7593, + "learning_rate": 1.7359180106459046e-06, + "loss": 2.7445, "step": 291600 }, { "epoch": 0.97, - "learning_rate": 1.7190468939308673e-06, - "loss": 2.7539, + "learning_rate": 1.719366542199624e-06, + "loss": 2.7469, "step": 291700 }, { "epoch": 0.97, - "learning_rate": 1.7024953159034185e-06, - "loss": 2.7644, + "learning_rate": 1.7028150737533432e-06, + "loss": 2.7418, "step": 291800 }, { "epoch": 0.97, - "learning_rate": 1.6859437378759693e-06, - "loss": 2.7752, + "learning_rate": 1.6862636053070629e-06, + "loss": 2.7358, "step": 291900 }, { "epoch": 0.97, - "learning_rate": 1.66939215984852e-06, - "loss": 2.7482, + "learning_rate": 1.6697121368607825e-06, + "loss": 2.758, "step": 292000 }, { "epoch": 0.97, - "learning_rate": 1.6528405818210708e-06, - "loss": 2.757, + "learning_rate": 1.6531606684145017e-06, + "loss": 2.74, "step": 292100 }, { "epoch": 0.97, - "learning_rate": 1.636289003793622e-06, - "loss": 2.7621, + "learning_rate": 1.6366091999682213e-06, + "loss": 2.7506, "step": 292200 }, { "epoch": 0.97, - "learning_rate": 1.6197374257661727e-06, - "loss": 2.7622, + "learning_rate": 1.6200577315219407e-06, + "loss": 2.7442, "step": 292300 }, { "epoch": 0.97, - "learning_rate": 1.6031858477387234e-06, - "loss": 2.7672, + "learning_rate": 1.6035062630756603e-06, + "loss": 2.7545, "step": 292400 }, { "epoch": 0.97, - "learning_rate": 1.5866342697112742e-06, - "loss": 2.764, + "learning_rate": 1.5869547946293795e-06, + "loss": 2.7404, "step": 292500 }, { "epoch": 0.97, - "learning_rate": 1.5700826916838254e-06, - "loss": 2.7668, + "learning_rate": 1.5704033261830991e-06, + "loss": 2.7335, "step": 292600 }, { "epoch": 0.97, - "learning_rate": 1.5535311136563761e-06, - "loss": 2.7534, + "learning_rate": 1.5538518577368185e-06, + "loss": 2.7331, "step": 292700 }, { "epoch": 0.97, - "learning_rate": 1.536979535628927e-06, - "loss": 2.767, + "learning_rate": 1.537300389290538e-06, + "loss": 2.7512, "step": 292800 }, { "epoch": 0.97, - "learning_rate": 1.5204279576014778e-06, - "loss": 2.7643, + "learning_rate": 1.5207489208442573e-06, + "loss": 2.7392, "step": 292900 }, { "epoch": 0.97, - "learning_rate": 1.5038763795740286e-06, - "loss": 2.757, + "learning_rate": 1.5041974523979767e-06, + "loss": 2.7439, "step": 293000 }, { "epoch": 0.97, - "learning_rate": 1.4873248015465796e-06, - "loss": 2.7616, + "learning_rate": 1.4876459839516964e-06, + "loss": 2.7444, "step": 293100 }, { "epoch": 0.97, - "learning_rate": 1.4707732235191303e-06, - "loss": 2.775, + "learning_rate": 1.4710945155054158e-06, + "loss": 2.7449, "step": 293200 }, { "epoch": 0.97, - "learning_rate": 1.454221645491681e-06, - "loss": 2.7669, + "learning_rate": 1.4545430470591352e-06, + "loss": 2.7497, "step": 293300 }, { "epoch": 0.97, - "learning_rate": 1.437670067464232e-06, - "loss": 2.767, + "learning_rate": 1.4379915786128546e-06, + "loss": 2.7465, "step": 293400 }, { "epoch": 0.97, - "learning_rate": 1.421118489436783e-06, - "loss": 2.7733, + "learning_rate": 1.421440110166574e-06, + "loss": 2.7511, "step": 293500 }, { "epoch": 0.97, - "learning_rate": 1.4045669114093338e-06, - "loss": 2.7495, + "learning_rate": 1.4048886417202934e-06, + "loss": 2.7475, "step": 293600 }, { "epoch": 0.97, - "learning_rate": 1.3880153333818847e-06, - "loss": 2.7554, + "learning_rate": 1.388337173274013e-06, + "loss": 2.7375, "step": 293700 }, { "epoch": 0.97, - "learning_rate": 1.3714637553544355e-06, - "loss": 2.7616, + "learning_rate": 1.3717857048277324e-06, + "loss": 2.7464, "step": 293800 }, { "epoch": 0.97, - "learning_rate": 1.3549121773269864e-06, - "loss": 2.7534, + "learning_rate": 1.3552342363814518e-06, + "loss": 2.7384, "step": 293900 }, { "epoch": 0.97, - "learning_rate": 1.3383605992995372e-06, - "loss": 2.7447, + "learning_rate": 1.3386827679351712e-06, + "loss": 2.7525, "step": 294000 }, { "epoch": 0.97, - "learning_rate": 1.3218090212720882e-06, - "loss": 2.7555, + "learning_rate": 1.3221312994888906e-06, + "loss": 2.743, "step": 294100 }, { "epoch": 0.97, - "learning_rate": 1.3052574432446391e-06, - "loss": 2.7642, + "learning_rate": 1.30557983104261e-06, + "loss": 2.7583, "step": 294200 }, { "epoch": 0.97, - "learning_rate": 1.2887058652171899e-06, - "loss": 2.7557, + "learning_rate": 1.2890283625963297e-06, + "loss": 2.7431, "step": 294300 }, { "epoch": 0.97, - "learning_rate": 1.2721542871897408e-06, - "loss": 2.7666, + "learning_rate": 1.272476894150049e-06, + "loss": 2.7308, "step": 294400 }, { "epoch": 0.97, - "learning_rate": 1.2556027091622916e-06, - "loss": 2.7571, + "learning_rate": 1.2559254257037687e-06, + "loss": 2.733, "step": 294500 }, { "epoch": 0.98, - "learning_rate": 1.2390511311348426e-06, - "loss": 2.7461, + "learning_rate": 1.2393739572574879e-06, + "loss": 2.7525, "step": 294600 }, { "epoch": 0.98, - "learning_rate": 1.2224995531073933e-06, - "loss": 2.7574, + "learning_rate": 1.2228224888112073e-06, + "loss": 2.7322, "step": 294700 }, { "epoch": 0.98, - "learning_rate": 1.2059479750799443e-06, - "loss": 2.7596, + "learning_rate": 1.206271020364927e-06, + "loss": 2.7517, "step": 294800 }, { "epoch": 0.98, - "learning_rate": 1.189396397052495e-06, - "loss": 2.7662, + "learning_rate": 1.1897195519186463e-06, + "loss": 2.7369, "step": 294900 }, { "epoch": 0.98, - "learning_rate": 1.172844819025046e-06, - "loss": 2.7579, + "learning_rate": 1.1731680834723657e-06, + "loss": 2.7407, "step": 295000 }, { "epoch": 0.98, - "learning_rate": 1.1562932409975967e-06, - "loss": 2.7448, + "learning_rate": 1.1566166150260851e-06, + "loss": 2.7378, "step": 295100 }, { "epoch": 0.98, - "learning_rate": 1.1397416629701475e-06, - "loss": 2.7772, + "learning_rate": 1.1400651465798045e-06, + "loss": 2.7395, "step": 295200 }, { "epoch": 0.98, - "learning_rate": 1.1231900849426985e-06, - "loss": 2.7506, + "learning_rate": 1.123513678133524e-06, + "loss": 2.7352, "step": 295300 }, { "epoch": 0.98, - "learning_rate": 1.1066385069152492e-06, - "loss": 2.7724, + "learning_rate": 1.1069622096872435e-06, + "loss": 2.7434, "step": 295400 }, { "epoch": 0.98, - "learning_rate": 1.0900869288878002e-06, - "loss": 2.7708, + "learning_rate": 1.090410741240963e-06, + "loss": 2.7483, "step": 295500 }, { "epoch": 0.98, - "learning_rate": 1.073535350860351e-06, - "loss": 2.7595, + "learning_rate": 1.0738592727946826e-06, + "loss": 2.7582, "step": 295600 }, { "epoch": 0.98, - "learning_rate": 1.056983772832902e-06, - "loss": 2.7468, + "learning_rate": 1.0573078043484018e-06, + "loss": 2.7475, "step": 295700 }, { "epoch": 0.98, - "learning_rate": 1.0404321948054529e-06, - "loss": 2.7579, + "learning_rate": 1.0407563359021212e-06, + "loss": 2.7477, "step": 295800 }, { "epoch": 0.98, - "learning_rate": 1.0238806167780036e-06, - "loss": 2.7605, + "learning_rate": 1.0242048674558408e-06, + "loss": 2.7503, "step": 295900 }, { "epoch": 0.98, - "learning_rate": 1.0073290387505546e-06, - "loss": 2.7573, + "learning_rate": 1.0076533990095602e-06, + "loss": 2.7432, "step": 296000 }, { "epoch": 0.98, - "learning_rate": 9.907774607231053e-07, - "loss": 2.7577, + "learning_rate": 9.911019305632796e-07, + "loss": 2.7523, "step": 296100 }, { "epoch": 0.98, - "learning_rate": 9.742258826956563e-07, - "loss": 2.7602, + "learning_rate": 9.745504621169992e-07, + "loss": 2.7503, "step": 296200 }, { "epoch": 0.98, - "learning_rate": 9.57674304668207e-07, - "loss": 2.7655, + "learning_rate": 9.579989936707184e-07, + "loss": 2.7481, "step": 296300 }, { "epoch": 0.98, - "learning_rate": 9.41122726640758e-07, - "loss": 2.7562, + "learning_rate": 9.414475252244379e-07, + "loss": 2.7505, "step": 296400 }, { "epoch": 0.98, - "learning_rate": 9.245711486133088e-07, - "loss": 2.7671, + "learning_rate": 9.248960567781574e-07, + "loss": 2.7301, "step": 296500 }, { "epoch": 0.98, - "learning_rate": 9.080195705858597e-07, - "loss": 2.7582, + "learning_rate": 9.083445883318768e-07, + "loss": 2.7475, "step": 296600 }, { "epoch": 0.98, - "learning_rate": 8.914679925584106e-07, - "loss": 2.7398, + "learning_rate": 8.917931198855962e-07, + "loss": 2.743, "step": 296700 }, { "epoch": 0.98, - "learning_rate": 8.749164145309613e-07, - "loss": 2.7573, + "learning_rate": 8.752416514393158e-07, + "loss": 2.7464, "step": 296800 }, { "epoch": 0.98, - "learning_rate": 8.583648365035123e-07, - "loss": 2.7617, + "learning_rate": 8.586901829930352e-07, + "loss": 2.7416, "step": 296900 }, { "epoch": 0.98, - "learning_rate": 8.418132584760631e-07, - "loss": 2.7601, + "learning_rate": 8.421387145467547e-07, + "loss": 2.7406, "step": 297000 }, { "epoch": 0.98, - "learning_rate": 8.25261680448614e-07, - "loss": 2.778, + "learning_rate": 8.255872461004741e-07, + "loss": 2.7322, "step": 297100 }, { "epoch": 0.98, - "learning_rate": 8.087101024211648e-07, - "loss": 2.7619, + "learning_rate": 8.090357776541935e-07, + "loss": 2.7264, "step": 297200 }, { "epoch": 0.98, - "learning_rate": 7.921585243937158e-07, - "loss": 2.7577, + "learning_rate": 7.92484309207913e-07, + "loss": 2.7506, "step": 297300 }, { "epoch": 0.98, - "learning_rate": 7.756069463662666e-07, - "loss": 2.7464, + "learning_rate": 7.759328407616324e-07, + "loss": 2.7487, "step": 297400 }, { "epoch": 0.98, - "learning_rate": 7.590553683388175e-07, - "loss": 2.7474, + "learning_rate": 7.593813723153519e-07, + "loss": 2.7492, "step": 297500 }, { "epoch": 0.99, - "learning_rate": 7.425037903113683e-07, - "loss": 2.7609, + "learning_rate": 7.428299038690712e-07, + "loss": 2.7277, "step": 297600 }, { "epoch": 0.99, - "learning_rate": 7.259522122839192e-07, - "loss": 2.7664, + "learning_rate": 7.262784354227907e-07, + "loss": 2.7638, "step": 297700 }, { "epoch": 0.99, - "learning_rate": 7.0940063425647e-07, - "loss": 2.7487, + "learning_rate": 7.097269669765102e-07, + "loss": 2.742, "step": 297800 }, { "epoch": 0.99, - "learning_rate": 6.928490562290209e-07, - "loss": 2.7596, + "learning_rate": 6.931754985302296e-07, + "loss": 2.7582, "step": 297900 }, { "epoch": 0.99, - "learning_rate": 6.762974782015718e-07, - "loss": 2.7481, + "learning_rate": 6.76624030083949e-07, + "loss": 2.7386, "step": 298000 }, { "epoch": 0.99, - "learning_rate": 6.597459001741226e-07, - "loss": 2.7671, + "learning_rate": 6.600725616376686e-07, + "loss": 2.7488, "step": 298100 }, { "epoch": 0.99, - "learning_rate": 6.431943221466735e-07, - "loss": 2.7591, + "learning_rate": 6.43521093191388e-07, + "loss": 2.75, "step": 298200 }, { "epoch": 0.99, - "learning_rate": 6.266427441192243e-07, - "loss": 2.7642, + "learning_rate": 6.269696247451074e-07, + "loss": 2.7419, "step": 298300 }, { "epoch": 0.99, - "learning_rate": 6.100911660917752e-07, - "loss": 2.7509, + "learning_rate": 6.104181562988269e-07, + "loss": 2.738, "step": 298400 }, { "epoch": 0.99, - "learning_rate": 5.935395880643261e-07, - "loss": 2.7657, + "learning_rate": 5.938666878525463e-07, + "loss": 2.7521, "step": 298500 }, { "epoch": 0.99, - "learning_rate": 5.76988010036877e-07, - "loss": 2.7588, + "learning_rate": 5.773152194062658e-07, + "loss": 2.7501, "step": 298600 }, { "epoch": 0.99, - "learning_rate": 5.604364320094279e-07, - "loss": 2.7548, + "learning_rate": 5.607637509599852e-07, + "loss": 2.7357, "step": 298700 }, { "epoch": 0.99, - "learning_rate": 5.438848539819786e-07, - "loss": 2.7734, + "learning_rate": 5.442122825137046e-07, + "loss": 2.748, "step": 298800 }, { "epoch": 0.99, - "learning_rate": 5.273332759545295e-07, - "loss": 2.7631, + "learning_rate": 5.276608140674241e-07, + "loss": 2.7403, "step": 298900 }, { "epoch": 0.99, - "learning_rate": 5.107816979270804e-07, - "loss": 2.7631, + "learning_rate": 5.111093456211435e-07, + "loss": 2.7486, "step": 299000 }, { "epoch": 0.99, - "learning_rate": 4.942301198996312e-07, - "loss": 2.7572, + "learning_rate": 4.945578771748629e-07, + "loss": 2.7265, "step": 299100 }, { "epoch": 0.99, - "learning_rate": 4.776785418721821e-07, - "loss": 2.7583, + "learning_rate": 4.780064087285825e-07, + "loss": 2.7547, "step": 299200 }, { "epoch": 0.99, - "learning_rate": 4.6112696384473293e-07, - "loss": 2.7644, + "learning_rate": 4.614549402823019e-07, + "loss": 2.7536, "step": 299300 }, { "epoch": 0.99, - "learning_rate": 4.445753858172839e-07, - "loss": 2.7586, + "learning_rate": 4.4490347183602127e-07, + "loss": 2.728, "step": 299400 }, { "epoch": 0.99, - "learning_rate": 4.280238077898347e-07, - "loss": 2.7583, + "learning_rate": 4.283520033897408e-07, + "loss": 2.733, "step": 299500 }, { "epoch": 0.99, - "learning_rate": 4.1147222976238556e-07, - "loss": 2.746, + "learning_rate": 4.1180053494346024e-07, + "loss": 2.7592, "step": 299600 }, { "epoch": 0.99, - "learning_rate": 3.949206517349364e-07, - "loss": 2.761, + "learning_rate": 3.952490664971797e-07, + "loss": 2.7456, "step": 299700 }, { "epoch": 0.99, - "learning_rate": 3.783690737074873e-07, - "loss": 2.7645, + "learning_rate": 3.7869759805089905e-07, + "loss": 2.7258, "step": 299800 }, { "epoch": 0.99, - "learning_rate": 3.6181749568003814e-07, - "loss": 2.7573, + "learning_rate": 3.6214612960461856e-07, + "loss": 2.7382, "step": 299900 }, { "epoch": 0.99, - "learning_rate": 3.4526591765258905e-07, - "loss": 2.7519, + "learning_rate": 3.4559466115833797e-07, + "loss": 2.7404, "step": 300000 }, { "epoch": 0.99, - "learning_rate": 3.2871433962513986e-07, - "loss": 2.761, + "learning_rate": 3.290431927120574e-07, + "loss": 2.7367, "step": 300100 }, { "epoch": 0.99, - "learning_rate": 3.121627615976907e-07, - "loss": 2.7798, + "learning_rate": 3.124917242657769e-07, + "loss": 2.7528, "step": 300200 }, { "epoch": 0.99, - "learning_rate": 2.9561118357024157e-07, - "loss": 2.7589, + "learning_rate": 2.9594025581949634e-07, + "loss": 2.745, "step": 300300 }, { "epoch": 0.99, - "learning_rate": 2.790596055427925e-07, - "loss": 2.7619, + "learning_rate": 2.7938878737321575e-07, + "loss": 2.7525, "step": 300400 }, { "epoch": 0.99, - "learning_rate": 2.6250802751534334e-07, - "loss": 2.7644, + "learning_rate": 2.628373189269352e-07, + "loss": 2.7259, "step": 300500 }, { "epoch": 1.0, - "learning_rate": 2.459564494878942e-07, - "loss": 2.765, + "learning_rate": 2.4628585048065467e-07, + "loss": 2.7397, "step": 300600 }, { "epoch": 1.0, - "learning_rate": 2.2940487146044504e-07, - "loss": 2.7732, + "learning_rate": 2.2973438203437407e-07, + "loss": 2.7356, "step": 300700 }, { "epoch": 1.0, - "learning_rate": 2.1285329343299592e-07, - "loss": 2.7586, + "learning_rate": 2.1318291358809356e-07, + "loss": 2.7418, "step": 300800 }, { "epoch": 1.0, - "learning_rate": 1.9630171540554678e-07, - "loss": 2.7565, + "learning_rate": 1.96631445141813e-07, + "loss": 2.744, "step": 300900 }, { "epoch": 1.0, - "learning_rate": 1.7975013737809764e-07, - "loss": 2.7629, + "learning_rate": 1.8007997669553242e-07, + "loss": 2.7339, "step": 301000 }, { "epoch": 1.0, - "learning_rate": 1.631985593506485e-07, - "loss": 2.7454, + "learning_rate": 1.6352850824925188e-07, + "loss": 2.7423, "step": 301100 }, { "epoch": 1.0, - "learning_rate": 1.4664698132319936e-07, - "loss": 2.7648, + "learning_rate": 1.4697703980297131e-07, + "loss": 2.7457, "step": 301200 }, { "epoch": 1.0, - "learning_rate": 1.3009540329575022e-07, - "loss": 2.757, + "learning_rate": 1.3042557135669077e-07, + "loss": 2.743, "step": 301300 }, { "epoch": 1.0, - "learning_rate": 1.1354382526830108e-07, - "loss": 2.7507, + "learning_rate": 1.1387410291041022e-07, + "loss": 2.7527, "step": 301400 }, { "epoch": 1.0, - "learning_rate": 9.699224724085195e-08, - "loss": 2.7675, + "learning_rate": 9.732263446412967e-08, + "loss": 2.7325, "step": 301500 }, { "epoch": 1.0, - "learning_rate": 8.044066921340281e-08, - "loss": 2.7443, + "learning_rate": 8.07711660178491e-08, + "loss": 2.7482, "step": 301600 }, { "epoch": 1.0, - "learning_rate": 6.388909118595368e-08, - "loss": 2.7635, + "learning_rate": 6.421969757156856e-08, + "loss": 2.7457, "step": 301700 }, { "epoch": 1.0, - "learning_rate": 4.733751315850453e-08, - "loss": 2.7542, + "learning_rate": 4.7668229125288e-08, + "loss": 2.7431, "step": 301800 }, { "epoch": 1.0, - "learning_rate": 3.07859351310554e-08, - "loss": 2.7685, + "learning_rate": 3.111676067900744e-08, + "loss": 2.7425, "step": 301900 }, { "epoch": 1.0, - "learning_rate": 1.423435710360626e-08, - "loss": 2.7444, + "learning_rate": 1.456529223272689e-08, + "loss": 2.7382, "step": 302000 } ], - "max_steps": 302086, + "max_steps": 302088, "num_train_epochs": 1, "total_flos": 8.184418510307328e+18, "trial_name": null,