diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,2245 +1,5953 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 2.9985838559579205, - "global_step": 1851, + "epoch": 4.0, + "global_step": 4944, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.01, - "learning_rate": 1.0526315789473684e-05, - "loss": 11.5, + "epoch": 0.0, + "learning_rate": 2.0161290322580645e-07, + "loss": 1.7344, "step": 5 }, { - "epoch": 0.02, - "learning_rate": 2.105263157894737e-05, - "loss": 6.7393, + "epoch": 0.01, + "learning_rate": 4.032258064516129e-07, + "loss": 1.6586, "step": 10 }, { - "epoch": 0.02, - "learning_rate": 3.157894736842106e-05, - "loss": 3.7191, + "epoch": 0.01, + "learning_rate": 6.048387096774194e-07, + "loss": 1.6465, "step": 15 }, { - "epoch": 0.03, - "learning_rate": 3.999997059313686e-05, - "loss": 3.1877, + "epoch": 0.02, + "learning_rate": 8.064516129032258e-07, + "loss": 1.5832, "step": 20 }, { - "epoch": 0.04, - "learning_rate": 3.999894136200706e-05, - "loss": 2.9055, + "epoch": 0.02, + "learning_rate": 1.0080645161290323e-06, + "loss": 1.5574, "step": 25 }, { - "epoch": 0.05, - "learning_rate": 3.9996441874195635e-05, - "loss": 2.9141, + "epoch": 0.02, + "learning_rate": 1.2096774193548388e-06, + "loss": 1.4381, "step": 30 }, { - "epoch": 0.06, - "learning_rate": 3.999247231345674e-05, - "loss": 2.7834, + "epoch": 0.03, + "learning_rate": 1.4112903225806455e-06, + "loss": 1.5215, "step": 35 }, { - "epoch": 0.06, - "learning_rate": 3.998703297161948e-05, - "loss": 2.7901, + "epoch": 0.03, + "learning_rate": 1.6129032258064516e-06, + "loss": 1.5566, "step": 40 }, { - "epoch": 0.07, - "learning_rate": 3.9980124248566466e-05, - "loss": 2.8676, + "epoch": 0.04, + "learning_rate": 1.8145161290322583e-06, + "loss": 1.5641, "step": 45 }, { - "epoch": 0.08, - "learning_rate": 3.9971746652204386e-05, - "loss": 2.6787, + "epoch": 0.04, + "learning_rate": 2.0161290322580646e-06, + "loss": 1.4707, "step": 50 }, { - "epoch": 0.09, - "learning_rate": 3.996190079842669e-05, - "loss": 2.704, + "epoch": 0.04, + "learning_rate": 2.217741935483871e-06, + "loss": 1.4484, "step": 55 }, { - "epoch": 0.1, - "learning_rate": 3.995058741106831e-05, - "loss": 2.6286, + "epoch": 0.05, + "learning_rate": 2.4193548387096776e-06, + "loss": 1.4656, "step": 60 }, { - "epoch": 0.11, - "learning_rate": 3.993780732185244e-05, - "loss": 2.6983, + "epoch": 0.05, + "learning_rate": 2.620967741935484e-06, + "loss": 1.3938, "step": 65 }, { - "epoch": 0.11, - "learning_rate": 3.992356147032939e-05, - "loss": 2.6522, + "epoch": 0.06, + "learning_rate": 2.822580645161291e-06, + "loss": 1.4152, "step": 70 }, { - "epoch": 0.12, - "learning_rate": 3.9907850903807514e-05, - "loss": 2.6293, + "epoch": 0.06, + "learning_rate": 3.024193548387097e-06, + "loss": 1.4182, "step": 75 }, { - "epoch": 0.13, - "learning_rate": 3.989067677727622e-05, - "loss": 2.5399, + "epoch": 0.06, + "learning_rate": 3.225806451612903e-06, + "loss": 1.5051, "step": 80 }, { - "epoch": 0.14, - "learning_rate": 3.987204035332105e-05, - "loss": 2.6687, + "epoch": 0.07, + "learning_rate": 3.4274193548387097e-06, + "loss": 1.4918, "step": 85 }, { - "epoch": 0.15, - "learning_rate": 3.985194300203087e-05, - "loss": 2.6291, + "epoch": 0.07, + "learning_rate": 3.6290322580645166e-06, + "loss": 1.4738, "step": 90 }, { - "epoch": 0.15, - "learning_rate": 3.983038620089714e-05, - "loss": 2.6533, + "epoch": 0.08, + "learning_rate": 3.830645161290323e-06, + "loss": 1.4035, "step": 95 }, { - "epoch": 0.16, - "learning_rate": 3.980737153470528e-05, - "loss": 2.5923, + "epoch": 0.08, + "learning_rate": 4.032258064516129e-06, + "loss": 1.4367, "step": 100 }, { - "epoch": 0.17, - "learning_rate": 3.97829006954182e-05, - "loss": 2.5941, + "epoch": 0.08, + "learning_rate": 4.233870967741936e-06, + "loss": 1.4076, "step": 105 }, { - "epoch": 0.18, - "learning_rate": 3.9756975482051855e-05, - "loss": 2.6612, + "epoch": 0.09, + "learning_rate": 4.435483870967742e-06, + "loss": 1.4902, "step": 110 }, { - "epoch": 0.19, - "learning_rate": 3.972959780054306e-05, - "loss": 2.6058, + "epoch": 0.09, + "learning_rate": 4.637096774193548e-06, + "loss": 1.3578, "step": 115 }, { - "epoch": 0.19, - "learning_rate": 3.9700769663609304e-05, - "loss": 2.5226, + "epoch": 0.1, + "learning_rate": 4.838709677419355e-06, + "loss": 1.4467, "step": 120 }, { - "epoch": 0.2, - "learning_rate": 3.967049319060081e-05, - "loss": 2.5573, + "epoch": 0.1, + "learning_rate": 5.040322580645161e-06, + "loss": 1.4766, "step": 125 }, { - "epoch": 0.21, - "learning_rate": 3.963877060734473e-05, - "loss": 2.603, + "epoch": 0.11, + "learning_rate": 5.241935483870968e-06, + "loss": 1.4828, "step": 130 }, { - "epoch": 0.22, - "learning_rate": 3.9605604245981515e-05, - "loss": 2.6506, + "epoch": 0.11, + "learning_rate": 5.443548387096774e-06, + "loss": 1.4258, "step": 135 }, { - "epoch": 0.23, - "learning_rate": 3.9570996544793445e-05, - "loss": 2.631, + "epoch": 0.11, + "learning_rate": 5.645161290322582e-06, + "loss": 1.4602, "step": 140 }, { - "epoch": 0.23, - "learning_rate": 3.9534950048025396e-05, - "loss": 2.5668, + "epoch": 0.12, + "learning_rate": 5.846774193548388e-06, + "loss": 1.4902, "step": 145 }, { - "epoch": 0.24, - "learning_rate": 3.9497467405697756e-05, - "loss": 2.6354, + "epoch": 0.12, + "learning_rate": 6.048387096774194e-06, + "loss": 1.3729, "step": 150 }, { - "epoch": 0.25, - "learning_rate": 3.9458551373411664e-05, - "loss": 2.5427, + "epoch": 0.13, + "learning_rate": 6.25e-06, + "loss": 1.4902, "step": 155 }, { - "epoch": 0.26, - "learning_rate": 3.941820481214637e-05, - "loss": 2.5586, + "epoch": 0.13, + "learning_rate": 6.451612903225806e-06, + "loss": 1.435, "step": 160 }, { - "epoch": 0.27, - "learning_rate": 3.937643068804896e-05, - "loss": 2.5577, + "epoch": 0.13, + "learning_rate": 6.653225806451613e-06, + "loss": 1.4096, "step": 165 }, { - "epoch": 0.28, - "learning_rate": 3.933323207221624e-05, - "loss": 2.5664, + "epoch": 0.14, + "learning_rate": 6.854838709677419e-06, + "loss": 1.4508, "step": 170 }, { - "epoch": 0.28, - "learning_rate": 3.9288612140468984e-05, - "loss": 2.5396, + "epoch": 0.14, + "learning_rate": 7.056451612903227e-06, + "loss": 1.4266, "step": 175 }, { - "epoch": 0.29, - "learning_rate": 3.924257417311846e-05, - "loss": 2.5558, + "epoch": 0.15, + "learning_rate": 7.258064516129033e-06, + "loss": 1.4648, "step": 180 }, { - "epoch": 0.3, - "learning_rate": 3.919512155472529e-05, - "loss": 2.5306, + "epoch": 0.15, + "learning_rate": 7.459677419354839e-06, + "loss": 1.3996, "step": 185 }, { - "epoch": 0.31, - "learning_rate": 3.9146257773850585e-05, - "loss": 2.4945, + "epoch": 0.15, + "learning_rate": 7.661290322580646e-06, + "loss": 1.4152, "step": 190 }, { - "epoch": 0.32, - "learning_rate": 3.9095986422799506e-05, - "loss": 2.6086, + "epoch": 0.16, + "learning_rate": 7.862903225806451e-06, + "loss": 1.4648, "step": 195 }, { - "epoch": 0.32, - "learning_rate": 3.904431119735718e-05, - "loss": 2.4973, + "epoch": 0.16, + "learning_rate": 8.064516129032258e-06, + "loss": 1.3128, "step": 200 }, { - "epoch": 0.33, - "learning_rate": 3.899123589651695e-05, - "loss": 2.5872, + "epoch": 0.17, + "learning_rate": 8.266129032258065e-06, + "loss": 1.4242, "step": 205 }, { - "epoch": 0.34, - "learning_rate": 3.893676442220114e-05, - "loss": 2.5216, + "epoch": 0.17, + "learning_rate": 8.467741935483872e-06, + "loss": 1.4648, "step": 210 }, { - "epoch": 0.35, - "learning_rate": 3.888090077897418e-05, - "loss": 2.5367, + "epoch": 0.17, + "learning_rate": 8.669354838709677e-06, + "loss": 1.4527, "step": 215 }, { - "epoch": 0.36, - "learning_rate": 3.882364907374819e-05, - "loss": 2.5495, + "epoch": 0.18, + "learning_rate": 8.870967741935484e-06, + "loss": 1.4, "step": 220 }, { - "epoch": 0.36, - "learning_rate": 3.8765013515481065e-05, - "loss": 2.6037, + "epoch": 0.18, + "learning_rate": 9.072580645161291e-06, + "loss": 1.4984, "step": 225 }, { - "epoch": 0.37, - "learning_rate": 3.870499841486705e-05, - "loss": 2.5012, + "epoch": 0.19, + "learning_rate": 9.274193548387097e-06, + "loss": 1.4297, "step": 230 }, { - "epoch": 0.38, - "learning_rate": 3.864360818401982e-05, - "loss": 2.4773, + "epoch": 0.19, + "learning_rate": 9.475806451612905e-06, + "loss": 1.4645, "step": 235 }, { - "epoch": 0.39, - "learning_rate": 3.8580847336148105e-05, - "loss": 2.52, + "epoch": 0.19, + "learning_rate": 9.67741935483871e-06, + "loss": 1.4107, "step": 240 }, { - "epoch": 0.4, - "learning_rate": 3.851672048522395e-05, - "loss": 2.4718, + "epoch": 0.2, + "learning_rate": 9.879032258064517e-06, + "loss": 1.4328, "step": 245 }, { - "epoch": 0.4, - "learning_rate": 3.845123234564343e-05, - "loss": 2.5251, + "epoch": 0.2, + "learning_rate": 9.999995524479982e-06, + "loss": 1.4279, "step": 250 }, { - "epoch": 0.41, - "learning_rate": 3.838438773188014e-05, - "loss": 2.5547, + "epoch": 0.21, + "learning_rate": 9.999945174971776e-06, + "loss": 1.4738, "step": 255 }, { - "epoch": 0.42, - "learning_rate": 3.831619155813119e-05, - "loss": 2.475, + "epoch": 0.21, + "learning_rate": 9.999838882120566e-06, + "loss": 1.3898, "step": 260 }, { - "epoch": 0.43, - "learning_rate": 3.8246648837955965e-05, - "loss": 2.4957, + "epoch": 0.21, + "learning_rate": 9.999676647115646e-06, + "loss": 1.3346, "step": 265 }, { - "epoch": 0.44, - "learning_rate": 3.817576468390753e-05, - "loss": 2.5425, + "epoch": 0.22, + "learning_rate": 9.999458471772225e-06, + "loss": 1.5164, "step": 270 }, { - "epoch": 0.45, - "learning_rate": 3.810354430715678e-05, - "loss": 2.5495, + "epoch": 0.22, + "learning_rate": 9.999184358531422e-06, + "loss": 1.4148, "step": 275 }, { - "epoch": 0.45, - "learning_rate": 3.802999301710932e-05, - "loss": 2.5375, + "epoch": 0.23, + "learning_rate": 9.998854310460233e-06, + "loss": 1.4273, "step": 280 }, { - "epoch": 0.46, - "learning_rate": 3.795511622101516e-05, - "loss": 2.5151, + "epoch": 0.23, + "learning_rate": 9.998468331251499e-06, + "loss": 1.4592, "step": 285 }, { - "epoch": 0.47, - "learning_rate": 3.787891942357115e-05, - "loss": 2.4864, + "epoch": 0.23, + "learning_rate": 9.998026425223858e-06, + "loss": 1.4504, "step": 290 }, { - "epoch": 0.48, - "learning_rate": 3.780140822651633e-05, - "loss": 2.5396, + "epoch": 0.24, + "learning_rate": 9.997528597321704e-06, + "loss": 1.448, "step": 295 }, { - "epoch": 0.49, - "learning_rate": 3.77225883282201e-05, - "loss": 2.5585, + "epoch": 0.24, + "learning_rate": 9.996974853115132e-06, + "loss": 1.4523, "step": 300 }, { - "epoch": 0.49, - "learning_rate": 3.764246552326328e-05, - "loss": 2.4947, + "epoch": 0.25, + "learning_rate": 9.996365198799868e-06, + "loss": 1.5063, "step": 305 }, { - "epoch": 0.5, - "learning_rate": 3.756104570201213e-05, - "loss": 2.5004, + "epoch": 0.25, + "learning_rate": 9.99569964119721e-06, + "loss": 1.4051, "step": 310 }, { - "epoch": 0.51, - "learning_rate": 3.747833485018529e-05, - "loss": 2.4729, + "epoch": 0.25, + "learning_rate": 9.99497818775394e-06, + "loss": 1.4312, "step": 315 }, { - "epoch": 0.52, - "learning_rate": 3.739433904841375e-05, - "loss": 2.4764, + "epoch": 0.26, + "learning_rate": 9.994200846542251e-06, + "loss": 1.4758, "step": 320 }, { - "epoch": 0.53, - "learning_rate": 3.7309064471793794e-05, - "loss": 2.5369, + "epoch": 0.26, + "learning_rate": 9.993367626259652e-06, + "loss": 1.468, "step": 325 }, { - "epoch": 0.53, - "learning_rate": 3.7222517389433085e-05, - "loss": 2.4836, + "epoch": 0.27, + "learning_rate": 9.99247853622887e-06, + "loss": 1.527, "step": 330 }, { - "epoch": 0.54, - "learning_rate": 3.7134704163989705e-05, - "loss": 2.4558, + "epoch": 0.27, + "learning_rate": 9.991533586397751e-06, + "loss": 1.4969, "step": 335 }, { - "epoch": 0.55, - "learning_rate": 3.7045631251204434e-05, - "loss": 2.5562, + "epoch": 0.28, + "learning_rate": 9.990532787339137e-06, + "loss": 1.4996, "step": 340 }, { - "epoch": 0.56, - "learning_rate": 3.6955305199426164e-05, - "loss": 2.4603, + "epoch": 0.28, + "learning_rate": 9.98947615025076e-06, + "loss": 1.5262, "step": 345 }, { - "epoch": 0.57, - "learning_rate": 3.6863732649130426e-05, - "loss": 2.4176, + "epoch": 0.28, + "learning_rate": 9.988363686955116e-06, + "loss": 1.5992, "step": 350 }, { - "epoch": 0.57, - "learning_rate": 3.677092033243128e-05, - "loss": 2.4991, + "epoch": 0.29, + "learning_rate": 9.987195409899322e-06, + "loss": 1.4711, "step": 355 }, { - "epoch": 0.58, - "learning_rate": 3.667687507258631e-05, - "loss": 2.4846, + "epoch": 0.29, + "learning_rate": 9.985971332154985e-06, + "loss": 1.45, "step": 360 }, { - "epoch": 0.59, - "learning_rate": 3.658160378349508e-05, - "loss": 2.5998, + "epoch": 0.3, + "learning_rate": 9.984691467418057e-06, + "loss": 1.4863, "step": 365 }, { - "epoch": 0.6, - "learning_rate": 3.648511346919079e-05, - "loss": 2.5061, + "epoch": 0.3, + "learning_rate": 9.983355830008678e-06, + "loss": 1.5219, "step": 370 }, { - "epoch": 0.61, - "learning_rate": 3.638741122332539e-05, - "loss": 2.4538, + "epoch": 0.3, + "learning_rate": 9.981964434871015e-06, + "loss": 1.5977, "step": 375 }, { - "epoch": 0.62, - "learning_rate": 3.628850422864807e-05, - "loss": 2.498, + "epoch": 0.31, + "learning_rate": 9.980517297573097e-06, + "loss": 1.4539, "step": 380 }, { - "epoch": 0.62, - "learning_rate": 3.618839975647718e-05, - "loss": 2.4867, + "epoch": 0.31, + "learning_rate": 9.979014434306642e-06, + "loss": 1.3713, "step": 385 }, { - "epoch": 0.63, - "learning_rate": 3.608710516616575e-05, - "loss": 2.5426, + "epoch": 0.32, + "learning_rate": 9.977455861886874e-06, + "loss": 1.4434, "step": 390 }, { - "epoch": 0.64, - "learning_rate": 3.598462790456035e-05, - "loss": 2.4842, + "epoch": 0.32, + "learning_rate": 9.975841597752334e-06, + "loss": 1.5469, "step": 395 }, { - "epoch": 0.65, - "learning_rate": 3.588097550545368e-05, - "loss": 2.4274, + "epoch": 0.32, + "learning_rate": 9.974171659964688e-06, + "loss": 1.4531, "step": 400 }, { - "epoch": 0.66, - "learning_rate": 3.5776155589030725e-05, - "loss": 2.5354, + "epoch": 0.33, + "learning_rate": 9.972446067208519e-06, + "loss": 1.4828, "step": 405 }, { - "epoch": 0.66, - "learning_rate": 3.5670175861308496e-05, - "loss": 2.455, + "epoch": 0.33, + "learning_rate": 9.970664838791126e-06, + "loss": 1.4512, "step": 410 }, { - "epoch": 0.67, - "learning_rate": 3.556304411356954e-05, - "loss": 2.5039, + "epoch": 0.34, + "learning_rate": 9.9688279946423e-06, + "loss": 1.4076, "step": 415 }, { - "epoch": 0.68, - "learning_rate": 3.545476822178915e-05, - "loss": 2.518, + "epoch": 0.34, + "learning_rate": 9.966935555314107e-06, + "loss": 1.4969, "step": 420 }, { - "epoch": 0.69, - "learning_rate": 3.5345356146056326e-05, - "loss": 2.4718, + "epoch": 0.34, + "learning_rate": 9.96498754198066e-06, + "loss": 1.498, "step": 425 }, { - "epoch": 0.7, - "learning_rate": 3.52348159299886e-05, - "loss": 2.4318, + "epoch": 0.35, + "learning_rate": 9.962983976437868e-06, + "loss": 1.4393, "step": 430 }, { - "epoch": 0.7, - "learning_rate": 3.512315570014071e-05, - "loss": 2.5146, + "epoch": 0.35, + "learning_rate": 9.96092488110321e-06, + "loss": 1.5219, "step": 435 }, { - "epoch": 0.71, - "learning_rate": 3.50103836654071e-05, - "loss": 2.4801, + "epoch": 0.36, + "learning_rate": 9.958810279015474e-06, + "loss": 1.4484, "step": 440 }, { - "epoch": 0.72, - "learning_rate": 3.489650811641849e-05, - "loss": 2.4429, + "epoch": 0.36, + "learning_rate": 9.956640193834501e-06, + "loss": 1.4805, "step": 445 }, { - "epoch": 0.73, - "learning_rate": 3.478153742493235e-05, - "loss": 2.4654, + "epoch": 0.36, + "learning_rate": 9.954414649840922e-06, + "loss": 1.4832, "step": 450 }, { - "epoch": 0.74, - "learning_rate": 3.4665480043217444e-05, - "loss": 2.4846, + "epoch": 0.37, + "learning_rate": 9.952133671935885e-06, + "loss": 1.3789, "step": 455 }, { - "epoch": 0.74, - "learning_rate": 3.454834450343245e-05, - "loss": 2.4371, + "epoch": 0.37, + "learning_rate": 9.949797285640771e-06, + "loss": 1.4773, "step": 460 }, { - "epoch": 0.75, - "learning_rate": 3.443013941699868e-05, - "loss": 2.4071, + "epoch": 0.38, + "learning_rate": 9.947405517096927e-06, + "loss": 1.5246, "step": 465 }, { - "epoch": 0.76, - "learning_rate": 3.431087347396702e-05, - "loss": 2.3886, + "epoch": 0.38, + "learning_rate": 9.944958393065343e-06, + "loss": 1.5094, "step": 470 }, { - "epoch": 0.77, - "learning_rate": 3.419055544237906e-05, - "loss": 2.4539, + "epoch": 0.38, + "learning_rate": 9.942455940926384e-06, + "loss": 1.4253, "step": 475 }, { - "epoch": 0.78, - "learning_rate": 3.40691941676225e-05, - "loss": 2.4406, + "epoch": 0.39, + "learning_rate": 9.939898188679465e-06, + "loss": 1.4396, "step": 480 }, { - "epoch": 0.78, - "learning_rate": 3.394679857178086e-05, - "loss": 2.3687, + "epoch": 0.39, + "learning_rate": 9.93728516494274e-06, + "loss": 1.3863, "step": 485 }, { - "epoch": 0.79, - "learning_rate": 3.382337765297756e-05, - "loss": 2.4244, + "epoch": 0.4, + "learning_rate": 9.934616898952787e-06, + "loss": 1.4965, "step": 490 }, { - "epoch": 0.8, - "learning_rate": 3.3698940484714394e-05, - "loss": 2.4477, + "epoch": 0.4, + "learning_rate": 9.931893420564277e-06, + "loss": 1.5633, "step": 495 }, { - "epoch": 0.81, - "learning_rate": 3.35734962152045e-05, - "loss": 2.5432, + "epoch": 0.4, + "learning_rate": 9.929114760249642e-06, + "loss": 1.3689, "step": 500 }, { - "epoch": 0.82, - "learning_rate": 3.34470540666998e-05, - "loss": 2.4633, + "epoch": 0.41, + "learning_rate": 9.926280949098732e-06, + "loss": 1.4434, "step": 505 }, { - "epoch": 0.83, - "learning_rate": 3.331962333481302e-05, - "loss": 2.4302, + "epoch": 0.41, + "learning_rate": 9.923392018818467e-06, + "loss": 1.4758, "step": 510 }, { - "epoch": 0.83, - "learning_rate": 3.319121338783428e-05, - "loss": 2.4528, + "epoch": 0.42, + "learning_rate": 9.92044800173249e-06, + "loss": 1.4133, "step": 515 }, { - "epoch": 0.84, - "learning_rate": 3.3061833666042416e-05, - "loss": 2.3741, + "epoch": 0.42, + "learning_rate": 9.917448930780786e-06, + "loss": 1.451, "step": 520 }, { - "epoch": 0.85, - "learning_rate": 3.29314936810109e-05, - "loss": 2.4638, + "epoch": 0.42, + "learning_rate": 9.91439483951934e-06, + "loss": 1.5117, "step": 525 }, { - "epoch": 0.86, - "learning_rate": 3.280020301490863e-05, - "loss": 2.4204, + "epoch": 0.43, + "learning_rate": 9.91128576211974e-06, + "loss": 1.4949, "step": 530 }, { - "epoch": 0.87, - "learning_rate": 3.2667971319795473e-05, - "loss": 2.436, + "epoch": 0.43, + "learning_rate": 9.908121733368803e-06, + "loss": 1.3984, "step": 535 }, { - "epoch": 0.87, - "learning_rate": 3.253480831691264e-05, - "loss": 2.4194, + "epoch": 0.44, + "learning_rate": 9.904902788668187e-06, + "loss": 1.6094, "step": 540 }, { - "epoch": 0.88, - "learning_rate": 3.240072379596806e-05, - "loss": 2.3565, + "epoch": 0.44, + "learning_rate": 9.901628964033993e-06, + "loss": 1.452, "step": 545 }, { - "epoch": 0.89, - "learning_rate": 3.226572761441666e-05, - "loss": 2.4421, + "epoch": 0.44, + "learning_rate": 9.89830029609636e-06, + "loss": 1.4293, "step": 550 }, { - "epoch": 0.9, - "learning_rate": 3.2129829696735636e-05, - "loss": 2.4169, + "epoch": 0.45, + "learning_rate": 9.894916822099062e-06, + "loss": 1.4957, "step": 555 }, { - "epoch": 0.91, - "learning_rate": 3.1993040033694916e-05, - "loss": 2.4425, + "epoch": 0.45, + "learning_rate": 9.89147857989908e-06, + "loss": 1.507, "step": 560 }, { - "epoch": 0.91, - "learning_rate": 3.1855368681622584e-05, - "loss": 2.399, + "epoch": 0.46, + "learning_rate": 9.887985607966188e-06, + "loss": 1.4379, "step": 565 }, { - "epoch": 0.92, - "learning_rate": 3.171682576166565e-05, - "loss": 2.3747, + "epoch": 0.46, + "learning_rate": 9.884437945382523e-06, + "loss": 1.4812, "step": 570 }, { - "epoch": 0.93, - "learning_rate": 3.1577421459045905e-05, - "loss": 2.437, + "epoch": 0.47, + "learning_rate": 9.880835631842141e-06, + "loss": 1.4477, "step": 575 }, { - "epoch": 0.94, - "learning_rate": 3.143716602231122e-05, - "loss": 2.4131, + "epoch": 0.47, + "learning_rate": 9.877178707650573e-06, + "loss": 1.4484, "step": 580 }, { - "epoch": 0.95, - "learning_rate": 3.129606976258201e-05, - "loss": 2.4329, + "epoch": 0.47, + "learning_rate": 9.873467213724384e-06, + "loss": 1.4897, "step": 585 }, { - "epoch": 0.95, - "learning_rate": 3.115414305279327e-05, - "loss": 2.4521, + "epoch": 0.48, + "learning_rate": 9.869701191590703e-06, + "loss": 1.4641, "step": 590 }, { - "epoch": 0.96, - "learning_rate": 3.101139632693197e-05, - "loss": 2.3317, + "epoch": 0.48, + "learning_rate": 9.865880683386766e-06, + "loss": 1.4809, "step": 595 }, { - "epoch": 0.97, - "learning_rate": 3.086784007926996e-05, - "loss": 2.4119, + "epoch": 0.49, + "learning_rate": 9.862005731859443e-06, + "loss": 1.4742, "step": 600 }, { - "epoch": 0.98, - "learning_rate": 3.072348486359247e-05, - "loss": 2.4315, + "epoch": 0.49, + "learning_rate": 9.858076380364757e-06, + "loss": 1.4902, "step": 605 }, { - "epoch": 0.99, - "learning_rate": 3.0578341292422286e-05, - "loss": 2.4687, + "epoch": 0.49, + "learning_rate": 9.854092672867399e-06, + "loss": 1.5898, "step": 610 }, { - "epoch": 1.0, - "learning_rate": 3.043242003623947e-05, - "loss": 2.4703, + "epoch": 0.5, + "learning_rate": 9.850054653940247e-06, + "loss": 1.4984, "step": 615 }, { - "epoch": 1.0, - "learning_rate": 3.0285731822696954e-05, - "loss": 2.5997, + "epoch": 0.5, + "learning_rate": 9.845962368763847e-06, + "loss": 1.4898, "step": 620 }, { - "epoch": 1.01, - "learning_rate": 3.0138287435831855e-05, - "loss": 2.0103, + "epoch": 0.51, + "learning_rate": 9.841815863125923e-06, + "loss": 1.5082, "step": 625 }, { - "epoch": 1.02, - "learning_rate": 2.9990097715272694e-05, - "loss": 1.9907, + "epoch": 0.51, + "learning_rate": 9.837615183420866e-06, + "loss": 1.4992, "step": 630 }, { - "epoch": 1.03, - "learning_rate": 2.9841173555442463e-05, - "loss": 1.9292, + "epoch": 0.51, + "learning_rate": 9.8333603766492e-06, + "loss": 1.3744, "step": 635 }, { - "epoch": 1.04, - "learning_rate": 2.9691525904757745e-05, - "loss": 1.9898, + "epoch": 0.52, + "learning_rate": 9.829051490417074e-06, + "loss": 1.527, "step": 640 }, { - "epoch": 1.05, - "learning_rate": 2.954116576482378e-05, - "loss": 1.9234, + "epoch": 0.52, + "learning_rate": 9.824688572935713e-06, + "loss": 1.5008, "step": 645 }, { - "epoch": 1.05, - "learning_rate": 2.9390104189625702e-05, - "loss": 1.8726, + "epoch": 0.53, + "learning_rate": 9.820271673020891e-06, + "loss": 1.4721, "step": 650 }, { - "epoch": 1.06, - "learning_rate": 2.923835228471587e-05, - "loss": 1.9208, + "epoch": 0.53, + "learning_rate": 9.81580084009238e-06, + "loss": 1.5555, "step": 655 }, { - "epoch": 1.07, - "learning_rate": 2.90859212063974e-05, - "loss": 1.9407, + "epoch": 0.53, + "learning_rate": 9.811276124173395e-06, + "loss": 1.5285, "step": 660 }, { - "epoch": 1.08, - "learning_rate": 2.8932822160904038e-05, - "loss": 1.9377, + "epoch": 0.54, + "learning_rate": 9.806697575890043e-06, + "loss": 1.4777, "step": 665 }, { - "epoch": 1.09, - "learning_rate": 2.877906640357628e-05, - "loss": 1.9665, + "epoch": 0.54, + "learning_rate": 9.802065246470738e-06, + "loss": 1.4322, "step": 670 }, { - "epoch": 1.09, - "learning_rate": 2.862466523803393e-05, - "loss": 1.9723, + "epoch": 0.55, + "learning_rate": 9.797379187745652e-06, + "loss": 1.5197, "step": 675 }, { - "epoch": 1.1, - "learning_rate": 2.846963001534507e-05, - "loss": 1.9876, + "epoch": 0.55, + "learning_rate": 9.792639452146116e-06, + "loss": 1.4703, "step": 680 }, { - "epoch": 1.11, - "learning_rate": 2.8313972133191615e-05, - "loss": 1.9405, + "epoch": 0.55, + "learning_rate": 9.787846092704043e-06, + "loss": 1.4539, "step": 685 }, { - "epoch": 1.12, - "learning_rate": 2.8157703035031353e-05, - "loss": 1.9848, + "epoch": 0.56, + "learning_rate": 9.782999163051332e-06, + "loss": 1.4326, "step": 690 }, { - "epoch": 1.13, - "learning_rate": 2.8000834209256665e-05, - "loss": 1.9328, + "epoch": 0.56, + "learning_rate": 9.778098717419266e-06, + "loss": 1.3992, "step": 695 }, { - "epoch": 1.13, - "learning_rate": 2.7843377188349962e-05, - "loss": 1.9343, + "epoch": 0.57, + "learning_rate": 9.773144810637908e-06, + "loss": 1.457, "step": 700 }, { - "epoch": 1.14, - "learning_rate": 2.768534354803581e-05, - "loss": 1.9245, + "epoch": 0.57, + "learning_rate": 9.768137498135489e-06, + "loss": 1.477, "step": 705 }, { - "epoch": 1.15, - "learning_rate": 2.752674490642996e-05, - "loss": 1.9526, + "epoch": 0.57, + "learning_rate": 9.763076835937782e-06, + "loss": 1.4455, "step": 710 }, { - "epoch": 1.16, - "learning_rate": 2.7367592923185207e-05, - "loss": 1.93, + "epoch": 0.58, + "learning_rate": 9.75796288066748e-06, + "loss": 1.442, "step": 715 }, { - "epoch": 1.17, - "learning_rate": 2.720789929863421e-05, - "loss": 1.9263, + "epoch": 0.58, + "learning_rate": 9.752795689543563e-06, + "loss": 1.5156, "step": 720 }, { - "epoch": 1.17, - "learning_rate": 2.7047675772929328e-05, - "loss": 1.9432, + "epoch": 0.59, + "learning_rate": 9.747575320380652e-06, + "loss": 1.5129, "step": 725 }, { - "epoch": 1.18, - "learning_rate": 2.6886934125179504e-05, - "loss": 1.9481, + "epoch": 0.59, + "learning_rate": 9.742301831588368e-06, + "loss": 1.3873, "step": 730 }, { - "epoch": 1.19, - "learning_rate": 2.672568617258432e-05, - "loss": 1.909, + "epoch": 0.59, + "learning_rate": 9.736975282170678e-06, + "loss": 1.448, "step": 735 }, { - "epoch": 1.2, - "learning_rate": 2.6563943769565258e-05, - "loss": 1.9386, + "epoch": 0.6, + "learning_rate": 9.731595731725232e-06, + "loss": 1.4695, "step": 740 }, { - "epoch": 1.21, - "learning_rate": 2.6401718806894144e-05, - "loss": 1.9362, + "epoch": 0.6, + "learning_rate": 9.726163240442695e-06, + "loss": 1.4898, "step": 745 }, { - "epoch": 1.22, - "learning_rate": 2.6239023210819027e-05, - "loss": 1.9494, + "epoch": 0.61, + "learning_rate": 9.720677869106077e-06, + "loss": 1.4619, "step": 750 }, { - "epoch": 1.22, - "learning_rate": 2.6075868942187366e-05, - "loss": 1.9576, + "epoch": 0.61, + "learning_rate": 9.715139679090057e-06, + "loss": 1.5293, "step": 755 }, { - "epoch": 1.23, - "learning_rate": 2.5912267995566746e-05, - "loss": 1.937, + "epoch": 0.61, + "learning_rate": 9.709548732360286e-06, + "loss": 1.4535, "step": 760 }, { - "epoch": 1.24, - "learning_rate": 2.5748232398363044e-05, - "loss": 1.9889, + "epoch": 0.62, + "learning_rate": 9.703905091472698e-06, + "loss": 1.3992, "step": 765 }, { - "epoch": 1.25, - "learning_rate": 2.5583774209936218e-05, - "loss": 1.9285, + "epoch": 0.62, + "learning_rate": 9.698208819572815e-06, + "loss": 1.5254, "step": 770 }, { - "epoch": 1.26, - "learning_rate": 2.5418905520713767e-05, - "loss": 1.895, + "epoch": 0.63, + "learning_rate": 9.692459980395034e-06, + "loss": 1.351, "step": 775 }, { - "epoch": 1.26, - "learning_rate": 2.525363845130185e-05, - "loss": 1.9826, + "epoch": 0.63, + "learning_rate": 9.686658638261916e-06, + "loss": 1.3639, "step": 780 }, { - "epoch": 1.27, - "learning_rate": 2.5087985151594235e-05, - "loss": 1.9869, + "epoch": 0.64, + "learning_rate": 9.680804858083468e-06, + "loss": 1.3813, "step": 785 }, { - "epoch": 1.28, - "learning_rate": 2.4921957799879076e-05, - "loss": 1.9325, + "epoch": 0.64, + "learning_rate": 9.674898705356413e-06, + "loss": 1.4016, "step": 790 }, { - "epoch": 1.29, - "learning_rate": 2.4755568601943615e-05, - "loss": 1.9479, + "epoch": 0.64, + "learning_rate": 9.668940246163464e-06, + "loss": 1.4611, "step": 795 }, { - "epoch": 1.3, - "learning_rate": 2.4588829790176837e-05, - "loss": 1.9616, + "epoch": 0.65, + "learning_rate": 9.662929547172575e-06, + "loss": 1.4902, "step": 800 }, { - "epoch": 1.3, - "learning_rate": 2.4421753622670178e-05, - "loss": 1.9706, + "epoch": 0.65, + "learning_rate": 9.656866675636204e-06, + "loss": 1.5445, "step": 805 }, { - "epoch": 1.31, - "learning_rate": 2.425435238231638e-05, - "loss": 1.9675, + "epoch": 0.66, + "learning_rate": 9.650751699390554e-06, + "loss": 1.5363, "step": 810 }, { - "epoch": 1.32, - "learning_rate": 2.4086638375906484e-05, - "loss": 1.9684, + "epoch": 0.66, + "learning_rate": 9.64458468685482e-06, + "loss": 1.4508, "step": 815 }, { - "epoch": 1.33, - "learning_rate": 2.3918623933225043e-05, - "loss": 1.9388, + "epoch": 0.66, + "learning_rate": 9.638365707030415e-06, + "loss": 1.4367, "step": 820 }, { - "epoch": 1.34, - "learning_rate": 2.375032140614372e-05, - "loss": 1.9326, + "epoch": 0.67, + "learning_rate": 9.632094829500206e-06, + "loss": 1.4594, "step": 825 }, { - "epoch": 1.34, - "learning_rate": 2.3581743167713187e-05, - "loss": 1.9521, + "epoch": 0.67, + "learning_rate": 9.62577212442774e-06, + "loss": 1.498, "step": 830 }, { - "epoch": 1.35, - "learning_rate": 2.3412901611253524e-05, - "loss": 1.9704, + "epoch": 0.68, + "learning_rate": 9.619397662556434e-06, + "loss": 1.4816, "step": 835 }, { - "epoch": 1.36, - "learning_rate": 2.3243809149443077e-05, - "loss": 1.89, + "epoch": 0.68, + "learning_rate": 9.61297151520882e-06, + "loss": 1.4754, "step": 840 }, { - "epoch": 1.37, - "learning_rate": 2.3074478213405937e-05, - "loss": 1.9438, + "epoch": 0.68, + "learning_rate": 9.606493754285712e-06, + "loss": 1.4805, "step": 845 }, { - "epoch": 1.38, - "learning_rate": 2.2904921251798052e-05, - "loss": 1.9682, + "epoch": 0.69, + "learning_rate": 9.599964452265434e-06, + "loss": 1.5539, "step": 850 }, { - "epoch": 1.39, - "learning_rate": 2.2735150729892013e-05, - "loss": 2.008, + "epoch": 0.69, + "learning_rate": 9.593383682202974e-06, + "loss": 1.4996, "step": 855 }, { - "epoch": 1.39, - "learning_rate": 2.2565179128660667e-05, - "loss": 1.9247, + "epoch": 0.7, + "learning_rate": 9.586751517729203e-06, + "loss": 1.484, "step": 860 }, { - "epoch": 1.4, - "learning_rate": 2.2395018943859558e-05, - "loss": 1.9377, + "epoch": 0.7, + "learning_rate": 9.580068033050019e-06, + "loss": 1.4781, "step": 865 }, { - "epoch": 1.41, - "learning_rate": 2.222468268510828e-05, - "loss": 1.9396, + "epoch": 0.7, + "learning_rate": 9.57333330294554e-06, + "loss": 1.5156, "step": 870 }, { - "epoch": 1.42, - "learning_rate": 2.2054182874970808e-05, - "loss": 1.9848, + "epoch": 0.71, + "learning_rate": 9.566547402769255e-06, + "loss": 1.4734, "step": 875 }, { - "epoch": 1.43, - "learning_rate": 2.188353204803486e-05, - "loss": 1.9382, + "epoch": 0.71, + "learning_rate": 9.559710408447185e-06, + "loss": 1.5398, "step": 880 }, { - "epoch": 1.43, - "learning_rate": 2.1712742749990444e-05, - "loss": 1.9431, + "epoch": 0.72, + "learning_rate": 9.55282239647703e-06, + "loss": 1.5621, "step": 885 }, { - "epoch": 1.44, - "learning_rate": 2.154182753670749e-05, - "loss": 1.9833, + "epoch": 0.72, + "learning_rate": 9.545883443927325e-06, + "loss": 1.4793, "step": 890 }, { - "epoch": 1.45, - "learning_rate": 2.1370798973312813e-05, - "loss": 1.9338, + "epoch": 0.72, + "learning_rate": 9.538893628436554e-06, + "loss": 1.3898, "step": 895 }, { - "epoch": 1.46, - "learning_rate": 2.1199669633266353e-05, - "loss": 1.9543, + "epoch": 0.73, + "learning_rate": 9.531853028212308e-06, + "loss": 1.4719, "step": 900 }, { - "epoch": 1.47, - "learning_rate": 2.102845209743682e-05, - "loss": 1.9455, + "epoch": 0.73, + "learning_rate": 9.52476172203039e-06, + "loss": 1.5215, "step": 905 }, { - "epoch": 1.47, - "learning_rate": 2.085715895317679e-05, - "loss": 1.9533, + "epoch": 0.74, + "learning_rate": 9.51761978923395e-06, + "loss": 1.4264, "step": 910 }, { - "epoch": 1.48, - "learning_rate": 2.0685802793397317e-05, - "loss": 2.0128, + "epoch": 0.74, + "learning_rate": 9.51042730973258e-06, + "loss": 1.502, "step": 915 }, { - "epoch": 1.49, - "learning_rate": 2.051439621564216e-05, - "loss": 1.9471, + "epoch": 0.74, + "learning_rate": 9.503184364001432e-06, + "loss": 1.4287, "step": 920 }, { - "epoch": 1.5, - "learning_rate": 2.0342951821161648e-05, - "loss": 1.9474, + "epoch": 0.75, + "learning_rate": 9.495891033080315e-06, + "loss": 1.4693, "step": 925 }, { - "epoch": 1.51, - "learning_rate": 2.017148221398625e-05, - "loss": 1.9946, + "epoch": 0.75, + "learning_rate": 9.488547398572787e-06, + "loss": 1.5344, "step": 930 }, { - "epoch": 1.51, - "learning_rate": 2e-05, - "loss": 1.913, + "epoch": 0.76, + "learning_rate": 9.48115354264524e-06, + "loss": 1.3703, "step": 935 }, { - "epoch": 1.52, - "learning_rate": 1.9828517786013752e-05, - "loss": 1.981, + "epoch": 0.76, + "learning_rate": 9.473709548025987e-06, + "loss": 1.6094, "step": 940 }, { - "epoch": 1.53, - "learning_rate": 1.965704817883836e-05, - "loss": 1.9809, + "epoch": 0.76, + "learning_rate": 9.466215498004328e-06, + "loss": 1.4773, "step": 945 }, { - "epoch": 1.54, - "learning_rate": 1.948560378435784e-05, - "loss": 1.9793, + "epoch": 0.77, + "learning_rate": 9.458671476429624e-06, + "loss": 1.4602, "step": 950 }, { - "epoch": 1.55, - "learning_rate": 1.9314197206602693e-05, - "loss": 1.9207, + "epoch": 0.77, + "learning_rate": 9.45107756771036e-06, + "loss": 1.4798, "step": 955 }, { - "epoch": 1.56, - "learning_rate": 1.914284104682322e-05, - "loss": 1.8926, + "epoch": 0.78, + "learning_rate": 9.443433856813197e-06, + "loss": 1.516, "step": 960 }, { - "epoch": 1.56, - "learning_rate": 1.897154790256319e-05, - "loss": 2.0005, + "epoch": 0.78, + "learning_rate": 9.435740429262016e-06, + "loss": 1.4941, "step": 965 }, { - "epoch": 1.57, - "learning_rate": 1.8800330366733654e-05, - "loss": 1.9432, + "epoch": 0.78, + "learning_rate": 9.427997371136976e-06, + "loss": 1.402, "step": 970 }, { - "epoch": 1.58, - "learning_rate": 1.862920102668719e-05, - "loss": 1.8667, + "epoch": 0.79, + "learning_rate": 9.420204769073538e-06, + "loss": 1.5305, "step": 975 }, { - "epoch": 1.59, - "learning_rate": 1.8458172463292516e-05, - "loss": 1.9405, + "epoch": 0.79, + "learning_rate": 9.4123627102615e-06, + "loss": 1.4604, "step": 980 }, { - "epoch": 1.6, - "learning_rate": 1.828725725000956e-05, - "loss": 1.9617, + "epoch": 0.8, + "learning_rate": 9.404471282444019e-06, + "loss": 1.5129, "step": 985 }, { - "epoch": 1.6, - "learning_rate": 1.8116467951965145e-05, - "loss": 1.9447, + "epoch": 0.8, + "learning_rate": 9.396530573916636e-06, + "loss": 1.5453, "step": 990 }, { - "epoch": 1.61, - "learning_rate": 1.79458171250292e-05, - "loss": 1.9093, + "epoch": 0.81, + "learning_rate": 9.38854067352628e-06, + "loss": 1.4834, "step": 995 }, { - "epoch": 1.62, - "learning_rate": 1.7775317314891724e-05, - "loss": 1.9051, + "epoch": 0.81, + "learning_rate": 9.38050167067028e-06, + "loss": 1.4387, "step": 1000 }, { - "epoch": 1.63, - "learning_rate": 1.7604981056140446e-05, - "loss": 1.916, + "epoch": 0.81, + "learning_rate": 9.372413655295362e-06, + "loss": 1.4383, "step": 1005 }, { - "epoch": 1.64, - "learning_rate": 1.7434820871339336e-05, - "loss": 1.8569, + "epoch": 0.82, + "learning_rate": 9.364276717896639e-06, + "loss": 1.602, "step": 1010 }, { - "epoch": 1.64, - "learning_rate": 1.7264849270107994e-05, - "loss": 1.9163, + "epoch": 0.82, + "learning_rate": 9.356090949516608e-06, + "loss": 1.4193, "step": 1015 }, { - "epoch": 1.65, - "learning_rate": 1.709507874820195e-05, - "loss": 1.9342, + "epoch": 0.83, + "learning_rate": 9.347856441744122e-06, + "loss": 1.5031, "step": 1020 }, { - "epoch": 1.66, - "learning_rate": 1.6925521786594067e-05, - "loss": 1.8947, + "epoch": 0.83, + "learning_rate": 9.339573286713369e-06, + "loss": 1.4828, "step": 1025 }, { - "epoch": 1.67, - "learning_rate": 1.675619085055693e-05, - "loss": 1.9396, + "epoch": 0.83, + "learning_rate": 9.331241577102841e-06, + "loss": 1.5191, "step": 1030 }, { - "epoch": 1.68, - "learning_rate": 1.6587098388746486e-05, - "loss": 1.9416, + "epoch": 0.84, + "learning_rate": 9.322861406134302e-06, + "loss": 1.4305, "step": 1035 }, { - "epoch": 1.68, - "learning_rate": 1.6418256832286816e-05, - "loss": 1.9382, + "epoch": 0.84, + "learning_rate": 9.314432867571732e-06, + "loss": 1.4625, "step": 1040 }, { - "epoch": 1.69, - "learning_rate": 1.6249678593856288e-05, - "loss": 1.9747, + "epoch": 0.85, + "learning_rate": 9.30595605572029e-06, + "loss": 1.5246, "step": 1045 }, { - "epoch": 1.7, - "learning_rate": 1.6081376066774964e-05, - "loss": 1.8799, + "epoch": 0.85, + "learning_rate": 9.297431065425257e-06, + "loss": 1.5227, "step": 1050 }, { - "epoch": 1.71, - "learning_rate": 1.591336162409352e-05, - "loss": 1.8957, + "epoch": 0.85, + "learning_rate": 9.28885799207097e-06, + "loss": 1.5367, "step": 1055 }, { - "epoch": 1.72, - "learning_rate": 1.5745647617683627e-05, - "loss": 1.8921, + "epoch": 0.86, + "learning_rate": 9.280236931579754e-06, + "loss": 1.4879, "step": 1060 }, { - "epoch": 1.73, - "learning_rate": 1.557824637732983e-05, - "loss": 1.9406, + "epoch": 0.86, + "learning_rate": 9.271567980410859e-06, + "loss": 1.5137, "step": 1065 }, { - "epoch": 1.73, - "learning_rate": 1.5411170209823177e-05, - "loss": 1.9282, + "epoch": 0.87, + "learning_rate": 9.26285123555937e-06, + "loss": 1.4449, "step": 1070 }, { - "epoch": 1.74, - "learning_rate": 1.5244431398056392e-05, - "loss": 1.8621, + "epoch": 0.87, + "learning_rate": 9.254086794555121e-06, + "loss": 1.4602, "step": 1075 }, { - "epoch": 1.75, - "learning_rate": 1.5078042200120933e-05, - "loss": 1.9375, + "epoch": 0.87, + "learning_rate": 9.245274755461621e-06, + "loss": 1.4187, "step": 1080 }, { - "epoch": 1.76, - "learning_rate": 1.4912014848405771e-05, - "loss": 1.8779, + "epoch": 0.88, + "learning_rate": 9.23641521687493e-06, + "loss": 1.5391, "step": 1085 }, { - "epoch": 1.77, - "learning_rate": 1.4746361548698151e-05, - "loss": 1.9353, + "epoch": 0.88, + "learning_rate": 9.227508277922579e-06, + "loss": 1.3988, "step": 1090 }, { - "epoch": 1.77, - "learning_rate": 1.4581094479286234e-05, - "loss": 1.9255, + "epoch": 0.89, + "learning_rate": 9.218554038262448e-06, + "loss": 1.5984, "step": 1095 }, { - "epoch": 1.78, - "learning_rate": 1.4416225790063784e-05, - "loss": 1.9163, + "epoch": 0.89, + "learning_rate": 9.209552598081657e-06, + "loss": 1.5109, "step": 1100 }, { - "epoch": 1.79, - "learning_rate": 1.4251767601636965e-05, - "loss": 1.9314, + "epoch": 0.89, + "learning_rate": 9.200504058095439e-06, + "loss": 1.5418, "step": 1105 }, { - "epoch": 1.8, - "learning_rate": 1.4087732004433258e-05, - "loss": 1.8751, + "epoch": 0.9, + "learning_rate": 9.191408519546022e-06, + "loss": 1.4275, "step": 1110 }, { - "epoch": 1.81, - "learning_rate": 1.3924131057812642e-05, - "loss": 1.8934, + "epoch": 0.9, + "learning_rate": 9.182266084201486e-06, + "loss": 1.4074, "step": 1115 }, { - "epoch": 1.81, - "learning_rate": 1.376097678918098e-05, - "loss": 1.9148, + "epoch": 0.91, + "learning_rate": 9.173076854354634e-06, + "loss": 1.5016, "step": 1120 }, { - "epoch": 1.82, - "learning_rate": 1.3598281193105858e-05, - "loss": 1.8754, + "epoch": 0.91, + "learning_rate": 9.16384093282184e-06, + "loss": 1.5188, "step": 1125 }, { - "epoch": 1.83, - "learning_rate": 1.3436056230434747e-05, - "loss": 1.9183, + "epoch": 0.91, + "learning_rate": 9.154558422941901e-06, + "loss": 1.4738, "step": 1130 }, { - "epoch": 1.84, - "learning_rate": 1.3274313827415678e-05, - "loss": 1.9236, + "epoch": 0.92, + "learning_rate": 9.145229428574886e-06, + "loss": 1.4049, "step": 1135 }, { - "epoch": 1.85, - "learning_rate": 1.3113065874820506e-05, - "loss": 1.889, + "epoch": 0.92, + "learning_rate": 9.135854054100961e-06, + "loss": 1.4363, "step": 1140 }, { - "epoch": 1.85, - "learning_rate": 1.295232422707068e-05, - "loss": 1.8898, + "epoch": 0.93, + "learning_rate": 9.126432404419239e-06, + "loss": 1.5211, "step": 1145 }, { - "epoch": 1.86, - "learning_rate": 1.2792100701365794e-05, - "loss": 1.8991, + "epoch": 0.93, + "learning_rate": 9.11696458494659e-06, + "loss": 1.457, "step": 1150 }, { - "epoch": 1.87, - "learning_rate": 1.2632407076814794e-05, - "loss": 1.9559, + "epoch": 0.93, + "learning_rate": 9.107450701616469e-06, + "loss": 1.4543, "step": 1155 }, { - "epoch": 1.88, - "learning_rate": 1.2473255093570039e-05, - "loss": 1.9048, + "epoch": 0.94, + "learning_rate": 9.097890860877732e-06, + "loss": 1.534, "step": 1160 }, { - "epoch": 1.89, - "learning_rate": 1.2314656451964196e-05, - "loss": 1.859, + "epoch": 0.94, + "learning_rate": 9.088285169693442e-06, + "loss": 1.5254, "step": 1165 }, { - "epoch": 1.9, - "learning_rate": 1.2156622811650043e-05, - "loss": 1.8825, + "epoch": 0.95, + "learning_rate": 9.078633735539673e-06, + "loss": 1.5371, "step": 1170 }, { - "epoch": 1.9, - "learning_rate": 1.1999165790743338e-05, - "loss": 1.9094, + "epoch": 0.95, + "learning_rate": 9.068936666404307e-06, + "loss": 1.475, "step": 1175 }, { - "epoch": 1.91, - "learning_rate": 1.1842296964968652e-05, - "loss": 1.937, + "epoch": 0.95, + "learning_rate": 9.059194070785823e-06, + "loss": 1.457, "step": 1180 }, { - "epoch": 1.92, - "learning_rate": 1.1686027866808394e-05, - "loss": 1.8838, + "epoch": 0.96, + "learning_rate": 9.049406057692097e-06, + "loss": 1.4891, "step": 1185 }, { - "epoch": 1.93, - "learning_rate": 1.1530369984654936e-05, - "loss": 1.9023, + "epoch": 0.96, + "learning_rate": 9.03957273663916e-06, + "loss": 1.5648, "step": 1190 }, { - "epoch": 1.94, - "learning_rate": 1.1375334761966074e-05, - "loss": 1.9099, + "epoch": 0.97, + "learning_rate": 9.02969421764999e-06, + "loss": 1.5855, "step": 1195 }, { - "epoch": 1.94, - "learning_rate": 1.122093359642372e-05, - "loss": 1.9058, + "epoch": 0.97, + "learning_rate": 9.019770611253272e-06, + "loss": 1.4734, "step": 1200 }, { - "epoch": 1.95, - "learning_rate": 1.1067177839095957e-05, - "loss": 1.9359, + "epoch": 0.97, + "learning_rate": 9.009802028482169e-06, + "loss": 1.4867, "step": 1205 }, { - "epoch": 1.96, - "learning_rate": 1.0914078793602601e-05, - "loss": 1.8897, + "epoch": 0.98, + "learning_rate": 8.999788580873074e-06, + "loss": 1.5094, "step": 1210 }, { - "epoch": 1.97, - "learning_rate": 1.0761647715284139e-05, - "loss": 1.9341, + "epoch": 0.98, + "learning_rate": 8.989730380464362e-06, + "loss": 1.4965, "step": 1215 }, { - "epoch": 1.98, - "learning_rate": 1.0609895810374304e-05, - "loss": 1.876, + "epoch": 0.99, + "learning_rate": 8.979627539795136e-06, + "loss": 1.4887, "step": 1220 }, { - "epoch": 1.98, - "learning_rate": 1.0458834235176225e-05, - "loss": 1.8287, + "epoch": 0.99, + "learning_rate": 8.969480171903973e-06, + "loss": 1.4398, "step": 1225 }, { - "epoch": 1.99, - "learning_rate": 1.0308474095242267e-05, - "loss": 1.8523, + "epoch": 1.0, + "learning_rate": 8.959288390327656e-06, + "loss": 1.4301, "step": 1230 }, { - "epoch": 2.0, - "learning_rate": 1.0128983382202781e-05, - "loss": 2.0887, + "epoch": 1.0, + "learning_rate": 8.949052309099897e-06, + "loss": 1.5309, "step": 1235 }, { - "epoch": 2.01, - "learning_rate": 9.980205236069665e-06, - "loss": 1.4855, + "epoch": 1.0, + "learning_rate": 8.938772042750078e-06, + "loss": 1.3054, "step": 1240 }, { - "epoch": 2.02, - "learning_rate": 9.832163712437392e-06, - "loss": 1.4915, + "epoch": 1.01, + "learning_rate": 8.928447706301951e-06, + "loss": 1.2152, "step": 1245 }, { - "epoch": 2.03, - "learning_rate": 9.684869694834003e-06, - "loss": 1.4679, + "epoch": 1.01, + "learning_rate": 8.91807941527236e-06, + "loss": 1.2238, "step": 1250 }, { - "epoch": 2.03, - "learning_rate": 9.538334011833363e-06, - "loss": 1.4298, + "epoch": 1.02, + "learning_rate": 8.907667285669955e-06, + "loss": 1.1881, "step": 1255 }, { - "epoch": 2.04, - "learning_rate": 9.392567436259034e-06, - "loss": 1.4018, + "epoch": 1.02, + "learning_rate": 8.897211433993873e-06, + "loss": 1.234, "step": 1260 }, { - "epoch": 2.05, - "learning_rate": 9.247580684392345e-06, - "loss": 1.4642, + "epoch": 1.02, + "learning_rate": 8.886711977232463e-06, + "loss": 1.1724, "step": 1265 }, { - "epoch": 2.06, - "learning_rate": 9.10338441518453e-06, - "loss": 1.4434, + "epoch": 1.03, + "learning_rate": 8.87616903286195e-06, + "loss": 1.3313, "step": 1270 }, { - "epoch": 2.07, - "learning_rate": 8.959989229473125e-06, - "loss": 1.4574, + "epoch": 1.03, + "learning_rate": 8.865582718845142e-06, + "loss": 1.2277, "step": 1275 }, { - "epoch": 2.07, - "learning_rate": 8.817405669202619e-06, - "loss": 1.4256, + "epoch": 1.04, + "learning_rate": 8.854953153630097e-06, + "loss": 1.2145, "step": 1280 }, { - "epoch": 2.08, - "learning_rate": 8.675644216649478e-06, - "loss": 1.4539, + "epoch": 1.04, + "learning_rate": 8.844280456148799e-06, + "loss": 1.243, "step": 1285 }, { - "epoch": 2.09, - "learning_rate": 8.534715293651492e-06, - "loss": 1.5016, + "epoch": 1.04, + "learning_rate": 8.833564745815835e-06, + "loss": 1.1816, "step": 1290 }, { - "epoch": 2.1, - "learning_rate": 8.39462926084159e-06, - "loss": 1.4738, + "epoch": 1.05, + "learning_rate": 8.82280614252705e-06, + "loss": 1.1965, "step": 1295 }, { - "epoch": 2.11, - "learning_rate": 8.255396416886194e-06, - "loss": 1.4265, + "epoch": 1.05, + "learning_rate": 8.81200476665821e-06, + "loss": 1.2035, "step": 1300 }, { - "epoch": 2.11, - "learning_rate": 8.117026997728079e-06, - "loss": 1.4235, + "epoch": 1.06, + "learning_rate": 8.801160739063657e-06, + "loss": 1.2477, "step": 1305 }, { - "epoch": 2.12, - "learning_rate": 7.979531175833828e-06, - "loss": 1.5084, + "epoch": 1.06, + "learning_rate": 8.790274181074951e-06, + "loss": 1.1686, "step": 1310 }, { - "epoch": 2.13, - "learning_rate": 7.842919059446046e-06, - "loss": 1.4426, + "epoch": 1.06, + "learning_rate": 8.779345214499517e-06, + "loss": 1.1877, "step": 1315 }, { - "epoch": 2.14, - "learning_rate": 7.707200691840173e-06, - "loss": 1.4797, + "epoch": 1.07, + "learning_rate": 8.768373961619283e-06, + "loss": 1.2209, "step": 1320 }, { - "epoch": 2.15, - "learning_rate": 7.572386050586196e-06, - "loss": 1.4309, + "epoch": 1.07, + "learning_rate": 8.757360545189308e-06, + "loss": 1.2066, "step": 1325 }, { - "epoch": 2.16, - "learning_rate": 7.438485046815078e-06, - "loss": 1.4505, + "epoch": 1.08, + "learning_rate": 8.746305088436406e-06, + "loss": 1.2484, "step": 1330 }, { - "epoch": 2.16, - "learning_rate": 7.305507524490145e-06, - "loss": 1.4734, + "epoch": 1.08, + "learning_rate": 8.735207715057779e-06, + "loss": 1.2068, "step": 1335 }, { - "epoch": 2.17, - "learning_rate": 7.1734632596834106e-06, - "loss": 1.397, + "epoch": 1.08, + "learning_rate": 8.724068549219618e-06, + "loss": 1.1803, "step": 1340 }, { - "epoch": 2.18, - "learning_rate": 7.042361959856825e-06, - "loss": 1.4341, + "epoch": 1.09, + "learning_rate": 8.712887715555728e-06, + "loss": 1.234, "step": 1345 }, { - "epoch": 2.19, - "learning_rate": 6.912213263148673e-06, - "loss": 1.4599, + "epoch": 1.09, + "learning_rate": 8.701665339166122e-06, + "loss": 1.2441, "step": 1350 }, { - "epoch": 2.2, - "learning_rate": 6.783026737664942e-06, - "loss": 1.4466, + "epoch": 1.1, + "learning_rate": 8.690401545615626e-06, + "loss": 1.2082, "step": 1355 }, { - "epoch": 2.2, - "learning_rate": 6.654811880775973e-06, - "loss": 1.4435, + "epoch": 1.1, + "learning_rate": 8.679096460932477e-06, + "loss": 1.2176, "step": 1360 }, { - "epoch": 2.21, - "learning_rate": 6.527578118418187e-06, - "loss": 1.4597, + "epoch": 1.1, + "learning_rate": 8.667750211606906e-06, + "loss": 1.2516, "step": 1365 }, { - "epoch": 2.22, - "learning_rate": 6.401334804401171e-06, - "loss": 1.4217, + "epoch": 1.11, + "learning_rate": 8.65636292458973e-06, + "loss": 1.1766, "step": 1370 }, { - "epoch": 2.23, - "learning_rate": 6.276091219719984e-06, - "loss": 1.4477, + "epoch": 1.11, + "learning_rate": 8.644934727290927e-06, + "loss": 1.2277, "step": 1375 }, { - "epoch": 2.24, - "learning_rate": 6.151856571872854e-06, - "loss": 1.4716, + "epoch": 1.12, + "learning_rate": 8.63346574757821e-06, + "loss": 1.2773, "step": 1380 }, { - "epoch": 2.24, - "learning_rate": 6.028639994184277e-06, - "loss": 1.4398, + "epoch": 1.12, + "learning_rate": 8.621956113775601e-06, + "loss": 1.2162, "step": 1385 }, { - "epoch": 2.25, - "learning_rate": 5.906450545133564e-06, - "loss": 1.4442, + "epoch": 1.12, + "learning_rate": 8.610405954661988e-06, + "loss": 1.2551, "step": 1390 }, { - "epoch": 2.26, - "learning_rate": 5.785297207688905e-06, - "loss": 1.4506, + "epoch": 1.13, + "learning_rate": 8.598815399469694e-06, + "loss": 1.2625, "step": 1395 }, { - "epoch": 2.27, - "learning_rate": 5.665188888646935e-06, - "loss": 1.4123, + "epoch": 1.13, + "learning_rate": 8.587184577883018e-06, + "loss": 1.2465, "step": 1400 }, { - "epoch": 2.28, - "learning_rate": 5.546134417977984e-06, - "loss": 1.456, + "epoch": 1.14, + "learning_rate": 8.5755136200368e-06, + "loss": 1.2008, "step": 1405 }, { - "epoch": 2.28, - "learning_rate": 5.428142548176876e-06, - "loss": 1.4274, + "epoch": 1.14, + "learning_rate": 8.563802656514946e-06, + "loss": 1.1623, "step": 1410 }, { - "epoch": 2.29, - "learning_rate": 5.311221953619514e-06, - "loss": 1.4062, + "epoch": 1.14, + "learning_rate": 8.552051818348986e-06, + "loss": 1.1625, "step": 1415 }, { - "epoch": 2.3, - "learning_rate": 5.195381229925156e-06, - "loss": 1.427, + "epoch": 1.15, + "learning_rate": 8.540261237016597e-06, + "loss": 1.1723, "step": 1420 }, { - "epoch": 2.31, - "learning_rate": 5.080628893324475e-06, - "loss": 1.4783, + "epoch": 1.15, + "learning_rate": 8.528431044440127e-06, + "loss": 1.268, "step": 1425 }, { - "epoch": 2.32, - "learning_rate": 4.9669733800334955e-06, - "loss": 1.4356, + "epoch": 1.16, + "learning_rate": 8.516561372985137e-06, + "loss": 1.2488, "step": 1430 }, { - "epoch": 2.33, - "learning_rate": 4.854423045633392e-06, - "loss": 1.4809, + "epoch": 1.16, + "learning_rate": 8.504652355458901e-06, + "loss": 1.298, "step": 1435 }, { - "epoch": 2.33, - "learning_rate": 4.742986164456196e-06, - "loss": 1.4079, + "epoch": 1.17, + "learning_rate": 8.492704125108933e-06, + "loss": 1.2168, "step": 1440 }, { - "epoch": 2.34, - "learning_rate": 4.632670928976501e-06, - "loss": 1.4884, + "epoch": 1.17, + "learning_rate": 8.480716815621486e-06, + "loss": 1.2166, "step": 1445 }, { - "epoch": 2.35, - "learning_rate": 4.523485449209195e-06, - "loss": 1.4499, + "epoch": 1.17, + "learning_rate": 8.468690561120064e-06, + "loss": 1.201, "step": 1450 }, { - "epoch": 2.36, - "learning_rate": 4.415437752113223e-06, - "loss": 1.4065, + "epoch": 1.18, + "learning_rate": 8.456625496163921e-06, + "loss": 1.2266, "step": 1455 }, { - "epoch": 2.37, - "learning_rate": 4.308535781001457e-06, - "loss": 1.4888, + "epoch": 1.18, + "learning_rate": 8.444521755746547e-06, + "loss": 1.1812, "step": 1460 }, { - "epoch": 2.37, - "learning_rate": 4.202787394956769e-06, - "loss": 1.4707, + "epoch": 1.19, + "learning_rate": 8.43237947529417e-06, + "loss": 1.1762, "step": 1465 }, { - "epoch": 2.38, - "learning_rate": 4.0982003682542146e-06, - "loss": 1.4426, + "epoch": 1.19, + "learning_rate": 8.420198790664232e-06, + "loss": 1.2473, "step": 1470 }, { - "epoch": 2.39, - "learning_rate": 3.994782389789535e-06, - "loss": 1.3991, + "epoch": 1.19, + "learning_rate": 8.407979838143869e-06, + "loss": 1.1887, "step": 1475 }, { - "epoch": 2.4, - "learning_rate": 3.892541062513853e-06, - "loss": 1.4187, + "epoch": 1.2, + "learning_rate": 8.395722754448392e-06, + "loss": 1.2277, "step": 1480 }, { - "epoch": 2.41, - "learning_rate": 3.7914839028747507e-06, - "loss": 1.4248, + "epoch": 1.2, + "learning_rate": 8.38342767671975e-06, + "loss": 1.2418, "step": 1485 }, { - "epoch": 2.41, - "learning_rate": 3.691618340263701e-06, - "loss": 1.447, + "epoch": 1.21, + "learning_rate": 8.371094742525006e-06, + "loss": 1.2081, "step": 1490 }, { - "epoch": 2.42, - "learning_rate": 3.5929517164698436e-06, - "loss": 1.4394, + "epoch": 1.21, + "learning_rate": 8.358724089854784e-06, + "loss": 1.3461, "step": 1495 }, { - "epoch": 2.43, - "learning_rate": 3.495491285140282e-06, - "loss": 1.4359, + "epoch": 1.21, + "learning_rate": 8.346315857121732e-06, + "loss": 1.1977, "step": 1500 }, { - "epoch": 2.44, - "learning_rate": 3.399244211246779e-06, - "loss": 1.4752, + "epoch": 1.22, + "learning_rate": 8.33387018315898e-06, + "loss": 1.2336, "step": 1505 }, { - "epoch": 2.45, - "learning_rate": 3.304217570559052e-06, - "loss": 1.4508, + "epoch": 1.22, + "learning_rate": 8.321387207218578e-06, + "loss": 1.249, "step": 1510 }, { - "epoch": 2.45, - "learning_rate": 3.2104183491245466e-06, - "loss": 1.4718, + "epoch": 1.23, + "learning_rate": 8.308867068969933e-06, + "loss": 1.2188, "step": 1515 }, { - "epoch": 2.46, - "learning_rate": 3.117853442754879e-06, - "loss": 1.4514, + "epoch": 1.23, + "learning_rate": 8.296309908498264e-06, + "loss": 1.1823, "step": 1520 }, { - "epoch": 2.47, - "learning_rate": 3.026529656518864e-06, - "loss": 1.399, + "epoch": 1.23, + "learning_rate": 8.283715866303016e-06, + "loss": 1.2462, "step": 1525 }, { - "epoch": 2.48, - "learning_rate": 2.936453704242215e-06, - "loss": 1.4136, + "epoch": 1.24, + "learning_rate": 8.271085083296295e-06, + "loss": 1.218, "step": 1530 }, { - "epoch": 2.49, - "learning_rate": 2.8476322080139862e-06, - "loss": 1.4474, + "epoch": 1.24, + "learning_rate": 8.258417700801301e-06, + "loss": 1.249, "step": 1535 }, { - "epoch": 2.5, - "learning_rate": 2.760071697699729e-06, - "loss": 1.4542, + "epoch": 1.25, + "learning_rate": 8.245713860550734e-06, + "loss": 1.2629, "step": 1540 }, { - "epoch": 2.5, - "learning_rate": 2.673778610461448e-06, - "loss": 1.4176, + "epoch": 1.25, + "learning_rate": 8.232973704685208e-06, + "loss": 1.2605, "step": 1545 }, { - "epoch": 2.51, - "learning_rate": 2.588759290284337e-06, - "loss": 1.4471, + "epoch": 1.25, + "learning_rate": 8.220197375751667e-06, + "loss": 1.2232, "step": 1550 }, { - "epoch": 2.52, - "learning_rate": 2.505019987510426e-06, - "loss": 1.4217, + "epoch": 1.26, + "learning_rate": 8.207385016701792e-06, + "loss": 1.2242, "step": 1555 }, { - "epoch": 2.53, - "learning_rate": 2.4225668583790474e-06, - "loss": 1.4194, + "epoch": 1.26, + "learning_rate": 8.194536770890392e-06, + "loss": 1.1824, "step": 1560 }, { - "epoch": 2.54, - "learning_rate": 2.3414059645742504e-06, - "loss": 1.3959, + "epoch": 1.27, + "learning_rate": 8.181652782073808e-06, + "loss": 1.275, "step": 1565 }, { - "epoch": 2.54, - "learning_rate": 2.261543272779192e-06, - "loss": 1.4689, + "epoch": 1.27, + "learning_rate": 8.168733194408302e-06, + "loss": 1.2164, "step": 1570 }, { - "epoch": 2.55, - "learning_rate": 2.1829846542374565e-06, - "loss": 1.4568, + "epoch": 1.27, + "learning_rate": 8.155778152448443e-06, + "loss": 1.2207, "step": 1575 }, { - "epoch": 2.56, - "learning_rate": 2.105735884321436e-06, - "loss": 1.451, + "epoch": 1.28, + "learning_rate": 8.142787801145495e-06, + "loss": 1.2266, "step": 1580 }, { - "epoch": 2.57, - "learning_rate": 2.029802642107734e-06, - "loss": 1.4418, + "epoch": 1.28, + "learning_rate": 8.129762285845784e-06, + "loss": 1.1971, "step": 1585 }, { - "epoch": 2.58, - "learning_rate": 1.9551905099596813e-06, - "loss": 1.4619, + "epoch": 1.29, + "learning_rate": 8.116701752289084e-06, + "loss": 1.2107, "step": 1590 }, { - "epoch": 2.58, - "learning_rate": 1.8819049731169059e-06, - "loss": 1.4182, + "epoch": 1.29, + "learning_rate": 8.103606346606978e-06, + "loss": 1.1904, "step": 1595 }, { - "epoch": 2.59, - "learning_rate": 1.809951419292104e-06, - "loss": 1.4095, + "epoch": 1.29, + "learning_rate": 8.090476215321226e-06, + "loss": 1.2895, "step": 1600 }, { - "epoch": 2.6, - "learning_rate": 1.7393351382749424e-06, - "loss": 1.4397, + "epoch": 1.3, + "learning_rate": 8.07731150534213e-06, + "loss": 1.235, "step": 1605 }, { - "epoch": 2.61, - "learning_rate": 1.6700613215431549e-06, - "loss": 1.4747, + "epoch": 1.3, + "learning_rate": 8.064112363966877e-06, + "loss": 1.2238, "step": 1610 }, { - "epoch": 2.62, - "learning_rate": 1.6021350618809184e-06, - "loss": 1.4356, + "epoch": 1.31, + "learning_rate": 8.050878938877908e-06, + "loss": 1.2535, "step": 1615 }, { - "epoch": 2.62, - "learning_rate": 1.5355613530044089e-06, - "loss": 1.4381, + "epoch": 1.31, + "learning_rate": 8.037611378141257e-06, + "loss": 1.2336, "step": 1620 }, { - "epoch": 2.63, - "learning_rate": 1.470345089194709e-06, - "loss": 1.4444, + "epoch": 1.31, + "learning_rate": 8.024309830204888e-06, + "loss": 1.2285, "step": 1625 }, { - "epoch": 2.64, - "learning_rate": 1.4064910649379803e-06, - "loss": 1.469, + "epoch": 1.32, + "learning_rate": 8.010974443897046e-06, + "loss": 1.25, "step": 1630 }, { - "epoch": 2.65, - "learning_rate": 1.3440039745729894e-06, - "loss": 1.4427, + "epoch": 1.32, + "learning_rate": 7.997605368424585e-06, + "loss": 1.2492, "step": 1635 }, { - "epoch": 2.66, - "learning_rate": 1.2828884119460105e-06, - "loss": 1.3941, + "epoch": 1.33, + "learning_rate": 7.9842027533713e-06, + "loss": 1.1783, "step": 1640 }, { - "epoch": 2.67, - "learning_rate": 1.2231488700730742e-06, - "loss": 1.4452, + "epoch": 1.33, + "learning_rate": 7.970766748696254e-06, + "loss": 1.171, "step": 1645 }, { - "epoch": 2.67, - "learning_rate": 1.1647897408096886e-06, - "loss": 1.4236, + "epoch": 1.33, + "learning_rate": 7.9572975047321e-06, + "loss": 1.2336, "step": 1650 }, { - "epoch": 2.68, - "learning_rate": 1.107815314527929e-06, - "loss": 1.4538, + "epoch": 1.34, + "learning_rate": 7.943795172183394e-06, + "loss": 1.204, "step": 1655 }, { - "epoch": 2.69, - "learning_rate": 1.0522297798010594e-06, - "loss": 1.4112, + "epoch": 1.34, + "learning_rate": 7.93025990212492e-06, + "loss": 1.2342, "step": 1660 }, { - "epoch": 2.7, - "learning_rate": 9.980372230955693e-07, - "loss": 1.4808, + "epoch": 1.35, + "learning_rate": 7.916691845999986e-06, + "loss": 1.1936, "step": 1665 }, { - "epoch": 2.71, - "learning_rate": 9.452416284707743e-07, - "loss": 1.4509, + "epoch": 1.35, + "learning_rate": 7.903091155618747e-06, + "loss": 1.1992, "step": 1670 }, { - "epoch": 2.71, - "learning_rate": 8.938468772859132e-07, - "loss": 1.4414, + "epoch": 1.36, + "learning_rate": 7.889457983156484e-06, + "loss": 1.1707, "step": 1675 }, { - "epoch": 2.72, - "learning_rate": 8.438567479147975e-07, - "loss": 1.4203, + "epoch": 1.36, + "learning_rate": 7.875792481151916e-06, + "loss": 1.1835, "step": 1680 }, { - "epoch": 2.73, - "learning_rate": 7.952749154680405e-07, - "loss": 1.4294, + "epoch": 1.36, + "learning_rate": 7.862094802505498e-06, + "loss": 1.2359, "step": 1685 }, { - "epoch": 2.74, - "learning_rate": 7.481049515228811e-07, - "loss": 1.4136, + "epoch": 1.37, + "learning_rate": 7.848365100477695e-06, + "loss": 1.2754, "step": 1690 }, { - "epoch": 2.75, - "learning_rate": 7.023503238606122e-07, - "loss": 1.4316, + "epoch": 1.37, + "learning_rate": 7.834603528687277e-06, + "loss": 1.2664, "step": 1695 }, { - "epoch": 2.75, - "learning_rate": 6.580143962116281e-07, - "loss": 1.4645, + "epoch": 1.38, + "learning_rate": 7.8208102411096e-06, + "loss": 1.2535, "step": 1700 }, { - "epoch": 2.76, - "learning_rate": 6.151004280081574e-07, - "loss": 1.4692, + "epoch": 1.38, + "learning_rate": 7.806985392074877e-06, + "loss": 1.2158, "step": 1705 }, { - "epoch": 2.77, - "learning_rate": 5.736115741446146e-07, - "loss": 1.4408, + "epoch": 1.38, + "learning_rate": 7.793129136266464e-06, + "loss": 1.2504, "step": 1710 }, { - "epoch": 2.78, - "learning_rate": 5.335508847456794e-07, - "loss": 1.4552, + "epoch": 1.39, + "learning_rate": 7.779241628719108e-06, + "loss": 1.1505, "step": 1715 }, { - "epoch": 2.79, - "learning_rate": 4.949213049420576e-07, - "loss": 1.4657, + "epoch": 1.39, + "learning_rate": 7.765323024817237e-06, + "loss": 1.2695, "step": 1720 }, { - "epoch": 2.79, - "learning_rate": 4.577256746539638e-07, - "loss": 1.4189, + "epoch": 1.4, + "learning_rate": 7.751373480293205e-06, + "loss": 1.2059, "step": 1725 }, { - "epoch": 2.8, - "learning_rate": 4.2196672838233257e-07, - "loss": 1.4573, + "epoch": 1.4, + "learning_rate": 7.737393151225555e-06, + "loss": 1.2547, "step": 1730 }, { - "epoch": 2.81, - "learning_rate": 3.876470950078037e-07, - "loss": 1.4382, + "epoch": 1.4, + "learning_rate": 7.723382194037266e-06, + "loss": 1.2127, "step": 1735 }, { - "epoch": 2.82, - "learning_rate": 3.5476929759743927e-07, - "loss": 1.4272, + "epoch": 1.41, + "learning_rate": 7.709340765494017e-06, + "loss": 1.1734, "step": 1740 }, { - "epoch": 2.83, - "learning_rate": 3.233357532192494e-07, - "loss": 1.4866, + "epoch": 1.41, + "learning_rate": 7.695269022702425e-06, + "loss": 1.2037, "step": 1745 }, { - "epoch": 2.84, - "learning_rate": 2.933487727644813e-07, - "loss": 1.4132, + "epoch": 1.42, + "learning_rate": 7.681167123108277e-06, + "loss": 1.2891, "step": 1750 }, { - "epoch": 2.84, - "learning_rate": 2.648105607777507e-07, - "loss": 1.4498, + "epoch": 1.42, + "learning_rate": 7.667035224494787e-06, + "loss": 1.2285, "step": 1755 }, { - "epoch": 2.85, - "learning_rate": 2.3772321529494712e-07, - "loss": 1.4505, + "epoch": 1.42, + "learning_rate": 7.65287348498082e-06, + "loss": 1.217, "step": 1760 }, { - "epoch": 2.86, - "learning_rate": 2.1208872768901713e-07, - "loss": 1.4338, + "epoch": 1.43, + "learning_rate": 7.63868206301912e-06, + "loss": 1.1856, "step": 1765 }, { - "epoch": 2.87, - "learning_rate": 1.8790898252354583e-07, - "loss": 1.4299, + "epoch": 1.43, + "learning_rate": 7.62446111739455e-06, + "loss": 1.2613, "step": 1770 }, { - "epoch": 2.88, - "learning_rate": 1.6518575741421904e-07, - "loss": 1.4378, + "epoch": 1.44, + "learning_rate": 7.6102108072223e-06, + "loss": 1.1617, "step": 1775 }, { - "epoch": 2.88, - "learning_rate": 1.4392072289814319e-07, - "loss": 1.4323, + "epoch": 1.44, + "learning_rate": 7.595931291946116e-06, + "loss": 1.2006, "step": 1780 }, { - "epoch": 2.89, - "learning_rate": 1.241154423110169e-07, - "loss": 1.4144, + "epoch": 1.44, + "learning_rate": 7.581622731336515e-06, + "loss": 1.2543, "step": 1785 }, { - "epoch": 2.9, - "learning_rate": 1.0577137167221863e-07, - "loss": 1.4343, + "epoch": 1.45, + "learning_rate": 7.567285285488994e-06, + "loss": 1.2498, "step": 1790 }, { - "epoch": 2.91, - "learning_rate": 8.88898595777543e-08, - "loss": 1.4625, + "epoch": 1.45, + "learning_rate": 7.552919114822246e-06, + "loss": 1.2484, "step": 1795 }, { - "epoch": 2.92, - "learning_rate": 7.347214710111239e-08, - "loss": 1.3614, + "epoch": 1.46, + "learning_rate": 7.5385243800763505e-06, + "loss": 1.2543, "step": 1800 }, { - "epoch": 2.92, - "learning_rate": 5.951936770202782e-08, - "loss": 1.4099, + "epoch": 1.46, + "learning_rate": 7.524101242310993e-06, + "loss": 1.2621, "step": 1805 }, { - "epoch": 2.93, - "learning_rate": 4.7032547143155417e-08, - "loss": 1.4601, + "epoch": 1.46, + "learning_rate": 7.509649862903652e-06, + "loss": 1.2176, "step": 1810 }, { - "epoch": 2.94, - "learning_rate": 3.60126034146524e-08, - "loss": 1.4231, + "epoch": 1.47, + "learning_rate": 7.495170403547797e-06, + "loss": 1.2189, "step": 1815 }, { - "epoch": 2.95, - "learning_rate": 2.6460346666696835e-08, - "loss": 1.4549, + "epoch": 1.47, + "learning_rate": 7.480663026251073e-06, + "loss": 1.2503, "step": 1820 }, { - "epoch": 2.96, - "learning_rate": 1.8376479149926353e-08, - "loss": 1.4122, + "epoch": 1.48, + "learning_rate": 7.466127893333498e-06, + "loss": 1.2186, "step": 1825 }, { - "epoch": 2.96, - "learning_rate": 1.176159516380837e-08, - "loss": 1.3961, + "epoch": 1.48, + "learning_rate": 7.451565167425642e-06, + "loss": 1.2805, "step": 1830 }, { - "epoch": 2.97, - "learning_rate": 6.616181012955025e-09, - "loss": 1.4652, + "epoch": 1.48, + "learning_rate": 7.436975011466805e-06, + "loss": 1.2347, "step": 1835 }, { - "epoch": 2.98, - "learning_rate": 2.9406149713628874e-09, - "loss": 1.425, + "epoch": 1.49, + "learning_rate": 7.422357588703195e-06, + "loss": 1.266, "step": 1840 }, { - "epoch": 2.99, - "learning_rate": 7.351672546129785e-10, - "loss": 1.4916, + "epoch": 1.49, + "learning_rate": 7.407713062686107e-06, + "loss": 1.2496, "step": 1845 }, { - "epoch": 3.0, - "learning_rate": 0.0, - "loss": 1.3883, + "epoch": 1.5, + "learning_rate": 7.393041597270085e-06, + "loss": 1.2902, "step": 1850 }, { - "epoch": 3.0, - "step": 1851, - "total_flos": 1.8340911973547377e+18, - "train_loss": 2.0081952639620475, - "train_runtime": 43810.4873, - "train_samples_per_second": 5.415, - "train_steps_per_second": 0.042 + "epoch": 1.5, + "learning_rate": 7.378343356611093e-06, + "loss": 1.2367, + "step": 1855 + }, + { + "epoch": 1.5, + "learning_rate": 7.363618505164678e-06, + "loss": 1.274, + "step": 1860 + }, + { + "epoch": 1.51, + "learning_rate": 7.348867207684132e-06, + "loss": 1.2242, + "step": 1865 + }, + { + "epoch": 1.51, + "learning_rate": 7.334089629218639e-06, + "loss": 1.2844, + "step": 1870 + }, + { + "epoch": 1.52, + "learning_rate": 7.319285935111444e-06, + "loss": 1.2672, + "step": 1875 + }, + { + "epoch": 1.52, + "learning_rate": 7.304456290997991e-06, + "loss": 1.1542, + "step": 1880 + }, + { + "epoch": 1.53, + "learning_rate": 7.289600862804069e-06, + "loss": 1.15, + "step": 1885 + }, + { + "epoch": 1.53, + "learning_rate": 7.274719816743967e-06, + "loss": 1.2385, + "step": 1890 + }, + { + "epoch": 1.53, + "learning_rate": 7.259813319318601e-06, + "loss": 1.2348, + "step": 1895 + }, + { + "epoch": 1.54, + "learning_rate": 7.244881537313664e-06, + "loss": 1.2578, + "step": 1900 + }, + { + "epoch": 1.54, + "learning_rate": 7.229924637797742e-06, + "loss": 1.2191, + "step": 1905 + }, + { + "epoch": 1.55, + "learning_rate": 7.214942788120466e-06, + "loss": 1.251, + "step": 1910 + }, + { + "epoch": 1.55, + "learning_rate": 7.1999361559106225e-06, + "loss": 1.2031, + "step": 1915 + }, + { + "epoch": 1.55, + "learning_rate": 7.184904909074293e-06, + "loss": 1.2766, + "step": 1920 + }, + { + "epoch": 1.56, + "learning_rate": 7.169849215792955e-06, + "loss": 1.2299, + "step": 1925 + }, + { + "epoch": 1.56, + "learning_rate": 7.15476924452162e-06, + "loss": 1.2355, + "step": 1930 + }, + { + "epoch": 1.57, + "learning_rate": 7.139665163986938e-06, + "loss": 1.2336, + "step": 1935 + }, + { + "epoch": 1.57, + "learning_rate": 7.124537143185317e-06, + "loss": 1.3566, + "step": 1940 + }, + { + "epoch": 1.57, + "learning_rate": 7.109385351381022e-06, + "loss": 1.1423, + "step": 1945 + }, + { + "epoch": 1.58, + "learning_rate": 7.09420995810429e-06, + "loss": 1.2576, + "step": 1950 + }, + { + "epoch": 1.58, + "learning_rate": 7.079011133149427e-06, + "loss": 1.2563, + "step": 1955 + }, + { + "epoch": 1.59, + "learning_rate": 7.0637890465729165e-06, + "loss": 1.2695, + "step": 1960 + }, + { + "epoch": 1.59, + "learning_rate": 7.048543868691506e-06, + "loss": 1.1986, + "step": 1965 + }, + { + "epoch": 1.59, + "learning_rate": 7.033275770080309e-06, + "loss": 1.25, + "step": 1970 + }, + { + "epoch": 1.6, + "learning_rate": 7.017984921570895e-06, + "loss": 1.2025, + "step": 1975 + }, + { + "epoch": 1.6, + "learning_rate": 7.002671494249376e-06, + "loss": 1.2465, + "step": 1980 + }, + { + "epoch": 1.61, + "learning_rate": 6.987335659454493e-06, + "loss": 1.2336, + "step": 1985 + }, + { + "epoch": 1.61, + "learning_rate": 6.971977588775703e-06, + "loss": 1.2436, + "step": 1990 + }, + { + "epoch": 1.61, + "learning_rate": 6.956597454051253e-06, + "loss": 1.2429, + "step": 1995 + }, + { + "epoch": 1.62, + "learning_rate": 6.941195427366259e-06, + "loss": 1.2574, + "step": 2000 + }, + { + "epoch": 1.62, + "learning_rate": 6.925771681050784e-06, + "loss": 1.2465, + "step": 2005 + }, + { + "epoch": 1.63, + "learning_rate": 6.910326387677906e-06, + "loss": 1.2805, + "step": 2010 + }, + { + "epoch": 1.63, + "learning_rate": 6.89485972006179e-06, + "loss": 1.2664, + "step": 2015 + }, + { + "epoch": 1.63, + "learning_rate": 6.879371851255747e-06, + "loss": 1.1826, + "step": 2020 + }, + { + "epoch": 1.64, + "learning_rate": 6.863862954550315e-06, + "loss": 1.2441, + "step": 2025 + }, + { + "epoch": 1.64, + "learning_rate": 6.8483332034713006e-06, + "loss": 1.191, + "step": 2030 + }, + { + "epoch": 1.65, + "learning_rate": 6.832782771777846e-06, + "loss": 1.2574, + "step": 2035 + }, + { + "epoch": 1.65, + "learning_rate": 6.817211833460484e-06, + "loss": 1.2865, + "step": 2040 + }, + { + "epoch": 1.65, + "learning_rate": 6.801620562739197e-06, + "loss": 1.2504, + "step": 2045 + }, + { + "epoch": 1.66, + "learning_rate": 6.7860091340614575e-06, + "loss": 1.2084, + "step": 2050 + }, + { + "epoch": 1.66, + "learning_rate": 6.770377722100284e-06, + "loss": 1.2609, + "step": 2055 + }, + { + "epoch": 1.67, + "learning_rate": 6.75472650175228e-06, + "loss": 1.2723, + "step": 2060 + }, + { + "epoch": 1.67, + "learning_rate": 6.739055648135685e-06, + "loss": 1.1243, + "step": 2065 + }, + { + "epoch": 1.67, + "learning_rate": 6.723365336588409e-06, + "loss": 1.2529, + "step": 2070 + }, + { + "epoch": 1.68, + "learning_rate": 6.707655742666074e-06, + "loss": 1.3047, + "step": 2075 + }, + { + "epoch": 1.68, + "learning_rate": 6.691927042140044e-06, + "loss": 1.257, + "step": 2080 + }, + { + "epoch": 1.69, + "learning_rate": 6.6761794109954714e-06, + "loss": 1.2086, + "step": 2085 + }, + { + "epoch": 1.69, + "learning_rate": 6.660413025429312e-06, + "loss": 1.2711, + "step": 2090 + }, + { + "epoch": 1.69, + "learning_rate": 6.644628061848363e-06, + "loss": 1.1157, + "step": 2095 + }, + { + "epoch": 1.7, + "learning_rate": 6.628824696867286e-06, + "loss": 1.2309, + "step": 2100 + }, + { + "epoch": 1.7, + "learning_rate": 6.613003107306637e-06, + "loss": 1.2363, + "step": 2105 + }, + { + "epoch": 1.71, + "learning_rate": 6.597163470190877e-06, + "loss": 1.207, + "step": 2110 + }, + { + "epoch": 1.71, + "learning_rate": 6.5813059627464e-06, + "loss": 1.2641, + "step": 2115 + }, + { + "epoch": 1.72, + "learning_rate": 6.565430762399546e-06, + "loss": 1.252, + "step": 2120 + }, + { + "epoch": 1.72, + "learning_rate": 6.549538046774621e-06, + "loss": 1.2586, + "step": 2125 + }, + { + "epoch": 1.72, + "learning_rate": 6.533627993691901e-06, + "loss": 1.3012, + "step": 2130 + }, + { + "epoch": 1.73, + "learning_rate": 6.517700781165649e-06, + "loss": 1.1842, + "step": 2135 + }, + { + "epoch": 1.73, + "learning_rate": 6.501756587402124e-06, + "loss": 1.2016, + "step": 2140 + }, + { + "epoch": 1.74, + "learning_rate": 6.485795590797579e-06, + "loss": 1.2988, + "step": 2145 + }, + { + "epoch": 1.74, + "learning_rate": 6.469817969936277e-06, + "loss": 1.2547, + "step": 2150 + }, + { + "epoch": 1.74, + "learning_rate": 6.453823903588481e-06, + "loss": 1.2309, + "step": 2155 + }, + { + "epoch": 1.75, + "learning_rate": 6.437813570708463e-06, + "loss": 1.2855, + "step": 2160 + }, + { + "epoch": 1.75, + "learning_rate": 6.421787150432493e-06, + "loss": 1.1488, + "step": 2165 + }, + { + "epoch": 1.76, + "learning_rate": 6.405744822076845e-06, + "loss": 1.2115, + "step": 2170 + }, + { + "epoch": 1.76, + "learning_rate": 6.389686765135782e-06, + "loss": 1.2336, + "step": 2175 + }, + { + "epoch": 1.76, + "learning_rate": 6.3736131592795525e-06, + "loss": 1.2746, + "step": 2180 + }, + { + "epoch": 1.77, + "learning_rate": 6.357524184352375e-06, + "loss": 1.201, + "step": 2185 + }, + { + "epoch": 1.77, + "learning_rate": 6.341420020370435e-06, + "loss": 1.2703, + "step": 2190 + }, + { + "epoch": 1.78, + "learning_rate": 6.325300847519859e-06, + "loss": 1.2441, + "step": 2195 + }, + { + "epoch": 1.78, + "learning_rate": 6.309166846154713e-06, + "loss": 1.2684, + "step": 2200 + }, + { + "epoch": 1.78, + "learning_rate": 6.293018196794964e-06, + "loss": 1.2449, + "step": 2205 + }, + { + "epoch": 1.79, + "learning_rate": 6.276855080124483e-06, + "loss": 1.268, + "step": 2210 + }, + { + "epoch": 1.79, + "learning_rate": 6.260677676989008e-06, + "loss": 1.2906, + "step": 2215 + }, + { + "epoch": 1.8, + "learning_rate": 6.24448616839413e-06, + "loss": 1.307, + "step": 2220 + }, + { + "epoch": 1.8, + "learning_rate": 6.228280735503254e-06, + "loss": 1.2699, + "step": 2225 + }, + { + "epoch": 1.8, + "learning_rate": 6.212061559635588e-06, + "loss": 1.24, + "step": 2230 + }, + { + "epoch": 1.81, + "learning_rate": 6.195828822264107e-06, + "loss": 1.2605, + "step": 2235 + }, + { + "epoch": 1.81, + "learning_rate": 6.179582705013519e-06, + "loss": 1.3457, + "step": 2240 + }, + { + "epoch": 1.82, + "learning_rate": 6.163323389658242e-06, + "loss": 1.2301, + "step": 2245 + }, + { + "epoch": 1.82, + "learning_rate": 6.147051058120359e-06, + "loss": 1.257, + "step": 2250 + }, + { + "epoch": 1.82, + "learning_rate": 6.130765892467595e-06, + "loss": 1.1584, + "step": 2255 + }, + { + "epoch": 1.83, + "learning_rate": 6.114468074911265e-06, + "loss": 1.2537, + "step": 2260 + }, + { + "epoch": 1.83, + "learning_rate": 6.098157787804252e-06, + "loss": 1.2559, + "step": 2265 + }, + { + "epoch": 1.84, + "learning_rate": 6.081835213638951e-06, + "loss": 1.2307, + "step": 2270 + }, + { + "epoch": 1.84, + "learning_rate": 6.0655005350452414e-06, + "loss": 1.1664, + "step": 2275 + }, + { + "epoch": 1.84, + "learning_rate": 6.049153934788429e-06, + "loss": 1.2146, + "step": 2280 + }, + { + "epoch": 1.85, + "learning_rate": 6.032795595767214e-06, + "loss": 1.2498, + "step": 2285 + }, + { + "epoch": 1.85, + "learning_rate": 6.016425701011637e-06, + "loss": 1.2379, + "step": 2290 + }, + { + "epoch": 1.86, + "learning_rate": 6.000044433681034e-06, + "loss": 1.2584, + "step": 2295 + }, + { + "epoch": 1.86, + "learning_rate": 5.9836519770619865e-06, + "loss": 1.2805, + "step": 2300 + }, + { + "epoch": 1.86, + "learning_rate": 5.967248514566271e-06, + "loss": 1.2348, + "step": 2305 + }, + { + "epoch": 1.87, + "learning_rate": 5.9508342297288035e-06, + "loss": 1.2572, + "step": 2310 + }, + { + "epoch": 1.87, + "learning_rate": 5.934409306205593e-06, + "loss": 1.2018, + "step": 2315 + }, + { + "epoch": 1.88, + "learning_rate": 5.917973927771678e-06, + "loss": 1.2641, + "step": 2320 + }, + { + "epoch": 1.88, + "learning_rate": 5.901528278319083e-06, + "loss": 1.2293, + "step": 2325 + }, + { + "epoch": 1.89, + "learning_rate": 5.885072541854742e-06, + "loss": 1.3113, + "step": 2330 + }, + { + "epoch": 1.89, + "learning_rate": 5.868606902498457e-06, + "loss": 1.26, + "step": 2335 + }, + { + "epoch": 1.89, + "learning_rate": 5.852131544480831e-06, + "loss": 1.2092, + "step": 2340 + }, + { + "epoch": 1.9, + "learning_rate": 5.835646652141208e-06, + "loss": 1.2535, + "step": 2345 + }, + { + "epoch": 1.9, + "learning_rate": 5.8191524099256035e-06, + "loss": 1.2535, + "step": 2350 + }, + { + "epoch": 1.91, + "learning_rate": 5.802649002384655e-06, + "loss": 1.2629, + "step": 2355 + }, + { + "epoch": 1.91, + "learning_rate": 5.786136614171542e-06, + "loss": 1.233, + "step": 2360 + }, + { + "epoch": 1.91, + "learning_rate": 5.769615430039931e-06, + "loss": 1.2375, + "step": 2365 + }, + { + "epoch": 1.92, + "learning_rate": 5.753085634841903e-06, + "loss": 1.2312, + "step": 2370 + }, + { + "epoch": 1.92, + "learning_rate": 5.736547413525888e-06, + "loss": 1.1715, + "step": 2375 + }, + { + "epoch": 1.93, + "learning_rate": 5.72000095113459e-06, + "loss": 1.2695, + "step": 2380 + }, + { + "epoch": 1.93, + "learning_rate": 5.703446432802924e-06, + "loss": 1.1672, + "step": 2385 + }, + { + "epoch": 1.93, + "learning_rate": 5.686884043755942e-06, + "loss": 1.2637, + "step": 2390 + }, + { + "epoch": 1.94, + "learning_rate": 5.6703139693067554e-06, + "loss": 1.1591, + "step": 2395 + }, + { + "epoch": 1.94, + "learning_rate": 5.653736394854471e-06, + "loss": 1.2343, + "step": 2400 + }, + { + "epoch": 1.95, + "learning_rate": 5.637151505882109e-06, + "loss": 1.2172, + "step": 2405 + }, + { + "epoch": 1.95, + "learning_rate": 5.620559487954531e-06, + "loss": 1.2121, + "step": 2410 + }, + { + "epoch": 1.95, + "learning_rate": 5.603960526716361e-06, + "loss": 1.178, + "step": 2415 + }, + { + "epoch": 1.96, + "learning_rate": 5.587354807889913e-06, + "loss": 1.2256, + "step": 2420 + }, + { + "epoch": 1.96, + "learning_rate": 5.570742517273109e-06, + "loss": 1.241, + "step": 2425 + }, + { + "epoch": 1.97, + "learning_rate": 5.554123840737402e-06, + "loss": 1.2773, + "step": 2430 + }, + { + "epoch": 1.97, + "learning_rate": 5.537498964225694e-06, + "loss": 1.2383, + "step": 2435 + }, + { + "epoch": 1.97, + "learning_rate": 5.520868073750261e-06, + "loss": 1.177, + "step": 2440 + }, + { + "epoch": 1.98, + "learning_rate": 5.50423135539066e-06, + "loss": 1.1607, + "step": 2445 + }, + { + "epoch": 1.98, + "learning_rate": 5.487588995291666e-06, + "loss": 1.2957, + "step": 2450 + }, + { + "epoch": 1.99, + "learning_rate": 5.47094117966117e-06, + "loss": 1.164, + "step": 2455 + }, + { + "epoch": 1.99, + "learning_rate": 5.454288094768108e-06, + "loss": 1.225, + "step": 2460 + }, + { + "epoch": 1.99, + "learning_rate": 5.437629926940367e-06, + "loss": 1.2602, + "step": 2465 + }, + { + "epoch": 2.0, + "learning_rate": 5.420966862562718e-06, + "loss": 1.2434, + "step": 2470 + }, + { + "epoch": 2.0, + "learning_rate": 5.404299088074702e-06, + "loss": 1.0836, + "step": 2475 + }, + { + "epoch": 2.01, + "learning_rate": 5.387626789968574e-06, + "loss": 1.0635, + "step": 2480 + }, + { + "epoch": 2.01, + "learning_rate": 5.370950154787195e-06, + "loss": 1.032, + "step": 2485 + }, + { + "epoch": 2.01, + "learning_rate": 5.354269369121958e-06, + "loss": 1.0236, + "step": 2490 + }, + { + "epoch": 2.02, + "learning_rate": 5.337584619610691e-06, + "loss": 1.0402, + "step": 2495 + }, + { + "epoch": 2.02, + "learning_rate": 5.320896092935575e-06, + "loss": 1.0713, + "step": 2500 + }, + { + "epoch": 2.03, + "learning_rate": 5.304203975821048e-06, + "loss": 1.0443, + "step": 2505 + }, + { + "epoch": 2.03, + "learning_rate": 5.287508455031729e-06, + "loss": 1.0523, + "step": 2510 + }, + { + "epoch": 2.03, + "learning_rate": 5.270809717370314e-06, + "loss": 1.0072, + "step": 2515 + }, + { + "epoch": 2.04, + "learning_rate": 5.254107949675493e-06, + "loss": 1.0473, + "step": 2520 + }, + { + "epoch": 2.04, + "learning_rate": 5.237403338819859e-06, + "loss": 1.0189, + "step": 2525 + }, + { + "epoch": 2.05, + "learning_rate": 5.220696071707816e-06, + "loss": 1.027, + "step": 2530 + }, + { + "epoch": 2.05, + "learning_rate": 5.20398633527349e-06, + "loss": 0.9773, + "step": 2535 + }, + { + "epoch": 2.06, + "learning_rate": 5.187274316478632e-06, + "loss": 0.9916, + "step": 2540 + }, + { + "epoch": 2.06, + "learning_rate": 5.170560202310536e-06, + "loss": 1.0252, + "step": 2545 + }, + { + "epoch": 2.06, + "learning_rate": 5.153844179779932e-06, + "loss": 1.0508, + "step": 2550 + }, + { + "epoch": 2.07, + "learning_rate": 5.137126435918912e-06, + "loss": 1.0217, + "step": 2555 + }, + { + "epoch": 2.07, + "learning_rate": 5.12040715777882e-06, + "loss": 1.0367, + "step": 2560 + }, + { + "epoch": 2.08, + "learning_rate": 5.1036865324281716e-06, + "loss": 1.0121, + "step": 2565 + }, + { + "epoch": 2.08, + "learning_rate": 5.08696474695055e-06, + "loss": 0.9992, + "step": 2570 + }, + { + "epoch": 2.08, + "learning_rate": 5.070241988442528e-06, + "loss": 1.0778, + "step": 2575 + }, + { + "epoch": 2.09, + "learning_rate": 5.053518444011557e-06, + "loss": 1.0703, + "step": 2580 + }, + { + "epoch": 2.09, + "learning_rate": 5.036794300773887e-06, + "loss": 1.017, + "step": 2585 + }, + { + "epoch": 2.1, + "learning_rate": 5.020069745852463e-06, + "loss": 0.9813, + "step": 2590 + }, + { + "epoch": 2.1, + "learning_rate": 5.003344966374843e-06, + "loss": 1.0287, + "step": 2595 + }, + { + "epoch": 2.1, + "learning_rate": 4.9866201494710934e-06, + "loss": 1.0617, + "step": 2600 + }, + { + "epoch": 2.11, + "learning_rate": 4.969895482271695e-06, + "loss": 1.1227, + "step": 2605 + }, + { + "epoch": 2.11, + "learning_rate": 4.953171151905466e-06, + "loss": 1.0496, + "step": 2610 + }, + { + "epoch": 2.12, + "learning_rate": 4.936447345497443e-06, + "loss": 1.0287, + "step": 2615 + }, + { + "epoch": 2.12, + "learning_rate": 4.919724250166808e-06, + "loss": 1.0656, + "step": 2620 + }, + { + "epoch": 2.12, + "learning_rate": 4.903002053024782e-06, + "loss": 1.0287, + "step": 2625 + }, + { + "epoch": 2.13, + "learning_rate": 4.886280941172539e-06, + "loss": 1.0293, + "step": 2630 + }, + { + "epoch": 2.13, + "learning_rate": 4.869561101699113e-06, + "loss": 1.0805, + "step": 2635 + }, + { + "epoch": 2.14, + "learning_rate": 4.852842721679293e-06, + "loss": 1.0068, + "step": 2640 + }, + { + "epoch": 2.14, + "learning_rate": 4.836125988171547e-06, + "loss": 1.0056, + "step": 2645 + }, + { + "epoch": 2.14, + "learning_rate": 4.8194110882159175e-06, + "loss": 1.0256, + "step": 2650 + }, + { + "epoch": 2.15, + "learning_rate": 4.802698208831929e-06, + "loss": 1.0551, + "step": 2655 + }, + { + "epoch": 2.15, + "learning_rate": 4.785987537016504e-06, + "loss": 1.002, + "step": 2660 + }, + { + "epoch": 2.16, + "learning_rate": 4.769279259741858e-06, + "loss": 1.0378, + "step": 2665 + }, + { + "epoch": 2.16, + "learning_rate": 4.752573563953422e-06, + "loss": 1.0088, + "step": 2670 + }, + { + "epoch": 2.16, + "learning_rate": 4.735870636567736e-06, + "loss": 0.9963, + "step": 2675 + }, + { + "epoch": 2.17, + "learning_rate": 4.719170664470371e-06, + "loss": 0.9977, + "step": 2680 + }, + { + "epoch": 2.17, + "learning_rate": 4.702473834513826e-06, + "loss": 1.0533, + "step": 2685 + }, + { + "epoch": 2.18, + "learning_rate": 4.685780333515449e-06, + "loss": 1.0148, + "step": 2690 + }, + { + "epoch": 2.18, + "learning_rate": 4.669090348255338e-06, + "loss": 1.0023, + "step": 2695 + }, + { + "epoch": 2.18, + "learning_rate": 4.652404065474257e-06, + "loss": 1.0227, + "step": 2700 + }, + { + "epoch": 2.19, + "learning_rate": 4.6357216718715375e-06, + "loss": 1.0236, + "step": 2705 + }, + { + "epoch": 2.19, + "learning_rate": 4.619043354103002e-06, + "loss": 1.01, + "step": 2710 + }, + { + "epoch": 2.2, + "learning_rate": 4.602369298778866e-06, + "loss": 1.0625, + "step": 2715 + }, + { + "epoch": 2.2, + "learning_rate": 4.585699692461655e-06, + "loss": 1.0154, + "step": 2720 + }, + { + "epoch": 2.2, + "learning_rate": 4.569034721664114e-06, + "loss": 1.0547, + "step": 2725 + }, + { + "epoch": 2.21, + "learning_rate": 4.552374572847122e-06, + "loss": 0.981, + "step": 2730 + }, + { + "epoch": 2.21, + "learning_rate": 4.535719432417612e-06, + "loss": 1.0691, + "step": 2735 + }, + { + "epoch": 2.22, + "learning_rate": 4.519069486726468e-06, + "loss": 1.0451, + "step": 2740 + }, + { + "epoch": 2.22, + "learning_rate": 4.502424922066462e-06, + "loss": 0.9773, + "step": 2745 + }, + { + "epoch": 2.22, + "learning_rate": 4.485785924670151e-06, + "loss": 0.9898, + "step": 2750 + }, + { + "epoch": 2.23, + "learning_rate": 4.469152680707804e-06, + "loss": 1.0496, + "step": 2755 + }, + { + "epoch": 2.23, + "learning_rate": 4.452525376285319e-06, + "loss": 1.0211, + "step": 2760 + }, + { + "epoch": 2.24, + "learning_rate": 4.435904197442131e-06, + "loss": 1.0961, + "step": 2765 + }, + { + "epoch": 2.24, + "learning_rate": 4.419289330149145e-06, + "loss": 1.0279, + "step": 2770 + }, + { + "epoch": 2.25, + "learning_rate": 4.4026809603066375e-06, + "loss": 1.0081, + "step": 2775 + }, + { + "epoch": 2.25, + "learning_rate": 4.386079273742199e-06, + "loss": 1.0764, + "step": 2780 + }, + { + "epoch": 2.25, + "learning_rate": 4.3694844562086325e-06, + "loss": 1.0342, + "step": 2785 + }, + { + "epoch": 2.26, + "learning_rate": 4.3528966933818865e-06, + "loss": 1.0707, + "step": 2790 + }, + { + "epoch": 2.26, + "learning_rate": 4.33631617085898e-06, + "loss": 1.0127, + "step": 2795 + }, + { + "epoch": 2.27, + "learning_rate": 4.319743074155916e-06, + "loss": 1.0658, + "step": 2800 + }, + { + "epoch": 2.27, + "learning_rate": 4.3031775887056176e-06, + "loss": 1.0881, + "step": 2805 + }, + { + "epoch": 2.27, + "learning_rate": 4.2866198998558404e-06, + "loss": 1.043, + "step": 2810 + }, + { + "epoch": 2.28, + "learning_rate": 4.2700701928671105e-06, + "loss": 1.008, + "step": 2815 + }, + { + "epoch": 2.28, + "learning_rate": 4.253528652910647e-06, + "loss": 1.0571, + "step": 2820 + }, + { + "epoch": 2.29, + "learning_rate": 4.236995465066287e-06, + "loss": 1.0859, + "step": 2825 + }, + { + "epoch": 2.29, + "learning_rate": 4.220470814320417e-06, + "loss": 1.0085, + "step": 2830 + }, + { + "epoch": 2.29, + "learning_rate": 4.203954885563909e-06, + "loss": 1.0146, + "step": 2835 + }, + { + "epoch": 2.3, + "learning_rate": 4.187447863590039e-06, + "loss": 1.0562, + "step": 2840 + }, + { + "epoch": 2.3, + "learning_rate": 4.170949933092432e-06, + "loss": 1.1096, + "step": 2845 + }, + { + "epoch": 2.31, + "learning_rate": 4.154461278662989e-06, + "loss": 1.0555, + "step": 2850 + }, + { + "epoch": 2.31, + "learning_rate": 4.137982084789823e-06, + "loss": 0.9902, + "step": 2855 + }, + { + "epoch": 2.31, + "learning_rate": 4.121512535855193e-06, + "loss": 1.06, + "step": 2860 + }, + { + "epoch": 2.32, + "learning_rate": 4.105052816133448e-06, + "loss": 1.0412, + "step": 2865 + }, + { + "epoch": 2.32, + "learning_rate": 4.0886031097889556e-06, + "loss": 1.0354, + "step": 2870 + }, + { + "epoch": 2.33, + "learning_rate": 4.072163600874045e-06, + "loss": 1.0928, + "step": 2875 + }, + { + "epoch": 2.33, + "learning_rate": 4.0557344733269505e-06, + "loss": 1.0645, + "step": 2880 + }, + { + "epoch": 2.33, + "learning_rate": 4.039315910969754e-06, + "loss": 0.9994, + "step": 2885 + }, + { + "epoch": 2.34, + "learning_rate": 4.02290809750632e-06, + "loss": 1.003, + "step": 2890 + }, + { + "epoch": 2.34, + "learning_rate": 4.006511216520251e-06, + "loss": 1.0512, + "step": 2895 + }, + { + "epoch": 2.35, + "learning_rate": 3.9901254514728225e-06, + "loss": 1.06, + "step": 2900 + }, + { + "epoch": 2.35, + "learning_rate": 3.973750985700943e-06, + "loss": 1.0541, + "step": 2905 + }, + { + "epoch": 2.35, + "learning_rate": 3.957388002415093e-06, + "loss": 1.0078, + "step": 2910 + }, + { + "epoch": 2.36, + "learning_rate": 3.941036684697274e-06, + "loss": 1.0104, + "step": 2915 + }, + { + "epoch": 2.36, + "learning_rate": 3.924697215498971e-06, + "loss": 1.0465, + "step": 2920 + }, + { + "epoch": 2.37, + "learning_rate": 3.908369777639091e-06, + "loss": 0.9527, + "step": 2925 + }, + { + "epoch": 2.37, + "learning_rate": 3.892054553801931e-06, + "loss": 1.0559, + "step": 2930 + }, + { + "epoch": 2.37, + "learning_rate": 3.875751726535124e-06, + "loss": 1.041, + "step": 2935 + }, + { + "epoch": 2.38, + "learning_rate": 3.8594614782476024e-06, + "loss": 1.0352, + "step": 2940 + }, + { + "epoch": 2.38, + "learning_rate": 3.843183991207551e-06, + "loss": 1.0175, + "step": 2945 + }, + { + "epoch": 2.39, + "learning_rate": 3.82691944754038e-06, + "loss": 0.9959, + "step": 2950 + }, + { + "epoch": 2.39, + "learning_rate": 3.8106680292266717e-06, + "loss": 1.0094, + "step": 2955 + }, + { + "epoch": 2.39, + "learning_rate": 3.7944299181001544e-06, + "loss": 1.0367, + "step": 2960 + }, + { + "epoch": 2.4, + "learning_rate": 3.778205295845663e-06, + "loss": 1.0443, + "step": 2965 + }, + { + "epoch": 2.4, + "learning_rate": 3.7619943439971107e-06, + "loss": 1.0074, + "step": 2970 + }, + { + "epoch": 2.41, + "learning_rate": 3.7457972439354526e-06, + "loss": 1.0396, + "step": 2975 + }, + { + "epoch": 2.41, + "learning_rate": 3.7296141768866635e-06, + "loss": 1.0506, + "step": 2980 + }, + { + "epoch": 2.42, + "learning_rate": 3.7134453239196987e-06, + "loss": 1.0268, + "step": 2985 + }, + { + "epoch": 2.42, + "learning_rate": 3.6972908659444828e-06, + "loss": 1.0101, + "step": 2990 + }, + { + "epoch": 2.42, + "learning_rate": 3.6811509837098756e-06, + "loss": 1.0076, + "step": 2995 + }, + { + "epoch": 2.43, + "learning_rate": 3.6650258578016474e-06, + "loss": 1.0602, + "step": 3000 + }, + { + "epoch": 2.43, + "learning_rate": 3.6489156686404683e-06, + "loss": 1.0418, + "step": 3005 + }, + { + "epoch": 2.44, + "learning_rate": 3.6328205964798822e-06, + "loss": 1.0498, + "step": 3010 + }, + { + "epoch": 2.44, + "learning_rate": 3.616740821404292e-06, + "loss": 1.0277, + "step": 3015 + }, + { + "epoch": 2.44, + "learning_rate": 3.600676523326946e-06, + "loss": 1.0979, + "step": 3020 + }, + { + "epoch": 2.45, + "learning_rate": 3.5846278819879197e-06, + "loss": 1.0467, + "step": 3025 + }, + { + "epoch": 2.45, + "learning_rate": 3.568595076952113e-06, + "loss": 1.0344, + "step": 3030 + }, + { + "epoch": 2.46, + "learning_rate": 3.552578287607237e-06, + "loss": 0.9874, + "step": 3035 + }, + { + "epoch": 2.46, + "learning_rate": 3.536577693161801e-06, + "loss": 1.0688, + "step": 3040 + }, + { + "epoch": 2.46, + "learning_rate": 3.520593472643122e-06, + "loss": 1.0023, + "step": 3045 + }, + { + "epoch": 2.47, + "learning_rate": 3.504625804895302e-06, + "loss": 1.0315, + "step": 3050 + }, + { + "epoch": 2.47, + "learning_rate": 3.488674868577246e-06, + "loss": 1.0318, + "step": 3055 + }, + { + "epoch": 2.48, + "learning_rate": 3.472740842160649e-06, + "loss": 1.057, + "step": 3060 + }, + { + "epoch": 2.48, + "learning_rate": 3.4568239039280094e-06, + "loss": 1.041, + "step": 3065 + }, + { + "epoch": 2.48, + "learning_rate": 3.4409242319706225e-06, + "loss": 1.126, + "step": 3070 + }, + { + "epoch": 2.49, + "learning_rate": 3.4250420041866057e-06, + "loss": 1.0151, + "step": 3075 + }, + { + "epoch": 2.49, + "learning_rate": 3.4091773982788867e-06, + "loss": 1.0395, + "step": 3080 + }, + { + "epoch": 2.5, + "learning_rate": 3.393330591753231e-06, + "loss": 1.0207, + "step": 3085 + }, + { + "epoch": 2.5, + "learning_rate": 3.377501761916249e-06, + "loss": 0.9663, + "step": 3090 + }, + { + "epoch": 2.5, + "learning_rate": 3.3616910858734143e-06, + "loss": 1.055, + "step": 3095 + }, + { + "epoch": 2.51, + "learning_rate": 3.3458987405270803e-06, + "loss": 1.017, + "step": 3100 + }, + { + "epoch": 2.51, + "learning_rate": 3.330124902574505e-06, + "loss": 1.0034, + "step": 3105 + }, + { + "epoch": 2.52, + "learning_rate": 3.3143697485058666e-06, + "loss": 1.0262, + "step": 3110 + }, + { + "epoch": 2.52, + "learning_rate": 3.2986334546022964e-06, + "loss": 1.0723, + "step": 3115 + }, + { + "epoch": 2.52, + "learning_rate": 3.282916196933904e-06, + "loss": 1.0314, + "step": 3120 + }, + { + "epoch": 2.53, + "learning_rate": 3.2672181513578038e-06, + "loss": 1.0613, + "step": 3125 + }, + { + "epoch": 2.53, + "learning_rate": 3.251539493516152e-06, + "loss": 1.0641, + "step": 3130 + }, + { + "epoch": 2.54, + "learning_rate": 3.2358803988341776e-06, + "loss": 1.0283, + "step": 3135 + }, + { + "epoch": 2.54, + "learning_rate": 3.220241042518223e-06, + "loss": 1.0502, + "step": 3140 + }, + { + "epoch": 2.54, + "learning_rate": 3.2046215995537837e-06, + "loss": 1.0416, + "step": 3145 + }, + { + "epoch": 2.55, + "learning_rate": 3.1890222447035444e-06, + "loss": 1.0549, + "step": 3150 + }, + { + "epoch": 2.55, + "learning_rate": 3.173443152505431e-06, + "loss": 1.034, + "step": 3155 + }, + { + "epoch": 2.56, + "learning_rate": 3.157884497270658e-06, + "loss": 1.0594, + "step": 3160 + }, + { + "epoch": 2.56, + "learning_rate": 3.1423464530817673e-06, + "loss": 1.0637, + "step": 3165 + }, + { + "epoch": 2.56, + "learning_rate": 3.1268291937906957e-06, + "loss": 1.0402, + "step": 3170 + }, + { + "epoch": 2.57, + "learning_rate": 3.1113328930168153e-06, + "loss": 1.0236, + "step": 3175 + }, + { + "epoch": 2.57, + "learning_rate": 3.095857724145004e-06, + "loss": 1.0414, + "step": 3180 + }, + { + "epoch": 2.58, + "learning_rate": 3.0804038603236943e-06, + "loss": 1.0465, + "step": 3185 + }, + { + "epoch": 2.58, + "learning_rate": 3.0649714744629454e-06, + "loss": 1.0561, + "step": 3190 + }, + { + "epoch": 2.58, + "learning_rate": 3.0495607392324987e-06, + "loss": 1.0414, + "step": 3195 + }, + { + "epoch": 2.59, + "learning_rate": 3.0341718270598557e-06, + "loss": 1.0492, + "step": 3200 + }, + { + "epoch": 2.59, + "learning_rate": 3.0188049101283433e-06, + "loss": 1.0053, + "step": 3205 + }, + { + "epoch": 2.6, + "learning_rate": 3.003460160375189e-06, + "loss": 1.0193, + "step": 3210 + }, + { + "epoch": 2.6, + "learning_rate": 2.9881377494895925e-06, + "loss": 1.093, + "step": 3215 + }, + { + "epoch": 2.61, + "learning_rate": 2.9728378489108135e-06, + "loss": 1.0285, + "step": 3220 + }, + { + "epoch": 2.61, + "learning_rate": 2.957560629826244e-06, + "loss": 1.0982, + "step": 3225 + }, + { + "epoch": 2.61, + "learning_rate": 2.942306263169502e-06, + "loss": 1.0438, + "step": 3230 + }, + { + "epoch": 2.62, + "learning_rate": 2.9270749196185095e-06, + "loss": 1.0695, + "step": 3235 + }, + { + "epoch": 2.62, + "learning_rate": 2.911866769593592e-06, + "loss": 1.0139, + "step": 3240 + }, + { + "epoch": 2.63, + "learning_rate": 2.896681983255565e-06, + "loss": 1.1477, + "step": 3245 + }, + { + "epoch": 2.63, + "learning_rate": 2.881520730503837e-06, + "loss": 1.0437, + "step": 3250 + }, + { + "epoch": 2.63, + "learning_rate": 2.866383180974498e-06, + "loss": 1.0455, + "step": 3255 + }, + { + "epoch": 2.64, + "learning_rate": 2.8512695040384287e-06, + "loss": 1.0014, + "step": 3260 + }, + { + "epoch": 2.64, + "learning_rate": 2.8361798687994097e-06, + "loss": 1.0016, + "step": 3265 + }, + { + "epoch": 2.65, + "learning_rate": 2.8211144440922176e-06, + "loss": 0.9983, + "step": 3270 + }, + { + "epoch": 2.65, + "learning_rate": 2.8060733984807466e-06, + "loss": 1.0927, + "step": 3275 + }, + { + "epoch": 2.65, + "learning_rate": 2.7910569002561137e-06, + "loss": 1.0424, + "step": 3280 + }, + { + "epoch": 2.66, + "learning_rate": 2.7760651174347854e-06, + "loss": 1.0555, + "step": 3285 + }, + { + "epoch": 2.66, + "learning_rate": 2.7610982177566926e-06, + "loss": 0.983, + "step": 3290 + }, + { + "epoch": 2.67, + "learning_rate": 2.7461563686833504e-06, + "loss": 0.9712, + "step": 3295 + }, + { + "epoch": 2.67, + "learning_rate": 2.7312397373959894e-06, + "loss": 1.04, + "step": 3300 + }, + { + "epoch": 2.67, + "learning_rate": 2.716348490793681e-06, + "loss": 1.092, + "step": 3305 + }, + { + "epoch": 2.68, + "learning_rate": 2.7014827954914814e-06, + "loss": 0.9855, + "step": 3310 + }, + { + "epoch": 2.68, + "learning_rate": 2.686642817818548e-06, + "loss": 1.0319, + "step": 3315 + }, + { + "epoch": 2.69, + "learning_rate": 2.6718287238162963e-06, + "loss": 0.9938, + "step": 3320 + }, + { + "epoch": 2.69, + "learning_rate": 2.6570406792365268e-06, + "loss": 1.0662, + "step": 3325 + }, + { + "epoch": 2.69, + "learning_rate": 2.6422788495395912e-06, + "loss": 1.0263, + "step": 3330 + }, + { + "epoch": 2.7, + "learning_rate": 2.6275433998925176e-06, + "loss": 1.0584, + "step": 3335 + }, + { + "epoch": 2.7, + "learning_rate": 2.612834495167177e-06, + "loss": 1.0334, + "step": 3340 + }, + { + "epoch": 2.71, + "learning_rate": 2.5981522999384323e-06, + "loss": 1.0426, + "step": 3345 + }, + { + "epoch": 2.71, + "learning_rate": 2.583496978482305e-06, + "loss": 1.0199, + "step": 3350 + }, + { + "epoch": 2.71, + "learning_rate": 2.568868694774127e-06, + "loss": 1.0363, + "step": 3355 + }, + { + "epoch": 2.72, + "learning_rate": 2.5542676124867103e-06, + "loss": 0.9959, + "step": 3360 + }, + { + "epoch": 2.72, + "learning_rate": 2.5396938949885163e-06, + "loss": 1.0357, + "step": 3365 + }, + { + "epoch": 2.73, + "learning_rate": 2.52514770534183e-06, + "loss": 1.0444, + "step": 3370 + }, + { + "epoch": 2.73, + "learning_rate": 2.510629206300933e-06, + "loss": 1.0627, + "step": 3375 + }, + { + "epoch": 2.73, + "learning_rate": 2.4961385603102794e-06, + "loss": 1.0535, + "step": 3380 + }, + { + "epoch": 2.74, + "learning_rate": 2.481675929502682e-06, + "loss": 1.0276, + "step": 3385 + }, + { + "epoch": 2.74, + "learning_rate": 2.467241475697498e-06, + "loss": 1.0057, + "step": 3390 + }, + { + "epoch": 2.75, + "learning_rate": 2.45283536039882e-06, + "loss": 1.0055, + "step": 3395 + }, + { + "epoch": 2.75, + "learning_rate": 2.438457744793665e-06, + "loss": 1.0001, + "step": 3400 + }, + { + "epoch": 2.75, + "learning_rate": 2.4241087897501703e-06, + "loss": 1.1129, + "step": 3405 + }, + { + "epoch": 2.76, + "learning_rate": 2.409788655815802e-06, + "loss": 0.9816, + "step": 3410 + }, + { + "epoch": 2.76, + "learning_rate": 2.395497503215551e-06, + "loss": 1.008, + "step": 3415 + }, + { + "epoch": 2.77, + "learning_rate": 2.3812354918501397e-06, + "loss": 1.0068, + "step": 3420 + }, + { + "epoch": 2.77, + "learning_rate": 2.3670027812942353e-06, + "loss": 1.0779, + "step": 3425 + }, + { + "epoch": 2.78, + "learning_rate": 2.3527995307946655e-06, + "loss": 1.0264, + "step": 3430 + }, + { + "epoch": 2.78, + "learning_rate": 2.338625899268638e-06, + "loss": 1.0395, + "step": 3435 + }, + { + "epoch": 2.78, + "learning_rate": 2.3244820453019566e-06, + "loss": 1.0604, + "step": 3440 + }, + { + "epoch": 2.79, + "learning_rate": 2.3103681271472516e-06, + "loss": 1.0236, + "step": 3445 + }, + { + "epoch": 2.79, + "learning_rate": 2.296284302722205e-06, + "loss": 1.0918, + "step": 3450 + }, + { + "epoch": 2.8, + "learning_rate": 2.28223072960779e-06, + "loss": 1.0504, + "step": 3455 + }, + { + "epoch": 2.8, + "learning_rate": 2.2682075650465063e-06, + "loss": 1.0361, + "step": 3460 + }, + { + "epoch": 2.8, + "learning_rate": 2.2542149659406126e-06, + "loss": 1.0268, + "step": 3465 + }, + { + "epoch": 2.81, + "learning_rate": 2.2402530888503783e-06, + "loss": 1.0434, + "step": 3470 + }, + { + "epoch": 2.81, + "learning_rate": 2.226322089992336e-06, + "loss": 1.0348, + "step": 3475 + }, + { + "epoch": 2.82, + "learning_rate": 2.2124221252375215e-06, + "loss": 1.0135, + "step": 3480 + }, + { + "epoch": 2.82, + "learning_rate": 2.1985533501097407e-06, + "loss": 1.0488, + "step": 3485 + }, + { + "epoch": 2.82, + "learning_rate": 2.1847159197838213e-06, + "loss": 0.9809, + "step": 3490 + }, + { + "epoch": 2.83, + "learning_rate": 2.1709099890838846e-06, + "loss": 1.0627, + "step": 3495 + }, + { + "epoch": 2.83, + "learning_rate": 2.1571357124816107e-06, + "loss": 1.0373, + "step": 3500 + }, + { + "epoch": 2.84, + "learning_rate": 2.1433932440945028e-06, + "loss": 1.0068, + "step": 3505 + }, + { + "epoch": 2.84, + "learning_rate": 2.129682737684171e-06, + "loss": 1.0604, + "step": 3510 + }, + { + "epoch": 2.84, + "learning_rate": 2.11600434665461e-06, + "loss": 1.0337, + "step": 3515 + }, + { + "epoch": 2.85, + "learning_rate": 2.1023582240504836e-06, + "loss": 1.0668, + "step": 3520 + }, + { + "epoch": 2.85, + "learning_rate": 2.088744522555409e-06, + "loss": 1.0088, + "step": 3525 + }, + { + "epoch": 2.86, + "learning_rate": 2.0751633944902487e-06, + "loss": 1.0436, + "step": 3530 + }, + { + "epoch": 2.86, + "learning_rate": 2.061614991811414e-06, + "loss": 1.0138, + "step": 3535 + }, + { + "epoch": 2.86, + "learning_rate": 2.0480994661091507e-06, + "loss": 1.1406, + "step": 3540 + }, + { + "epoch": 2.87, + "learning_rate": 2.0346169686058586e-06, + "loss": 1.0391, + "step": 3545 + }, + { + "epoch": 2.87, + "learning_rate": 2.0211676501543866e-06, + "loss": 1.0592, + "step": 3550 + }, + { + "epoch": 2.88, + "learning_rate": 2.00775166123635e-06, + "loss": 0.9783, + "step": 3555 + }, + { + "epoch": 2.88, + "learning_rate": 1.9943691519604523e-06, + "loss": 1.0473, + "step": 3560 + }, + { + "epoch": 2.88, + "learning_rate": 1.9810202720607945e-06, + "loss": 1.0555, + "step": 3565 + }, + { + "epoch": 2.89, + "learning_rate": 1.967705170895208e-06, + "loss": 1.0691, + "step": 3570 + }, + { + "epoch": 2.89, + "learning_rate": 1.9544239974435797e-06, + "loss": 1.026, + "step": 3575 + }, + { + "epoch": 2.9, + "learning_rate": 1.9411769003061874e-06, + "loss": 1.0588, + "step": 3580 + }, + { + "epoch": 2.9, + "learning_rate": 1.9279640277020396e-06, + "loss": 1.0635, + "step": 3585 + }, + { + "epoch": 2.9, + "learning_rate": 1.9147855274672073e-06, + "loss": 0.9919, + "step": 3590 + }, + { + "epoch": 2.91, + "learning_rate": 1.9016415470531773e-06, + "loss": 1.0053, + "step": 3595 + }, + { + "epoch": 2.91, + "learning_rate": 1.8885322335252076e-06, + "loss": 1.0461, + "step": 3600 + }, + { + "epoch": 2.92, + "learning_rate": 1.8754577335606689e-06, + "loss": 1.0051, + "step": 3605 + }, + { + "epoch": 2.92, + "learning_rate": 1.8624181934474117e-06, + "loss": 1.0521, + "step": 3610 + }, + { + "epoch": 2.92, + "learning_rate": 1.8494137590821282e-06, + "loss": 0.9926, + "step": 3615 + }, + { + "epoch": 2.93, + "learning_rate": 1.8364445759687233e-06, + "loss": 1.0264, + "step": 3620 + }, + { + "epoch": 2.93, + "learning_rate": 1.823510789216676e-06, + "loss": 1.0475, + "step": 3625 + }, + { + "epoch": 2.94, + "learning_rate": 1.8106125435394312e-06, + "loss": 1.012, + "step": 3630 + }, + { + "epoch": 2.94, + "learning_rate": 1.7977499832527655e-06, + "loss": 1.0269, + "step": 3635 + }, + { + "epoch": 2.94, + "learning_rate": 1.7849232522731797e-06, + "loss": 1.0463, + "step": 3640 + }, + { + "epoch": 2.95, + "learning_rate": 1.7721324941162933e-06, + "loss": 1.025, + "step": 3645 + }, + { + "epoch": 2.95, + "learning_rate": 1.7593778518952275e-06, + "loss": 1.0326, + "step": 3650 + }, + { + "epoch": 2.96, + "learning_rate": 1.7466594683190107e-06, + "loss": 1.0389, + "step": 3655 + }, + { + "epoch": 2.96, + "learning_rate": 1.7339774856909851e-06, + "loss": 1.0609, + "step": 3660 + }, + { + "epoch": 2.97, + "learning_rate": 1.7213320459072047e-06, + "loss": 0.9949, + "step": 3665 + }, + { + "epoch": 2.97, + "learning_rate": 1.7087232904548595e-06, + "loss": 1.0083, + "step": 3670 + }, + { + "epoch": 2.97, + "learning_rate": 1.69615136041068e-06, + "loss": 1.0377, + "step": 3675 + }, + { + "epoch": 2.98, + "learning_rate": 1.6836163964393664e-06, + "loss": 1.0514, + "step": 3680 + }, + { + "epoch": 2.98, + "learning_rate": 1.6711185387920176e-06, + "loss": 0.99, + "step": 3685 + }, + { + "epoch": 2.99, + "learning_rate": 1.6586579273045529e-06, + "loss": 1.0146, + "step": 3690 + }, + { + "epoch": 2.99, + "learning_rate": 1.6462347013961526e-06, + "loss": 1.0445, + "step": 3695 + }, + { + "epoch": 2.99, + "learning_rate": 1.6338490000676987e-06, + "loss": 1.0674, + "step": 3700 + }, + { + "epoch": 3.0, + "learning_rate": 1.6215009619002197e-06, + "loss": 1.0215, + "step": 3705 + }, + { + "epoch": 3.0, + "learning_rate": 1.609190725053335e-06, + "loss": 0.9832, + "step": 3710 + }, + { + "epoch": 3.01, + "learning_rate": 1.5969184272637184e-06, + "loss": 0.9313, + "step": 3715 + }, + { + "epoch": 3.01, + "learning_rate": 1.5846842058435457e-06, + "loss": 1.0244, + "step": 3720 + }, + { + "epoch": 3.01, + "learning_rate": 1.5724881976789696e-06, + "loss": 0.9002, + "step": 3725 + }, + { + "epoch": 3.02, + "learning_rate": 1.5603305392285785e-06, + "loss": 0.957, + "step": 3730 + }, + { + "epoch": 3.02, + "learning_rate": 1.548211366521875e-06, + "loss": 0.9404, + "step": 3735 + }, + { + "epoch": 3.03, + "learning_rate": 1.5361308151577526e-06, + "loss": 0.9199, + "step": 3740 + }, + { + "epoch": 3.03, + "learning_rate": 1.5240890203029813e-06, + "loss": 0.9224, + "step": 3745 + }, + { + "epoch": 3.03, + "learning_rate": 1.5120861166906869e-06, + "loss": 0.9822, + "step": 3750 + }, + { + "epoch": 3.04, + "learning_rate": 1.5001222386188573e-06, + "loss": 0.9063, + "step": 3755 + }, + { + "epoch": 3.04, + "learning_rate": 1.4881975199488247e-06, + "loss": 0.9455, + "step": 3760 + }, + { + "epoch": 3.05, + "learning_rate": 1.4763120941037757e-06, + "loss": 0.8986, + "step": 3765 + }, + { + "epoch": 3.05, + "learning_rate": 1.4644660940672628e-06, + "loss": 0.9297, + "step": 3770 + }, + { + "epoch": 3.05, + "learning_rate": 1.4526596523817066e-06, + "loss": 0.9889, + "step": 3775 + }, + { + "epoch": 3.06, + "learning_rate": 1.4408929011469175e-06, + "loss": 0.9387, + "step": 3780 + }, + { + "epoch": 3.06, + "learning_rate": 1.4291659720186218e-06, + "loss": 0.8889, + "step": 3785 + }, + { + "epoch": 3.07, + "learning_rate": 1.4174789962069808e-06, + "loss": 0.9965, + "step": 3790 + }, + { + "epoch": 3.07, + "learning_rate": 1.4058321044751255e-06, + "loss": 0.9279, + "step": 3795 + }, + { + "epoch": 3.07, + "learning_rate": 1.3942254271377004e-06, + "loss": 0.9621, + "step": 3800 + }, + { + "epoch": 3.08, + "learning_rate": 1.3826590940593926e-06, + "loss": 0.9081, + "step": 3805 + }, + { + "epoch": 3.08, + "learning_rate": 1.3711332346534916e-06, + "loss": 0.9201, + "step": 3810 + }, + { + "epoch": 3.09, + "learning_rate": 1.3596479778804312e-06, + "loss": 0.9013, + "step": 3815 + }, + { + "epoch": 3.09, + "learning_rate": 1.3482034522463522e-06, + "loss": 0.9255, + "step": 3820 + }, + { + "epoch": 3.09, + "learning_rate": 1.3367997858016619e-06, + "loss": 0.9678, + "step": 3825 + }, + { + "epoch": 3.1, + "learning_rate": 1.325437106139607e-06, + "loss": 0.9334, + "step": 3830 + }, + { + "epoch": 3.1, + "learning_rate": 1.3141155403948358e-06, + "loss": 0.9455, + "step": 3835 + }, + { + "epoch": 3.11, + "learning_rate": 1.3028352152419876e-06, + "loss": 0.9025, + "step": 3840 + }, + { + "epoch": 3.11, + "learning_rate": 1.291596256894263e-06, + "loss": 0.8933, + "step": 3845 + }, + { + "epoch": 3.11, + "learning_rate": 1.2803987911020239e-06, + "loss": 0.999, + "step": 3850 + }, + { + "epoch": 3.12, + "learning_rate": 1.269242943151377e-06, + "loss": 0.8996, + "step": 3855 + }, + { + "epoch": 3.12, + "learning_rate": 1.2581288378627759e-06, + "loss": 0.9594, + "step": 3860 + }, + { + "epoch": 3.13, + "learning_rate": 1.2470565995896244e-06, + "loss": 0.9385, + "step": 3865 + }, + { + "epoch": 3.13, + "learning_rate": 1.236026352216888e-06, + "loss": 0.9508, + "step": 3870 + }, + { + "epoch": 3.14, + "learning_rate": 1.2250382191597015e-06, + "loss": 0.9479, + "step": 3875 + }, + { + "epoch": 3.14, + "learning_rate": 1.21409232336199e-06, + "loss": 0.8861, + "step": 3880 + }, + { + "epoch": 3.14, + "learning_rate": 1.2031887872951004e-06, + "loss": 0.9539, + "step": 3885 + }, + { + "epoch": 3.15, + "learning_rate": 1.1923277329564192e-06, + "loss": 0.8969, + "step": 3890 + }, + { + "epoch": 3.15, + "learning_rate": 1.181509281868019e-06, + "loss": 0.9248, + "step": 3895 + }, + { + "epoch": 3.16, + "learning_rate": 1.1707335550752901e-06, + "loss": 0.8923, + "step": 3900 + }, + { + "epoch": 3.16, + "learning_rate": 1.1600006731455888e-06, + "loss": 0.8534, + "step": 3905 + }, + { + "epoch": 3.16, + "learning_rate": 1.1493107561668943e-06, + "loss": 0.9193, + "step": 3910 + }, + { + "epoch": 3.17, + "learning_rate": 1.1386639237464542e-06, + "loss": 0.9688, + "step": 3915 + }, + { + "epoch": 3.17, + "learning_rate": 1.1280602950094532e-06, + "loss": 0.8982, + "step": 3920 + }, + { + "epoch": 3.18, + "learning_rate": 1.1174999885976834e-06, + "loss": 0.9001, + "step": 3925 + }, + { + "epoch": 3.18, + "learning_rate": 1.106983122668206e-06, + "loss": 0.9189, + "step": 3930 + }, + { + "epoch": 3.18, + "learning_rate": 1.0965098148920422e-06, + "loss": 0.9842, + "step": 3935 + }, + { + "epoch": 3.19, + "learning_rate": 1.0860801824528443e-06, + "loss": 0.9438, + "step": 3940 + }, + { + "epoch": 3.19, + "learning_rate": 1.0756943420455934e-06, + "loss": 0.9412, + "step": 3945 + }, + { + "epoch": 3.2, + "learning_rate": 1.0653524098752894e-06, + "loss": 0.9695, + "step": 3950 + }, + { + "epoch": 3.2, + "learning_rate": 1.055054501655654e-06, + "loss": 0.9145, + "step": 3955 + }, + { + "epoch": 3.2, + "learning_rate": 1.0448007326078336e-06, + "loss": 0.9602, + "step": 3960 + }, + { + "epoch": 3.21, + "learning_rate": 1.0345912174591071e-06, + "loss": 0.9009, + "step": 3965 + }, + { + "epoch": 3.21, + "learning_rate": 1.0244260704416104e-06, + "loss": 0.9375, + "step": 3970 + }, + { + "epoch": 3.22, + "learning_rate": 1.0143054052910534e-06, + "loss": 0.9402, + "step": 3975 + }, + { + "epoch": 3.22, + "learning_rate": 1.0042293352454446e-06, + "loss": 0.9182, + "step": 3980 + }, + { + "epoch": 3.22, + "learning_rate": 9.94197973043829e-07, + "loss": 0.909, + "step": 3985 + }, + { + "epoch": 3.23, + "learning_rate": 9.842114309250222e-07, + "loss": 0.9285, + "step": 3990 + }, + { + "epoch": 3.23, + "learning_rate": 9.74269820626364e-07, + "loss": 0.9264, + "step": 3995 + }, + { + "epoch": 3.24, + "learning_rate": 9.643732533824545e-07, + "loss": 0.9205, + "step": 4000 + }, + { + "epoch": 3.24, + "learning_rate": 9.545218399239186e-07, + "loss": 0.96, + "step": 4005 + }, + { + "epoch": 3.24, + "learning_rate": 9.447156904761668e-07, + "loss": 0.9473, + "step": 4010 + }, + { + "epoch": 3.25, + "learning_rate": 9.349549147581571e-07, + "loss": 0.9281, + "step": 4015 + }, + { + "epoch": 3.25, + "learning_rate": 9.252396219811737e-07, + "loss": 0.9311, + "step": 4020 + }, + { + "epoch": 3.26, + "learning_rate": 9.155699208475988e-07, + "loss": 0.9789, + "step": 4025 + }, + { + "epoch": 3.26, + "learning_rate": 9.059459195496989e-07, + "loss": 0.8984, + "step": 4030 + }, + { + "epoch": 3.26, + "learning_rate": 8.963677257684184e-07, + "loss": 0.9564, + "step": 4035 + }, + { + "epoch": 3.27, + "learning_rate": 8.868354466721668e-07, + "loss": 0.9293, + "step": 4040 + }, + { + "epoch": 3.27, + "learning_rate": 8.773491889156254e-07, + "loss": 0.9678, + "step": 4045 + }, + { + "epoch": 3.28, + "learning_rate": 8.679090586385519e-07, + "loss": 0.9275, + "step": 4050 + }, + { + "epoch": 3.28, + "learning_rate": 8.585151614645942e-07, + "loss": 0.966, + "step": 4055 + }, + { + "epoch": 3.28, + "learning_rate": 8.491676025001083e-07, + "loss": 0.9049, + "step": 4060 + }, + { + "epoch": 3.29, + "learning_rate": 8.398664863329792e-07, + "loss": 0.9385, + "step": 4065 + }, + { + "epoch": 3.29, + "learning_rate": 8.306119170314553e-07, + "loss": 0.9529, + "step": 4070 + }, + { + "epoch": 3.3, + "learning_rate": 8.214039981429789e-07, + "loss": 0.9412, + "step": 4075 + }, + { + "epoch": 3.3, + "learning_rate": 8.122428326930348e-07, + "loss": 0.9852, + "step": 4080 + }, + { + "epoch": 3.31, + "learning_rate": 8.031285231839908e-07, + "loss": 0.9223, + "step": 4085 + }, + { + "epoch": 3.31, + "learning_rate": 7.940611715939522e-07, + "loss": 0.9592, + "step": 4090 + }, + { + "epoch": 3.31, + "learning_rate": 7.850408793756242e-07, + "loss": 0.9758, + "step": 4095 + }, + { + "epoch": 3.32, + "learning_rate": 7.760677474551759e-07, + "loss": 0.842, + "step": 4100 + }, + { + "epoch": 3.32, + "learning_rate": 7.67141876231105e-07, + "loss": 0.9406, + "step": 4105 + }, + { + "epoch": 3.33, + "learning_rate": 7.582633655731231e-07, + "loss": 0.9397, + "step": 4110 + }, + { + "epoch": 3.33, + "learning_rate": 7.494323148210303e-07, + "loss": 0.9193, + "step": 4115 + }, + { + "epoch": 3.33, + "learning_rate": 7.406488227836139e-07, + "loss": 0.9529, + "step": 4120 + }, + { + "epoch": 3.34, + "learning_rate": 7.319129877375314e-07, + "loss": 0.973, + "step": 4125 + }, + { + "epoch": 3.34, + "learning_rate": 7.232249074262176e-07, + "loss": 0.9596, + "step": 4130 + }, + { + "epoch": 3.35, + "learning_rate": 7.145846790587891e-07, + "loss": 0.9477, + "step": 4135 + }, + { + "epoch": 3.35, + "learning_rate": 7.059923993089585e-07, + "loss": 0.9809, + "step": 4140 + }, + { + "epoch": 3.35, + "learning_rate": 6.974481643139514e-07, + "loss": 0.9863, + "step": 4145 + }, + { + "epoch": 3.36, + "learning_rate": 6.889520696734297e-07, + "loss": 0.9666, + "step": 4150 + }, + { + "epoch": 3.36, + "learning_rate": 6.805042104484216e-07, + "loss": 0.9328, + "step": 4155 + }, + { + "epoch": 3.37, + "learning_rate": 6.721046811602622e-07, + "loss": 0.8867, + "step": 4160 + }, + { + "epoch": 3.37, + "learning_rate": 6.63753575789532e-07, + "loss": 0.9635, + "step": 4165 + }, + { + "epoch": 3.37, + "learning_rate": 6.554509877750042e-07, + "loss": 0.9605, + "step": 4170 + }, + { + "epoch": 3.38, + "learning_rate": 6.471970100126035e-07, + "loss": 0.989, + "step": 4175 + }, + { + "epoch": 3.38, + "learning_rate": 6.389917348543651e-07, + "loss": 0.9393, + "step": 4180 + }, + { + "epoch": 3.39, + "learning_rate": 6.308352541074014e-07, + "loss": 0.9385, + "step": 4185 + }, + { + "epoch": 3.39, + "learning_rate": 6.227276590328713e-07, + "loss": 0.9325, + "step": 4190 + }, + { + "epoch": 3.39, + "learning_rate": 6.146690403449646e-07, + "loss": 0.9801, + "step": 4195 + }, + { + "epoch": 3.4, + "learning_rate": 6.066594882098831e-07, + "loss": 0.976, + "step": 4200 + }, + { + "epoch": 3.4, + "learning_rate": 5.98699092244835e-07, + "loss": 0.9523, + "step": 4205 + }, + { + "epoch": 3.41, + "learning_rate": 5.907879415170287e-07, + "loss": 0.8773, + "step": 4210 + }, + { + "epoch": 3.41, + "learning_rate": 5.829261245426793e-07, + "loss": 0.8939, + "step": 4215 + }, + { + "epoch": 3.41, + "learning_rate": 5.751137292860126e-07, + "loss": 0.9383, + "step": 4220 + }, + { + "epoch": 3.42, + "learning_rate": 5.673508431582936e-07, + "loss": 0.9797, + "step": 4225 + }, + { + "epoch": 3.42, + "learning_rate": 5.596375530168329e-07, + "loss": 0.932, + "step": 4230 + }, + { + "epoch": 3.43, + "learning_rate": 5.519739451640238e-07, + "loss": 0.9015, + "step": 4235 + }, + { + "epoch": 3.43, + "learning_rate": 5.443601053463743e-07, + "loss": 0.966, + "step": 4240 + }, + { + "epoch": 3.43, + "learning_rate": 5.367961187535504e-07, + "loss": 0.9252, + "step": 4245 + }, + { + "epoch": 3.44, + "learning_rate": 5.292820700174189e-07, + "loss": 0.925, + "step": 4250 + }, + { + "epoch": 3.44, + "learning_rate": 5.218180432111026e-07, + "loss": 0.9445, + "step": 4255 + }, + { + "epoch": 3.45, + "learning_rate": 5.144041218480389e-07, + "loss": 0.9461, + "step": 4260 + }, + { + "epoch": 3.45, + "learning_rate": 5.070403888810471e-07, + "loss": 0.926, + "step": 4265 + }, + { + "epoch": 3.45, + "learning_rate": 4.997269267013993e-07, + "loss": 0.9242, + "step": 4270 + }, + { + "epoch": 3.46, + "learning_rate": 4.924638171378976e-07, + "loss": 0.9514, + "step": 4275 + }, + { + "epoch": 3.46, + "learning_rate": 4.852511414559575e-07, + "loss": 0.9877, + "step": 4280 + }, + { + "epoch": 3.47, + "learning_rate": 4.780889803567018e-07, + "loss": 0.9541, + "step": 4285 + }, + { + "epoch": 3.47, + "learning_rate": 4.7097741397605754e-07, + "loss": 0.9449, + "step": 4290 + }, + { + "epoch": 3.47, + "learning_rate": 4.639165218838559e-07, + "loss": 0.9361, + "step": 4295 + }, + { + "epoch": 3.48, + "learning_rate": 4.569063830829445e-07, + "loss": 0.9908, + "step": 4300 + }, + { + "epoch": 3.48, + "learning_rate": 4.49947076008303e-07, + "loss": 0.9355, + "step": 4305 + }, + { + "epoch": 3.49, + "learning_rate": 4.4303867852616755e-07, + "loss": 0.9096, + "step": 4310 + }, + { + "epoch": 3.49, + "learning_rate": 4.361812679331551e-07, + "loss": 0.9555, + "step": 4315 + }, + { + "epoch": 3.5, + "learning_rate": 4.2937492095540043e-07, + "loss": 0.9221, + "step": 4320 + }, + { + "epoch": 3.5, + "learning_rate": 4.2261971374769893e-07, + "loss": 0.9594, + "step": 4325 + }, + { + "epoch": 3.5, + "learning_rate": 4.159157218926557e-07, + "loss": 0.914, + "step": 4330 + }, + { + "epoch": 3.51, + "learning_rate": 4.09263020399836e-07, + "loss": 0.9935, + "step": 4335 + }, + { + "epoch": 3.51, + "learning_rate": 4.02661683704928e-07, + "loss": 0.9467, + "step": 4340 + }, + { + "epoch": 3.52, + "learning_rate": 3.9611178566890894e-07, + "loss": 0.943, + "step": 4345 + }, + { + "epoch": 3.52, + "learning_rate": 3.896133995772233e-07, + "loss": 0.9232, + "step": 4350 + }, + { + "epoch": 3.52, + "learning_rate": 3.8316659813895597e-07, + "loss": 0.9545, + "step": 4355 + }, + { + "epoch": 3.53, + "learning_rate": 3.767714534860223e-07, + "loss": 0.9242, + "step": 4360 + }, + { + "epoch": 3.53, + "learning_rate": 3.704280371723601e-07, + "loss": 0.9379, + "step": 4365 + }, + { + "epoch": 3.54, + "learning_rate": 3.6413642017313233e-07, + "loss": 0.9506, + "step": 4370 + }, + { + "epoch": 3.54, + "learning_rate": 3.5789667288392784e-07, + "loss": 0.9465, + "step": 4375 + }, + { + "epoch": 3.54, + "learning_rate": 3.517088651199768e-07, + "loss": 0.9365, + "step": 4380 + }, + { + "epoch": 3.55, + "learning_rate": 3.455730661153672e-07, + "loss": 0.9195, + "step": 4385 + }, + { + "epoch": 3.55, + "learning_rate": 3.394893445222752e-07, + "loss": 0.9746, + "step": 4390 + }, + { + "epoch": 3.56, + "learning_rate": 3.334577684101925e-07, + "loss": 0.9289, + "step": 4395 + }, + { + "epoch": 3.56, + "learning_rate": 3.2747840526516414e-07, + "loss": 0.9038, + "step": 4400 + }, + { + "epoch": 3.56, + "learning_rate": 3.215513219890365e-07, + "loss": 0.9098, + "step": 4405 + }, + { + "epoch": 3.57, + "learning_rate": 3.15676584898707e-07, + "loss": 0.9435, + "step": 4410 + }, + { + "epoch": 3.57, + "learning_rate": 3.0985425972538343e-07, + "loss": 0.9098, + "step": 4415 + }, + { + "epoch": 3.58, + "learning_rate": 3.040844116138475e-07, + "loss": 0.9318, + "step": 4420 + }, + { + "epoch": 3.58, + "learning_rate": 2.9836710512172353e-07, + "loss": 0.9592, + "step": 4425 + }, + { + "epoch": 3.58, + "learning_rate": 2.9270240421876204e-07, + "loss": 0.9756, + "step": 4430 + }, + { + "epoch": 3.59, + "learning_rate": 2.8709037228611903e-07, + "loss": 0.9189, + "step": 4435 + }, + { + "epoch": 3.59, + "learning_rate": 2.815310721156489e-07, + "loss": 0.9139, + "step": 4440 + }, + { + "epoch": 3.6, + "learning_rate": 2.7602456590920034e-07, + "loss": 0.9127, + "step": 4445 + }, + { + "epoch": 3.6, + "learning_rate": 2.7057091527792125e-07, + "loss": 0.9602, + "step": 4450 + }, + { + "epoch": 3.6, + "learning_rate": 2.6517018124157137e-07, + "loss": 0.9787, + "step": 4455 + }, + { + "epoch": 3.61, + "learning_rate": 2.598224242278369e-07, + "loss": 0.916, + "step": 4460 + }, + { + "epoch": 3.61, + "learning_rate": 2.545277040716537e-07, + "loss": 0.9846, + "step": 4465 + }, + { + "epoch": 3.62, + "learning_rate": 2.492860800145408e-07, + "loss": 0.9484, + "step": 4470 + }, + { + "epoch": 3.62, + "learning_rate": 2.4409761070393614e-07, + "loss": 0.9191, + "step": 4475 + }, + { + "epoch": 3.62, + "learning_rate": 2.389623541925407e-07, + "loss": 0.9266, + "step": 4480 + }, + { + "epoch": 3.63, + "learning_rate": 2.3388036793766723e-07, + "loss": 0.9034, + "step": 4485 + }, + { + "epoch": 3.63, + "learning_rate": 2.2885170880059758e-07, + "loss": 0.896, + "step": 4490 + }, + { + "epoch": 3.64, + "learning_rate": 2.2387643304595196e-07, + "loss": 0.9574, + "step": 4495 + }, + { + "epoch": 3.64, + "learning_rate": 2.189545963410511e-07, + "loss": 0.9387, + "step": 4500 + }, + { + "epoch": 3.64, + "learning_rate": 2.1408625375529845e-07, + "loss": 0.9322, + "step": 4505 + }, + { + "epoch": 3.65, + "learning_rate": 2.0927145975956297e-07, + "loss": 0.9088, + "step": 4510 + }, + { + "epoch": 3.65, + "learning_rate": 2.0451026822556952e-07, + "loss": 0.9168, + "step": 4515 + }, + { + "epoch": 3.66, + "learning_rate": 1.9980273242529825e-07, + "loss": 0.951, + "step": 4520 + }, + { + "epoch": 3.66, + "learning_rate": 1.951489050303834e-07, + "loss": 0.916, + "step": 4525 + }, + { + "epoch": 3.67, + "learning_rate": 1.9054883811152837e-07, + "loss": 0.8936, + "step": 4530 + }, + { + "epoch": 3.67, + "learning_rate": 1.8600258313792142e-07, + "loss": 0.9279, + "step": 4535 + }, + { + "epoch": 3.67, + "learning_rate": 1.8151019097666146e-07, + "loss": 0.9666, + "step": 4540 + }, + { + "epoch": 3.68, + "learning_rate": 1.7707171189218663e-07, + "loss": 0.9555, + "step": 4545 + }, + { + "epoch": 3.68, + "learning_rate": 1.7268719554571157e-07, + "loss": 0.945, + "step": 4550 + }, + { + "epoch": 3.69, + "learning_rate": 1.683566909946771e-07, + "loss": 0.9357, + "step": 4555 + }, + { + "epoch": 3.69, + "learning_rate": 1.640802466921926e-07, + "loss": 0.9528, + "step": 4560 + }, + { + "epoch": 3.69, + "learning_rate": 1.5985791048650223e-07, + "loss": 0.8418, + "step": 4565 + }, + { + "epoch": 3.7, + "learning_rate": 1.5568972962044405e-07, + "loss": 0.9797, + "step": 4570 + }, + { + "epoch": 3.7, + "learning_rate": 1.515757507309229e-07, + "loss": 0.9197, + "step": 4575 + }, + { + "epoch": 3.71, + "learning_rate": 1.4751601984839159e-07, + "loss": 1.0133, + "step": 4580 + }, + { + "epoch": 3.71, + "learning_rate": 1.4351058239633065e-07, + "loss": 0.9518, + "step": 4585 + }, + { + "epoch": 3.71, + "learning_rate": 1.3955948319074374e-07, + "loss": 0.881, + "step": 4590 + }, + { + "epoch": 3.72, + "learning_rate": 1.3566276643965538e-07, + "loss": 0.9238, + "step": 4595 + }, + { + "epoch": 3.72, + "learning_rate": 1.3182047574261557e-07, + "loss": 0.9002, + "step": 4600 + }, + { + "epoch": 3.73, + "learning_rate": 1.2803265409021436e-07, + "loss": 0.948, + "step": 4605 + }, + { + "epoch": 3.73, + "learning_rate": 1.2429934386359643e-07, + "loss": 0.9025, + "step": 4610 + }, + { + "epoch": 3.73, + "learning_rate": 1.2062058683399048e-07, + "loss": 0.9354, + "step": 4615 + }, + { + "epoch": 3.74, + "learning_rate": 1.1699642416224233e-07, + "loss": 0.9582, + "step": 4620 + }, + { + "epoch": 3.74, + "learning_rate": 1.1342689639835036e-07, + "loss": 0.9734, + "step": 4625 + }, + { + "epoch": 3.75, + "learning_rate": 1.0991204348101692e-07, + "loss": 0.9267, + "step": 4630 + }, + { + "epoch": 3.75, + "learning_rate": 1.0645190473719647e-07, + "loss": 0.9705, + "step": 4635 + }, + { + "epoch": 3.75, + "learning_rate": 1.0304651888166039e-07, + "loss": 0.9285, + "step": 4640 + }, + { + "epoch": 3.76, + "learning_rate": 9.969592401655903e-08, + "loss": 0.9494, + "step": 4645 + }, + { + "epoch": 3.76, + "learning_rate": 9.640015763100031e-08, + "loss": 0.8965, + "step": 4650 + }, + { + "epoch": 3.77, + "learning_rate": 9.315925660062619e-08, + "loss": 0.9922, + "step": 4655 + }, + { + "epoch": 3.77, + "learning_rate": 8.997325718720085e-08, + "loss": 0.9295, + "step": 4660 + }, + { + "epoch": 3.77, + "learning_rate": 8.684219503820756e-08, + "loss": 0.9564, + "step": 4665 + }, + { + "epoch": 3.78, + "learning_rate": 8.376610518644746e-08, + "loss": 0.9201, + "step": 4670 + }, + { + "epoch": 3.78, + "learning_rate": 8.074502204964696e-08, + "loss": 0.9303, + "step": 4675 + }, + { + "epoch": 3.79, + "learning_rate": 7.777897943007595e-08, + "loss": 0.9636, + "step": 4680 + }, + { + "epoch": 3.79, + "learning_rate": 7.486801051416525e-08, + "loss": 0.9542, + "step": 4685 + }, + { + "epoch": 3.79, + "learning_rate": 7.201214787213862e-08, + "loss": 0.9684, + "step": 4690 + }, + { + "epoch": 3.8, + "learning_rate": 6.921142345764798e-08, + "loss": 0.924, + "step": 4695 + }, + { + "epoch": 3.8, + "learning_rate": 6.646586860741322e-08, + "loss": 0.9271, + "step": 4700 + }, + { + "epoch": 3.81, + "learning_rate": 6.377551404087467e-08, + "loss": 0.9333, + "step": 4705 + }, + { + "epoch": 3.81, + "learning_rate": 6.114038985984894e-08, + "loss": 0.9413, + "step": 4710 + }, + { + "epoch": 3.81, + "learning_rate": 5.856052554818969e-08, + "loss": 0.9223, + "step": 4715 + }, + { + "epoch": 3.82, + "learning_rate": 5.603594997145967e-08, + "loss": 0.9301, + "step": 4720 + }, + { + "epoch": 3.82, + "learning_rate": 5.3566691376609744e-08, + "loss": 0.9072, + "step": 4725 + }, + { + "epoch": 3.83, + "learning_rate": 5.115277739165703e-08, + "loss": 0.9152, + "step": 4730 + }, + { + "epoch": 3.83, + "learning_rate": 4.8794235025383386e-08, + "loss": 0.9234, + "step": 4735 + }, + { + "epoch": 3.83, + "learning_rate": 4.6491090667025176e-08, + "loss": 0.943, + "step": 4740 + }, + { + "epoch": 3.84, + "learning_rate": 4.4243370085985114e-08, + "loss": 0.8847, + "step": 4745 + }, + { + "epoch": 3.84, + "learning_rate": 4.2051098431539764e-08, + "loss": 1.0156, + "step": 4750 + }, + { + "epoch": 3.85, + "learning_rate": 3.991430023255804e-08, + "loss": 0.866, + "step": 4755 + }, + { + "epoch": 3.85, + "learning_rate": 3.783299939722984e-08, + "loss": 0.9083, + "step": 4760 + }, + { + "epoch": 3.86, + "learning_rate": 3.580721921279562e-08, + "loss": 0.9077, + "step": 4765 + }, + { + "epoch": 3.86, + "learning_rate": 3.383698234528665e-08, + "loss": 0.9351, + "step": 4770 + }, + { + "epoch": 3.86, + "learning_rate": 3.1922310839272444e-08, + "loss": 0.9322, + "step": 4775 + }, + { + "epoch": 3.87, + "learning_rate": 3.006322611761314e-08, + "loss": 0.9379, + "step": 4780 + }, + { + "epoch": 3.87, + "learning_rate": 2.8259748981219194e-08, + "loss": 0.9136, + "step": 4785 + }, + { + "epoch": 3.88, + "learning_rate": 2.651189960882039e-08, + "loss": 0.9764, + "step": 4790 + }, + { + "epoch": 3.88, + "learning_rate": 2.4819697556737742e-08, + "loss": 0.9348, + "step": 4795 + }, + { + "epoch": 3.88, + "learning_rate": 2.318316175866697e-08, + "loss": 0.9345, + "step": 4800 + }, + { + "epoch": 3.89, + "learning_rate": 2.1602310525466464e-08, + "loss": 0.8879, + "step": 4805 + }, + { + "epoch": 3.89, + "learning_rate": 2.007716154494965e-08, + "loss": 0.9619, + "step": 4810 + }, + { + "epoch": 3.9, + "learning_rate": 1.8607731881690737e-08, + "loss": 0.9516, + "step": 4815 + }, + { + "epoch": 3.9, + "learning_rate": 1.7194037976831502e-08, + "loss": 0.9471, + "step": 4820 + }, + { + "epoch": 3.9, + "learning_rate": 1.583609564789812e-08, + "loss": 0.9197, + "step": 4825 + }, + { + "epoch": 3.91, + "learning_rate": 1.4533920088623533e-08, + "loss": 0.8611, + "step": 4830 + }, + { + "epoch": 3.91, + "learning_rate": 1.3287525868778128e-08, + "loss": 0.9449, + "step": 4835 + }, + { + "epoch": 3.92, + "learning_rate": 1.2096926934007103e-08, + "loss": 0.9418, + "step": 4840 + }, + { + "epoch": 3.92, + "learning_rate": 1.0962136605673357e-08, + "loss": 0.9337, + "step": 4845 + }, + { + "epoch": 3.92, + "learning_rate": 9.883167580709285e-09, + "loss": 0.9118, + "step": 4850 + }, + { + "epoch": 3.93, + "learning_rate": 8.860031931473555e-09, + "loss": 0.9563, + "step": 4855 + }, + { + "epoch": 3.93, + "learning_rate": 7.892741105617329e-09, + "loss": 0.9342, + "step": 4860 + }, + { + "epoch": 3.94, + "learning_rate": 6.981305925956583e-09, + "loss": 0.9553, + "step": 4865 + }, + { + "epoch": 3.94, + "learning_rate": 6.1257365903488745e-09, + "loss": 0.9455, + "step": 4870 + }, + { + "epoch": 3.94, + "learning_rate": 5.326042671580655e-09, + "loss": 0.8813, + "step": 4875 + }, + { + "epoch": 3.95, + "learning_rate": 4.582233117260693e-09, + "loss": 0.8929, + "step": 4880 + }, + { + "epoch": 3.95, + "learning_rate": 3.894316249717922e-09, + "loss": 0.9463, + "step": 4885 + }, + { + "epoch": 3.96, + "learning_rate": 3.2622997659120802e-09, + "loss": 0.9428, + "step": 4890 + }, + { + "epoch": 3.96, + "learning_rate": 2.6861907373432193e-09, + "loss": 0.866, + "step": 4895 + }, + { + "epoch": 3.96, + "learning_rate": 2.165995609973992e-09, + "loss": 0.94, + "step": 4900 + }, + { + "epoch": 3.97, + "learning_rate": 1.7017202041602621e-09, + "loss": 0.9525, + "step": 4905 + }, + { + "epoch": 3.97, + "learning_rate": 1.293369714582271e-09, + "loss": 0.9548, + "step": 4910 + }, + { + "epoch": 3.98, + "learning_rate": 9.409487101880167e-10, + "loss": 0.9668, + "step": 4915 + }, + { + "epoch": 3.98, + "learning_rate": 6.444611341432927e-10, + "loss": 0.9349, + "step": 4920 + }, + { + "epoch": 3.98, + "learning_rate": 4.0391030378561513e-10, + "loss": 0.974, + "step": 4925 + }, + { + "epoch": 3.99, + "learning_rate": 2.1929891058758424e-10, + "loss": 0.9563, + "step": 4930 + }, + { + "epoch": 3.99, + "learning_rate": 9.0629020127464e-11, + "loss": 0.9373, + "step": 4935 + }, + { + "epoch": 4.0, + "learning_rate": 1.790207206586736e-11, + "loss": 0.9326, + "step": 4940 + }, + { + "epoch": 4.0, + "step": 4944, + "total_flos": 2.446826463366742e+18, + "train_loss": 1.1695684537918436, + "train_runtime": 57751.42, + "train_samples_per_second": 5.478, + "train_steps_per_second": 0.086 } ], - "max_steps": 1851, - "num_train_epochs": 3, - "total_flos": 1.8340911973547377e+18, + "max_steps": 4944, + "num_train_epochs": 4, + "total_flos": 2.446826463366742e+18, "trial_name": null, "trial_params": null }