{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "global_step": 4944, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0161290322580645e-07, "loss": 1.7344, "step": 5 }, { "epoch": 0.01, "learning_rate": 4.032258064516129e-07, "loss": 1.6586, "step": 10 }, { "epoch": 0.01, "learning_rate": 6.048387096774194e-07, "loss": 1.6465, "step": 15 }, { "epoch": 0.02, "learning_rate": 8.064516129032258e-07, "loss": 1.5832, "step": 20 }, { "epoch": 0.02, "learning_rate": 1.0080645161290323e-06, "loss": 1.5574, "step": 25 }, { "epoch": 0.02, "learning_rate": 1.2096774193548388e-06, "loss": 1.4381, "step": 30 }, { "epoch": 0.03, "learning_rate": 1.4112903225806455e-06, "loss": 1.5215, "step": 35 }, { "epoch": 0.03, "learning_rate": 1.6129032258064516e-06, "loss": 1.5566, "step": 40 }, { "epoch": 0.04, "learning_rate": 1.8145161290322583e-06, "loss": 1.5641, "step": 45 }, { "epoch": 0.04, "learning_rate": 2.0161290322580646e-06, "loss": 1.4707, "step": 50 }, { "epoch": 0.04, "learning_rate": 2.217741935483871e-06, "loss": 1.4484, "step": 55 }, { "epoch": 0.05, "learning_rate": 2.4193548387096776e-06, "loss": 1.4656, "step": 60 }, { "epoch": 0.05, "learning_rate": 2.620967741935484e-06, "loss": 1.3938, "step": 65 }, { "epoch": 0.06, "learning_rate": 2.822580645161291e-06, "loss": 1.4152, "step": 70 }, { "epoch": 0.06, "learning_rate": 3.024193548387097e-06, "loss": 1.4182, "step": 75 }, { "epoch": 0.06, "learning_rate": 3.225806451612903e-06, "loss": 1.5051, "step": 80 }, { "epoch": 0.07, "learning_rate": 3.4274193548387097e-06, "loss": 1.4918, "step": 85 }, { "epoch": 0.07, "learning_rate": 3.6290322580645166e-06, "loss": 1.4738, "step": 90 }, { "epoch": 0.08, "learning_rate": 3.830645161290323e-06, "loss": 1.4035, "step": 95 }, { "epoch": 0.08, "learning_rate": 4.032258064516129e-06, "loss": 1.4367, "step": 100 }, { "epoch": 0.08, "learning_rate": 4.233870967741936e-06, "loss": 1.4076, "step": 105 }, { "epoch": 0.09, "learning_rate": 4.435483870967742e-06, "loss": 1.4902, "step": 110 }, { "epoch": 0.09, "learning_rate": 4.637096774193548e-06, "loss": 1.3578, "step": 115 }, { "epoch": 0.1, "learning_rate": 4.838709677419355e-06, "loss": 1.4467, "step": 120 }, { "epoch": 0.1, "learning_rate": 5.040322580645161e-06, "loss": 1.4766, "step": 125 }, { "epoch": 0.11, "learning_rate": 5.241935483870968e-06, "loss": 1.4828, "step": 130 }, { "epoch": 0.11, "learning_rate": 5.443548387096774e-06, "loss": 1.4258, "step": 135 }, { "epoch": 0.11, "learning_rate": 5.645161290322582e-06, "loss": 1.4602, "step": 140 }, { "epoch": 0.12, "learning_rate": 5.846774193548388e-06, "loss": 1.4902, "step": 145 }, { "epoch": 0.12, "learning_rate": 6.048387096774194e-06, "loss": 1.3729, "step": 150 }, { "epoch": 0.13, "learning_rate": 6.25e-06, "loss": 1.4902, "step": 155 }, { "epoch": 0.13, "learning_rate": 6.451612903225806e-06, "loss": 1.435, "step": 160 }, { "epoch": 0.13, "learning_rate": 6.653225806451613e-06, "loss": 1.4096, "step": 165 }, { "epoch": 0.14, "learning_rate": 6.854838709677419e-06, "loss": 1.4508, "step": 170 }, { "epoch": 0.14, "learning_rate": 7.056451612903227e-06, "loss": 1.4266, "step": 175 }, { "epoch": 0.15, "learning_rate": 7.258064516129033e-06, "loss": 1.4648, "step": 180 }, { "epoch": 0.15, "learning_rate": 7.459677419354839e-06, "loss": 1.3996, "step": 185 }, { "epoch": 0.15, "learning_rate": 7.661290322580646e-06, "loss": 1.4152, "step": 190 }, { "epoch": 0.16, "learning_rate": 7.862903225806451e-06, "loss": 1.4648, "step": 195 }, { "epoch": 0.16, "learning_rate": 8.064516129032258e-06, "loss": 1.3128, "step": 200 }, { "epoch": 0.17, "learning_rate": 8.266129032258065e-06, "loss": 1.4242, "step": 205 }, { "epoch": 0.17, "learning_rate": 8.467741935483872e-06, "loss": 1.4648, "step": 210 }, { "epoch": 0.17, "learning_rate": 8.669354838709677e-06, "loss": 1.4527, "step": 215 }, { "epoch": 0.18, "learning_rate": 8.870967741935484e-06, "loss": 1.4, "step": 220 }, { "epoch": 0.18, "learning_rate": 9.072580645161291e-06, "loss": 1.4984, "step": 225 }, { "epoch": 0.19, "learning_rate": 9.274193548387097e-06, "loss": 1.4297, "step": 230 }, { "epoch": 0.19, "learning_rate": 9.475806451612905e-06, "loss": 1.4645, "step": 235 }, { "epoch": 0.19, "learning_rate": 9.67741935483871e-06, "loss": 1.4107, "step": 240 }, { "epoch": 0.2, "learning_rate": 9.879032258064517e-06, "loss": 1.4328, "step": 245 }, { "epoch": 0.2, "learning_rate": 9.999995524479982e-06, "loss": 1.4279, "step": 250 }, { "epoch": 0.21, "learning_rate": 9.999945174971776e-06, "loss": 1.4738, "step": 255 }, { "epoch": 0.21, "learning_rate": 9.999838882120566e-06, "loss": 1.3898, "step": 260 }, { "epoch": 0.21, "learning_rate": 9.999676647115646e-06, "loss": 1.3346, "step": 265 }, { "epoch": 0.22, "learning_rate": 9.999458471772225e-06, "loss": 1.5164, "step": 270 }, { "epoch": 0.22, "learning_rate": 9.999184358531422e-06, "loss": 1.4148, "step": 275 }, { "epoch": 0.23, "learning_rate": 9.998854310460233e-06, "loss": 1.4273, "step": 280 }, { "epoch": 0.23, "learning_rate": 9.998468331251499e-06, "loss": 1.4592, "step": 285 }, { "epoch": 0.23, "learning_rate": 9.998026425223858e-06, "loss": 1.4504, "step": 290 }, { "epoch": 0.24, "learning_rate": 9.997528597321704e-06, "loss": 1.448, "step": 295 }, { "epoch": 0.24, "learning_rate": 9.996974853115132e-06, "loss": 1.4523, "step": 300 }, { "epoch": 0.25, "learning_rate": 9.996365198799868e-06, "loss": 1.5063, "step": 305 }, { "epoch": 0.25, "learning_rate": 9.99569964119721e-06, "loss": 1.4051, "step": 310 }, { "epoch": 0.25, "learning_rate": 9.99497818775394e-06, "loss": 1.4312, "step": 315 }, { "epoch": 0.26, "learning_rate": 9.994200846542251e-06, "loss": 1.4758, "step": 320 }, { "epoch": 0.26, "learning_rate": 9.993367626259652e-06, "loss": 1.468, "step": 325 }, { "epoch": 0.27, "learning_rate": 9.99247853622887e-06, "loss": 1.527, "step": 330 }, { "epoch": 0.27, "learning_rate": 9.991533586397751e-06, "loss": 1.4969, "step": 335 }, { "epoch": 0.28, "learning_rate": 9.990532787339137e-06, "loss": 1.4996, "step": 340 }, { "epoch": 0.28, "learning_rate": 9.98947615025076e-06, "loss": 1.5262, "step": 345 }, { "epoch": 0.28, "learning_rate": 9.988363686955116e-06, "loss": 1.5992, "step": 350 }, { "epoch": 0.29, "learning_rate": 9.987195409899322e-06, "loss": 1.4711, "step": 355 }, { "epoch": 0.29, "learning_rate": 9.985971332154985e-06, "loss": 1.45, "step": 360 }, { "epoch": 0.3, "learning_rate": 9.984691467418057e-06, "loss": 1.4863, "step": 365 }, { "epoch": 0.3, "learning_rate": 9.983355830008678e-06, "loss": 1.5219, "step": 370 }, { "epoch": 0.3, "learning_rate": 9.981964434871015e-06, "loss": 1.5977, "step": 375 }, { "epoch": 0.31, "learning_rate": 9.980517297573097e-06, "loss": 1.4539, "step": 380 }, { "epoch": 0.31, "learning_rate": 9.979014434306642e-06, "loss": 1.3713, "step": 385 }, { "epoch": 0.32, "learning_rate": 9.977455861886874e-06, "loss": 1.4434, "step": 390 }, { "epoch": 0.32, "learning_rate": 9.975841597752334e-06, "loss": 1.5469, "step": 395 }, { "epoch": 0.32, "learning_rate": 9.974171659964688e-06, "loss": 1.4531, "step": 400 }, { "epoch": 0.33, "learning_rate": 9.972446067208519e-06, "loss": 1.4828, "step": 405 }, { "epoch": 0.33, "learning_rate": 9.970664838791126e-06, "loss": 1.4512, "step": 410 }, { "epoch": 0.34, "learning_rate": 9.9688279946423e-06, "loss": 1.4076, "step": 415 }, { "epoch": 0.34, "learning_rate": 9.966935555314107e-06, "loss": 1.4969, "step": 420 }, { "epoch": 0.34, "learning_rate": 9.96498754198066e-06, "loss": 1.498, "step": 425 }, { "epoch": 0.35, "learning_rate": 9.962983976437868e-06, "loss": 1.4393, "step": 430 }, { "epoch": 0.35, "learning_rate": 9.96092488110321e-06, "loss": 1.5219, "step": 435 }, { "epoch": 0.36, "learning_rate": 9.958810279015474e-06, "loss": 1.4484, "step": 440 }, { "epoch": 0.36, "learning_rate": 9.956640193834501e-06, "loss": 1.4805, "step": 445 }, { "epoch": 0.36, "learning_rate": 9.954414649840922e-06, "loss": 1.4832, "step": 450 }, { "epoch": 0.37, "learning_rate": 9.952133671935885e-06, "loss": 1.3789, "step": 455 }, { "epoch": 0.37, "learning_rate": 9.949797285640771e-06, "loss": 1.4773, "step": 460 }, { "epoch": 0.38, "learning_rate": 9.947405517096927e-06, "loss": 1.5246, "step": 465 }, { "epoch": 0.38, "learning_rate": 9.944958393065343e-06, "loss": 1.5094, "step": 470 }, { "epoch": 0.38, "learning_rate": 9.942455940926384e-06, "loss": 1.4253, "step": 475 }, { "epoch": 0.39, "learning_rate": 9.939898188679465e-06, "loss": 1.4396, "step": 480 }, { "epoch": 0.39, "learning_rate": 9.93728516494274e-06, "loss": 1.3863, "step": 485 }, { "epoch": 0.4, "learning_rate": 9.934616898952787e-06, "loss": 1.4965, "step": 490 }, { "epoch": 0.4, "learning_rate": 9.931893420564277e-06, "loss": 1.5633, "step": 495 }, { "epoch": 0.4, "learning_rate": 9.929114760249642e-06, "loss": 1.3689, "step": 500 }, { "epoch": 0.41, "learning_rate": 9.926280949098732e-06, "loss": 1.4434, "step": 505 }, { "epoch": 0.41, "learning_rate": 9.923392018818467e-06, "loss": 1.4758, "step": 510 }, { "epoch": 0.42, "learning_rate": 9.92044800173249e-06, "loss": 1.4133, "step": 515 }, { "epoch": 0.42, "learning_rate": 9.917448930780786e-06, "loss": 1.451, "step": 520 }, { "epoch": 0.42, "learning_rate": 9.91439483951934e-06, "loss": 1.5117, "step": 525 }, { "epoch": 0.43, "learning_rate": 9.91128576211974e-06, "loss": 1.4949, "step": 530 }, { "epoch": 0.43, "learning_rate": 9.908121733368803e-06, "loss": 1.3984, "step": 535 }, { "epoch": 0.44, "learning_rate": 9.904902788668187e-06, "loss": 1.6094, "step": 540 }, { "epoch": 0.44, "learning_rate": 9.901628964033993e-06, "loss": 1.452, "step": 545 }, { "epoch": 0.44, "learning_rate": 9.89830029609636e-06, "loss": 1.4293, "step": 550 }, { "epoch": 0.45, "learning_rate": 9.894916822099062e-06, "loss": 1.4957, "step": 555 }, { "epoch": 0.45, "learning_rate": 9.89147857989908e-06, "loss": 1.507, "step": 560 }, { "epoch": 0.46, "learning_rate": 9.887985607966188e-06, "loss": 1.4379, "step": 565 }, { "epoch": 0.46, "learning_rate": 9.884437945382523e-06, "loss": 1.4812, "step": 570 }, { "epoch": 0.47, "learning_rate": 9.880835631842141e-06, "loss": 1.4477, "step": 575 }, { "epoch": 0.47, "learning_rate": 9.877178707650573e-06, "loss": 1.4484, "step": 580 }, { "epoch": 0.47, "learning_rate": 9.873467213724384e-06, "loss": 1.4897, "step": 585 }, { "epoch": 0.48, "learning_rate": 9.869701191590703e-06, "loss": 1.4641, "step": 590 }, { "epoch": 0.48, "learning_rate": 9.865880683386766e-06, "loss": 1.4809, "step": 595 }, { "epoch": 0.49, "learning_rate": 9.862005731859443e-06, "loss": 1.4742, "step": 600 }, { "epoch": 0.49, "learning_rate": 9.858076380364757e-06, "loss": 1.4902, "step": 605 }, { "epoch": 0.49, "learning_rate": 9.854092672867399e-06, "loss": 1.5898, "step": 610 }, { "epoch": 0.5, "learning_rate": 9.850054653940247e-06, "loss": 1.4984, "step": 615 }, { "epoch": 0.5, "learning_rate": 9.845962368763847e-06, "loss": 1.4898, "step": 620 }, { "epoch": 0.51, "learning_rate": 9.841815863125923e-06, "loss": 1.5082, "step": 625 }, { "epoch": 0.51, "learning_rate": 9.837615183420866e-06, "loss": 1.4992, "step": 630 }, { "epoch": 0.51, "learning_rate": 9.8333603766492e-06, "loss": 1.3744, "step": 635 }, { "epoch": 0.52, "learning_rate": 9.829051490417074e-06, "loss": 1.527, "step": 640 }, { "epoch": 0.52, "learning_rate": 9.824688572935713e-06, "loss": 1.5008, "step": 645 }, { "epoch": 0.53, "learning_rate": 9.820271673020891e-06, "loss": 1.4721, "step": 650 }, { "epoch": 0.53, "learning_rate": 9.81580084009238e-06, "loss": 1.5555, "step": 655 }, { "epoch": 0.53, "learning_rate": 9.811276124173395e-06, "loss": 1.5285, "step": 660 }, { "epoch": 0.54, "learning_rate": 9.806697575890043e-06, "loss": 1.4777, "step": 665 }, { "epoch": 0.54, "learning_rate": 9.802065246470738e-06, "loss": 1.4322, "step": 670 }, { "epoch": 0.55, "learning_rate": 9.797379187745652e-06, "loss": 1.5197, "step": 675 }, { "epoch": 0.55, "learning_rate": 9.792639452146116e-06, "loss": 1.4703, "step": 680 }, { "epoch": 0.55, "learning_rate": 9.787846092704043e-06, "loss": 1.4539, "step": 685 }, { "epoch": 0.56, "learning_rate": 9.782999163051332e-06, "loss": 1.4326, "step": 690 }, { "epoch": 0.56, "learning_rate": 9.778098717419266e-06, "loss": 1.3992, "step": 695 }, { "epoch": 0.57, "learning_rate": 9.773144810637908e-06, "loss": 1.457, "step": 700 }, { "epoch": 0.57, "learning_rate": 9.768137498135489e-06, "loss": 1.477, "step": 705 }, { "epoch": 0.57, "learning_rate": 9.763076835937782e-06, "loss": 1.4455, "step": 710 }, { "epoch": 0.58, "learning_rate": 9.75796288066748e-06, "loss": 1.442, "step": 715 }, { "epoch": 0.58, "learning_rate": 9.752795689543563e-06, "loss": 1.5156, "step": 720 }, { "epoch": 0.59, "learning_rate": 9.747575320380652e-06, "loss": 1.5129, "step": 725 }, { "epoch": 0.59, "learning_rate": 9.742301831588368e-06, "loss": 1.3873, "step": 730 }, { "epoch": 0.59, "learning_rate": 9.736975282170678e-06, "loss": 1.448, "step": 735 }, { "epoch": 0.6, "learning_rate": 9.731595731725232e-06, "loss": 1.4695, "step": 740 }, { "epoch": 0.6, "learning_rate": 9.726163240442695e-06, "loss": 1.4898, "step": 745 }, { "epoch": 0.61, "learning_rate": 9.720677869106077e-06, "loss": 1.4619, "step": 750 }, { "epoch": 0.61, "learning_rate": 9.715139679090057e-06, "loss": 1.5293, "step": 755 }, { "epoch": 0.61, "learning_rate": 9.709548732360286e-06, "loss": 1.4535, "step": 760 }, { "epoch": 0.62, "learning_rate": 9.703905091472698e-06, "loss": 1.3992, "step": 765 }, { "epoch": 0.62, "learning_rate": 9.698208819572815e-06, "loss": 1.5254, "step": 770 }, { "epoch": 0.63, "learning_rate": 9.692459980395034e-06, "loss": 1.351, "step": 775 }, { "epoch": 0.63, "learning_rate": 9.686658638261916e-06, "loss": 1.3639, "step": 780 }, { "epoch": 0.64, "learning_rate": 9.680804858083468e-06, "loss": 1.3813, "step": 785 }, { "epoch": 0.64, "learning_rate": 9.674898705356413e-06, "loss": 1.4016, "step": 790 }, { "epoch": 0.64, "learning_rate": 9.668940246163464e-06, "loss": 1.4611, "step": 795 }, { "epoch": 0.65, "learning_rate": 9.662929547172575e-06, "loss": 1.4902, "step": 800 }, { "epoch": 0.65, "learning_rate": 9.656866675636204e-06, "loss": 1.5445, "step": 805 }, { "epoch": 0.66, "learning_rate": 9.650751699390554e-06, "loss": 1.5363, "step": 810 }, { "epoch": 0.66, "learning_rate": 9.64458468685482e-06, "loss": 1.4508, "step": 815 }, { "epoch": 0.66, "learning_rate": 9.638365707030415e-06, "loss": 1.4367, "step": 820 }, { "epoch": 0.67, "learning_rate": 9.632094829500206e-06, "loss": 1.4594, "step": 825 }, { "epoch": 0.67, "learning_rate": 9.62577212442774e-06, "loss": 1.498, "step": 830 }, { "epoch": 0.68, "learning_rate": 9.619397662556434e-06, "loss": 1.4816, "step": 835 }, { "epoch": 0.68, "learning_rate": 9.61297151520882e-06, "loss": 1.4754, "step": 840 }, { "epoch": 0.68, "learning_rate": 9.606493754285712e-06, "loss": 1.4805, "step": 845 }, { "epoch": 0.69, "learning_rate": 9.599964452265434e-06, "loss": 1.5539, "step": 850 }, { "epoch": 0.69, "learning_rate": 9.593383682202974e-06, "loss": 1.4996, "step": 855 }, { "epoch": 0.7, "learning_rate": 9.586751517729203e-06, "loss": 1.484, "step": 860 }, { "epoch": 0.7, "learning_rate": 9.580068033050019e-06, "loss": 1.4781, "step": 865 }, { "epoch": 0.7, "learning_rate": 9.57333330294554e-06, "loss": 1.5156, "step": 870 }, { "epoch": 0.71, "learning_rate": 9.566547402769255e-06, "loss": 1.4734, "step": 875 }, { "epoch": 0.71, "learning_rate": 9.559710408447185e-06, "loss": 1.5398, "step": 880 }, { "epoch": 0.72, "learning_rate": 9.55282239647703e-06, "loss": 1.5621, "step": 885 }, { "epoch": 0.72, "learning_rate": 9.545883443927325e-06, "loss": 1.4793, "step": 890 }, { "epoch": 0.72, "learning_rate": 9.538893628436554e-06, "loss": 1.3898, "step": 895 }, { "epoch": 0.73, "learning_rate": 9.531853028212308e-06, "loss": 1.4719, "step": 900 }, { "epoch": 0.73, "learning_rate": 9.52476172203039e-06, "loss": 1.5215, "step": 905 }, { "epoch": 0.74, "learning_rate": 9.51761978923395e-06, "loss": 1.4264, "step": 910 }, { "epoch": 0.74, "learning_rate": 9.51042730973258e-06, "loss": 1.502, "step": 915 }, { "epoch": 0.74, "learning_rate": 9.503184364001432e-06, "loss": 1.4287, "step": 920 }, { "epoch": 0.75, "learning_rate": 9.495891033080315e-06, "loss": 1.4693, "step": 925 }, { "epoch": 0.75, "learning_rate": 9.488547398572787e-06, "loss": 1.5344, "step": 930 }, { "epoch": 0.76, "learning_rate": 9.48115354264524e-06, "loss": 1.3703, "step": 935 }, { "epoch": 0.76, "learning_rate": 9.473709548025987e-06, "loss": 1.6094, "step": 940 }, { "epoch": 0.76, "learning_rate": 9.466215498004328e-06, "loss": 1.4773, "step": 945 }, { "epoch": 0.77, "learning_rate": 9.458671476429624e-06, "loss": 1.4602, "step": 950 }, { "epoch": 0.77, "learning_rate": 9.45107756771036e-06, "loss": 1.4798, "step": 955 }, { "epoch": 0.78, "learning_rate": 9.443433856813197e-06, "loss": 1.516, "step": 960 }, { "epoch": 0.78, "learning_rate": 9.435740429262016e-06, "loss": 1.4941, "step": 965 }, { "epoch": 0.78, "learning_rate": 9.427997371136976e-06, "loss": 1.402, "step": 970 }, { "epoch": 0.79, "learning_rate": 9.420204769073538e-06, "loss": 1.5305, "step": 975 }, { "epoch": 0.79, "learning_rate": 9.4123627102615e-06, "loss": 1.4604, "step": 980 }, { "epoch": 0.8, "learning_rate": 9.404471282444019e-06, "loss": 1.5129, "step": 985 }, { "epoch": 0.8, "learning_rate": 9.396530573916636e-06, "loss": 1.5453, "step": 990 }, { "epoch": 0.81, "learning_rate": 9.38854067352628e-06, "loss": 1.4834, "step": 995 }, { "epoch": 0.81, "learning_rate": 9.38050167067028e-06, "loss": 1.4387, "step": 1000 }, { "epoch": 0.81, "learning_rate": 9.372413655295362e-06, "loss": 1.4383, "step": 1005 }, { "epoch": 0.82, "learning_rate": 9.364276717896639e-06, "loss": 1.602, "step": 1010 }, { "epoch": 0.82, "learning_rate": 9.356090949516608e-06, "loss": 1.4193, "step": 1015 }, { "epoch": 0.83, "learning_rate": 9.347856441744122e-06, "loss": 1.5031, "step": 1020 }, { "epoch": 0.83, "learning_rate": 9.339573286713369e-06, "loss": 1.4828, "step": 1025 }, { "epoch": 0.83, "learning_rate": 9.331241577102841e-06, "loss": 1.5191, "step": 1030 }, { "epoch": 0.84, "learning_rate": 9.322861406134302e-06, "loss": 1.4305, "step": 1035 }, { "epoch": 0.84, "learning_rate": 9.314432867571732e-06, "loss": 1.4625, "step": 1040 }, { "epoch": 0.85, "learning_rate": 9.30595605572029e-06, "loss": 1.5246, "step": 1045 }, { "epoch": 0.85, "learning_rate": 9.297431065425257e-06, "loss": 1.5227, "step": 1050 }, { "epoch": 0.85, "learning_rate": 9.28885799207097e-06, "loss": 1.5367, "step": 1055 }, { "epoch": 0.86, "learning_rate": 9.280236931579754e-06, "loss": 1.4879, "step": 1060 }, { "epoch": 0.86, "learning_rate": 9.271567980410859e-06, "loss": 1.5137, "step": 1065 }, { "epoch": 0.87, "learning_rate": 9.26285123555937e-06, "loss": 1.4449, "step": 1070 }, { "epoch": 0.87, "learning_rate": 9.254086794555121e-06, "loss": 1.4602, "step": 1075 }, { "epoch": 0.87, "learning_rate": 9.245274755461621e-06, "loss": 1.4187, "step": 1080 }, { "epoch": 0.88, "learning_rate": 9.23641521687493e-06, "loss": 1.5391, "step": 1085 }, { "epoch": 0.88, "learning_rate": 9.227508277922579e-06, "loss": 1.3988, "step": 1090 }, { "epoch": 0.89, "learning_rate": 9.218554038262448e-06, "loss": 1.5984, "step": 1095 }, { "epoch": 0.89, "learning_rate": 9.209552598081657e-06, "loss": 1.5109, "step": 1100 }, { "epoch": 0.89, "learning_rate": 9.200504058095439e-06, "loss": 1.5418, "step": 1105 }, { "epoch": 0.9, "learning_rate": 9.191408519546022e-06, "loss": 1.4275, "step": 1110 }, { "epoch": 0.9, "learning_rate": 9.182266084201486e-06, "loss": 1.4074, "step": 1115 }, { "epoch": 0.91, "learning_rate": 9.173076854354634e-06, "loss": 1.5016, "step": 1120 }, { "epoch": 0.91, "learning_rate": 9.16384093282184e-06, "loss": 1.5188, "step": 1125 }, { "epoch": 0.91, "learning_rate": 9.154558422941901e-06, "loss": 1.4738, "step": 1130 }, { "epoch": 0.92, "learning_rate": 9.145229428574886e-06, "loss": 1.4049, "step": 1135 }, { "epoch": 0.92, "learning_rate": 9.135854054100961e-06, "loss": 1.4363, "step": 1140 }, { "epoch": 0.93, "learning_rate": 9.126432404419239e-06, "loss": 1.5211, "step": 1145 }, { "epoch": 0.93, "learning_rate": 9.11696458494659e-06, "loss": 1.457, "step": 1150 }, { "epoch": 0.93, "learning_rate": 9.107450701616469e-06, "loss": 1.4543, "step": 1155 }, { "epoch": 0.94, "learning_rate": 9.097890860877732e-06, "loss": 1.534, "step": 1160 }, { "epoch": 0.94, "learning_rate": 9.088285169693442e-06, "loss": 1.5254, "step": 1165 }, { "epoch": 0.95, "learning_rate": 9.078633735539673e-06, "loss": 1.5371, "step": 1170 }, { "epoch": 0.95, "learning_rate": 9.068936666404307e-06, "loss": 1.475, "step": 1175 }, { "epoch": 0.95, "learning_rate": 9.059194070785823e-06, "loss": 1.457, "step": 1180 }, { "epoch": 0.96, "learning_rate": 9.049406057692097e-06, "loss": 1.4891, "step": 1185 }, { "epoch": 0.96, "learning_rate": 9.03957273663916e-06, "loss": 1.5648, "step": 1190 }, { "epoch": 0.97, "learning_rate": 9.02969421764999e-06, "loss": 1.5855, "step": 1195 }, { "epoch": 0.97, "learning_rate": 9.019770611253272e-06, "loss": 1.4734, "step": 1200 }, { "epoch": 0.97, "learning_rate": 9.009802028482169e-06, "loss": 1.4867, "step": 1205 }, { "epoch": 0.98, "learning_rate": 8.999788580873074e-06, "loss": 1.5094, "step": 1210 }, { "epoch": 0.98, "learning_rate": 8.989730380464362e-06, "loss": 1.4965, "step": 1215 }, { "epoch": 0.99, "learning_rate": 8.979627539795136e-06, "loss": 1.4887, "step": 1220 }, { "epoch": 0.99, "learning_rate": 8.969480171903973e-06, "loss": 1.4398, "step": 1225 }, { "epoch": 1.0, "learning_rate": 8.959288390327656e-06, "loss": 1.4301, "step": 1230 }, { "epoch": 1.0, "learning_rate": 8.949052309099897e-06, "loss": 1.5309, "step": 1235 }, { "epoch": 1.0, "learning_rate": 8.938772042750078e-06, "loss": 1.3054, "step": 1240 }, { "epoch": 1.01, "learning_rate": 8.928447706301951e-06, "loss": 1.2152, "step": 1245 }, { "epoch": 1.01, "learning_rate": 8.91807941527236e-06, "loss": 1.2238, "step": 1250 }, { "epoch": 1.02, "learning_rate": 8.907667285669955e-06, "loss": 1.1881, "step": 1255 }, { "epoch": 1.02, "learning_rate": 8.897211433993873e-06, "loss": 1.234, "step": 1260 }, { "epoch": 1.02, "learning_rate": 8.886711977232463e-06, "loss": 1.1724, "step": 1265 }, { "epoch": 1.03, "learning_rate": 8.87616903286195e-06, "loss": 1.3313, "step": 1270 }, { "epoch": 1.03, "learning_rate": 8.865582718845142e-06, "loss": 1.2277, "step": 1275 }, { "epoch": 1.04, "learning_rate": 8.854953153630097e-06, "loss": 1.2145, "step": 1280 }, { "epoch": 1.04, "learning_rate": 8.844280456148799e-06, "loss": 1.243, "step": 1285 }, { "epoch": 1.04, "learning_rate": 8.833564745815835e-06, "loss": 1.1816, "step": 1290 }, { "epoch": 1.05, "learning_rate": 8.82280614252705e-06, "loss": 1.1965, "step": 1295 }, { "epoch": 1.05, "learning_rate": 8.81200476665821e-06, "loss": 1.2035, "step": 1300 }, { "epoch": 1.06, "learning_rate": 8.801160739063657e-06, "loss": 1.2477, "step": 1305 }, { "epoch": 1.06, "learning_rate": 8.790274181074951e-06, "loss": 1.1686, "step": 1310 }, { "epoch": 1.06, "learning_rate": 8.779345214499517e-06, "loss": 1.1877, "step": 1315 }, { "epoch": 1.07, "learning_rate": 8.768373961619283e-06, "loss": 1.2209, "step": 1320 }, { "epoch": 1.07, "learning_rate": 8.757360545189308e-06, "loss": 1.2066, "step": 1325 }, { "epoch": 1.08, "learning_rate": 8.746305088436406e-06, "loss": 1.2484, "step": 1330 }, { "epoch": 1.08, "learning_rate": 8.735207715057779e-06, "loss": 1.2068, "step": 1335 }, { "epoch": 1.08, "learning_rate": 8.724068549219618e-06, "loss": 1.1803, "step": 1340 }, { "epoch": 1.09, "learning_rate": 8.712887715555728e-06, "loss": 1.234, "step": 1345 }, { "epoch": 1.09, "learning_rate": 8.701665339166122e-06, "loss": 1.2441, "step": 1350 }, { "epoch": 1.1, "learning_rate": 8.690401545615626e-06, "loss": 1.2082, "step": 1355 }, { "epoch": 1.1, "learning_rate": 8.679096460932477e-06, "loss": 1.2176, "step": 1360 }, { "epoch": 1.1, "learning_rate": 8.667750211606906e-06, "loss": 1.2516, "step": 1365 }, { "epoch": 1.11, "learning_rate": 8.65636292458973e-06, "loss": 1.1766, "step": 1370 }, { "epoch": 1.11, "learning_rate": 8.644934727290927e-06, "loss": 1.2277, "step": 1375 }, { "epoch": 1.12, "learning_rate": 8.63346574757821e-06, "loss": 1.2773, "step": 1380 }, { "epoch": 1.12, "learning_rate": 8.621956113775601e-06, "loss": 1.2162, "step": 1385 }, { "epoch": 1.12, "learning_rate": 8.610405954661988e-06, "loss": 1.2551, "step": 1390 }, { "epoch": 1.13, "learning_rate": 8.598815399469694e-06, "loss": 1.2625, "step": 1395 }, { "epoch": 1.13, "learning_rate": 8.587184577883018e-06, "loss": 1.2465, "step": 1400 }, { "epoch": 1.14, "learning_rate": 8.5755136200368e-06, "loss": 1.2008, "step": 1405 }, { "epoch": 1.14, "learning_rate": 8.563802656514946e-06, "loss": 1.1623, "step": 1410 }, { "epoch": 1.14, "learning_rate": 8.552051818348986e-06, "loss": 1.1625, "step": 1415 }, { "epoch": 1.15, "learning_rate": 8.540261237016597e-06, "loss": 1.1723, "step": 1420 }, { "epoch": 1.15, "learning_rate": 8.528431044440127e-06, "loss": 1.268, "step": 1425 }, { "epoch": 1.16, "learning_rate": 8.516561372985137e-06, "loss": 1.2488, "step": 1430 }, { "epoch": 1.16, "learning_rate": 8.504652355458901e-06, "loss": 1.298, "step": 1435 }, { "epoch": 1.17, "learning_rate": 8.492704125108933e-06, "loss": 1.2168, "step": 1440 }, { "epoch": 1.17, "learning_rate": 8.480716815621486e-06, "loss": 1.2166, "step": 1445 }, { "epoch": 1.17, "learning_rate": 8.468690561120064e-06, "loss": 1.201, "step": 1450 }, { "epoch": 1.18, "learning_rate": 8.456625496163921e-06, "loss": 1.2266, "step": 1455 }, { "epoch": 1.18, "learning_rate": 8.444521755746547e-06, "loss": 1.1812, "step": 1460 }, { "epoch": 1.19, "learning_rate": 8.43237947529417e-06, "loss": 1.1762, "step": 1465 }, { "epoch": 1.19, "learning_rate": 8.420198790664232e-06, "loss": 1.2473, "step": 1470 }, { "epoch": 1.19, "learning_rate": 8.407979838143869e-06, "loss": 1.1887, "step": 1475 }, { "epoch": 1.2, "learning_rate": 8.395722754448392e-06, "loss": 1.2277, "step": 1480 }, { "epoch": 1.2, "learning_rate": 8.38342767671975e-06, "loss": 1.2418, "step": 1485 }, { "epoch": 1.21, "learning_rate": 8.371094742525006e-06, "loss": 1.2081, "step": 1490 }, { "epoch": 1.21, "learning_rate": 8.358724089854784e-06, "loss": 1.3461, "step": 1495 }, { "epoch": 1.21, "learning_rate": 8.346315857121732e-06, "loss": 1.1977, "step": 1500 }, { "epoch": 1.22, "learning_rate": 8.33387018315898e-06, "loss": 1.2336, "step": 1505 }, { "epoch": 1.22, "learning_rate": 8.321387207218578e-06, "loss": 1.249, "step": 1510 }, { "epoch": 1.23, "learning_rate": 8.308867068969933e-06, "loss": 1.2188, "step": 1515 }, { "epoch": 1.23, "learning_rate": 8.296309908498264e-06, "loss": 1.1823, "step": 1520 }, { "epoch": 1.23, "learning_rate": 8.283715866303016e-06, "loss": 1.2462, "step": 1525 }, { "epoch": 1.24, "learning_rate": 8.271085083296295e-06, "loss": 1.218, "step": 1530 }, { "epoch": 1.24, "learning_rate": 8.258417700801301e-06, "loss": 1.249, "step": 1535 }, { "epoch": 1.25, "learning_rate": 8.245713860550734e-06, "loss": 1.2629, "step": 1540 }, { "epoch": 1.25, "learning_rate": 8.232973704685208e-06, "loss": 1.2605, "step": 1545 }, { "epoch": 1.25, "learning_rate": 8.220197375751667e-06, "loss": 1.2232, "step": 1550 }, { "epoch": 1.26, "learning_rate": 8.207385016701792e-06, "loss": 1.2242, "step": 1555 }, { "epoch": 1.26, "learning_rate": 8.194536770890392e-06, "loss": 1.1824, "step": 1560 }, { "epoch": 1.27, "learning_rate": 8.181652782073808e-06, "loss": 1.275, "step": 1565 }, { "epoch": 1.27, "learning_rate": 8.168733194408302e-06, "loss": 1.2164, "step": 1570 }, { "epoch": 1.27, "learning_rate": 8.155778152448443e-06, "loss": 1.2207, "step": 1575 }, { "epoch": 1.28, "learning_rate": 8.142787801145495e-06, "loss": 1.2266, "step": 1580 }, { "epoch": 1.28, "learning_rate": 8.129762285845784e-06, "loss": 1.1971, "step": 1585 }, { "epoch": 1.29, "learning_rate": 8.116701752289084e-06, "loss": 1.2107, "step": 1590 }, { "epoch": 1.29, "learning_rate": 8.103606346606978e-06, "loss": 1.1904, "step": 1595 }, { "epoch": 1.29, "learning_rate": 8.090476215321226e-06, "loss": 1.2895, "step": 1600 }, { "epoch": 1.3, "learning_rate": 8.07731150534213e-06, "loss": 1.235, "step": 1605 }, { "epoch": 1.3, "learning_rate": 8.064112363966877e-06, "loss": 1.2238, "step": 1610 }, { "epoch": 1.31, "learning_rate": 8.050878938877908e-06, "loss": 1.2535, "step": 1615 }, { "epoch": 1.31, "learning_rate": 8.037611378141257e-06, "loss": 1.2336, "step": 1620 }, { "epoch": 1.31, "learning_rate": 8.024309830204888e-06, "loss": 1.2285, "step": 1625 }, { "epoch": 1.32, "learning_rate": 8.010974443897046e-06, "loss": 1.25, "step": 1630 }, { "epoch": 1.32, "learning_rate": 7.997605368424585e-06, "loss": 1.2492, "step": 1635 }, { "epoch": 1.33, "learning_rate": 7.9842027533713e-06, "loss": 1.1783, "step": 1640 }, { "epoch": 1.33, "learning_rate": 7.970766748696254e-06, "loss": 1.171, "step": 1645 }, { "epoch": 1.33, "learning_rate": 7.9572975047321e-06, "loss": 1.2336, "step": 1650 }, { "epoch": 1.34, "learning_rate": 7.943795172183394e-06, "loss": 1.204, "step": 1655 }, { "epoch": 1.34, "learning_rate": 7.93025990212492e-06, "loss": 1.2342, "step": 1660 }, { "epoch": 1.35, "learning_rate": 7.916691845999986e-06, "loss": 1.1936, "step": 1665 }, { "epoch": 1.35, "learning_rate": 7.903091155618747e-06, "loss": 1.1992, "step": 1670 }, { "epoch": 1.36, "learning_rate": 7.889457983156484e-06, "loss": 1.1707, "step": 1675 }, { "epoch": 1.36, "learning_rate": 7.875792481151916e-06, "loss": 1.1835, "step": 1680 }, { "epoch": 1.36, "learning_rate": 7.862094802505498e-06, "loss": 1.2359, "step": 1685 }, { "epoch": 1.37, "learning_rate": 7.848365100477695e-06, "loss": 1.2754, "step": 1690 }, { "epoch": 1.37, "learning_rate": 7.834603528687277e-06, "loss": 1.2664, "step": 1695 }, { "epoch": 1.38, "learning_rate": 7.8208102411096e-06, "loss": 1.2535, "step": 1700 }, { "epoch": 1.38, "learning_rate": 7.806985392074877e-06, "loss": 1.2158, "step": 1705 }, { "epoch": 1.38, "learning_rate": 7.793129136266464e-06, "loss": 1.2504, "step": 1710 }, { "epoch": 1.39, "learning_rate": 7.779241628719108e-06, "loss": 1.1505, "step": 1715 }, { "epoch": 1.39, "learning_rate": 7.765323024817237e-06, "loss": 1.2695, "step": 1720 }, { "epoch": 1.4, "learning_rate": 7.751373480293205e-06, "loss": 1.2059, "step": 1725 }, { "epoch": 1.4, "learning_rate": 7.737393151225555e-06, "loss": 1.2547, "step": 1730 }, { "epoch": 1.4, "learning_rate": 7.723382194037266e-06, "loss": 1.2127, "step": 1735 }, { "epoch": 1.41, "learning_rate": 7.709340765494017e-06, "loss": 1.1734, "step": 1740 }, { "epoch": 1.41, "learning_rate": 7.695269022702425e-06, "loss": 1.2037, "step": 1745 }, { "epoch": 1.42, "learning_rate": 7.681167123108277e-06, "loss": 1.2891, "step": 1750 }, { "epoch": 1.42, "learning_rate": 7.667035224494787e-06, "loss": 1.2285, "step": 1755 }, { "epoch": 1.42, "learning_rate": 7.65287348498082e-06, "loss": 1.217, "step": 1760 }, { "epoch": 1.43, "learning_rate": 7.63868206301912e-06, "loss": 1.1856, "step": 1765 }, { "epoch": 1.43, "learning_rate": 7.62446111739455e-06, "loss": 1.2613, "step": 1770 }, { "epoch": 1.44, "learning_rate": 7.6102108072223e-06, "loss": 1.1617, "step": 1775 }, { "epoch": 1.44, "learning_rate": 7.595931291946116e-06, "loss": 1.2006, "step": 1780 }, { "epoch": 1.44, "learning_rate": 7.581622731336515e-06, "loss": 1.2543, "step": 1785 }, { "epoch": 1.45, "learning_rate": 7.567285285488994e-06, "loss": 1.2498, "step": 1790 }, { "epoch": 1.45, "learning_rate": 7.552919114822246e-06, "loss": 1.2484, "step": 1795 }, { "epoch": 1.46, "learning_rate": 7.5385243800763505e-06, "loss": 1.2543, "step": 1800 }, { "epoch": 1.46, "learning_rate": 7.524101242310993e-06, "loss": 1.2621, "step": 1805 }, { "epoch": 1.46, "learning_rate": 7.509649862903652e-06, "loss": 1.2176, "step": 1810 }, { "epoch": 1.47, "learning_rate": 7.495170403547797e-06, "loss": 1.2189, "step": 1815 }, { "epoch": 1.47, "learning_rate": 7.480663026251073e-06, "loss": 1.2503, "step": 1820 }, { "epoch": 1.48, "learning_rate": 7.466127893333498e-06, "loss": 1.2186, "step": 1825 }, { "epoch": 1.48, "learning_rate": 7.451565167425642e-06, "loss": 1.2805, "step": 1830 }, { "epoch": 1.48, "learning_rate": 7.436975011466805e-06, "loss": 1.2347, "step": 1835 }, { "epoch": 1.49, "learning_rate": 7.422357588703195e-06, "loss": 1.266, "step": 1840 }, { "epoch": 1.49, "learning_rate": 7.407713062686107e-06, "loss": 1.2496, "step": 1845 }, { "epoch": 1.5, "learning_rate": 7.393041597270085e-06, "loss": 1.2902, "step": 1850 }, { "epoch": 1.5, "learning_rate": 7.378343356611093e-06, "loss": 1.2367, "step": 1855 }, { "epoch": 1.5, "learning_rate": 7.363618505164678e-06, "loss": 1.274, "step": 1860 }, { "epoch": 1.51, "learning_rate": 7.348867207684132e-06, "loss": 1.2242, "step": 1865 }, { "epoch": 1.51, "learning_rate": 7.334089629218639e-06, "loss": 1.2844, "step": 1870 }, { "epoch": 1.52, "learning_rate": 7.319285935111444e-06, "loss": 1.2672, "step": 1875 }, { "epoch": 1.52, "learning_rate": 7.304456290997991e-06, "loss": 1.1542, "step": 1880 }, { "epoch": 1.53, "learning_rate": 7.289600862804069e-06, "loss": 1.15, "step": 1885 }, { "epoch": 1.53, "learning_rate": 7.274719816743967e-06, "loss": 1.2385, "step": 1890 }, { "epoch": 1.53, "learning_rate": 7.259813319318601e-06, "loss": 1.2348, "step": 1895 }, { "epoch": 1.54, "learning_rate": 7.244881537313664e-06, "loss": 1.2578, "step": 1900 }, { "epoch": 1.54, "learning_rate": 7.229924637797742e-06, "loss": 1.2191, "step": 1905 }, { "epoch": 1.55, "learning_rate": 7.214942788120466e-06, "loss": 1.251, "step": 1910 }, { "epoch": 1.55, "learning_rate": 7.1999361559106225e-06, "loss": 1.2031, "step": 1915 }, { "epoch": 1.55, "learning_rate": 7.184904909074293e-06, "loss": 1.2766, "step": 1920 }, { "epoch": 1.56, "learning_rate": 7.169849215792955e-06, "loss": 1.2299, "step": 1925 }, { "epoch": 1.56, "learning_rate": 7.15476924452162e-06, "loss": 1.2355, "step": 1930 }, { "epoch": 1.57, "learning_rate": 7.139665163986938e-06, "loss": 1.2336, "step": 1935 }, { "epoch": 1.57, "learning_rate": 7.124537143185317e-06, "loss": 1.3566, "step": 1940 }, { "epoch": 1.57, "learning_rate": 7.109385351381022e-06, "loss": 1.1423, "step": 1945 }, { "epoch": 1.58, "learning_rate": 7.09420995810429e-06, "loss": 1.2576, "step": 1950 }, { "epoch": 1.58, "learning_rate": 7.079011133149427e-06, "loss": 1.2563, "step": 1955 }, { "epoch": 1.59, "learning_rate": 7.0637890465729165e-06, "loss": 1.2695, "step": 1960 }, { "epoch": 1.59, "learning_rate": 7.048543868691506e-06, "loss": 1.1986, "step": 1965 }, { "epoch": 1.59, "learning_rate": 7.033275770080309e-06, "loss": 1.25, "step": 1970 }, { "epoch": 1.6, "learning_rate": 7.017984921570895e-06, "loss": 1.2025, "step": 1975 }, { "epoch": 1.6, "learning_rate": 7.002671494249376e-06, "loss": 1.2465, "step": 1980 }, { "epoch": 1.61, "learning_rate": 6.987335659454493e-06, "loss": 1.2336, "step": 1985 }, { "epoch": 1.61, "learning_rate": 6.971977588775703e-06, "loss": 1.2436, "step": 1990 }, { "epoch": 1.61, "learning_rate": 6.956597454051253e-06, "loss": 1.2429, "step": 1995 }, { "epoch": 1.62, "learning_rate": 6.941195427366259e-06, "loss": 1.2574, "step": 2000 }, { "epoch": 1.62, "learning_rate": 6.925771681050784e-06, "loss": 1.2465, "step": 2005 }, { "epoch": 1.63, "learning_rate": 6.910326387677906e-06, "loss": 1.2805, "step": 2010 }, { "epoch": 1.63, "learning_rate": 6.89485972006179e-06, "loss": 1.2664, "step": 2015 }, { "epoch": 1.63, "learning_rate": 6.879371851255747e-06, "loss": 1.1826, "step": 2020 }, { "epoch": 1.64, "learning_rate": 6.863862954550315e-06, "loss": 1.2441, "step": 2025 }, { "epoch": 1.64, "learning_rate": 6.8483332034713006e-06, "loss": 1.191, "step": 2030 }, { "epoch": 1.65, "learning_rate": 6.832782771777846e-06, "loss": 1.2574, "step": 2035 }, { "epoch": 1.65, "learning_rate": 6.817211833460484e-06, "loss": 1.2865, "step": 2040 }, { "epoch": 1.65, "learning_rate": 6.801620562739197e-06, "loss": 1.2504, "step": 2045 }, { "epoch": 1.66, "learning_rate": 6.7860091340614575e-06, "loss": 1.2084, "step": 2050 }, { "epoch": 1.66, "learning_rate": 6.770377722100284e-06, "loss": 1.2609, "step": 2055 }, { "epoch": 1.67, "learning_rate": 6.75472650175228e-06, "loss": 1.2723, "step": 2060 }, { "epoch": 1.67, "learning_rate": 6.739055648135685e-06, "loss": 1.1243, "step": 2065 }, { "epoch": 1.67, "learning_rate": 6.723365336588409e-06, "loss": 1.2529, "step": 2070 }, { "epoch": 1.68, "learning_rate": 6.707655742666074e-06, "loss": 1.3047, "step": 2075 }, { "epoch": 1.68, "learning_rate": 6.691927042140044e-06, "loss": 1.257, "step": 2080 }, { "epoch": 1.69, "learning_rate": 6.6761794109954714e-06, "loss": 1.2086, "step": 2085 }, { "epoch": 1.69, "learning_rate": 6.660413025429312e-06, "loss": 1.2711, "step": 2090 }, { "epoch": 1.69, "learning_rate": 6.644628061848363e-06, "loss": 1.1157, "step": 2095 }, { "epoch": 1.7, "learning_rate": 6.628824696867286e-06, "loss": 1.2309, "step": 2100 }, { "epoch": 1.7, "learning_rate": 6.613003107306637e-06, "loss": 1.2363, "step": 2105 }, { "epoch": 1.71, "learning_rate": 6.597163470190877e-06, "loss": 1.207, "step": 2110 }, { "epoch": 1.71, "learning_rate": 6.5813059627464e-06, "loss": 1.2641, "step": 2115 }, { "epoch": 1.72, "learning_rate": 6.565430762399546e-06, "loss": 1.252, "step": 2120 }, { "epoch": 1.72, "learning_rate": 6.549538046774621e-06, "loss": 1.2586, "step": 2125 }, { "epoch": 1.72, "learning_rate": 6.533627993691901e-06, "loss": 1.3012, "step": 2130 }, { "epoch": 1.73, "learning_rate": 6.517700781165649e-06, "loss": 1.1842, "step": 2135 }, { "epoch": 1.73, "learning_rate": 6.501756587402124e-06, "loss": 1.2016, "step": 2140 }, { "epoch": 1.74, "learning_rate": 6.485795590797579e-06, "loss": 1.2988, "step": 2145 }, { "epoch": 1.74, "learning_rate": 6.469817969936277e-06, "loss": 1.2547, "step": 2150 }, { "epoch": 1.74, "learning_rate": 6.453823903588481e-06, "loss": 1.2309, "step": 2155 }, { "epoch": 1.75, "learning_rate": 6.437813570708463e-06, "loss": 1.2855, "step": 2160 }, { "epoch": 1.75, "learning_rate": 6.421787150432493e-06, "loss": 1.1488, "step": 2165 }, { "epoch": 1.76, "learning_rate": 6.405744822076845e-06, "loss": 1.2115, "step": 2170 }, { "epoch": 1.76, "learning_rate": 6.389686765135782e-06, "loss": 1.2336, "step": 2175 }, { "epoch": 1.76, "learning_rate": 6.3736131592795525e-06, "loss": 1.2746, "step": 2180 }, { "epoch": 1.77, "learning_rate": 6.357524184352375e-06, "loss": 1.201, "step": 2185 }, { "epoch": 1.77, "learning_rate": 6.341420020370435e-06, "loss": 1.2703, "step": 2190 }, { "epoch": 1.78, "learning_rate": 6.325300847519859e-06, "loss": 1.2441, "step": 2195 }, { "epoch": 1.78, "learning_rate": 6.309166846154713e-06, "loss": 1.2684, "step": 2200 }, { "epoch": 1.78, "learning_rate": 6.293018196794964e-06, "loss": 1.2449, "step": 2205 }, { "epoch": 1.79, "learning_rate": 6.276855080124483e-06, "loss": 1.268, "step": 2210 }, { "epoch": 1.79, "learning_rate": 6.260677676989008e-06, "loss": 1.2906, "step": 2215 }, { "epoch": 1.8, "learning_rate": 6.24448616839413e-06, "loss": 1.307, "step": 2220 }, { "epoch": 1.8, "learning_rate": 6.228280735503254e-06, "loss": 1.2699, "step": 2225 }, { "epoch": 1.8, "learning_rate": 6.212061559635588e-06, "loss": 1.24, "step": 2230 }, { "epoch": 1.81, "learning_rate": 6.195828822264107e-06, "loss": 1.2605, "step": 2235 }, { "epoch": 1.81, "learning_rate": 6.179582705013519e-06, "loss": 1.3457, "step": 2240 }, { "epoch": 1.82, "learning_rate": 6.163323389658242e-06, "loss": 1.2301, "step": 2245 }, { "epoch": 1.82, "learning_rate": 6.147051058120359e-06, "loss": 1.257, "step": 2250 }, { "epoch": 1.82, "learning_rate": 6.130765892467595e-06, "loss": 1.1584, "step": 2255 }, { "epoch": 1.83, "learning_rate": 6.114468074911265e-06, "loss": 1.2537, "step": 2260 }, { "epoch": 1.83, "learning_rate": 6.098157787804252e-06, "loss": 1.2559, "step": 2265 }, { "epoch": 1.84, "learning_rate": 6.081835213638951e-06, "loss": 1.2307, "step": 2270 }, { "epoch": 1.84, "learning_rate": 6.0655005350452414e-06, "loss": 1.1664, "step": 2275 }, { "epoch": 1.84, "learning_rate": 6.049153934788429e-06, "loss": 1.2146, "step": 2280 }, { "epoch": 1.85, "learning_rate": 6.032795595767214e-06, "loss": 1.2498, "step": 2285 }, { "epoch": 1.85, "learning_rate": 6.016425701011637e-06, "loss": 1.2379, "step": 2290 }, { "epoch": 1.86, "learning_rate": 6.000044433681034e-06, "loss": 1.2584, "step": 2295 }, { "epoch": 1.86, "learning_rate": 5.9836519770619865e-06, "loss": 1.2805, "step": 2300 }, { "epoch": 1.86, "learning_rate": 5.967248514566271e-06, "loss": 1.2348, "step": 2305 }, { "epoch": 1.87, "learning_rate": 5.9508342297288035e-06, "loss": 1.2572, "step": 2310 }, { "epoch": 1.87, "learning_rate": 5.934409306205593e-06, "loss": 1.2018, "step": 2315 }, { "epoch": 1.88, "learning_rate": 5.917973927771678e-06, "loss": 1.2641, "step": 2320 }, { "epoch": 1.88, "learning_rate": 5.901528278319083e-06, "loss": 1.2293, "step": 2325 }, { "epoch": 1.89, "learning_rate": 5.885072541854742e-06, "loss": 1.3113, "step": 2330 }, { "epoch": 1.89, "learning_rate": 5.868606902498457e-06, "loss": 1.26, "step": 2335 }, { "epoch": 1.89, "learning_rate": 5.852131544480831e-06, "loss": 1.2092, "step": 2340 }, { "epoch": 1.9, "learning_rate": 5.835646652141208e-06, "loss": 1.2535, "step": 2345 }, { "epoch": 1.9, "learning_rate": 5.8191524099256035e-06, "loss": 1.2535, "step": 2350 }, { "epoch": 1.91, "learning_rate": 5.802649002384655e-06, "loss": 1.2629, "step": 2355 }, { "epoch": 1.91, "learning_rate": 5.786136614171542e-06, "loss": 1.233, "step": 2360 }, { "epoch": 1.91, "learning_rate": 5.769615430039931e-06, "loss": 1.2375, "step": 2365 }, { "epoch": 1.92, "learning_rate": 5.753085634841903e-06, "loss": 1.2312, "step": 2370 }, { "epoch": 1.92, "learning_rate": 5.736547413525888e-06, "loss": 1.1715, "step": 2375 }, { "epoch": 1.93, "learning_rate": 5.72000095113459e-06, "loss": 1.2695, "step": 2380 }, { "epoch": 1.93, "learning_rate": 5.703446432802924e-06, "loss": 1.1672, "step": 2385 }, { "epoch": 1.93, "learning_rate": 5.686884043755942e-06, "loss": 1.2637, "step": 2390 }, { "epoch": 1.94, "learning_rate": 5.6703139693067554e-06, "loss": 1.1591, "step": 2395 }, { "epoch": 1.94, "learning_rate": 5.653736394854471e-06, "loss": 1.2343, "step": 2400 }, { "epoch": 1.95, "learning_rate": 5.637151505882109e-06, "loss": 1.2172, "step": 2405 }, { "epoch": 1.95, "learning_rate": 5.620559487954531e-06, "loss": 1.2121, "step": 2410 }, { "epoch": 1.95, "learning_rate": 5.603960526716361e-06, "loss": 1.178, "step": 2415 }, { "epoch": 1.96, "learning_rate": 5.587354807889913e-06, "loss": 1.2256, "step": 2420 }, { "epoch": 1.96, "learning_rate": 5.570742517273109e-06, "loss": 1.241, "step": 2425 }, { "epoch": 1.97, "learning_rate": 5.554123840737402e-06, "loss": 1.2773, "step": 2430 }, { "epoch": 1.97, "learning_rate": 5.537498964225694e-06, "loss": 1.2383, "step": 2435 }, { "epoch": 1.97, "learning_rate": 5.520868073750261e-06, "loss": 1.177, "step": 2440 }, { "epoch": 1.98, "learning_rate": 5.50423135539066e-06, "loss": 1.1607, "step": 2445 }, { "epoch": 1.98, "learning_rate": 5.487588995291666e-06, "loss": 1.2957, "step": 2450 }, { "epoch": 1.99, "learning_rate": 5.47094117966117e-06, "loss": 1.164, "step": 2455 }, { "epoch": 1.99, "learning_rate": 5.454288094768108e-06, "loss": 1.225, "step": 2460 }, { "epoch": 1.99, "learning_rate": 5.437629926940367e-06, "loss": 1.2602, "step": 2465 }, { "epoch": 2.0, "learning_rate": 5.420966862562718e-06, "loss": 1.2434, "step": 2470 }, { "epoch": 2.0, "learning_rate": 5.404299088074702e-06, "loss": 1.0836, "step": 2475 }, { "epoch": 2.01, "learning_rate": 5.387626789968574e-06, "loss": 1.0635, "step": 2480 }, { "epoch": 2.01, "learning_rate": 5.370950154787195e-06, "loss": 1.032, "step": 2485 }, { "epoch": 2.01, "learning_rate": 5.354269369121958e-06, "loss": 1.0236, "step": 2490 }, { "epoch": 2.02, "learning_rate": 5.337584619610691e-06, "loss": 1.0402, "step": 2495 }, { "epoch": 2.02, "learning_rate": 5.320896092935575e-06, "loss": 1.0713, "step": 2500 }, { "epoch": 2.03, "learning_rate": 5.304203975821048e-06, "loss": 1.0443, "step": 2505 }, { "epoch": 2.03, "learning_rate": 5.287508455031729e-06, "loss": 1.0523, "step": 2510 }, { "epoch": 2.03, "learning_rate": 5.270809717370314e-06, "loss": 1.0072, "step": 2515 }, { "epoch": 2.04, "learning_rate": 5.254107949675493e-06, "loss": 1.0473, "step": 2520 }, { "epoch": 2.04, "learning_rate": 5.237403338819859e-06, "loss": 1.0189, "step": 2525 }, { "epoch": 2.05, "learning_rate": 5.220696071707816e-06, "loss": 1.027, "step": 2530 }, { "epoch": 2.05, "learning_rate": 5.20398633527349e-06, "loss": 0.9773, "step": 2535 }, { "epoch": 2.06, "learning_rate": 5.187274316478632e-06, "loss": 0.9916, "step": 2540 }, { "epoch": 2.06, "learning_rate": 5.170560202310536e-06, "loss": 1.0252, "step": 2545 }, { "epoch": 2.06, "learning_rate": 5.153844179779932e-06, "loss": 1.0508, "step": 2550 }, { "epoch": 2.07, "learning_rate": 5.137126435918912e-06, "loss": 1.0217, "step": 2555 }, { "epoch": 2.07, "learning_rate": 5.12040715777882e-06, "loss": 1.0367, "step": 2560 }, { "epoch": 2.08, "learning_rate": 5.1036865324281716e-06, "loss": 1.0121, "step": 2565 }, { "epoch": 2.08, "learning_rate": 5.08696474695055e-06, "loss": 0.9992, "step": 2570 }, { "epoch": 2.08, "learning_rate": 5.070241988442528e-06, "loss": 1.0778, "step": 2575 }, { "epoch": 2.09, "learning_rate": 5.053518444011557e-06, "loss": 1.0703, "step": 2580 }, { "epoch": 2.09, "learning_rate": 5.036794300773887e-06, "loss": 1.017, "step": 2585 }, { "epoch": 2.1, "learning_rate": 5.020069745852463e-06, "loss": 0.9813, "step": 2590 }, { "epoch": 2.1, "learning_rate": 5.003344966374843e-06, "loss": 1.0287, "step": 2595 }, { "epoch": 2.1, "learning_rate": 4.9866201494710934e-06, "loss": 1.0617, "step": 2600 }, { "epoch": 2.11, "learning_rate": 4.969895482271695e-06, "loss": 1.1227, "step": 2605 }, { "epoch": 2.11, "learning_rate": 4.953171151905466e-06, "loss": 1.0496, "step": 2610 }, { "epoch": 2.12, "learning_rate": 4.936447345497443e-06, "loss": 1.0287, "step": 2615 }, { "epoch": 2.12, "learning_rate": 4.919724250166808e-06, "loss": 1.0656, "step": 2620 }, { "epoch": 2.12, "learning_rate": 4.903002053024782e-06, "loss": 1.0287, "step": 2625 }, { "epoch": 2.13, "learning_rate": 4.886280941172539e-06, "loss": 1.0293, "step": 2630 }, { "epoch": 2.13, "learning_rate": 4.869561101699113e-06, "loss": 1.0805, "step": 2635 }, { "epoch": 2.14, "learning_rate": 4.852842721679293e-06, "loss": 1.0068, "step": 2640 }, { "epoch": 2.14, "learning_rate": 4.836125988171547e-06, "loss": 1.0056, "step": 2645 }, { "epoch": 2.14, "learning_rate": 4.8194110882159175e-06, "loss": 1.0256, "step": 2650 }, { "epoch": 2.15, "learning_rate": 4.802698208831929e-06, "loss": 1.0551, "step": 2655 }, { "epoch": 2.15, "learning_rate": 4.785987537016504e-06, "loss": 1.002, "step": 2660 }, { "epoch": 2.16, "learning_rate": 4.769279259741858e-06, "loss": 1.0378, "step": 2665 }, { "epoch": 2.16, "learning_rate": 4.752573563953422e-06, "loss": 1.0088, "step": 2670 }, { "epoch": 2.16, "learning_rate": 4.735870636567736e-06, "loss": 0.9963, "step": 2675 }, { "epoch": 2.17, "learning_rate": 4.719170664470371e-06, "loss": 0.9977, "step": 2680 }, { "epoch": 2.17, "learning_rate": 4.702473834513826e-06, "loss": 1.0533, "step": 2685 }, { "epoch": 2.18, "learning_rate": 4.685780333515449e-06, "loss": 1.0148, "step": 2690 }, { "epoch": 2.18, "learning_rate": 4.669090348255338e-06, "loss": 1.0023, "step": 2695 }, { "epoch": 2.18, "learning_rate": 4.652404065474257e-06, "loss": 1.0227, "step": 2700 }, { "epoch": 2.19, "learning_rate": 4.6357216718715375e-06, "loss": 1.0236, "step": 2705 }, { "epoch": 2.19, "learning_rate": 4.619043354103002e-06, "loss": 1.01, "step": 2710 }, { "epoch": 2.2, "learning_rate": 4.602369298778866e-06, "loss": 1.0625, "step": 2715 }, { "epoch": 2.2, "learning_rate": 4.585699692461655e-06, "loss": 1.0154, "step": 2720 }, { "epoch": 2.2, "learning_rate": 4.569034721664114e-06, "loss": 1.0547, "step": 2725 }, { "epoch": 2.21, "learning_rate": 4.552374572847122e-06, "loss": 0.981, "step": 2730 }, { "epoch": 2.21, "learning_rate": 4.535719432417612e-06, "loss": 1.0691, "step": 2735 }, { "epoch": 2.22, "learning_rate": 4.519069486726468e-06, "loss": 1.0451, "step": 2740 }, { "epoch": 2.22, "learning_rate": 4.502424922066462e-06, "loss": 0.9773, "step": 2745 }, { "epoch": 2.22, "learning_rate": 4.485785924670151e-06, "loss": 0.9898, "step": 2750 }, { "epoch": 2.23, "learning_rate": 4.469152680707804e-06, "loss": 1.0496, "step": 2755 }, { "epoch": 2.23, "learning_rate": 4.452525376285319e-06, "loss": 1.0211, "step": 2760 }, { "epoch": 2.24, "learning_rate": 4.435904197442131e-06, "loss": 1.0961, "step": 2765 }, { "epoch": 2.24, "learning_rate": 4.419289330149145e-06, "loss": 1.0279, "step": 2770 }, { "epoch": 2.25, "learning_rate": 4.4026809603066375e-06, "loss": 1.0081, "step": 2775 }, { "epoch": 2.25, "learning_rate": 4.386079273742199e-06, "loss": 1.0764, "step": 2780 }, { "epoch": 2.25, "learning_rate": 4.3694844562086325e-06, "loss": 1.0342, "step": 2785 }, { "epoch": 2.26, "learning_rate": 4.3528966933818865e-06, "loss": 1.0707, "step": 2790 }, { "epoch": 2.26, "learning_rate": 4.33631617085898e-06, "loss": 1.0127, "step": 2795 }, { "epoch": 2.27, "learning_rate": 4.319743074155916e-06, "loss": 1.0658, "step": 2800 }, { "epoch": 2.27, "learning_rate": 4.3031775887056176e-06, "loss": 1.0881, "step": 2805 }, { "epoch": 2.27, "learning_rate": 4.2866198998558404e-06, "loss": 1.043, "step": 2810 }, { "epoch": 2.28, "learning_rate": 4.2700701928671105e-06, "loss": 1.008, "step": 2815 }, { "epoch": 2.28, "learning_rate": 4.253528652910647e-06, "loss": 1.0571, "step": 2820 }, { "epoch": 2.29, "learning_rate": 4.236995465066287e-06, "loss": 1.0859, "step": 2825 }, { "epoch": 2.29, "learning_rate": 4.220470814320417e-06, "loss": 1.0085, "step": 2830 }, { "epoch": 2.29, "learning_rate": 4.203954885563909e-06, "loss": 1.0146, "step": 2835 }, { "epoch": 2.3, "learning_rate": 4.187447863590039e-06, "loss": 1.0562, "step": 2840 }, { "epoch": 2.3, "learning_rate": 4.170949933092432e-06, "loss": 1.1096, "step": 2845 }, { "epoch": 2.31, "learning_rate": 4.154461278662989e-06, "loss": 1.0555, "step": 2850 }, { "epoch": 2.31, "learning_rate": 4.137982084789823e-06, "loss": 0.9902, "step": 2855 }, { "epoch": 2.31, "learning_rate": 4.121512535855193e-06, "loss": 1.06, "step": 2860 }, { "epoch": 2.32, "learning_rate": 4.105052816133448e-06, "loss": 1.0412, "step": 2865 }, { "epoch": 2.32, "learning_rate": 4.0886031097889556e-06, "loss": 1.0354, "step": 2870 }, { "epoch": 2.33, "learning_rate": 4.072163600874045e-06, "loss": 1.0928, "step": 2875 }, { "epoch": 2.33, "learning_rate": 4.0557344733269505e-06, "loss": 1.0645, "step": 2880 }, { "epoch": 2.33, "learning_rate": 4.039315910969754e-06, "loss": 0.9994, "step": 2885 }, { "epoch": 2.34, "learning_rate": 4.02290809750632e-06, "loss": 1.003, "step": 2890 }, { "epoch": 2.34, "learning_rate": 4.006511216520251e-06, "loss": 1.0512, "step": 2895 }, { "epoch": 2.35, "learning_rate": 3.9901254514728225e-06, "loss": 1.06, "step": 2900 }, { "epoch": 2.35, "learning_rate": 3.973750985700943e-06, "loss": 1.0541, "step": 2905 }, { "epoch": 2.35, "learning_rate": 3.957388002415093e-06, "loss": 1.0078, "step": 2910 }, { "epoch": 2.36, "learning_rate": 3.941036684697274e-06, "loss": 1.0104, "step": 2915 }, { "epoch": 2.36, "learning_rate": 3.924697215498971e-06, "loss": 1.0465, "step": 2920 }, { "epoch": 2.37, "learning_rate": 3.908369777639091e-06, "loss": 0.9527, "step": 2925 }, { "epoch": 2.37, "learning_rate": 3.892054553801931e-06, "loss": 1.0559, "step": 2930 }, { "epoch": 2.37, "learning_rate": 3.875751726535124e-06, "loss": 1.041, "step": 2935 }, { "epoch": 2.38, "learning_rate": 3.8594614782476024e-06, "loss": 1.0352, "step": 2940 }, { "epoch": 2.38, "learning_rate": 3.843183991207551e-06, "loss": 1.0175, "step": 2945 }, { "epoch": 2.39, "learning_rate": 3.82691944754038e-06, "loss": 0.9959, "step": 2950 }, { "epoch": 2.39, "learning_rate": 3.8106680292266717e-06, "loss": 1.0094, "step": 2955 }, { "epoch": 2.39, "learning_rate": 3.7944299181001544e-06, "loss": 1.0367, "step": 2960 }, { "epoch": 2.4, "learning_rate": 3.778205295845663e-06, "loss": 1.0443, "step": 2965 }, { "epoch": 2.4, "learning_rate": 3.7619943439971107e-06, "loss": 1.0074, "step": 2970 }, { "epoch": 2.41, "learning_rate": 3.7457972439354526e-06, "loss": 1.0396, "step": 2975 }, { "epoch": 2.41, "learning_rate": 3.7296141768866635e-06, "loss": 1.0506, "step": 2980 }, { "epoch": 2.42, "learning_rate": 3.7134453239196987e-06, "loss": 1.0268, "step": 2985 }, { "epoch": 2.42, "learning_rate": 3.6972908659444828e-06, "loss": 1.0101, "step": 2990 }, { "epoch": 2.42, "learning_rate": 3.6811509837098756e-06, "loss": 1.0076, "step": 2995 }, { "epoch": 2.43, "learning_rate": 3.6650258578016474e-06, "loss": 1.0602, "step": 3000 }, { "epoch": 2.43, "learning_rate": 3.6489156686404683e-06, "loss": 1.0418, "step": 3005 }, { "epoch": 2.44, "learning_rate": 3.6328205964798822e-06, "loss": 1.0498, "step": 3010 }, { "epoch": 2.44, "learning_rate": 3.616740821404292e-06, "loss": 1.0277, "step": 3015 }, { "epoch": 2.44, "learning_rate": 3.600676523326946e-06, "loss": 1.0979, "step": 3020 }, { "epoch": 2.45, "learning_rate": 3.5846278819879197e-06, "loss": 1.0467, "step": 3025 }, { "epoch": 2.45, "learning_rate": 3.568595076952113e-06, "loss": 1.0344, "step": 3030 }, { "epoch": 2.46, "learning_rate": 3.552578287607237e-06, "loss": 0.9874, "step": 3035 }, { "epoch": 2.46, "learning_rate": 3.536577693161801e-06, "loss": 1.0688, "step": 3040 }, { "epoch": 2.46, "learning_rate": 3.520593472643122e-06, "loss": 1.0023, "step": 3045 }, { "epoch": 2.47, "learning_rate": 3.504625804895302e-06, "loss": 1.0315, "step": 3050 }, { "epoch": 2.47, "learning_rate": 3.488674868577246e-06, "loss": 1.0318, "step": 3055 }, { "epoch": 2.48, "learning_rate": 3.472740842160649e-06, "loss": 1.057, "step": 3060 }, { "epoch": 2.48, "learning_rate": 3.4568239039280094e-06, "loss": 1.041, "step": 3065 }, { "epoch": 2.48, "learning_rate": 3.4409242319706225e-06, "loss": 1.126, "step": 3070 }, { "epoch": 2.49, "learning_rate": 3.4250420041866057e-06, "loss": 1.0151, "step": 3075 }, { "epoch": 2.49, "learning_rate": 3.4091773982788867e-06, "loss": 1.0395, "step": 3080 }, { "epoch": 2.5, "learning_rate": 3.393330591753231e-06, "loss": 1.0207, "step": 3085 }, { "epoch": 2.5, "learning_rate": 3.377501761916249e-06, "loss": 0.9663, "step": 3090 }, { "epoch": 2.5, "learning_rate": 3.3616910858734143e-06, "loss": 1.055, "step": 3095 }, { "epoch": 2.51, "learning_rate": 3.3458987405270803e-06, "loss": 1.017, "step": 3100 }, { "epoch": 2.51, "learning_rate": 3.330124902574505e-06, "loss": 1.0034, "step": 3105 }, { "epoch": 2.52, "learning_rate": 3.3143697485058666e-06, "loss": 1.0262, "step": 3110 }, { "epoch": 2.52, "learning_rate": 3.2986334546022964e-06, "loss": 1.0723, "step": 3115 }, { "epoch": 2.52, "learning_rate": 3.282916196933904e-06, "loss": 1.0314, "step": 3120 }, { "epoch": 2.53, "learning_rate": 3.2672181513578038e-06, "loss": 1.0613, "step": 3125 }, { "epoch": 2.53, "learning_rate": 3.251539493516152e-06, "loss": 1.0641, "step": 3130 }, { "epoch": 2.54, "learning_rate": 3.2358803988341776e-06, "loss": 1.0283, "step": 3135 }, { "epoch": 2.54, "learning_rate": 3.220241042518223e-06, "loss": 1.0502, "step": 3140 }, { "epoch": 2.54, "learning_rate": 3.2046215995537837e-06, "loss": 1.0416, "step": 3145 }, { "epoch": 2.55, "learning_rate": 3.1890222447035444e-06, "loss": 1.0549, "step": 3150 }, { "epoch": 2.55, "learning_rate": 3.173443152505431e-06, "loss": 1.034, "step": 3155 }, { "epoch": 2.56, "learning_rate": 3.157884497270658e-06, "loss": 1.0594, "step": 3160 }, { "epoch": 2.56, "learning_rate": 3.1423464530817673e-06, "loss": 1.0637, "step": 3165 }, { "epoch": 2.56, "learning_rate": 3.1268291937906957e-06, "loss": 1.0402, "step": 3170 }, { "epoch": 2.57, "learning_rate": 3.1113328930168153e-06, "loss": 1.0236, "step": 3175 }, { "epoch": 2.57, "learning_rate": 3.095857724145004e-06, "loss": 1.0414, "step": 3180 }, { "epoch": 2.58, "learning_rate": 3.0804038603236943e-06, "loss": 1.0465, "step": 3185 }, { "epoch": 2.58, "learning_rate": 3.0649714744629454e-06, "loss": 1.0561, "step": 3190 }, { "epoch": 2.58, "learning_rate": 3.0495607392324987e-06, "loss": 1.0414, "step": 3195 }, { "epoch": 2.59, "learning_rate": 3.0341718270598557e-06, "loss": 1.0492, "step": 3200 }, { "epoch": 2.59, "learning_rate": 3.0188049101283433e-06, "loss": 1.0053, "step": 3205 }, { "epoch": 2.6, "learning_rate": 3.003460160375189e-06, "loss": 1.0193, "step": 3210 }, { "epoch": 2.6, "learning_rate": 2.9881377494895925e-06, "loss": 1.093, "step": 3215 }, { "epoch": 2.61, "learning_rate": 2.9728378489108135e-06, "loss": 1.0285, "step": 3220 }, { "epoch": 2.61, "learning_rate": 2.957560629826244e-06, "loss": 1.0982, "step": 3225 }, { "epoch": 2.61, "learning_rate": 2.942306263169502e-06, "loss": 1.0438, "step": 3230 }, { "epoch": 2.62, "learning_rate": 2.9270749196185095e-06, "loss": 1.0695, "step": 3235 }, { "epoch": 2.62, "learning_rate": 2.911866769593592e-06, "loss": 1.0139, "step": 3240 }, { "epoch": 2.63, "learning_rate": 2.896681983255565e-06, "loss": 1.1477, "step": 3245 }, { "epoch": 2.63, "learning_rate": 2.881520730503837e-06, "loss": 1.0437, "step": 3250 }, { "epoch": 2.63, "learning_rate": 2.866383180974498e-06, "loss": 1.0455, "step": 3255 }, { "epoch": 2.64, "learning_rate": 2.8512695040384287e-06, "loss": 1.0014, "step": 3260 }, { "epoch": 2.64, "learning_rate": 2.8361798687994097e-06, "loss": 1.0016, "step": 3265 }, { "epoch": 2.65, "learning_rate": 2.8211144440922176e-06, "loss": 0.9983, "step": 3270 }, { "epoch": 2.65, "learning_rate": 2.8060733984807466e-06, "loss": 1.0927, "step": 3275 }, { "epoch": 2.65, "learning_rate": 2.7910569002561137e-06, "loss": 1.0424, "step": 3280 }, { "epoch": 2.66, "learning_rate": 2.7760651174347854e-06, "loss": 1.0555, "step": 3285 }, { "epoch": 2.66, "learning_rate": 2.7610982177566926e-06, "loss": 0.983, "step": 3290 }, { "epoch": 2.67, "learning_rate": 2.7461563686833504e-06, "loss": 0.9712, "step": 3295 }, { "epoch": 2.67, "learning_rate": 2.7312397373959894e-06, "loss": 1.04, "step": 3300 }, { "epoch": 2.67, "learning_rate": 2.716348490793681e-06, "loss": 1.092, "step": 3305 }, { "epoch": 2.68, "learning_rate": 2.7014827954914814e-06, "loss": 0.9855, "step": 3310 }, { "epoch": 2.68, "learning_rate": 2.686642817818548e-06, "loss": 1.0319, "step": 3315 }, { "epoch": 2.69, "learning_rate": 2.6718287238162963e-06, "loss": 0.9938, "step": 3320 }, { "epoch": 2.69, "learning_rate": 2.6570406792365268e-06, "loss": 1.0662, "step": 3325 }, { "epoch": 2.69, "learning_rate": 2.6422788495395912e-06, "loss": 1.0263, "step": 3330 }, { "epoch": 2.7, "learning_rate": 2.6275433998925176e-06, "loss": 1.0584, "step": 3335 }, { "epoch": 2.7, "learning_rate": 2.612834495167177e-06, "loss": 1.0334, "step": 3340 }, { "epoch": 2.71, "learning_rate": 2.5981522999384323e-06, "loss": 1.0426, "step": 3345 }, { "epoch": 2.71, "learning_rate": 2.583496978482305e-06, "loss": 1.0199, "step": 3350 }, { "epoch": 2.71, "learning_rate": 2.568868694774127e-06, "loss": 1.0363, "step": 3355 }, { "epoch": 2.72, "learning_rate": 2.5542676124867103e-06, "loss": 0.9959, "step": 3360 }, { "epoch": 2.72, "learning_rate": 2.5396938949885163e-06, "loss": 1.0357, "step": 3365 }, { "epoch": 2.73, "learning_rate": 2.52514770534183e-06, "loss": 1.0444, "step": 3370 }, { "epoch": 2.73, "learning_rate": 2.510629206300933e-06, "loss": 1.0627, "step": 3375 }, { "epoch": 2.73, "learning_rate": 2.4961385603102794e-06, "loss": 1.0535, "step": 3380 }, { "epoch": 2.74, "learning_rate": 2.481675929502682e-06, "loss": 1.0276, "step": 3385 }, { "epoch": 2.74, "learning_rate": 2.467241475697498e-06, "loss": 1.0057, "step": 3390 }, { "epoch": 2.75, "learning_rate": 2.45283536039882e-06, "loss": 1.0055, "step": 3395 }, { "epoch": 2.75, "learning_rate": 2.438457744793665e-06, "loss": 1.0001, "step": 3400 }, { "epoch": 2.75, "learning_rate": 2.4241087897501703e-06, "loss": 1.1129, "step": 3405 }, { "epoch": 2.76, "learning_rate": 2.409788655815802e-06, "loss": 0.9816, "step": 3410 }, { "epoch": 2.76, "learning_rate": 2.395497503215551e-06, "loss": 1.008, "step": 3415 }, { "epoch": 2.77, "learning_rate": 2.3812354918501397e-06, "loss": 1.0068, "step": 3420 }, { "epoch": 2.77, "learning_rate": 2.3670027812942353e-06, "loss": 1.0779, "step": 3425 }, { "epoch": 2.78, "learning_rate": 2.3527995307946655e-06, "loss": 1.0264, "step": 3430 }, { "epoch": 2.78, "learning_rate": 2.338625899268638e-06, "loss": 1.0395, "step": 3435 }, { "epoch": 2.78, "learning_rate": 2.3244820453019566e-06, "loss": 1.0604, "step": 3440 }, { "epoch": 2.79, "learning_rate": 2.3103681271472516e-06, "loss": 1.0236, "step": 3445 }, { "epoch": 2.79, "learning_rate": 2.296284302722205e-06, "loss": 1.0918, "step": 3450 }, { "epoch": 2.8, "learning_rate": 2.28223072960779e-06, "loss": 1.0504, "step": 3455 }, { "epoch": 2.8, "learning_rate": 2.2682075650465063e-06, "loss": 1.0361, "step": 3460 }, { "epoch": 2.8, "learning_rate": 2.2542149659406126e-06, "loss": 1.0268, "step": 3465 }, { "epoch": 2.81, "learning_rate": 2.2402530888503783e-06, "loss": 1.0434, "step": 3470 }, { "epoch": 2.81, "learning_rate": 2.226322089992336e-06, "loss": 1.0348, "step": 3475 }, { "epoch": 2.82, "learning_rate": 2.2124221252375215e-06, "loss": 1.0135, "step": 3480 }, { "epoch": 2.82, "learning_rate": 2.1985533501097407e-06, "loss": 1.0488, "step": 3485 }, { "epoch": 2.82, "learning_rate": 2.1847159197838213e-06, "loss": 0.9809, "step": 3490 }, { "epoch": 2.83, "learning_rate": 2.1709099890838846e-06, "loss": 1.0627, "step": 3495 }, { "epoch": 2.83, "learning_rate": 2.1571357124816107e-06, "loss": 1.0373, "step": 3500 }, { "epoch": 2.84, "learning_rate": 2.1433932440945028e-06, "loss": 1.0068, "step": 3505 }, { "epoch": 2.84, "learning_rate": 2.129682737684171e-06, "loss": 1.0604, "step": 3510 }, { "epoch": 2.84, "learning_rate": 2.11600434665461e-06, "loss": 1.0337, "step": 3515 }, { "epoch": 2.85, "learning_rate": 2.1023582240504836e-06, "loss": 1.0668, "step": 3520 }, { "epoch": 2.85, "learning_rate": 2.088744522555409e-06, "loss": 1.0088, "step": 3525 }, { "epoch": 2.86, "learning_rate": 2.0751633944902487e-06, "loss": 1.0436, "step": 3530 }, { "epoch": 2.86, "learning_rate": 2.061614991811414e-06, "loss": 1.0138, "step": 3535 }, { "epoch": 2.86, "learning_rate": 2.0480994661091507e-06, "loss": 1.1406, "step": 3540 }, { "epoch": 2.87, "learning_rate": 2.0346169686058586e-06, "loss": 1.0391, "step": 3545 }, { "epoch": 2.87, "learning_rate": 2.0211676501543866e-06, "loss": 1.0592, "step": 3550 }, { "epoch": 2.88, "learning_rate": 2.00775166123635e-06, "loss": 0.9783, "step": 3555 }, { "epoch": 2.88, "learning_rate": 1.9943691519604523e-06, "loss": 1.0473, "step": 3560 }, { "epoch": 2.88, "learning_rate": 1.9810202720607945e-06, "loss": 1.0555, "step": 3565 }, { "epoch": 2.89, "learning_rate": 1.967705170895208e-06, "loss": 1.0691, "step": 3570 }, { "epoch": 2.89, "learning_rate": 1.9544239974435797e-06, "loss": 1.026, "step": 3575 }, { "epoch": 2.9, "learning_rate": 1.9411769003061874e-06, "loss": 1.0588, "step": 3580 }, { "epoch": 2.9, "learning_rate": 1.9279640277020396e-06, "loss": 1.0635, "step": 3585 }, { "epoch": 2.9, "learning_rate": 1.9147855274672073e-06, "loss": 0.9919, "step": 3590 }, { "epoch": 2.91, "learning_rate": 1.9016415470531773e-06, "loss": 1.0053, "step": 3595 }, { "epoch": 2.91, "learning_rate": 1.8885322335252076e-06, "loss": 1.0461, "step": 3600 }, { "epoch": 2.92, "learning_rate": 1.8754577335606689e-06, "loss": 1.0051, "step": 3605 }, { "epoch": 2.92, "learning_rate": 1.8624181934474117e-06, "loss": 1.0521, "step": 3610 }, { "epoch": 2.92, "learning_rate": 1.8494137590821282e-06, "loss": 0.9926, "step": 3615 }, { "epoch": 2.93, "learning_rate": 1.8364445759687233e-06, "loss": 1.0264, "step": 3620 }, { "epoch": 2.93, "learning_rate": 1.823510789216676e-06, "loss": 1.0475, "step": 3625 }, { "epoch": 2.94, "learning_rate": 1.8106125435394312e-06, "loss": 1.012, "step": 3630 }, { "epoch": 2.94, "learning_rate": 1.7977499832527655e-06, "loss": 1.0269, "step": 3635 }, { "epoch": 2.94, "learning_rate": 1.7849232522731797e-06, "loss": 1.0463, "step": 3640 }, { "epoch": 2.95, "learning_rate": 1.7721324941162933e-06, "loss": 1.025, "step": 3645 }, { "epoch": 2.95, "learning_rate": 1.7593778518952275e-06, "loss": 1.0326, "step": 3650 }, { "epoch": 2.96, "learning_rate": 1.7466594683190107e-06, "loss": 1.0389, "step": 3655 }, { "epoch": 2.96, "learning_rate": 1.7339774856909851e-06, "loss": 1.0609, "step": 3660 }, { "epoch": 2.97, "learning_rate": 1.7213320459072047e-06, "loss": 0.9949, "step": 3665 }, { "epoch": 2.97, "learning_rate": 1.7087232904548595e-06, "loss": 1.0083, "step": 3670 }, { "epoch": 2.97, "learning_rate": 1.69615136041068e-06, "loss": 1.0377, "step": 3675 }, { "epoch": 2.98, "learning_rate": 1.6836163964393664e-06, "loss": 1.0514, "step": 3680 }, { "epoch": 2.98, "learning_rate": 1.6711185387920176e-06, "loss": 0.99, "step": 3685 }, { "epoch": 2.99, "learning_rate": 1.6586579273045529e-06, "loss": 1.0146, "step": 3690 }, { "epoch": 2.99, "learning_rate": 1.6462347013961526e-06, "loss": 1.0445, "step": 3695 }, { "epoch": 2.99, "learning_rate": 1.6338490000676987e-06, "loss": 1.0674, "step": 3700 }, { "epoch": 3.0, "learning_rate": 1.6215009619002197e-06, "loss": 1.0215, "step": 3705 }, { "epoch": 3.0, "learning_rate": 1.609190725053335e-06, "loss": 0.9832, "step": 3710 }, { "epoch": 3.01, "learning_rate": 1.5969184272637184e-06, "loss": 0.9313, "step": 3715 }, { "epoch": 3.01, "learning_rate": 1.5846842058435457e-06, "loss": 1.0244, "step": 3720 }, { "epoch": 3.01, "learning_rate": 1.5724881976789696e-06, "loss": 0.9002, "step": 3725 }, { "epoch": 3.02, "learning_rate": 1.5603305392285785e-06, "loss": 0.957, "step": 3730 }, { "epoch": 3.02, "learning_rate": 1.548211366521875e-06, "loss": 0.9404, "step": 3735 }, { "epoch": 3.03, "learning_rate": 1.5361308151577526e-06, "loss": 0.9199, "step": 3740 }, { "epoch": 3.03, "learning_rate": 1.5240890203029813e-06, "loss": 0.9224, "step": 3745 }, { "epoch": 3.03, "learning_rate": 1.5120861166906869e-06, "loss": 0.9822, "step": 3750 }, { "epoch": 3.04, "learning_rate": 1.5001222386188573e-06, "loss": 0.9063, "step": 3755 }, { "epoch": 3.04, "learning_rate": 1.4881975199488247e-06, "loss": 0.9455, "step": 3760 }, { "epoch": 3.05, "learning_rate": 1.4763120941037757e-06, "loss": 0.8986, "step": 3765 }, { "epoch": 3.05, "learning_rate": 1.4644660940672628e-06, "loss": 0.9297, "step": 3770 }, { "epoch": 3.05, "learning_rate": 1.4526596523817066e-06, "loss": 0.9889, "step": 3775 }, { "epoch": 3.06, "learning_rate": 1.4408929011469175e-06, "loss": 0.9387, "step": 3780 }, { "epoch": 3.06, "learning_rate": 1.4291659720186218e-06, "loss": 0.8889, "step": 3785 }, { "epoch": 3.07, "learning_rate": 1.4174789962069808e-06, "loss": 0.9965, "step": 3790 }, { "epoch": 3.07, "learning_rate": 1.4058321044751255e-06, "loss": 0.9279, "step": 3795 }, { "epoch": 3.07, "learning_rate": 1.3942254271377004e-06, "loss": 0.9621, "step": 3800 }, { "epoch": 3.08, "learning_rate": 1.3826590940593926e-06, "loss": 0.9081, "step": 3805 }, { "epoch": 3.08, "learning_rate": 1.3711332346534916e-06, "loss": 0.9201, "step": 3810 }, { "epoch": 3.09, "learning_rate": 1.3596479778804312e-06, "loss": 0.9013, "step": 3815 }, { "epoch": 3.09, "learning_rate": 1.3482034522463522e-06, "loss": 0.9255, "step": 3820 }, { "epoch": 3.09, "learning_rate": 1.3367997858016619e-06, "loss": 0.9678, "step": 3825 }, { "epoch": 3.1, "learning_rate": 1.325437106139607e-06, "loss": 0.9334, "step": 3830 }, { "epoch": 3.1, "learning_rate": 1.3141155403948358e-06, "loss": 0.9455, "step": 3835 }, { "epoch": 3.11, "learning_rate": 1.3028352152419876e-06, "loss": 0.9025, "step": 3840 }, { "epoch": 3.11, "learning_rate": 1.291596256894263e-06, "loss": 0.8933, "step": 3845 }, { "epoch": 3.11, "learning_rate": 1.2803987911020239e-06, "loss": 0.999, "step": 3850 }, { "epoch": 3.12, "learning_rate": 1.269242943151377e-06, "loss": 0.8996, "step": 3855 }, { "epoch": 3.12, "learning_rate": 1.2581288378627759e-06, "loss": 0.9594, "step": 3860 }, { "epoch": 3.13, "learning_rate": 1.2470565995896244e-06, "loss": 0.9385, "step": 3865 }, { "epoch": 3.13, "learning_rate": 1.236026352216888e-06, "loss": 0.9508, "step": 3870 }, { "epoch": 3.14, "learning_rate": 1.2250382191597015e-06, "loss": 0.9479, "step": 3875 }, { "epoch": 3.14, "learning_rate": 1.21409232336199e-06, "loss": 0.8861, "step": 3880 }, { "epoch": 3.14, "learning_rate": 1.2031887872951004e-06, "loss": 0.9539, "step": 3885 }, { "epoch": 3.15, "learning_rate": 1.1923277329564192e-06, "loss": 0.8969, "step": 3890 }, { "epoch": 3.15, "learning_rate": 1.181509281868019e-06, "loss": 0.9248, "step": 3895 }, { "epoch": 3.16, "learning_rate": 1.1707335550752901e-06, "loss": 0.8923, "step": 3900 }, { "epoch": 3.16, "learning_rate": 1.1600006731455888e-06, "loss": 0.8534, "step": 3905 }, { "epoch": 3.16, "learning_rate": 1.1493107561668943e-06, "loss": 0.9193, "step": 3910 }, { "epoch": 3.17, "learning_rate": 1.1386639237464542e-06, "loss": 0.9688, "step": 3915 }, { "epoch": 3.17, "learning_rate": 1.1280602950094532e-06, "loss": 0.8982, "step": 3920 }, { "epoch": 3.18, "learning_rate": 1.1174999885976834e-06, "loss": 0.9001, "step": 3925 }, { "epoch": 3.18, "learning_rate": 1.106983122668206e-06, "loss": 0.9189, "step": 3930 }, { "epoch": 3.18, "learning_rate": 1.0965098148920422e-06, "loss": 0.9842, "step": 3935 }, { "epoch": 3.19, "learning_rate": 1.0860801824528443e-06, "loss": 0.9438, "step": 3940 }, { "epoch": 3.19, "learning_rate": 1.0756943420455934e-06, "loss": 0.9412, "step": 3945 }, { "epoch": 3.2, "learning_rate": 1.0653524098752894e-06, "loss": 0.9695, "step": 3950 }, { "epoch": 3.2, "learning_rate": 1.055054501655654e-06, "loss": 0.9145, "step": 3955 }, { "epoch": 3.2, "learning_rate": 1.0448007326078336e-06, "loss": 0.9602, "step": 3960 }, { "epoch": 3.21, "learning_rate": 1.0345912174591071e-06, "loss": 0.9009, "step": 3965 }, { "epoch": 3.21, "learning_rate": 1.0244260704416104e-06, "loss": 0.9375, "step": 3970 }, { "epoch": 3.22, "learning_rate": 1.0143054052910534e-06, "loss": 0.9402, "step": 3975 }, { "epoch": 3.22, "learning_rate": 1.0042293352454446e-06, "loss": 0.9182, "step": 3980 }, { "epoch": 3.22, "learning_rate": 9.94197973043829e-07, "loss": 0.909, "step": 3985 }, { "epoch": 3.23, "learning_rate": 9.842114309250222e-07, "loss": 0.9285, "step": 3990 }, { "epoch": 3.23, "learning_rate": 9.74269820626364e-07, "loss": 0.9264, "step": 3995 }, { "epoch": 3.24, "learning_rate": 9.643732533824545e-07, "loss": 0.9205, "step": 4000 }, { "epoch": 3.24, "learning_rate": 9.545218399239186e-07, "loss": 0.96, "step": 4005 }, { "epoch": 3.24, "learning_rate": 9.447156904761668e-07, "loss": 0.9473, "step": 4010 }, { "epoch": 3.25, "learning_rate": 9.349549147581571e-07, "loss": 0.9281, "step": 4015 }, { "epoch": 3.25, "learning_rate": 9.252396219811737e-07, "loss": 0.9311, "step": 4020 }, { "epoch": 3.26, "learning_rate": 9.155699208475988e-07, "loss": 0.9789, "step": 4025 }, { "epoch": 3.26, "learning_rate": 9.059459195496989e-07, "loss": 0.8984, "step": 4030 }, { "epoch": 3.26, "learning_rate": 8.963677257684184e-07, "loss": 0.9564, "step": 4035 }, { "epoch": 3.27, "learning_rate": 8.868354466721668e-07, "loss": 0.9293, "step": 4040 }, { "epoch": 3.27, "learning_rate": 8.773491889156254e-07, "loss": 0.9678, "step": 4045 }, { "epoch": 3.28, "learning_rate": 8.679090586385519e-07, "loss": 0.9275, "step": 4050 }, { "epoch": 3.28, "learning_rate": 8.585151614645942e-07, "loss": 0.966, "step": 4055 }, { "epoch": 3.28, "learning_rate": 8.491676025001083e-07, "loss": 0.9049, "step": 4060 }, { "epoch": 3.29, "learning_rate": 8.398664863329792e-07, "loss": 0.9385, "step": 4065 }, { "epoch": 3.29, "learning_rate": 8.306119170314553e-07, "loss": 0.9529, "step": 4070 }, { "epoch": 3.3, "learning_rate": 8.214039981429789e-07, "loss": 0.9412, "step": 4075 }, { "epoch": 3.3, "learning_rate": 8.122428326930348e-07, "loss": 0.9852, "step": 4080 }, { "epoch": 3.31, "learning_rate": 8.031285231839908e-07, "loss": 0.9223, "step": 4085 }, { "epoch": 3.31, "learning_rate": 7.940611715939522e-07, "loss": 0.9592, "step": 4090 }, { "epoch": 3.31, "learning_rate": 7.850408793756242e-07, "loss": 0.9758, "step": 4095 }, { "epoch": 3.32, "learning_rate": 7.760677474551759e-07, "loss": 0.842, "step": 4100 }, { "epoch": 3.32, "learning_rate": 7.67141876231105e-07, "loss": 0.9406, "step": 4105 }, { "epoch": 3.33, "learning_rate": 7.582633655731231e-07, "loss": 0.9397, "step": 4110 }, { "epoch": 3.33, "learning_rate": 7.494323148210303e-07, "loss": 0.9193, "step": 4115 }, { "epoch": 3.33, "learning_rate": 7.406488227836139e-07, "loss": 0.9529, "step": 4120 }, { "epoch": 3.34, "learning_rate": 7.319129877375314e-07, "loss": 0.973, "step": 4125 }, { "epoch": 3.34, "learning_rate": 7.232249074262176e-07, "loss": 0.9596, "step": 4130 }, { "epoch": 3.35, "learning_rate": 7.145846790587891e-07, "loss": 0.9477, "step": 4135 }, { "epoch": 3.35, "learning_rate": 7.059923993089585e-07, "loss": 0.9809, "step": 4140 }, { "epoch": 3.35, "learning_rate": 6.974481643139514e-07, "loss": 0.9863, "step": 4145 }, { "epoch": 3.36, "learning_rate": 6.889520696734297e-07, "loss": 0.9666, "step": 4150 }, { "epoch": 3.36, "learning_rate": 6.805042104484216e-07, "loss": 0.9328, "step": 4155 }, { "epoch": 3.37, "learning_rate": 6.721046811602622e-07, "loss": 0.8867, "step": 4160 }, { "epoch": 3.37, "learning_rate": 6.63753575789532e-07, "loss": 0.9635, "step": 4165 }, { "epoch": 3.37, "learning_rate": 6.554509877750042e-07, "loss": 0.9605, "step": 4170 }, { "epoch": 3.38, "learning_rate": 6.471970100126035e-07, "loss": 0.989, "step": 4175 }, { "epoch": 3.38, "learning_rate": 6.389917348543651e-07, "loss": 0.9393, "step": 4180 }, { "epoch": 3.39, "learning_rate": 6.308352541074014e-07, "loss": 0.9385, "step": 4185 }, { "epoch": 3.39, "learning_rate": 6.227276590328713e-07, "loss": 0.9325, "step": 4190 }, { "epoch": 3.39, "learning_rate": 6.146690403449646e-07, "loss": 0.9801, "step": 4195 }, { "epoch": 3.4, "learning_rate": 6.066594882098831e-07, "loss": 0.976, "step": 4200 }, { "epoch": 3.4, "learning_rate": 5.98699092244835e-07, "loss": 0.9523, "step": 4205 }, { "epoch": 3.41, "learning_rate": 5.907879415170287e-07, "loss": 0.8773, "step": 4210 }, { "epoch": 3.41, "learning_rate": 5.829261245426793e-07, "loss": 0.8939, "step": 4215 }, { "epoch": 3.41, "learning_rate": 5.751137292860126e-07, "loss": 0.9383, "step": 4220 }, { "epoch": 3.42, "learning_rate": 5.673508431582936e-07, "loss": 0.9797, "step": 4225 }, { "epoch": 3.42, "learning_rate": 5.596375530168329e-07, "loss": 0.932, "step": 4230 }, { "epoch": 3.43, "learning_rate": 5.519739451640238e-07, "loss": 0.9015, "step": 4235 }, { "epoch": 3.43, "learning_rate": 5.443601053463743e-07, "loss": 0.966, "step": 4240 }, { "epoch": 3.43, "learning_rate": 5.367961187535504e-07, "loss": 0.9252, "step": 4245 }, { "epoch": 3.44, "learning_rate": 5.292820700174189e-07, "loss": 0.925, "step": 4250 }, { "epoch": 3.44, "learning_rate": 5.218180432111026e-07, "loss": 0.9445, "step": 4255 }, { "epoch": 3.45, "learning_rate": 5.144041218480389e-07, "loss": 0.9461, "step": 4260 }, { "epoch": 3.45, "learning_rate": 5.070403888810471e-07, "loss": 0.926, "step": 4265 }, { "epoch": 3.45, "learning_rate": 4.997269267013993e-07, "loss": 0.9242, "step": 4270 }, { "epoch": 3.46, "learning_rate": 4.924638171378976e-07, "loss": 0.9514, "step": 4275 }, { "epoch": 3.46, "learning_rate": 4.852511414559575e-07, "loss": 0.9877, "step": 4280 }, { "epoch": 3.47, "learning_rate": 4.780889803567018e-07, "loss": 0.9541, "step": 4285 }, { "epoch": 3.47, "learning_rate": 4.7097741397605754e-07, "loss": 0.9449, "step": 4290 }, { "epoch": 3.47, "learning_rate": 4.639165218838559e-07, "loss": 0.9361, "step": 4295 }, { "epoch": 3.48, "learning_rate": 4.569063830829445e-07, "loss": 0.9908, "step": 4300 }, { "epoch": 3.48, "learning_rate": 4.49947076008303e-07, "loss": 0.9355, "step": 4305 }, { "epoch": 3.49, "learning_rate": 4.4303867852616755e-07, "loss": 0.9096, "step": 4310 }, { "epoch": 3.49, "learning_rate": 4.361812679331551e-07, "loss": 0.9555, "step": 4315 }, { "epoch": 3.5, "learning_rate": 4.2937492095540043e-07, "loss": 0.9221, "step": 4320 }, { "epoch": 3.5, "learning_rate": 4.2261971374769893e-07, "loss": 0.9594, "step": 4325 }, { "epoch": 3.5, "learning_rate": 4.159157218926557e-07, "loss": 0.914, "step": 4330 }, { "epoch": 3.51, "learning_rate": 4.09263020399836e-07, "loss": 0.9935, "step": 4335 }, { "epoch": 3.51, "learning_rate": 4.02661683704928e-07, "loss": 0.9467, "step": 4340 }, { "epoch": 3.52, "learning_rate": 3.9611178566890894e-07, "loss": 0.943, "step": 4345 }, { "epoch": 3.52, "learning_rate": 3.896133995772233e-07, "loss": 0.9232, "step": 4350 }, { "epoch": 3.52, "learning_rate": 3.8316659813895597e-07, "loss": 0.9545, "step": 4355 }, { "epoch": 3.53, "learning_rate": 3.767714534860223e-07, "loss": 0.9242, "step": 4360 }, { "epoch": 3.53, "learning_rate": 3.704280371723601e-07, "loss": 0.9379, "step": 4365 }, { "epoch": 3.54, "learning_rate": 3.6413642017313233e-07, "loss": 0.9506, "step": 4370 }, { "epoch": 3.54, "learning_rate": 3.5789667288392784e-07, "loss": 0.9465, "step": 4375 }, { "epoch": 3.54, "learning_rate": 3.517088651199768e-07, "loss": 0.9365, "step": 4380 }, { "epoch": 3.55, "learning_rate": 3.455730661153672e-07, "loss": 0.9195, "step": 4385 }, { "epoch": 3.55, "learning_rate": 3.394893445222752e-07, "loss": 0.9746, "step": 4390 }, { "epoch": 3.56, "learning_rate": 3.334577684101925e-07, "loss": 0.9289, "step": 4395 }, { "epoch": 3.56, "learning_rate": 3.2747840526516414e-07, "loss": 0.9038, "step": 4400 }, { "epoch": 3.56, "learning_rate": 3.215513219890365e-07, "loss": 0.9098, "step": 4405 }, { "epoch": 3.57, "learning_rate": 3.15676584898707e-07, "loss": 0.9435, "step": 4410 }, { "epoch": 3.57, "learning_rate": 3.0985425972538343e-07, "loss": 0.9098, "step": 4415 }, { "epoch": 3.58, "learning_rate": 3.040844116138475e-07, "loss": 0.9318, "step": 4420 }, { "epoch": 3.58, "learning_rate": 2.9836710512172353e-07, "loss": 0.9592, "step": 4425 }, { "epoch": 3.58, "learning_rate": 2.9270240421876204e-07, "loss": 0.9756, "step": 4430 }, { "epoch": 3.59, "learning_rate": 2.8709037228611903e-07, "loss": 0.9189, "step": 4435 }, { "epoch": 3.59, "learning_rate": 2.815310721156489e-07, "loss": 0.9139, "step": 4440 }, { "epoch": 3.6, "learning_rate": 2.7602456590920034e-07, "loss": 0.9127, "step": 4445 }, { "epoch": 3.6, "learning_rate": 2.7057091527792125e-07, "loss": 0.9602, "step": 4450 }, { "epoch": 3.6, "learning_rate": 2.6517018124157137e-07, "loss": 0.9787, "step": 4455 }, { "epoch": 3.61, "learning_rate": 2.598224242278369e-07, "loss": 0.916, "step": 4460 }, { "epoch": 3.61, "learning_rate": 2.545277040716537e-07, "loss": 0.9846, "step": 4465 }, { "epoch": 3.62, "learning_rate": 2.492860800145408e-07, "loss": 0.9484, "step": 4470 }, { "epoch": 3.62, "learning_rate": 2.4409761070393614e-07, "loss": 0.9191, "step": 4475 }, { "epoch": 3.62, "learning_rate": 2.389623541925407e-07, "loss": 0.9266, "step": 4480 }, { "epoch": 3.63, "learning_rate": 2.3388036793766723e-07, "loss": 0.9034, "step": 4485 }, { "epoch": 3.63, "learning_rate": 2.2885170880059758e-07, "loss": 0.896, "step": 4490 }, { "epoch": 3.64, "learning_rate": 2.2387643304595196e-07, "loss": 0.9574, "step": 4495 }, { "epoch": 3.64, "learning_rate": 2.189545963410511e-07, "loss": 0.9387, "step": 4500 }, { "epoch": 3.64, "learning_rate": 2.1408625375529845e-07, "loss": 0.9322, "step": 4505 }, { "epoch": 3.65, "learning_rate": 2.0927145975956297e-07, "loss": 0.9088, "step": 4510 }, { "epoch": 3.65, "learning_rate": 2.0451026822556952e-07, "loss": 0.9168, "step": 4515 }, { "epoch": 3.66, "learning_rate": 1.9980273242529825e-07, "loss": 0.951, "step": 4520 }, { "epoch": 3.66, "learning_rate": 1.951489050303834e-07, "loss": 0.916, "step": 4525 }, { "epoch": 3.67, "learning_rate": 1.9054883811152837e-07, "loss": 0.8936, "step": 4530 }, { "epoch": 3.67, "learning_rate": 1.8600258313792142e-07, "loss": 0.9279, "step": 4535 }, { "epoch": 3.67, "learning_rate": 1.8151019097666146e-07, "loss": 0.9666, "step": 4540 }, { "epoch": 3.68, "learning_rate": 1.7707171189218663e-07, "loss": 0.9555, "step": 4545 }, { "epoch": 3.68, "learning_rate": 1.7268719554571157e-07, "loss": 0.945, "step": 4550 }, { "epoch": 3.69, "learning_rate": 1.683566909946771e-07, "loss": 0.9357, "step": 4555 }, { "epoch": 3.69, "learning_rate": 1.640802466921926e-07, "loss": 0.9528, "step": 4560 }, { "epoch": 3.69, "learning_rate": 1.5985791048650223e-07, "loss": 0.8418, "step": 4565 }, { "epoch": 3.7, "learning_rate": 1.5568972962044405e-07, "loss": 0.9797, "step": 4570 }, { "epoch": 3.7, "learning_rate": 1.515757507309229e-07, "loss": 0.9197, "step": 4575 }, { "epoch": 3.71, "learning_rate": 1.4751601984839159e-07, "loss": 1.0133, "step": 4580 }, { "epoch": 3.71, "learning_rate": 1.4351058239633065e-07, "loss": 0.9518, "step": 4585 }, { "epoch": 3.71, "learning_rate": 1.3955948319074374e-07, "loss": 0.881, "step": 4590 }, { "epoch": 3.72, "learning_rate": 1.3566276643965538e-07, "loss": 0.9238, "step": 4595 }, { "epoch": 3.72, "learning_rate": 1.3182047574261557e-07, "loss": 0.9002, "step": 4600 }, { "epoch": 3.73, "learning_rate": 1.2803265409021436e-07, "loss": 0.948, "step": 4605 }, { "epoch": 3.73, "learning_rate": 1.2429934386359643e-07, "loss": 0.9025, "step": 4610 }, { "epoch": 3.73, "learning_rate": 1.2062058683399048e-07, "loss": 0.9354, "step": 4615 }, { "epoch": 3.74, "learning_rate": 1.1699642416224233e-07, "loss": 0.9582, "step": 4620 }, { "epoch": 3.74, "learning_rate": 1.1342689639835036e-07, "loss": 0.9734, "step": 4625 }, { "epoch": 3.75, "learning_rate": 1.0991204348101692e-07, "loss": 0.9267, "step": 4630 }, { "epoch": 3.75, "learning_rate": 1.0645190473719647e-07, "loss": 0.9705, "step": 4635 }, { "epoch": 3.75, "learning_rate": 1.0304651888166039e-07, "loss": 0.9285, "step": 4640 }, { "epoch": 3.76, "learning_rate": 9.969592401655903e-08, "loss": 0.9494, "step": 4645 }, { "epoch": 3.76, "learning_rate": 9.640015763100031e-08, "loss": 0.8965, "step": 4650 }, { "epoch": 3.77, "learning_rate": 9.315925660062619e-08, "loss": 0.9922, "step": 4655 }, { "epoch": 3.77, "learning_rate": 8.997325718720085e-08, "loss": 0.9295, "step": 4660 }, { "epoch": 3.77, "learning_rate": 8.684219503820756e-08, "loss": 0.9564, "step": 4665 }, { "epoch": 3.78, "learning_rate": 8.376610518644746e-08, "loss": 0.9201, "step": 4670 }, { "epoch": 3.78, "learning_rate": 8.074502204964696e-08, "loss": 0.9303, "step": 4675 }, { "epoch": 3.79, "learning_rate": 7.777897943007595e-08, "loss": 0.9636, "step": 4680 }, { "epoch": 3.79, "learning_rate": 7.486801051416525e-08, "loss": 0.9542, "step": 4685 }, { "epoch": 3.79, "learning_rate": 7.201214787213862e-08, "loss": 0.9684, "step": 4690 }, { "epoch": 3.8, "learning_rate": 6.921142345764798e-08, "loss": 0.924, "step": 4695 }, { "epoch": 3.8, "learning_rate": 6.646586860741322e-08, "loss": 0.9271, "step": 4700 }, { "epoch": 3.81, "learning_rate": 6.377551404087467e-08, "loss": 0.9333, "step": 4705 }, { "epoch": 3.81, "learning_rate": 6.114038985984894e-08, "loss": 0.9413, "step": 4710 }, { "epoch": 3.81, "learning_rate": 5.856052554818969e-08, "loss": 0.9223, "step": 4715 }, { "epoch": 3.82, "learning_rate": 5.603594997145967e-08, "loss": 0.9301, "step": 4720 }, { "epoch": 3.82, "learning_rate": 5.3566691376609744e-08, "loss": 0.9072, "step": 4725 }, { "epoch": 3.83, "learning_rate": 5.115277739165703e-08, "loss": 0.9152, "step": 4730 }, { "epoch": 3.83, "learning_rate": 4.8794235025383386e-08, "loss": 0.9234, "step": 4735 }, { "epoch": 3.83, "learning_rate": 4.6491090667025176e-08, "loss": 0.943, "step": 4740 }, { "epoch": 3.84, "learning_rate": 4.4243370085985114e-08, "loss": 0.8847, "step": 4745 }, { "epoch": 3.84, "learning_rate": 4.2051098431539764e-08, "loss": 1.0156, "step": 4750 }, { "epoch": 3.85, "learning_rate": 3.991430023255804e-08, "loss": 0.866, "step": 4755 }, { "epoch": 3.85, "learning_rate": 3.783299939722984e-08, "loss": 0.9083, "step": 4760 }, { "epoch": 3.86, "learning_rate": 3.580721921279562e-08, "loss": 0.9077, "step": 4765 }, { "epoch": 3.86, "learning_rate": 3.383698234528665e-08, "loss": 0.9351, "step": 4770 }, { "epoch": 3.86, "learning_rate": 3.1922310839272444e-08, "loss": 0.9322, "step": 4775 }, { "epoch": 3.87, "learning_rate": 3.006322611761314e-08, "loss": 0.9379, "step": 4780 }, { "epoch": 3.87, "learning_rate": 2.8259748981219194e-08, "loss": 0.9136, "step": 4785 }, { "epoch": 3.88, "learning_rate": 2.651189960882039e-08, "loss": 0.9764, "step": 4790 }, { "epoch": 3.88, "learning_rate": 2.4819697556737742e-08, "loss": 0.9348, "step": 4795 }, { "epoch": 3.88, "learning_rate": 2.318316175866697e-08, "loss": 0.9345, "step": 4800 }, { "epoch": 3.89, "learning_rate": 2.1602310525466464e-08, "loss": 0.8879, "step": 4805 }, { "epoch": 3.89, "learning_rate": 2.007716154494965e-08, "loss": 0.9619, "step": 4810 }, { "epoch": 3.9, "learning_rate": 1.8607731881690737e-08, "loss": 0.9516, "step": 4815 }, { "epoch": 3.9, "learning_rate": 1.7194037976831502e-08, "loss": 0.9471, "step": 4820 }, { "epoch": 3.9, "learning_rate": 1.583609564789812e-08, "loss": 0.9197, "step": 4825 }, { "epoch": 3.91, "learning_rate": 1.4533920088623533e-08, "loss": 0.8611, "step": 4830 }, { "epoch": 3.91, "learning_rate": 1.3287525868778128e-08, "loss": 0.9449, "step": 4835 }, { "epoch": 3.92, "learning_rate": 1.2096926934007103e-08, "loss": 0.9418, "step": 4840 }, { "epoch": 3.92, "learning_rate": 1.0962136605673357e-08, "loss": 0.9337, "step": 4845 }, { "epoch": 3.92, "learning_rate": 9.883167580709285e-09, "loss": 0.9118, "step": 4850 }, { "epoch": 3.93, "learning_rate": 8.860031931473555e-09, "loss": 0.9563, "step": 4855 }, { "epoch": 3.93, "learning_rate": 7.892741105617329e-09, "loss": 0.9342, "step": 4860 }, { "epoch": 3.94, "learning_rate": 6.981305925956583e-09, "loss": 0.9553, "step": 4865 }, { "epoch": 3.94, "learning_rate": 6.1257365903488745e-09, "loss": 0.9455, "step": 4870 }, { "epoch": 3.94, "learning_rate": 5.326042671580655e-09, "loss": 0.8813, "step": 4875 }, { "epoch": 3.95, "learning_rate": 4.582233117260693e-09, "loss": 0.8929, "step": 4880 }, { "epoch": 3.95, "learning_rate": 3.894316249717922e-09, "loss": 0.9463, "step": 4885 }, { "epoch": 3.96, "learning_rate": 3.2622997659120802e-09, "loss": 0.9428, "step": 4890 }, { "epoch": 3.96, "learning_rate": 2.6861907373432193e-09, "loss": 0.866, "step": 4895 }, { "epoch": 3.96, "learning_rate": 2.165995609973992e-09, "loss": 0.94, "step": 4900 }, { "epoch": 3.97, "learning_rate": 1.7017202041602621e-09, "loss": 0.9525, "step": 4905 }, { "epoch": 3.97, "learning_rate": 1.293369714582271e-09, "loss": 0.9548, "step": 4910 }, { "epoch": 3.98, "learning_rate": 9.409487101880167e-10, "loss": 0.9668, "step": 4915 }, { "epoch": 3.98, "learning_rate": 6.444611341432927e-10, "loss": 0.9349, "step": 4920 }, { "epoch": 3.98, "learning_rate": 4.0391030378561513e-10, "loss": 0.974, "step": 4925 }, { "epoch": 3.99, "learning_rate": 2.1929891058758424e-10, "loss": 0.9563, "step": 4930 }, { "epoch": 3.99, "learning_rate": 9.0629020127464e-11, "loss": 0.9373, "step": 4935 }, { "epoch": 4.0, "learning_rate": 1.790207206586736e-11, "loss": 0.9326, "step": 4940 }, { "epoch": 4.0, "step": 4944, "total_flos": 2.446826463366742e+18, "train_loss": 1.1695684537918436, "train_runtime": 57751.42, "train_samples_per_second": 5.478, "train_steps_per_second": 0.086 } ], "max_steps": 4944, "num_train_epochs": 4, "total_flos": 2.446826463366742e+18, "trial_name": null, "trial_params": null }