{ "best_metric": 0.5362423368193292, "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset\\checkpoint-29760", "epoch": 4.2, "eval_steps": 500, "global_step": 29760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.6801075268817206e-07, "loss": 4.8645, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.360215053763441e-07, "loss": 4.8248, "step": 20 }, { "epoch": 0.0, "learning_rate": 5.040322580645161e-07, "loss": 4.8447, "step": 30 }, { "epoch": 0.0, "learning_rate": 6.720430107526882e-07, "loss": 4.7971, "step": 40 }, { "epoch": 0.0, "learning_rate": 8.400537634408602e-07, "loss": 4.9621, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.0080645161290323e-06, "loss": 4.9151, "step": 60 }, { "epoch": 0.0, "learning_rate": 1.1760752688172044e-06, "loss": 4.8636, "step": 70 }, { "epoch": 0.0, "learning_rate": 1.3440860215053765e-06, "loss": 4.7778, "step": 80 }, { "epoch": 0.0, "learning_rate": 1.5120967741935486e-06, "loss": 4.8105, "step": 90 }, { "epoch": 0.0, "learning_rate": 1.6801075268817204e-06, "loss": 4.8286, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.8481182795698927e-06, "loss": 4.9951, "step": 110 }, { "epoch": 0.0, "learning_rate": 2.0161290322580646e-06, "loss": 4.8796, "step": 120 }, { "epoch": 0.0, "learning_rate": 2.184139784946237e-06, "loss": 4.8575, "step": 130 }, { "epoch": 0.0, "learning_rate": 2.3521505376344088e-06, "loss": 4.8098, "step": 140 }, { "epoch": 0.01, "learning_rate": 2.5201612903225806e-06, "loss": 4.8508, "step": 150 }, { "epoch": 0.01, "learning_rate": 2.688172043010753e-06, "loss": 4.9504, "step": 160 }, { "epoch": 0.01, "learning_rate": 2.856182795698925e-06, "loss": 4.7738, "step": 170 }, { "epoch": 0.01, "learning_rate": 3.024193548387097e-06, "loss": 4.9224, "step": 180 }, { "epoch": 0.01, "learning_rate": 3.192204301075269e-06, "loss": 4.9128, "step": 190 }, { "epoch": 0.01, "learning_rate": 3.360215053763441e-06, "loss": 4.9313, "step": 200 }, { "epoch": 0.01, "learning_rate": 3.5282258064516136e-06, "loss": 4.9543, "step": 210 }, { "epoch": 0.01, "learning_rate": 3.6962365591397855e-06, "loss": 4.8187, "step": 220 }, { "epoch": 0.01, "learning_rate": 3.864247311827957e-06, "loss": 4.7944, "step": 230 }, { "epoch": 0.01, "learning_rate": 4.032258064516129e-06, "loss": 4.9147, "step": 240 }, { "epoch": 0.01, "learning_rate": 4.200268817204301e-06, "loss": 4.844, "step": 250 }, { "epoch": 0.01, "learning_rate": 4.368279569892474e-06, "loss": 4.8138, "step": 260 }, { "epoch": 0.01, "learning_rate": 4.536290322580646e-06, "loss": 4.8867, "step": 270 }, { "epoch": 0.01, "learning_rate": 4.7043010752688175e-06, "loss": 4.861, "step": 280 }, { "epoch": 0.01, "learning_rate": 4.872311827956989e-06, "loss": 4.8429, "step": 290 }, { "epoch": 0.01, "learning_rate": 5.040322580645161e-06, "loss": 4.8064, "step": 300 }, { "epoch": 0.01, "learning_rate": 5.208333333333334e-06, "loss": 4.9924, "step": 310 }, { "epoch": 0.01, "learning_rate": 5.376344086021506e-06, "loss": 4.8166, "step": 320 }, { "epoch": 0.01, "learning_rate": 5.544354838709678e-06, "loss": 4.7953, "step": 330 }, { "epoch": 0.01, "learning_rate": 5.71236559139785e-06, "loss": 4.8624, "step": 340 }, { "epoch": 0.01, "learning_rate": 5.880376344086022e-06, "loss": 4.9109, "step": 350 }, { "epoch": 0.01, "learning_rate": 6.048387096774194e-06, "loss": 4.785, "step": 360 }, { "epoch": 0.01, "learning_rate": 6.216397849462366e-06, "loss": 4.8804, "step": 370 }, { "epoch": 0.01, "learning_rate": 6.384408602150538e-06, "loss": 4.8781, "step": 380 }, { "epoch": 0.01, "learning_rate": 6.55241935483871e-06, "loss": 4.9384, "step": 390 }, { "epoch": 0.01, "learning_rate": 6.720430107526882e-06, "loss": 4.8682, "step": 400 }, { "epoch": 0.01, "learning_rate": 6.888440860215054e-06, "loss": 4.9125, "step": 410 }, { "epoch": 0.01, "learning_rate": 7.056451612903227e-06, "loss": 4.9808, "step": 420 }, { "epoch": 0.01, "learning_rate": 7.224462365591399e-06, "loss": 4.9357, "step": 430 }, { "epoch": 0.01, "learning_rate": 7.392473118279571e-06, "loss": 4.8108, "step": 440 }, { "epoch": 0.02, "learning_rate": 7.560483870967743e-06, "loss": 4.8287, "step": 450 }, { "epoch": 0.02, "learning_rate": 7.728494623655915e-06, "loss": 4.829, "step": 460 }, { "epoch": 0.02, "learning_rate": 7.896505376344086e-06, "loss": 4.8015, "step": 470 }, { "epoch": 0.02, "learning_rate": 8.064516129032258e-06, "loss": 4.8736, "step": 480 }, { "epoch": 0.02, "learning_rate": 8.23252688172043e-06, "loss": 4.7864, "step": 490 }, { "epoch": 0.02, "learning_rate": 8.400537634408602e-06, "loss": 4.898, "step": 500 }, { "epoch": 0.02, "learning_rate": 8.568548387096773e-06, "loss": 4.8142, "step": 510 }, { "epoch": 0.02, "learning_rate": 8.736559139784948e-06, "loss": 4.8607, "step": 520 }, { "epoch": 0.02, "learning_rate": 8.904569892473119e-06, "loss": 4.9351, "step": 530 }, { "epoch": 0.02, "learning_rate": 9.072580645161291e-06, "loss": 4.7266, "step": 540 }, { "epoch": 0.02, "learning_rate": 9.240591397849462e-06, "loss": 4.8632, "step": 550 }, { "epoch": 0.02, "learning_rate": 9.408602150537635e-06, "loss": 4.8303, "step": 560 }, { "epoch": 0.02, "learning_rate": 9.576612903225806e-06, "loss": 4.8754, "step": 570 }, { "epoch": 0.02, "learning_rate": 9.744623655913979e-06, "loss": 4.7738, "step": 580 }, { "epoch": 0.02, "learning_rate": 9.91263440860215e-06, "loss": 4.9598, "step": 590 }, { "epoch": 0.02, "learning_rate": 1.0080645161290323e-05, "loss": 4.9171, "step": 600 }, { "epoch": 0.02, "learning_rate": 1.0248655913978495e-05, "loss": 4.8422, "step": 610 }, { "epoch": 0.02, "learning_rate": 1.0416666666666668e-05, "loss": 4.8404, "step": 620 }, { "epoch": 0.02, "learning_rate": 1.0584677419354839e-05, "loss": 4.914, "step": 630 }, { "epoch": 0.02, "learning_rate": 1.0752688172043012e-05, "loss": 4.8882, "step": 640 }, { "epoch": 0.02, "learning_rate": 1.0920698924731183e-05, "loss": 4.8929, "step": 650 }, { "epoch": 0.02, "learning_rate": 1.1088709677419356e-05, "loss": 4.8637, "step": 660 }, { "epoch": 0.02, "learning_rate": 1.1256720430107527e-05, "loss": 4.8609, "step": 670 }, { "epoch": 0.02, "learning_rate": 1.14247311827957e-05, "loss": 4.7739, "step": 680 }, { "epoch": 0.02, "learning_rate": 1.159274193548387e-05, "loss": 4.8239, "step": 690 }, { "epoch": 0.02, "learning_rate": 1.1760752688172045e-05, "loss": 4.8057, "step": 700 }, { "epoch": 0.02, "learning_rate": 1.1928763440860216e-05, "loss": 4.8922, "step": 710 }, { "epoch": 0.02, "learning_rate": 1.2096774193548388e-05, "loss": 4.7895, "step": 720 }, { "epoch": 0.02, "learning_rate": 1.226478494623656e-05, "loss": 4.8597, "step": 730 }, { "epoch": 0.02, "learning_rate": 1.2432795698924732e-05, "loss": 4.9904, "step": 740 }, { "epoch": 0.03, "learning_rate": 1.2600806451612903e-05, "loss": 4.839, "step": 750 }, { "epoch": 0.03, "learning_rate": 1.2768817204301076e-05, "loss": 4.8108, "step": 760 }, { "epoch": 0.03, "learning_rate": 1.2936827956989247e-05, "loss": 4.8368, "step": 770 }, { "epoch": 0.03, "learning_rate": 1.310483870967742e-05, "loss": 4.8011, "step": 780 }, { "epoch": 0.03, "learning_rate": 1.327284946236559e-05, "loss": 4.8465, "step": 790 }, { "epoch": 0.03, "learning_rate": 1.3440860215053763e-05, "loss": 4.86, "step": 800 }, { "epoch": 0.03, "learning_rate": 1.3608870967741934e-05, "loss": 4.9349, "step": 810 }, { "epoch": 0.03, "learning_rate": 1.3776881720430107e-05, "loss": 4.8882, "step": 820 }, { "epoch": 0.03, "learning_rate": 1.3944892473118278e-05, "loss": 4.8517, "step": 830 }, { "epoch": 0.03, "learning_rate": 1.4112903225806454e-05, "loss": 4.7464, "step": 840 }, { "epoch": 0.03, "learning_rate": 1.4280913978494625e-05, "loss": 4.7981, "step": 850 }, { "epoch": 0.03, "learning_rate": 1.4448924731182798e-05, "loss": 4.8219, "step": 860 }, { "epoch": 0.03, "learning_rate": 1.4616935483870969e-05, "loss": 4.8314, "step": 870 }, { "epoch": 0.03, "learning_rate": 1.4784946236559142e-05, "loss": 4.862, "step": 880 }, { "epoch": 0.03, "learning_rate": 1.4952956989247313e-05, "loss": 4.885, "step": 890 }, { "epoch": 0.03, "learning_rate": 1.5120967741935486e-05, "loss": 4.7596, "step": 900 }, { "epoch": 0.03, "learning_rate": 1.5288978494623658e-05, "loss": 4.8378, "step": 910 }, { "epoch": 0.03, "learning_rate": 1.545698924731183e-05, "loss": 4.8938, "step": 920 }, { "epoch": 0.03, "learning_rate": 1.5625e-05, "loss": 4.7002, "step": 930 }, { "epoch": 0.03, "learning_rate": 1.579301075268817e-05, "loss": 4.781, "step": 940 }, { "epoch": 0.03, "learning_rate": 1.5961021505376346e-05, "loss": 4.9581, "step": 950 }, { "epoch": 0.03, "learning_rate": 1.6129032258064517e-05, "loss": 4.8939, "step": 960 }, { "epoch": 0.03, "learning_rate": 1.6297043010752688e-05, "loss": 4.8499, "step": 970 }, { "epoch": 0.03, "learning_rate": 1.646505376344086e-05, "loss": 4.7838, "step": 980 }, { "epoch": 0.03, "learning_rate": 1.6633064516129033e-05, "loss": 4.8885, "step": 990 }, { "epoch": 0.03, "learning_rate": 1.6801075268817204e-05, "loss": 4.9412, "step": 1000 }, { "epoch": 0.03, "learning_rate": 1.6969086021505375e-05, "loss": 4.8294, "step": 1010 }, { "epoch": 0.03, "learning_rate": 1.7137096774193546e-05, "loss": 4.7796, "step": 1020 }, { "epoch": 0.03, "learning_rate": 1.7305107526881724e-05, "loss": 4.8445, "step": 1030 }, { "epoch": 0.03, "learning_rate": 1.7473118279569895e-05, "loss": 4.919, "step": 1040 }, { "epoch": 0.04, "learning_rate": 1.7641129032258066e-05, "loss": 4.8516, "step": 1050 }, { "epoch": 0.04, "learning_rate": 1.7809139784946237e-05, "loss": 4.9053, "step": 1060 }, { "epoch": 0.04, "learning_rate": 1.797715053763441e-05, "loss": 4.844, "step": 1070 }, { "epoch": 0.04, "learning_rate": 1.8145161290322583e-05, "loss": 4.8267, "step": 1080 }, { "epoch": 0.04, "learning_rate": 1.8313172043010754e-05, "loss": 4.793, "step": 1090 }, { "epoch": 0.04, "learning_rate": 1.8481182795698925e-05, "loss": 4.786, "step": 1100 }, { "epoch": 0.04, "learning_rate": 1.86491935483871e-05, "loss": 4.7492, "step": 1110 }, { "epoch": 0.04, "learning_rate": 1.881720430107527e-05, "loss": 4.9087, "step": 1120 }, { "epoch": 0.04, "learning_rate": 1.898521505376344e-05, "loss": 4.8693, "step": 1130 }, { "epoch": 0.04, "learning_rate": 1.9153225806451612e-05, "loss": 4.794, "step": 1140 }, { "epoch": 0.04, "learning_rate": 1.9321236559139787e-05, "loss": 4.8913, "step": 1150 }, { "epoch": 0.04, "learning_rate": 1.9489247311827958e-05, "loss": 4.7268, "step": 1160 }, { "epoch": 0.04, "learning_rate": 1.965725806451613e-05, "loss": 4.85, "step": 1170 }, { "epoch": 0.04, "learning_rate": 1.98252688172043e-05, "loss": 4.879, "step": 1180 }, { "epoch": 0.04, "learning_rate": 1.9993279569892474e-05, "loss": 4.8086, "step": 1190 }, { "epoch": 0.04, "learning_rate": 2.0161290322580645e-05, "loss": 4.8877, "step": 1200 }, { "epoch": 0.04, "learning_rate": 2.032930107526882e-05, "loss": 4.8549, "step": 1210 }, { "epoch": 0.04, "learning_rate": 2.049731182795699e-05, "loss": 4.837, "step": 1220 }, { "epoch": 0.04, "learning_rate": 2.0665322580645165e-05, "loss": 4.8102, "step": 1230 }, { "epoch": 0.04, "learning_rate": 2.0833333333333336e-05, "loss": 4.7385, "step": 1240 }, { "epoch": 0.04, "learning_rate": 2.1001344086021507e-05, "loss": 4.7322, "step": 1250 }, { "epoch": 0.04, "learning_rate": 2.1169354838709678e-05, "loss": 4.7797, "step": 1260 }, { "epoch": 0.04, "learning_rate": 2.1337365591397852e-05, "loss": 4.7483, "step": 1270 }, { "epoch": 0.04, "learning_rate": 2.1505376344086024e-05, "loss": 4.9216, "step": 1280 }, { "epoch": 0.04, "learning_rate": 2.1673387096774195e-05, "loss": 4.9207, "step": 1290 }, { "epoch": 0.04, "learning_rate": 2.1841397849462366e-05, "loss": 4.7954, "step": 1300 }, { "epoch": 0.04, "learning_rate": 2.200940860215054e-05, "loss": 4.7491, "step": 1310 }, { "epoch": 0.04, "learning_rate": 2.217741935483871e-05, "loss": 4.6397, "step": 1320 }, { "epoch": 0.04, "learning_rate": 2.2345430107526882e-05, "loss": 4.8405, "step": 1330 }, { "epoch": 0.05, "learning_rate": 2.2513440860215053e-05, "loss": 4.6964, "step": 1340 }, { "epoch": 0.05, "learning_rate": 2.2681451612903227e-05, "loss": 4.7082, "step": 1350 }, { "epoch": 0.05, "learning_rate": 2.28494623655914e-05, "loss": 4.7111, "step": 1360 }, { "epoch": 0.05, "learning_rate": 2.301747311827957e-05, "loss": 4.7278, "step": 1370 }, { "epoch": 0.05, "learning_rate": 2.318548387096774e-05, "loss": 4.7585, "step": 1380 }, { "epoch": 0.05, "learning_rate": 2.3353494623655915e-05, "loss": 4.8247, "step": 1390 }, { "epoch": 0.05, "learning_rate": 2.352150537634409e-05, "loss": 4.7762, "step": 1400 }, { "epoch": 0.05, "learning_rate": 2.368951612903226e-05, "loss": 4.7153, "step": 1410 }, { "epoch": 0.05, "learning_rate": 2.385752688172043e-05, "loss": 4.7031, "step": 1420 }, { "epoch": 0.05, "learning_rate": 2.4025537634408606e-05, "loss": 4.6676, "step": 1430 }, { "epoch": 0.05, "learning_rate": 2.4193548387096777e-05, "loss": 4.7177, "step": 1440 }, { "epoch": 0.05, "learning_rate": 2.4361559139784948e-05, "loss": 4.8695, "step": 1450 }, { "epoch": 0.05, "learning_rate": 2.452956989247312e-05, "loss": 4.8219, "step": 1460 }, { "epoch": 0.05, "learning_rate": 2.4697580645161293e-05, "loss": 4.7481, "step": 1470 }, { "epoch": 0.05, "learning_rate": 2.4865591397849464e-05, "loss": 4.8556, "step": 1480 }, { "epoch": 0.05, "learning_rate": 2.503360215053764e-05, "loss": 4.7572, "step": 1490 }, { "epoch": 0.05, "learning_rate": 2.5201612903225806e-05, "loss": 4.6192, "step": 1500 }, { "epoch": 0.05, "learning_rate": 2.536962365591398e-05, "loss": 4.723, "step": 1510 }, { "epoch": 0.05, "learning_rate": 2.5537634408602152e-05, "loss": 4.8091, "step": 1520 }, { "epoch": 0.05, "learning_rate": 2.5705645161290326e-05, "loss": 4.7191, "step": 1530 }, { "epoch": 0.05, "learning_rate": 2.5873655913978494e-05, "loss": 4.658, "step": 1540 }, { "epoch": 0.05, "learning_rate": 2.604166666666667e-05, "loss": 4.8029, "step": 1550 }, { "epoch": 0.05, "learning_rate": 2.620967741935484e-05, "loss": 4.5458, "step": 1560 }, { "epoch": 0.05, "learning_rate": 2.6377688172043014e-05, "loss": 4.9067, "step": 1570 }, { "epoch": 0.05, "learning_rate": 2.654569892473118e-05, "loss": 4.4242, "step": 1580 }, { "epoch": 0.05, "learning_rate": 2.6713709677419356e-05, "loss": 4.6519, "step": 1590 }, { "epoch": 0.05, "learning_rate": 2.6881720430107527e-05, "loss": 4.6038, "step": 1600 }, { "epoch": 0.05, "learning_rate": 2.70497311827957e-05, "loss": 4.7452, "step": 1610 }, { "epoch": 0.05, "learning_rate": 2.721774193548387e-05, "loss": 4.707, "step": 1620 }, { "epoch": 0.05, "learning_rate": 2.7385752688172043e-05, "loss": 4.6429, "step": 1630 }, { "epoch": 0.06, "learning_rate": 2.7553763440860214e-05, "loss": 4.8403, "step": 1640 }, { "epoch": 0.06, "learning_rate": 2.772177419354839e-05, "loss": 4.5688, "step": 1650 }, { "epoch": 0.06, "learning_rate": 2.7889784946236556e-05, "loss": 4.7081, "step": 1660 }, { "epoch": 0.06, "learning_rate": 2.8057795698924734e-05, "loss": 4.6276, "step": 1670 }, { "epoch": 0.06, "learning_rate": 2.822580645161291e-05, "loss": 4.6528, "step": 1680 }, { "epoch": 0.06, "learning_rate": 2.8393817204301076e-05, "loss": 4.8381, "step": 1690 }, { "epoch": 0.06, "learning_rate": 2.856182795698925e-05, "loss": 4.9323, "step": 1700 }, { "epoch": 0.06, "learning_rate": 2.8729838709677422e-05, "loss": 4.5868, "step": 1710 }, { "epoch": 0.06, "learning_rate": 2.8897849462365596e-05, "loss": 4.9298, "step": 1720 }, { "epoch": 0.06, "learning_rate": 2.9065860215053764e-05, "loss": 4.6908, "step": 1730 }, { "epoch": 0.06, "learning_rate": 2.9233870967741938e-05, "loss": 4.6427, "step": 1740 }, { "epoch": 0.06, "learning_rate": 2.940188172043011e-05, "loss": 4.6611, "step": 1750 }, { "epoch": 0.06, "learning_rate": 2.9569892473118284e-05, "loss": 4.6569, "step": 1760 }, { "epoch": 0.06, "learning_rate": 2.973790322580645e-05, "loss": 4.4677, "step": 1770 }, { "epoch": 0.06, "learning_rate": 2.9905913978494626e-05, "loss": 4.8696, "step": 1780 }, { "epoch": 0.06, "learning_rate": 3.0073924731182797e-05, "loss": 4.6955, "step": 1790 }, { "epoch": 0.06, "learning_rate": 3.024193548387097e-05, "loss": 4.5123, "step": 1800 }, { "epoch": 0.06, "learning_rate": 3.040994623655914e-05, "loss": 4.8245, "step": 1810 }, { "epoch": 0.06, "learning_rate": 3.0577956989247317e-05, "loss": 4.6848, "step": 1820 }, { "epoch": 0.06, "learning_rate": 3.074596774193548e-05, "loss": 4.6223, "step": 1830 }, { "epoch": 0.06, "learning_rate": 3.091397849462366e-05, "loss": 4.8519, "step": 1840 }, { "epoch": 0.06, "learning_rate": 3.108198924731183e-05, "loss": 4.6654, "step": 1850 }, { "epoch": 0.06, "learning_rate": 3.125e-05, "loss": 4.7357, "step": 1860 }, { "epoch": 0.06, "learning_rate": 3.141801075268818e-05, "loss": 4.7567, "step": 1870 }, { "epoch": 0.06, "learning_rate": 3.158602150537634e-05, "loss": 4.7637, "step": 1880 }, { "epoch": 0.06, "learning_rate": 3.175403225806452e-05, "loss": 4.6489, "step": 1890 }, { "epoch": 0.06, "learning_rate": 3.192204301075269e-05, "loss": 4.7715, "step": 1900 }, { "epoch": 0.06, "learning_rate": 3.209005376344086e-05, "loss": 4.5322, "step": 1910 }, { "epoch": 0.06, "learning_rate": 3.2258064516129034e-05, "loss": 4.7145, "step": 1920 }, { "epoch": 0.06, "learning_rate": 3.2426075268817205e-05, "loss": 4.8788, "step": 1930 }, { "epoch": 0.07, "learning_rate": 3.2594086021505376e-05, "loss": 4.7815, "step": 1940 }, { "epoch": 0.07, "learning_rate": 3.2762096774193553e-05, "loss": 4.6122, "step": 1950 }, { "epoch": 0.07, "learning_rate": 3.293010752688172e-05, "loss": 4.6607, "step": 1960 }, { "epoch": 0.07, "learning_rate": 3.3098118279569895e-05, "loss": 4.7085, "step": 1970 }, { "epoch": 0.07, "learning_rate": 3.3266129032258067e-05, "loss": 4.7526, "step": 1980 }, { "epoch": 0.07, "learning_rate": 3.343413978494624e-05, "loss": 4.4635, "step": 1990 }, { "epoch": 0.07, "learning_rate": 3.360215053763441e-05, "loss": 4.8464, "step": 2000 }, { "epoch": 0.07, "learning_rate": 3.377016129032258e-05, "loss": 4.8352, "step": 2010 }, { "epoch": 0.07, "learning_rate": 3.393817204301075e-05, "loss": 4.7684, "step": 2020 }, { "epoch": 0.07, "learning_rate": 3.410618279569893e-05, "loss": 4.4726, "step": 2030 }, { "epoch": 0.07, "learning_rate": 3.427419354838709e-05, "loss": 4.8434, "step": 2040 }, { "epoch": 0.07, "learning_rate": 3.444220430107527e-05, "loss": 4.5217, "step": 2050 }, { "epoch": 0.07, "learning_rate": 3.461021505376345e-05, "loss": 4.5219, "step": 2060 }, { "epoch": 0.07, "learning_rate": 3.477822580645161e-05, "loss": 4.7795, "step": 2070 }, { "epoch": 0.07, "learning_rate": 3.494623655913979e-05, "loss": 4.1376, "step": 2080 }, { "epoch": 0.07, "learning_rate": 3.511424731182796e-05, "loss": 4.9154, "step": 2090 }, { "epoch": 0.07, "learning_rate": 3.528225806451613e-05, "loss": 4.7189, "step": 2100 }, { "epoch": 0.07, "learning_rate": 3.5450268817204303e-05, "loss": 4.8433, "step": 2110 }, { "epoch": 0.07, "learning_rate": 3.5618279569892474e-05, "loss": 4.7001, "step": 2120 }, { "epoch": 0.07, "learning_rate": 3.5786290322580645e-05, "loss": 4.6825, "step": 2130 }, { "epoch": 0.07, "learning_rate": 3.595430107526882e-05, "loss": 4.7596, "step": 2140 }, { "epoch": 0.07, "learning_rate": 3.612231182795699e-05, "loss": 4.7217, "step": 2150 }, { "epoch": 0.07, "learning_rate": 3.6290322580645165e-05, "loss": 4.7864, "step": 2160 }, { "epoch": 0.07, "learning_rate": 3.6458333333333336e-05, "loss": 4.8844, "step": 2170 }, { "epoch": 0.07, "learning_rate": 3.662634408602151e-05, "loss": 4.828, "step": 2180 }, { "epoch": 0.07, "learning_rate": 3.679435483870968e-05, "loss": 4.5349, "step": 2190 }, { "epoch": 0.07, "learning_rate": 3.696236559139785e-05, "loss": 4.7096, "step": 2200 }, { "epoch": 0.07, "learning_rate": 3.713037634408602e-05, "loss": 4.7755, "step": 2210 }, { "epoch": 0.07, "learning_rate": 3.72983870967742e-05, "loss": 4.7531, "step": 2220 }, { "epoch": 0.07, "learning_rate": 3.746639784946236e-05, "loss": 4.5409, "step": 2230 }, { "epoch": 0.08, "learning_rate": 3.763440860215054e-05, "loss": 4.6511, "step": 2240 }, { "epoch": 0.08, "learning_rate": 3.780241935483871e-05, "loss": 4.5239, "step": 2250 }, { "epoch": 0.08, "learning_rate": 3.797043010752688e-05, "loss": 4.6094, "step": 2260 }, { "epoch": 0.08, "learning_rate": 3.813844086021506e-05, "loss": 4.4195, "step": 2270 }, { "epoch": 0.08, "learning_rate": 3.8306451612903224e-05, "loss": 4.5219, "step": 2280 }, { "epoch": 0.08, "learning_rate": 3.84744623655914e-05, "loss": 4.7089, "step": 2290 }, { "epoch": 0.08, "learning_rate": 3.864247311827957e-05, "loss": 4.7481, "step": 2300 }, { "epoch": 0.08, "learning_rate": 3.8810483870967744e-05, "loss": 4.7719, "step": 2310 }, { "epoch": 0.08, "learning_rate": 3.8978494623655915e-05, "loss": 4.4219, "step": 2320 }, { "epoch": 0.08, "learning_rate": 3.9146505376344086e-05, "loss": 4.4863, "step": 2330 }, { "epoch": 0.08, "learning_rate": 3.931451612903226e-05, "loss": 4.6597, "step": 2340 }, { "epoch": 0.08, "learning_rate": 3.9482526881720435e-05, "loss": 4.5431, "step": 2350 }, { "epoch": 0.08, "learning_rate": 3.96505376344086e-05, "loss": 4.8282, "step": 2360 }, { "epoch": 0.08, "learning_rate": 3.981854838709678e-05, "loss": 4.6723, "step": 2370 }, { "epoch": 0.08, "learning_rate": 3.998655913978495e-05, "loss": 4.7072, "step": 2380 }, { "epoch": 0.08, "learning_rate": 4.015456989247312e-05, "loss": 4.405, "step": 2390 }, { "epoch": 0.08, "learning_rate": 4.032258064516129e-05, "loss": 4.6087, "step": 2400 }, { "epoch": 0.08, "learning_rate": 4.049059139784946e-05, "loss": 4.4848, "step": 2410 }, { "epoch": 0.08, "learning_rate": 4.065860215053764e-05, "loss": 4.7085, "step": 2420 }, { "epoch": 0.08, "learning_rate": 4.082661290322581e-05, "loss": 4.5809, "step": 2430 }, { "epoch": 0.08, "learning_rate": 4.099462365591398e-05, "loss": 4.5694, "step": 2440 }, { "epoch": 0.08, "learning_rate": 4.116263440860215e-05, "loss": 4.6146, "step": 2450 }, { "epoch": 0.08, "learning_rate": 4.133064516129033e-05, "loss": 4.6111, "step": 2460 }, { "epoch": 0.08, "learning_rate": 4.1498655913978494e-05, "loss": 4.5915, "step": 2470 }, { "epoch": 0.08, "learning_rate": 4.166666666666667e-05, "loss": 4.5425, "step": 2480 }, { "epoch": 0.08, "learning_rate": 4.1834677419354836e-05, "loss": 4.344, "step": 2490 }, { "epoch": 0.08, "learning_rate": 4.2002688172043014e-05, "loss": 4.3079, "step": 2500 }, { "epoch": 0.08, "learning_rate": 4.2170698924731185e-05, "loss": 4.3301, "step": 2510 }, { "epoch": 0.08, "learning_rate": 4.2338709677419356e-05, "loss": 4.7686, "step": 2520 }, { "epoch": 0.09, "learning_rate": 4.250672043010753e-05, "loss": 4.6731, "step": 2530 }, { "epoch": 0.09, "learning_rate": 4.2674731182795705e-05, "loss": 4.3259, "step": 2540 }, { "epoch": 0.09, "learning_rate": 4.284274193548387e-05, "loss": 4.8506, "step": 2550 }, { "epoch": 0.09, "learning_rate": 4.301075268817205e-05, "loss": 4.5715, "step": 2560 }, { "epoch": 0.09, "learning_rate": 4.317876344086022e-05, "loss": 4.6516, "step": 2570 }, { "epoch": 0.09, "learning_rate": 4.334677419354839e-05, "loss": 4.6917, "step": 2580 }, { "epoch": 0.09, "learning_rate": 4.351478494623656e-05, "loss": 4.6217, "step": 2590 }, { "epoch": 0.09, "learning_rate": 4.368279569892473e-05, "loss": 4.568, "step": 2600 }, { "epoch": 0.09, "learning_rate": 4.385080645161291e-05, "loss": 4.5524, "step": 2610 }, { "epoch": 0.09, "learning_rate": 4.401881720430108e-05, "loss": 4.4783, "step": 2620 }, { "epoch": 0.09, "learning_rate": 4.418682795698925e-05, "loss": 4.6619, "step": 2630 }, { "epoch": 0.09, "learning_rate": 4.435483870967742e-05, "loss": 4.5148, "step": 2640 }, { "epoch": 0.09, "learning_rate": 4.452284946236559e-05, "loss": 4.5686, "step": 2650 }, { "epoch": 0.09, "learning_rate": 4.4690860215053764e-05, "loss": 4.6879, "step": 2660 }, { "epoch": 0.09, "learning_rate": 4.485887096774194e-05, "loss": 4.7926, "step": 2670 }, { "epoch": 0.09, "learning_rate": 4.5026881720430106e-05, "loss": 4.4609, "step": 2680 }, { "epoch": 0.09, "learning_rate": 4.5194892473118284e-05, "loss": 4.281, "step": 2690 }, { "epoch": 0.09, "learning_rate": 4.5362903225806455e-05, "loss": 4.4718, "step": 2700 }, { "epoch": 0.09, "learning_rate": 4.5530913978494626e-05, "loss": 4.6328, "step": 2710 }, { "epoch": 0.09, "learning_rate": 4.56989247311828e-05, "loss": 4.5215, "step": 2720 }, { "epoch": 0.09, "learning_rate": 4.586693548387097e-05, "loss": 4.3572, "step": 2730 }, { "epoch": 0.09, "learning_rate": 4.603494623655914e-05, "loss": 4.6574, "step": 2740 }, { "epoch": 0.09, "learning_rate": 4.620295698924732e-05, "loss": 4.6526, "step": 2750 }, { "epoch": 0.09, "learning_rate": 4.637096774193548e-05, "loss": 4.716, "step": 2760 }, { "epoch": 0.09, "learning_rate": 4.653897849462366e-05, "loss": 4.4768, "step": 2770 }, { "epoch": 0.09, "learning_rate": 4.670698924731183e-05, "loss": 4.6641, "step": 2780 }, { "epoch": 0.09, "learning_rate": 4.6875e-05, "loss": 4.2275, "step": 2790 }, { "epoch": 0.09, "learning_rate": 4.704301075268818e-05, "loss": 4.4817, "step": 2800 }, { "epoch": 0.09, "learning_rate": 4.721102150537634e-05, "loss": 4.5784, "step": 2810 }, { "epoch": 0.09, "learning_rate": 4.737903225806452e-05, "loss": 4.4939, "step": 2820 }, { "epoch": 0.1, "learning_rate": 4.754704301075269e-05, "loss": 4.623, "step": 2830 }, { "epoch": 0.1, "learning_rate": 4.771505376344086e-05, "loss": 4.4391, "step": 2840 }, { "epoch": 0.1, "learning_rate": 4.7883064516129034e-05, "loss": 4.5203, "step": 2850 }, { "epoch": 0.1, "learning_rate": 4.805107526881721e-05, "loss": 4.2879, "step": 2860 }, { "epoch": 0.1, "learning_rate": 4.8219086021505376e-05, "loss": 4.5818, "step": 2870 }, { "epoch": 0.1, "learning_rate": 4.8387096774193554e-05, "loss": 4.4625, "step": 2880 }, { "epoch": 0.1, "learning_rate": 4.855510752688172e-05, "loss": 4.6607, "step": 2890 }, { "epoch": 0.1, "learning_rate": 4.8723118279569896e-05, "loss": 4.2406, "step": 2900 }, { "epoch": 0.1, "learning_rate": 4.889112903225807e-05, "loss": 4.4246, "step": 2910 }, { "epoch": 0.1, "learning_rate": 4.905913978494624e-05, "loss": 4.6208, "step": 2920 }, { "epoch": 0.1, "learning_rate": 4.922715053763441e-05, "loss": 4.3164, "step": 2930 }, { "epoch": 0.1, "learning_rate": 4.939516129032259e-05, "loss": 4.3962, "step": 2940 }, { "epoch": 0.1, "learning_rate": 4.956317204301075e-05, "loss": 4.7987, "step": 2950 }, { "epoch": 0.1, "learning_rate": 4.973118279569893e-05, "loss": 4.5732, "step": 2960 }, { "epoch": 0.1, "learning_rate": 4.98991935483871e-05, "loss": 4.4208, "step": 2970 }, { "epoch": 0.1, "learning_rate": 4.9992532855436084e-05, "loss": 4.7173, "step": 2980 }, { "epoch": 0.1, "learning_rate": 4.997386499402629e-05, "loss": 4.4456, "step": 2990 }, { "epoch": 0.1, "learning_rate": 4.995519713261649e-05, "loss": 4.2572, "step": 3000 }, { "epoch": 0.1, "learning_rate": 4.993652927120669e-05, "loss": 4.7157, "step": 3010 }, { "epoch": 0.1, "learning_rate": 4.9917861409796897e-05, "loss": 4.4885, "step": 3020 }, { "epoch": 0.1, "learning_rate": 4.98991935483871e-05, "loss": 4.2594, "step": 3030 }, { "epoch": 0.1, "learning_rate": 4.98805256869773e-05, "loss": 4.6185, "step": 3040 }, { "epoch": 0.1, "learning_rate": 4.9861857825567506e-05, "loss": 4.5417, "step": 3050 }, { "epoch": 0.1, "learning_rate": 4.984318996415771e-05, "loss": 4.4005, "step": 3060 }, { "epoch": 0.1, "learning_rate": 4.982452210274791e-05, "loss": 4.7185, "step": 3070 }, { "epoch": 0.1, "learning_rate": 4.980585424133811e-05, "loss": 4.5395, "step": 3080 }, { "epoch": 0.1, "learning_rate": 4.978718637992832e-05, "loss": 4.331, "step": 3090 }, { "epoch": 0.1, "learning_rate": 4.976851851851852e-05, "loss": 4.4664, "step": 3100 }, { "epoch": 0.1, "learning_rate": 4.9749850657108726e-05, "loss": 4.3807, "step": 3110 }, { "epoch": 0.1, "learning_rate": 4.973118279569893e-05, "loss": 4.2413, "step": 3120 }, { "epoch": 0.11, "learning_rate": 4.9712514934289125e-05, "loss": 4.5774, "step": 3130 }, { "epoch": 0.11, "learning_rate": 4.9693847072879335e-05, "loss": 4.5053, "step": 3140 }, { "epoch": 0.11, "learning_rate": 4.967517921146954e-05, "loss": 4.6295, "step": 3150 }, { "epoch": 0.11, "learning_rate": 4.965651135005974e-05, "loss": 4.775, "step": 3160 }, { "epoch": 0.11, "learning_rate": 4.9637843488649945e-05, "loss": 5.0108, "step": 3170 }, { "epoch": 0.11, "learning_rate": 4.961917562724014e-05, "loss": 4.3621, "step": 3180 }, { "epoch": 0.11, "learning_rate": 4.960050776583035e-05, "loss": 4.4008, "step": 3190 }, { "epoch": 0.11, "learning_rate": 4.9581839904420555e-05, "loss": 4.4939, "step": 3200 }, { "epoch": 0.11, "learning_rate": 4.956317204301075e-05, "loss": 4.5212, "step": 3210 }, { "epoch": 0.11, "learning_rate": 4.954450418160096e-05, "loss": 4.6402, "step": 3220 }, { "epoch": 0.11, "learning_rate": 4.952583632019116e-05, "loss": 4.4374, "step": 3230 }, { "epoch": 0.11, "learning_rate": 4.950716845878137e-05, "loss": 4.052, "step": 3240 }, { "epoch": 0.11, "learning_rate": 4.948850059737157e-05, "loss": 4.3175, "step": 3250 }, { "epoch": 0.11, "learning_rate": 4.946983273596177e-05, "loss": 4.4946, "step": 3260 }, { "epoch": 0.11, "learning_rate": 4.945116487455198e-05, "loss": 4.5503, "step": 3270 }, { "epoch": 0.11, "learning_rate": 4.9432497013142173e-05, "loss": 4.4767, "step": 3280 }, { "epoch": 0.11, "learning_rate": 4.9413829151732383e-05, "loss": 4.3991, "step": 3290 }, { "epoch": 0.11, "learning_rate": 4.939516129032259e-05, "loss": 4.6221, "step": 3300 }, { "epoch": 0.11, "learning_rate": 4.937649342891278e-05, "loss": 4.4263, "step": 3310 }, { "epoch": 0.11, "learning_rate": 4.935782556750299e-05, "loss": 4.4661, "step": 3320 }, { "epoch": 0.11, "learning_rate": 4.933915770609319e-05, "loss": 4.7345, "step": 3330 }, { "epoch": 0.11, "learning_rate": 4.932048984468339e-05, "loss": 4.5323, "step": 3340 }, { "epoch": 0.11, "learning_rate": 4.93018219832736e-05, "loss": 4.5434, "step": 3350 }, { "epoch": 0.11, "learning_rate": 4.92831541218638e-05, "loss": 4.5242, "step": 3360 }, { "epoch": 0.11, "learning_rate": 4.926448626045401e-05, "loss": 4.4653, "step": 3370 }, { "epoch": 0.11, "learning_rate": 4.9245818399044206e-05, "loss": 4.2482, "step": 3380 }, { "epoch": 0.11, "learning_rate": 4.922715053763441e-05, "loss": 4.4161, "step": 3390 }, { "epoch": 0.11, "learning_rate": 4.920848267622461e-05, "loss": 4.459, "step": 3400 }, { "epoch": 0.11, "learning_rate": 4.9189814814814815e-05, "loss": 4.6974, "step": 3410 }, { "epoch": 0.11, "learning_rate": 4.9171146953405025e-05, "loss": 4.7484, "step": 3420 }, { "epoch": 0.12, "learning_rate": 4.915247909199522e-05, "loss": 4.553, "step": 3430 }, { "epoch": 0.12, "learning_rate": 4.9133811230585425e-05, "loss": 4.5069, "step": 3440 }, { "epoch": 0.12, "learning_rate": 4.911514336917563e-05, "loss": 4.5806, "step": 3450 }, { "epoch": 0.12, "learning_rate": 4.909647550776583e-05, "loss": 4.3154, "step": 3460 }, { "epoch": 0.12, "learning_rate": 4.9077807646356035e-05, "loss": 4.4244, "step": 3470 }, { "epoch": 0.12, "learning_rate": 4.905913978494624e-05, "loss": 4.4991, "step": 3480 }, { "epoch": 0.12, "learning_rate": 4.904047192353644e-05, "loss": 4.1619, "step": 3490 }, { "epoch": 0.12, "learning_rate": 4.9021804062126644e-05, "loss": 4.2876, "step": 3500 }, { "epoch": 0.12, "learning_rate": 4.900313620071685e-05, "loss": 4.2716, "step": 3510 }, { "epoch": 0.12, "learning_rate": 4.898446833930705e-05, "loss": 4.2517, "step": 3520 }, { "epoch": 0.12, "learning_rate": 4.8965800477897254e-05, "loss": 4.4254, "step": 3530 }, { "epoch": 0.12, "learning_rate": 4.894713261648746e-05, "loss": 4.5187, "step": 3540 }, { "epoch": 0.12, "learning_rate": 4.892846475507766e-05, "loss": 4.3443, "step": 3550 }, { "epoch": 0.12, "learning_rate": 4.8909796893667864e-05, "loss": 4.4591, "step": 3560 }, { "epoch": 0.12, "learning_rate": 4.889112903225807e-05, "loss": 4.2675, "step": 3570 }, { "epoch": 0.12, "learning_rate": 4.887246117084827e-05, "loss": 4.4332, "step": 3580 }, { "epoch": 0.12, "learning_rate": 4.885379330943847e-05, "loss": 4.6878, "step": 3590 }, { "epoch": 0.12, "learning_rate": 4.8835125448028677e-05, "loss": 4.2547, "step": 3600 }, { "epoch": 0.12, "learning_rate": 4.881645758661888e-05, "loss": 4.485, "step": 3610 }, { "epoch": 0.12, "learning_rate": 4.879778972520908e-05, "loss": 4.3487, "step": 3620 }, { "epoch": 0.12, "learning_rate": 4.8779121863799286e-05, "loss": 4.081, "step": 3630 }, { "epoch": 0.12, "learning_rate": 4.876045400238949e-05, "loss": 4.349, "step": 3640 }, { "epoch": 0.12, "learning_rate": 4.874178614097969e-05, "loss": 4.3416, "step": 3650 }, { "epoch": 0.12, "learning_rate": 4.8723118279569896e-05, "loss": 4.3979, "step": 3660 }, { "epoch": 0.12, "learning_rate": 4.87044504181601e-05, "loss": 4.4437, "step": 3670 }, { "epoch": 0.12, "learning_rate": 4.86857825567503e-05, "loss": 4.5158, "step": 3680 }, { "epoch": 0.12, "learning_rate": 4.8667114695340505e-05, "loss": 4.3471, "step": 3690 }, { "epoch": 0.12, "learning_rate": 4.864844683393071e-05, "loss": 4.4189, "step": 3700 }, { "epoch": 0.12, "learning_rate": 4.862977897252091e-05, "loss": 4.4054, "step": 3710 }, { "epoch": 0.12, "learning_rate": 4.8611111111111115e-05, "loss": 4.3472, "step": 3720 }, { "epoch": 0.13, "learning_rate": 4.859244324970131e-05, "loss": 4.2868, "step": 3730 }, { "epoch": 0.13, "learning_rate": 4.857377538829152e-05, "loss": 4.5381, "step": 3740 }, { "epoch": 0.13, "learning_rate": 4.855510752688172e-05, "loss": 4.2602, "step": 3750 }, { "epoch": 0.13, "learning_rate": 4.853643966547193e-05, "loss": 4.393, "step": 3760 }, { "epoch": 0.13, "learning_rate": 4.851777180406213e-05, "loss": 4.155, "step": 3770 }, { "epoch": 0.13, "learning_rate": 4.849910394265233e-05, "loss": 4.5707, "step": 3780 }, { "epoch": 0.13, "learning_rate": 4.848043608124254e-05, "loss": 4.2793, "step": 3790 }, { "epoch": 0.13, "learning_rate": 4.8461768219832734e-05, "loss": 4.2988, "step": 3800 }, { "epoch": 0.13, "learning_rate": 4.8443100358422944e-05, "loss": 4.5829, "step": 3810 }, { "epoch": 0.13, "learning_rate": 4.842443249701315e-05, "loss": 4.2576, "step": 3820 }, { "epoch": 0.13, "learning_rate": 4.8405764635603344e-05, "loss": 4.3333, "step": 3830 }, { "epoch": 0.13, "learning_rate": 4.8387096774193554e-05, "loss": 4.2648, "step": 3840 }, { "epoch": 0.13, "learning_rate": 4.836842891278375e-05, "loss": 4.1792, "step": 3850 }, { "epoch": 0.13, "learning_rate": 4.834976105137396e-05, "loss": 4.2942, "step": 3860 }, { "epoch": 0.13, "learning_rate": 4.8331093189964163e-05, "loss": 4.3512, "step": 3870 }, { "epoch": 0.13, "learning_rate": 4.831242532855436e-05, "loss": 4.6654, "step": 3880 }, { "epoch": 0.13, "learning_rate": 4.829375746714457e-05, "loss": 4.3735, "step": 3890 }, { "epoch": 0.13, "learning_rate": 4.8275089605734766e-05, "loss": 4.3627, "step": 3900 }, { "epoch": 0.13, "learning_rate": 4.825642174432497e-05, "loss": 4.4475, "step": 3910 }, { "epoch": 0.13, "learning_rate": 4.823775388291518e-05, "loss": 4.3226, "step": 3920 }, { "epoch": 0.13, "learning_rate": 4.8219086021505376e-05, "loss": 4.2341, "step": 3930 }, { "epoch": 0.13, "learning_rate": 4.8200418160095586e-05, "loss": 4.2264, "step": 3940 }, { "epoch": 0.13, "learning_rate": 4.818175029868578e-05, "loss": 4.1749, "step": 3950 }, { "epoch": 0.13, "learning_rate": 4.8163082437275986e-05, "loss": 4.4451, "step": 3960 }, { "epoch": 0.13, "learning_rate": 4.8144414575866196e-05, "loss": 4.4556, "step": 3970 }, { "epoch": 0.13, "learning_rate": 4.812574671445639e-05, "loss": 4.3421, "step": 3980 }, { "epoch": 0.13, "learning_rate": 4.81070788530466e-05, "loss": 4.4947, "step": 3990 }, { "epoch": 0.13, "learning_rate": 4.80884109916368e-05, "loss": 4.2745, "step": 4000 }, { "epoch": 0.13, "learning_rate": 4.8069743130227e-05, "loss": 4.0593, "step": 4010 }, { "epoch": 0.14, "learning_rate": 4.805107526881721e-05, "loss": 4.3374, "step": 4020 }, { "epoch": 0.14, "learning_rate": 4.803240740740741e-05, "loss": 4.2332, "step": 4030 }, { "epoch": 0.14, "learning_rate": 4.801373954599761e-05, "loss": 4.407, "step": 4040 }, { "epoch": 0.14, "learning_rate": 4.7995071684587815e-05, "loss": 3.8267, "step": 4050 }, { "epoch": 0.14, "learning_rate": 4.797640382317802e-05, "loss": 4.2591, "step": 4060 }, { "epoch": 0.14, "learning_rate": 4.795773596176822e-05, "loss": 4.2383, "step": 4070 }, { "epoch": 0.14, "learning_rate": 4.7939068100358424e-05, "loss": 3.9536, "step": 4080 }, { "epoch": 0.14, "learning_rate": 4.792040023894863e-05, "loss": 4.5887, "step": 4090 }, { "epoch": 0.14, "learning_rate": 4.790173237753883e-05, "loss": 4.3877, "step": 4100 }, { "epoch": 0.14, "learning_rate": 4.7883064516129034e-05, "loss": 4.3213, "step": 4110 }, { "epoch": 0.14, "learning_rate": 4.786439665471924e-05, "loss": 4.0475, "step": 4120 }, { "epoch": 0.14, "learning_rate": 4.784572879330944e-05, "loss": 4.3858, "step": 4130 }, { "epoch": 0.14, "learning_rate": 4.7827060931899644e-05, "loss": 4.2672, "step": 4140 }, { "epoch": 0.14, "learning_rate": 4.780839307048985e-05, "loss": 4.4997, "step": 4150 }, { "epoch": 0.14, "learning_rate": 4.778972520908005e-05, "loss": 4.3638, "step": 4160 }, { "epoch": 0.14, "learning_rate": 4.777105734767025e-05, "loss": 4.3172, "step": 4170 }, { "epoch": 0.14, "learning_rate": 4.7752389486260456e-05, "loss": 4.068, "step": 4180 }, { "epoch": 0.14, "learning_rate": 4.773372162485066e-05, "loss": 4.4311, "step": 4190 }, { "epoch": 0.14, "learning_rate": 4.771505376344086e-05, "loss": 4.5892, "step": 4200 }, { "epoch": 0.14, "learning_rate": 4.7696385902031066e-05, "loss": 4.2604, "step": 4210 }, { "epoch": 0.14, "learning_rate": 4.767771804062127e-05, "loss": 4.2857, "step": 4220 }, { "epoch": 0.14, "learning_rate": 4.765905017921147e-05, "loss": 4.3303, "step": 4230 }, { "epoch": 0.14, "learning_rate": 4.7640382317801676e-05, "loss": 4.2141, "step": 4240 }, { "epoch": 0.14, "learning_rate": 4.762171445639188e-05, "loss": 4.2023, "step": 4250 }, { "epoch": 0.14, "learning_rate": 4.760304659498208e-05, "loss": 4.2967, "step": 4260 }, { "epoch": 0.14, "learning_rate": 4.7584378733572285e-05, "loss": 4.1328, "step": 4270 }, { "epoch": 0.14, "learning_rate": 4.756571087216249e-05, "loss": 4.3922, "step": 4280 }, { "epoch": 0.14, "learning_rate": 4.754704301075269e-05, "loss": 4.3267, "step": 4290 }, { "epoch": 0.14, "learning_rate": 4.752837514934289e-05, "loss": 4.3216, "step": 4300 }, { "epoch": 0.14, "learning_rate": 4.75097072879331e-05, "loss": 4.1175, "step": 4310 }, { "epoch": 0.15, "learning_rate": 4.74910394265233e-05, "loss": 4.1491, "step": 4320 }, { "epoch": 0.15, "learning_rate": 4.7472371565113505e-05, "loss": 4.6231, "step": 4330 }, { "epoch": 0.15, "learning_rate": 4.745370370370371e-05, "loss": 4.0371, "step": 4340 }, { "epoch": 0.15, "learning_rate": 4.7435035842293904e-05, "loss": 4.3377, "step": 4350 }, { "epoch": 0.15, "learning_rate": 4.7416367980884114e-05, "loss": 4.4208, "step": 4360 }, { "epoch": 0.15, "learning_rate": 4.739770011947431e-05, "loss": 4.3935, "step": 4370 }, { "epoch": 0.15, "learning_rate": 4.737903225806452e-05, "loss": 4.0609, "step": 4380 }, { "epoch": 0.15, "learning_rate": 4.7360364396654724e-05, "loss": 4.2408, "step": 4390 }, { "epoch": 0.15, "learning_rate": 4.734169653524492e-05, "loss": 4.5171, "step": 4400 }, { "epoch": 0.15, "learning_rate": 4.732302867383513e-05, "loss": 4.2476, "step": 4410 }, { "epoch": 0.15, "learning_rate": 4.730436081242533e-05, "loss": 4.3547, "step": 4420 }, { "epoch": 0.15, "learning_rate": 4.728569295101554e-05, "loss": 4.0966, "step": 4430 }, { "epoch": 0.15, "learning_rate": 4.726702508960574e-05, "loss": 4.1857, "step": 4440 }, { "epoch": 0.15, "learning_rate": 4.7248357228195937e-05, "loss": 4.352, "step": 4450 }, { "epoch": 0.15, "learning_rate": 4.7229689366786147e-05, "loss": 4.1158, "step": 4460 }, { "epoch": 0.15, "learning_rate": 4.721102150537634e-05, "loss": 4.1918, "step": 4470 }, { "epoch": 0.15, "learning_rate": 4.7192353643966546e-05, "loss": 4.2308, "step": 4480 }, { "epoch": 0.15, "learning_rate": 4.7173685782556756e-05, "loss": 4.3485, "step": 4490 }, { "epoch": 0.15, "learning_rate": 4.715501792114695e-05, "loss": 4.094, "step": 4500 }, { "epoch": 0.15, "learning_rate": 4.713635005973716e-05, "loss": 4.0731, "step": 4510 }, { "epoch": 0.15, "learning_rate": 4.711768219832736e-05, "loss": 4.3649, "step": 4520 }, { "epoch": 0.15, "learning_rate": 4.709901433691756e-05, "loss": 3.8606, "step": 4530 }, { "epoch": 0.15, "learning_rate": 4.708034647550777e-05, "loss": 4.3086, "step": 4540 }, { "epoch": 0.15, "learning_rate": 4.706167861409797e-05, "loss": 4.1081, "step": 4550 }, { "epoch": 0.15, "learning_rate": 4.704301075268818e-05, "loss": 4.3555, "step": 4560 }, { "epoch": 0.15, "learning_rate": 4.7024342891278375e-05, "loss": 3.8921, "step": 4570 }, { "epoch": 0.15, "learning_rate": 4.700567502986858e-05, "loss": 4.4176, "step": 4580 }, { "epoch": 0.15, "learning_rate": 4.698700716845879e-05, "loss": 4.6405, "step": 4590 }, { "epoch": 0.15, "learning_rate": 4.6968339307048985e-05, "loss": 4.0693, "step": 4600 }, { "epoch": 0.15, "learning_rate": 4.694967144563919e-05, "loss": 4.5621, "step": 4610 }, { "epoch": 0.16, "learning_rate": 4.693100358422939e-05, "loss": 4.3265, "step": 4620 }, { "epoch": 0.16, "learning_rate": 4.6912335722819595e-05, "loss": 4.2263, "step": 4630 }, { "epoch": 0.16, "learning_rate": 4.6893667861409805e-05, "loss": 4.4782, "step": 4640 }, { "epoch": 0.16, "learning_rate": 4.6875e-05, "loss": 4.5211, "step": 4650 }, { "epoch": 0.16, "learning_rate": 4.6856332138590204e-05, "loss": 4.4426, "step": 4660 }, { "epoch": 0.16, "learning_rate": 4.683766427718041e-05, "loss": 4.3813, "step": 4670 }, { "epoch": 0.16, "learning_rate": 4.681899641577061e-05, "loss": 3.9564, "step": 4680 }, { "epoch": 0.16, "learning_rate": 4.6800328554360814e-05, "loss": 4.4241, "step": 4690 }, { "epoch": 0.16, "learning_rate": 4.678166069295102e-05, "loss": 4.3802, "step": 4700 }, { "epoch": 0.16, "learning_rate": 4.676299283154122e-05, "loss": 4.2065, "step": 4710 }, { "epoch": 0.16, "learning_rate": 4.6744324970131424e-05, "loss": 4.0179, "step": 4720 }, { "epoch": 0.16, "learning_rate": 4.672565710872163e-05, "loss": 4.445, "step": 4730 }, { "epoch": 0.16, "learning_rate": 4.670698924731183e-05, "loss": 4.3923, "step": 4740 }, { "epoch": 0.16, "learning_rate": 4.668832138590203e-05, "loss": 3.9034, "step": 4750 }, { "epoch": 0.16, "learning_rate": 4.6669653524492236e-05, "loss": 4.0292, "step": 4760 }, { "epoch": 0.16, "learning_rate": 4.665098566308244e-05, "loss": 4.4023, "step": 4770 }, { "epoch": 0.16, "learning_rate": 4.663231780167264e-05, "loss": 3.7686, "step": 4780 }, { "epoch": 0.16, "learning_rate": 4.6613649940262846e-05, "loss": 4.0862, "step": 4790 }, { "epoch": 0.16, "learning_rate": 4.659498207885305e-05, "loss": 4.0931, "step": 4800 }, { "epoch": 0.16, "learning_rate": 4.657631421744325e-05, "loss": 4.3211, "step": 4810 }, { "epoch": 0.16, "learning_rate": 4.6557646356033456e-05, "loss": 4.1455, "step": 4820 }, { "epoch": 0.16, "learning_rate": 4.653897849462366e-05, "loss": 4.396, "step": 4830 }, { "epoch": 0.16, "learning_rate": 4.652031063321386e-05, "loss": 4.1785, "step": 4840 }, { "epoch": 0.16, "learning_rate": 4.6501642771804065e-05, "loss": 4.2009, "step": 4850 }, { "epoch": 0.16, "learning_rate": 4.648297491039427e-05, "loss": 4.2842, "step": 4860 }, { "epoch": 0.16, "learning_rate": 4.6464307048984465e-05, "loss": 4.4221, "step": 4870 }, { "epoch": 0.16, "learning_rate": 4.6445639187574675e-05, "loss": 4.1758, "step": 4880 }, { "epoch": 0.16, "learning_rate": 4.642697132616488e-05, "loss": 4.3337, "step": 4890 }, { "epoch": 0.16, "learning_rate": 4.640830346475508e-05, "loss": 4.5078, "step": 4900 }, { "epoch": 0.16, "learning_rate": 4.6389635603345285e-05, "loss": 4.0794, "step": 4910 }, { "epoch": 0.17, "learning_rate": 4.637096774193548e-05, "loss": 4.1977, "step": 4920 }, { "epoch": 0.17, "learning_rate": 4.635229988052569e-05, "loss": 4.1703, "step": 4930 }, { "epoch": 0.17, "learning_rate": 4.6333632019115894e-05, "loss": 4.0087, "step": 4940 }, { "epoch": 0.17, "learning_rate": 4.63149641577061e-05, "loss": 4.2186, "step": 4950 }, { "epoch": 0.17, "learning_rate": 4.62962962962963e-05, "loss": 4.2193, "step": 4960 }, { "epoch": 0.17, "learning_rate": 4.62776284348865e-05, "loss": 4.0618, "step": 4970 }, { "epoch": 0.17, "learning_rate": 4.625896057347671e-05, "loss": 4.2286, "step": 4980 }, { "epoch": 0.17, "learning_rate": 4.624029271206691e-05, "loss": 4.4534, "step": 4990 }, { "epoch": 0.17, "learning_rate": 4.6221624850657114e-05, "loss": 4.1912, "step": 5000 }, { "epoch": 0.17, "learning_rate": 4.620295698924732e-05, "loss": 4.2098, "step": 5010 }, { "epoch": 0.17, "learning_rate": 4.618428912783751e-05, "loss": 4.4808, "step": 5020 }, { "epoch": 0.17, "learning_rate": 4.616562126642772e-05, "loss": 4.3189, "step": 5030 }, { "epoch": 0.17, "learning_rate": 4.614695340501792e-05, "loss": 3.9401, "step": 5040 }, { "epoch": 0.17, "learning_rate": 4.612828554360812e-05, "loss": 4.1829, "step": 5050 }, { "epoch": 0.17, "learning_rate": 4.610961768219833e-05, "loss": 4.4423, "step": 5060 }, { "epoch": 0.17, "learning_rate": 4.609094982078853e-05, "loss": 4.0158, "step": 5070 }, { "epoch": 0.17, "learning_rate": 4.607228195937874e-05, "loss": 3.8608, "step": 5080 }, { "epoch": 0.17, "learning_rate": 4.6053614097968936e-05, "loss": 4.0689, "step": 5090 }, { "epoch": 0.17, "learning_rate": 4.603494623655914e-05, "loss": 4.2897, "step": 5100 }, { "epoch": 0.17, "learning_rate": 4.601627837514935e-05, "loss": 4.2973, "step": 5110 }, { "epoch": 0.17, "learning_rate": 4.5997610513739546e-05, "loss": 4.1113, "step": 5120 }, { "epoch": 0.17, "learning_rate": 4.5978942652329756e-05, "loss": 4.2283, "step": 5130 }, { "epoch": 0.17, "learning_rate": 4.596027479091995e-05, "loss": 4.2546, "step": 5140 }, { "epoch": 0.17, "learning_rate": 4.5941606929510155e-05, "loss": 4.4958, "step": 5150 }, { "epoch": 0.17, "learning_rate": 4.5922939068100365e-05, "loss": 4.1367, "step": 5160 }, { "epoch": 0.17, "learning_rate": 4.590427120669056e-05, "loss": 3.6439, "step": 5170 }, { "epoch": 0.17, "learning_rate": 4.5885603345280765e-05, "loss": 4.2327, "step": 5180 }, { "epoch": 0.17, "learning_rate": 4.586693548387097e-05, "loss": 4.0844, "step": 5190 }, { "epoch": 0.17, "learning_rate": 4.584826762246117e-05, "loss": 4.2107, "step": 5200 }, { "epoch": 0.18, "learning_rate": 4.582959976105138e-05, "loss": 4.2222, "step": 5210 }, { "epoch": 0.18, "learning_rate": 4.581093189964158e-05, "loss": 4.1383, "step": 5220 }, { "epoch": 0.18, "learning_rate": 4.579226403823178e-05, "loss": 4.145, "step": 5230 }, { "epoch": 0.18, "learning_rate": 4.5773596176821984e-05, "loss": 4.0463, "step": 5240 }, { "epoch": 0.18, "learning_rate": 4.575492831541219e-05, "loss": 4.3782, "step": 5250 }, { "epoch": 0.18, "learning_rate": 4.57362604540024e-05, "loss": 4.4342, "step": 5260 }, { "epoch": 0.18, "learning_rate": 4.5717592592592594e-05, "loss": 3.8822, "step": 5270 }, { "epoch": 0.18, "learning_rate": 4.56989247311828e-05, "loss": 4.1924, "step": 5280 }, { "epoch": 0.18, "learning_rate": 4.5680256869773e-05, "loss": 4.3946, "step": 5290 }, { "epoch": 0.18, "learning_rate": 4.5661589008363203e-05, "loss": 4.2242, "step": 5300 }, { "epoch": 0.18, "learning_rate": 4.564292114695341e-05, "loss": 4.3488, "step": 5310 }, { "epoch": 0.18, "learning_rate": 4.562425328554361e-05, "loss": 4.2119, "step": 5320 }, { "epoch": 0.18, "learning_rate": 4.560558542413381e-05, "loss": 4.3024, "step": 5330 }, { "epoch": 0.18, "learning_rate": 4.5586917562724016e-05, "loss": 3.9396, "step": 5340 }, { "epoch": 0.18, "learning_rate": 4.556824970131422e-05, "loss": 3.9281, "step": 5350 }, { "epoch": 0.18, "learning_rate": 4.554958183990442e-05, "loss": 4.232, "step": 5360 }, { "epoch": 0.18, "learning_rate": 4.5530913978494626e-05, "loss": 3.6128, "step": 5370 }, { "epoch": 0.18, "learning_rate": 4.551224611708483e-05, "loss": 4.0242, "step": 5380 }, { "epoch": 0.18, "learning_rate": 4.549357825567503e-05, "loss": 3.8623, "step": 5390 }, { "epoch": 0.18, "learning_rate": 4.5474910394265236e-05, "loss": 4.1662, "step": 5400 }, { "epoch": 0.18, "learning_rate": 4.545624253285544e-05, "loss": 3.9453, "step": 5410 }, { "epoch": 0.18, "learning_rate": 4.543757467144564e-05, "loss": 4.2344, "step": 5420 }, { "epoch": 0.18, "learning_rate": 4.5418906810035845e-05, "loss": 4.2854, "step": 5430 }, { "epoch": 0.18, "learning_rate": 4.540023894862604e-05, "loss": 4.0162, "step": 5440 }, { "epoch": 0.18, "learning_rate": 4.538157108721625e-05, "loss": 4.3484, "step": 5450 }, { "epoch": 0.18, "learning_rate": 4.5362903225806455e-05, "loss": 4.3394, "step": 5460 }, { "epoch": 0.18, "learning_rate": 4.534423536439666e-05, "loss": 3.9772, "step": 5470 }, { "epoch": 0.18, "learning_rate": 4.532556750298686e-05, "loss": 4.2683, "step": 5480 }, { "epoch": 0.18, "learning_rate": 4.530689964157706e-05, "loss": 4.2409, "step": 5490 }, { "epoch": 0.18, "learning_rate": 4.528823178016727e-05, "loss": 4.1268, "step": 5500 }, { "epoch": 0.19, "learning_rate": 4.526956391875747e-05, "loss": 4.1671, "step": 5510 }, { "epoch": 0.19, "learning_rate": 4.5250896057347674e-05, "loss": 3.6939, "step": 5520 }, { "epoch": 0.19, "learning_rate": 4.523222819593788e-05, "loss": 4.0335, "step": 5530 }, { "epoch": 0.19, "learning_rate": 4.5213560334528074e-05, "loss": 4.1694, "step": 5540 }, { "epoch": 0.19, "learning_rate": 4.5194892473118284e-05, "loss": 4.0742, "step": 5550 }, { "epoch": 0.19, "learning_rate": 4.517622461170849e-05, "loss": 4.1111, "step": 5560 }, { "epoch": 0.19, "learning_rate": 4.515755675029869e-05, "loss": 4.1056, "step": 5570 }, { "epoch": 0.19, "learning_rate": 4.5138888888888894e-05, "loss": 4.1699, "step": 5580 }, { "epoch": 0.19, "learning_rate": 4.512022102747909e-05, "loss": 4.1817, "step": 5590 }, { "epoch": 0.19, "learning_rate": 4.51015531660693e-05, "loss": 3.8425, "step": 5600 }, { "epoch": 0.19, "learning_rate": 4.50828853046595e-05, "loss": 4.0771, "step": 5610 }, { "epoch": 0.19, "learning_rate": 4.50642174432497e-05, "loss": 3.8866, "step": 5620 }, { "epoch": 0.19, "learning_rate": 4.504554958183991e-05, "loss": 4.0063, "step": 5630 }, { "epoch": 0.19, "learning_rate": 4.5026881720430106e-05, "loss": 3.9252, "step": 5640 }, { "epoch": 0.19, "learning_rate": 4.5008213859020316e-05, "loss": 4.1584, "step": 5650 }, { "epoch": 0.19, "learning_rate": 4.498954599761052e-05, "loss": 4.1171, "step": 5660 }, { "epoch": 0.19, "learning_rate": 4.4970878136200716e-05, "loss": 4.1672, "step": 5670 }, { "epoch": 0.19, "learning_rate": 4.4952210274790926e-05, "loss": 4.5783, "step": 5680 }, { "epoch": 0.19, "learning_rate": 4.493354241338112e-05, "loss": 3.8408, "step": 5690 }, { "epoch": 0.19, "learning_rate": 4.491487455197133e-05, "loss": 3.8826, "step": 5700 }, { "epoch": 0.19, "learning_rate": 4.489620669056153e-05, "loss": 4.0903, "step": 5710 }, { "epoch": 0.19, "learning_rate": 4.487753882915173e-05, "loss": 3.8766, "step": 5720 }, { "epoch": 0.19, "learning_rate": 4.485887096774194e-05, "loss": 4.1665, "step": 5730 }, { "epoch": 0.19, "learning_rate": 4.484020310633214e-05, "loss": 4.4071, "step": 5740 }, { "epoch": 0.19, "learning_rate": 4.482153524492234e-05, "loss": 4.1926, "step": 5750 }, { "epoch": 0.19, "learning_rate": 4.4802867383512545e-05, "loss": 4.1452, "step": 5760 }, { "epoch": 0.19, "learning_rate": 4.478419952210275e-05, "loss": 4.1673, "step": 5770 }, { "epoch": 0.19, "learning_rate": 4.476553166069296e-05, "loss": 4.1794, "step": 5780 }, { "epoch": 0.19, "learning_rate": 4.4746863799283154e-05, "loss": 3.9736, "step": 5790 }, { "epoch": 0.19, "learning_rate": 4.472819593787336e-05, "loss": 4.1541, "step": 5800 }, { "epoch": 0.2, "learning_rate": 4.470952807646356e-05, "loss": 4.1792, "step": 5810 }, { "epoch": 0.2, "learning_rate": 4.4690860215053764e-05, "loss": 4.1334, "step": 5820 }, { "epoch": 0.2, "learning_rate": 4.4672192353643974e-05, "loss": 4.0146, "step": 5830 }, { "epoch": 0.2, "learning_rate": 4.465352449223417e-05, "loss": 3.9846, "step": 5840 }, { "epoch": 0.2, "learning_rate": 4.4634856630824374e-05, "loss": 4.2558, "step": 5850 }, { "epoch": 0.2, "learning_rate": 4.461618876941458e-05, "loss": 4.2142, "step": 5860 }, { "epoch": 0.2, "learning_rate": 4.459752090800478e-05, "loss": 3.9264, "step": 5870 }, { "epoch": 0.2, "learning_rate": 4.4578853046594983e-05, "loss": 3.9934, "step": 5880 }, { "epoch": 0.2, "learning_rate": 4.456018518518519e-05, "loss": 4.0109, "step": 5890 }, { "epoch": 0.2, "learning_rate": 4.454151732377539e-05, "loss": 4.5819, "step": 5900 }, { "epoch": 0.2, "learning_rate": 4.452284946236559e-05, "loss": 4.112, "step": 5910 }, { "epoch": 0.2, "learning_rate": 4.4504181600955796e-05, "loss": 4.11, "step": 5920 }, { "epoch": 0.2, "learning_rate": 4.4485513739546e-05, "loss": 4.2611, "step": 5930 }, { "epoch": 0.2, "learning_rate": 4.44668458781362e-05, "loss": 4.2696, "step": 5940 }, { "epoch": 0.2, "learning_rate": 4.4448178016726406e-05, "loss": 4.1471, "step": 5950 }, { "epoch": 0.2, "eval_accuracy": 0.03822574828705373, "eval_loss": 4.160111904144287, "eval_runtime": 1070.1177, "eval_samples_per_second": 2.591, "eval_steps_per_second": 1.296, "step": 5952 }, { "epoch": 1.0, "learning_rate": 4.442951015531661e-05, "loss": 4.1187, "step": 5960 }, { "epoch": 1.0, "learning_rate": 4.441084229390681e-05, "loss": 4.1273, "step": 5970 }, { "epoch": 1.0, "learning_rate": 4.4392174432497016e-05, "loss": 3.9333, "step": 5980 }, { "epoch": 1.0, "learning_rate": 4.437350657108722e-05, "loss": 3.8852, "step": 5990 }, { "epoch": 1.0, "learning_rate": 4.435483870967742e-05, "loss": 4.3789, "step": 6000 }, { "epoch": 1.0, "learning_rate": 4.4336170848267625e-05, "loss": 4.0991, "step": 6010 }, { "epoch": 1.0, "learning_rate": 4.431750298685783e-05, "loss": 3.9749, "step": 6020 }, { "epoch": 1.0, "learning_rate": 4.429883512544803e-05, "loss": 4.0504, "step": 6030 }, { "epoch": 1.0, "learning_rate": 4.4280167264038235e-05, "loss": 4.2472, "step": 6040 }, { "epoch": 1.0, "learning_rate": 4.426149940262844e-05, "loss": 4.1108, "step": 6050 }, { "epoch": 1.0, "learning_rate": 4.4242831541218635e-05, "loss": 3.9754, "step": 6060 }, { "epoch": 1.0, "learning_rate": 4.4224163679808845e-05, "loss": 3.9436, "step": 6070 }, { "epoch": 1.0, "learning_rate": 4.420549581839905e-05, "loss": 4.1744, "step": 6080 }, { "epoch": 1.0, "learning_rate": 4.418682795698925e-05, "loss": 4.377, "step": 6090 }, { "epoch": 1.0, "learning_rate": 4.4168160095579454e-05, "loss": 4.3518, "step": 6100 }, { "epoch": 1.01, "learning_rate": 4.414949223416965e-05, "loss": 4.1371, "step": 6110 }, { "epoch": 1.01, "learning_rate": 4.413082437275986e-05, "loss": 4.0343, "step": 6120 }, { "epoch": 1.01, "learning_rate": 4.4112156511350064e-05, "loss": 4.198, "step": 6130 }, { "epoch": 1.01, "learning_rate": 4.409348864994026e-05, "loss": 4.0281, "step": 6140 }, { "epoch": 1.01, "learning_rate": 4.407482078853047e-05, "loss": 4.2912, "step": 6150 }, { "epoch": 1.01, "learning_rate": 4.405615292712067e-05, "loss": 3.6893, "step": 6160 }, { "epoch": 1.01, "learning_rate": 4.403748506571088e-05, "loss": 4.6309, "step": 6170 }, { "epoch": 1.01, "learning_rate": 4.401881720430108e-05, "loss": 4.4654, "step": 6180 }, { "epoch": 1.01, "learning_rate": 4.4000149342891276e-05, "loss": 3.9596, "step": 6190 }, { "epoch": 1.01, "learning_rate": 4.3981481481481486e-05, "loss": 4.1032, "step": 6200 }, { "epoch": 1.01, "learning_rate": 4.396281362007168e-05, "loss": 3.9217, "step": 6210 }, { "epoch": 1.01, "learning_rate": 4.394414575866189e-05, "loss": 3.861, "step": 6220 }, { "epoch": 1.01, "learning_rate": 4.3925477897252096e-05, "loss": 4.1733, "step": 6230 }, { "epoch": 1.01, "learning_rate": 4.390681003584229e-05, "loss": 4.2278, "step": 6240 }, { "epoch": 1.01, "learning_rate": 4.38881421744325e-05, "loss": 4.2659, "step": 6250 }, { "epoch": 1.01, "learning_rate": 4.38694743130227e-05, "loss": 4.0317, "step": 6260 }, { "epoch": 1.01, "learning_rate": 4.385080645161291e-05, "loss": 4.4773, "step": 6270 }, { "epoch": 1.01, "learning_rate": 4.383213859020311e-05, "loss": 3.9262, "step": 6280 }, { "epoch": 1.01, "learning_rate": 4.381347072879331e-05, "loss": 3.8025, "step": 6290 }, { "epoch": 1.01, "learning_rate": 4.379480286738352e-05, "loss": 4.1975, "step": 6300 }, { "epoch": 1.01, "learning_rate": 4.3776135005973715e-05, "loss": 4.1544, "step": 6310 }, { "epoch": 1.01, "learning_rate": 4.375746714456392e-05, "loss": 4.0071, "step": 6320 }, { "epoch": 1.01, "learning_rate": 4.373879928315413e-05, "loss": 3.8359, "step": 6330 }, { "epoch": 1.01, "learning_rate": 4.3720131421744325e-05, "loss": 3.814, "step": 6340 }, { "epoch": 1.01, "learning_rate": 4.3701463560334535e-05, "loss": 3.8951, "step": 6350 }, { "epoch": 1.01, "learning_rate": 4.368279569892473e-05, "loss": 3.9571, "step": 6360 }, { "epoch": 1.01, "learning_rate": 4.3664127837514934e-05, "loss": 4.1305, "step": 6370 }, { "epoch": 1.01, "learning_rate": 4.364545997610514e-05, "loss": 3.6083, "step": 6380 }, { "epoch": 1.01, "learning_rate": 4.362679211469534e-05, "loss": 4.2703, "step": 6390 }, { "epoch": 1.02, "learning_rate": 4.360812425328555e-05, "loss": 4.0742, "step": 6400 }, { "epoch": 1.02, "learning_rate": 4.358945639187575e-05, "loss": 3.394, "step": 6410 }, { "epoch": 1.02, "learning_rate": 4.357078853046595e-05, "loss": 4.2712, "step": 6420 }, { "epoch": 1.02, "learning_rate": 4.3552120669056154e-05, "loss": 4.4349, "step": 6430 }, { "epoch": 1.02, "learning_rate": 4.353345280764636e-05, "loss": 4.148, "step": 6440 }, { "epoch": 1.02, "learning_rate": 4.351478494623656e-05, "loss": 3.9457, "step": 6450 }, { "epoch": 1.02, "learning_rate": 4.349611708482676e-05, "loss": 3.9034, "step": 6460 }, { "epoch": 1.02, "learning_rate": 4.3477449223416967e-05, "loss": 4.2363, "step": 6470 }, { "epoch": 1.02, "learning_rate": 4.345878136200717e-05, "loss": 4.2118, "step": 6480 }, { "epoch": 1.02, "learning_rate": 4.344011350059737e-05, "loss": 3.8898, "step": 6490 }, { "epoch": 1.02, "learning_rate": 4.3421445639187576e-05, "loss": 4.0214, "step": 6500 }, { "epoch": 1.02, "learning_rate": 4.340277777777778e-05, "loss": 4.164, "step": 6510 }, { "epoch": 1.02, "learning_rate": 4.338410991636798e-05, "loss": 4.0053, "step": 6520 }, { "epoch": 1.02, "learning_rate": 4.3365442054958186e-05, "loss": 3.9659, "step": 6530 }, { "epoch": 1.02, "learning_rate": 4.334677419354839e-05, "loss": 3.9184, "step": 6540 }, { "epoch": 1.02, "learning_rate": 4.332810633213859e-05, "loss": 3.9838, "step": 6550 }, { "epoch": 1.02, "learning_rate": 4.3309438470728796e-05, "loss": 3.9563, "step": 6560 }, { "epoch": 1.02, "learning_rate": 4.3290770609319e-05, "loss": 3.9839, "step": 6570 }, { "epoch": 1.02, "learning_rate": 4.32721027479092e-05, "loss": 4.3452, "step": 6580 }, { "epoch": 1.02, "learning_rate": 4.3253434886499405e-05, "loss": 4.166, "step": 6590 }, { "epoch": 1.02, "learning_rate": 4.323476702508961e-05, "loss": 3.9137, "step": 6600 }, { "epoch": 1.02, "learning_rate": 4.321609916367981e-05, "loss": 4.0799, "step": 6610 }, { "epoch": 1.02, "learning_rate": 4.3197431302270015e-05, "loss": 4.1999, "step": 6620 }, { "epoch": 1.02, "learning_rate": 4.317876344086022e-05, "loss": 4.0544, "step": 6630 }, { "epoch": 1.02, "learning_rate": 4.316009557945042e-05, "loss": 3.6268, "step": 6640 }, { "epoch": 1.02, "learning_rate": 4.3141427718040625e-05, "loss": 4.1647, "step": 6650 }, { "epoch": 1.02, "learning_rate": 4.312275985663083e-05, "loss": 3.9552, "step": 6660 }, { "epoch": 1.02, "learning_rate": 4.310409199522103e-05, "loss": 4.2799, "step": 6670 }, { "epoch": 1.02, "learning_rate": 4.308542413381123e-05, "loss": 3.928, "step": 6680 }, { "epoch": 1.02, "learning_rate": 4.306675627240144e-05, "loss": 4.0199, "step": 6690 }, { "epoch": 1.03, "learning_rate": 4.304808841099164e-05, "loss": 4.1996, "step": 6700 }, { "epoch": 1.03, "learning_rate": 4.302942054958184e-05, "loss": 3.9552, "step": 6710 }, { "epoch": 1.03, "learning_rate": 4.301075268817205e-05, "loss": 4.1223, "step": 6720 }, { "epoch": 1.03, "learning_rate": 4.2992084826762243e-05, "loss": 3.8597, "step": 6730 }, { "epoch": 1.03, "learning_rate": 4.2973416965352453e-05, "loss": 3.7483, "step": 6740 }, { "epoch": 1.03, "learning_rate": 4.295474910394266e-05, "loss": 3.8141, "step": 6750 }, { "epoch": 1.03, "learning_rate": 4.293608124253285e-05, "loss": 4.0109, "step": 6760 }, { "epoch": 1.03, "learning_rate": 4.291741338112306e-05, "loss": 4.3201, "step": 6770 }, { "epoch": 1.03, "learning_rate": 4.289874551971326e-05, "loss": 4.1932, "step": 6780 }, { "epoch": 1.03, "learning_rate": 4.288007765830347e-05, "loss": 3.7116, "step": 6790 }, { "epoch": 1.03, "learning_rate": 4.286140979689367e-05, "loss": 3.733, "step": 6800 }, { "epoch": 1.03, "learning_rate": 4.284274193548387e-05, "loss": 3.9251, "step": 6810 }, { "epoch": 1.03, "learning_rate": 4.282407407407408e-05, "loss": 4.0688, "step": 6820 }, { "epoch": 1.03, "learning_rate": 4.2805406212664276e-05, "loss": 3.9903, "step": 6830 }, { "epoch": 1.03, "learning_rate": 4.2786738351254486e-05, "loss": 3.7179, "step": 6840 }, { "epoch": 1.03, "learning_rate": 4.276807048984469e-05, "loss": 3.9756, "step": 6850 }, { "epoch": 1.03, "learning_rate": 4.2749402628434885e-05, "loss": 4.2888, "step": 6860 }, { "epoch": 1.03, "learning_rate": 4.2730734767025095e-05, "loss": 3.8294, "step": 6870 }, { "epoch": 1.03, "learning_rate": 4.271206690561529e-05, "loss": 3.9385, "step": 6880 }, { "epoch": 1.03, "learning_rate": 4.2693399044205495e-05, "loss": 4.2638, "step": 6890 }, { "epoch": 1.03, "learning_rate": 4.2674731182795705e-05, "loss": 4.1053, "step": 6900 }, { "epoch": 1.03, "learning_rate": 4.26560633213859e-05, "loss": 4.2088, "step": 6910 }, { "epoch": 1.03, "learning_rate": 4.263739545997611e-05, "loss": 4.1388, "step": 6920 }, { "epoch": 1.03, "learning_rate": 4.261872759856631e-05, "loss": 3.8907, "step": 6930 }, { "epoch": 1.03, "learning_rate": 4.260005973715651e-05, "loss": 3.7858, "step": 6940 }, { "epoch": 1.03, "learning_rate": 4.258139187574672e-05, "loss": 3.8084, "step": 6950 }, { "epoch": 1.03, "learning_rate": 4.256272401433692e-05, "loss": 3.9804, "step": 6960 }, { "epoch": 1.03, "learning_rate": 4.254405615292713e-05, "loss": 4.1962, "step": 6970 }, { "epoch": 1.03, "learning_rate": 4.2525388291517324e-05, "loss": 3.9086, "step": 6980 }, { "epoch": 1.03, "learning_rate": 4.250672043010753e-05, "loss": 4.0937, "step": 6990 }, { "epoch": 1.04, "learning_rate": 4.248805256869773e-05, "loss": 4.0512, "step": 7000 }, { "epoch": 1.04, "learning_rate": 4.2469384707287934e-05, "loss": 3.7795, "step": 7010 }, { "epoch": 1.04, "learning_rate": 4.245071684587814e-05, "loss": 3.9607, "step": 7020 }, { "epoch": 1.04, "learning_rate": 4.243204898446834e-05, "loss": 4.0437, "step": 7030 }, { "epoch": 1.04, "learning_rate": 4.241338112305854e-05, "loss": 4.0527, "step": 7040 }, { "epoch": 1.04, "learning_rate": 4.2394713261648746e-05, "loss": 4.0317, "step": 7050 }, { "epoch": 1.04, "learning_rate": 4.237604540023895e-05, "loss": 4.0935, "step": 7060 }, { "epoch": 1.04, "learning_rate": 4.235737753882915e-05, "loss": 3.8888, "step": 7070 }, { "epoch": 1.04, "learning_rate": 4.2338709677419356e-05, "loss": 3.8422, "step": 7080 }, { "epoch": 1.04, "learning_rate": 4.232004181600956e-05, "loss": 3.6444, "step": 7090 }, { "epoch": 1.04, "learning_rate": 4.230137395459976e-05, "loss": 3.9428, "step": 7100 }, { "epoch": 1.04, "learning_rate": 4.2282706093189966e-05, "loss": 3.9388, "step": 7110 }, { "epoch": 1.04, "learning_rate": 4.226403823178017e-05, "loss": 4.0284, "step": 7120 }, { "epoch": 1.04, "learning_rate": 4.224537037037037e-05, "loss": 4.1524, "step": 7130 }, { "epoch": 1.04, "learning_rate": 4.2226702508960575e-05, "loss": 3.7295, "step": 7140 }, { "epoch": 1.04, "learning_rate": 4.220803464755078e-05, "loss": 3.8585, "step": 7150 }, { "epoch": 1.04, "learning_rate": 4.218936678614098e-05, "loss": 4.1146, "step": 7160 }, { "epoch": 1.04, "learning_rate": 4.2170698924731185e-05, "loss": 3.8945, "step": 7170 }, { "epoch": 1.04, "learning_rate": 4.215203106332139e-05, "loss": 4.1032, "step": 7180 }, { "epoch": 1.04, "learning_rate": 4.213336320191159e-05, "loss": 3.7509, "step": 7190 }, { "epoch": 1.04, "learning_rate": 4.2114695340501795e-05, "loss": 4.1754, "step": 7200 }, { "epoch": 1.04, "learning_rate": 4.2096027479092e-05, "loss": 3.8621, "step": 7210 }, { "epoch": 1.04, "learning_rate": 4.20773596176822e-05, "loss": 3.9213, "step": 7220 }, { "epoch": 1.04, "learning_rate": 4.2058691756272404e-05, "loss": 3.8323, "step": 7230 }, { "epoch": 1.04, "learning_rate": 4.204002389486261e-05, "loss": 4.1141, "step": 7240 }, { "epoch": 1.04, "learning_rate": 4.202135603345281e-05, "loss": 4.2356, "step": 7250 }, { "epoch": 1.04, "learning_rate": 4.2002688172043014e-05, "loss": 4.2664, "step": 7260 }, { "epoch": 1.04, "learning_rate": 4.198402031063322e-05, "loss": 3.9595, "step": 7270 }, { "epoch": 1.04, "learning_rate": 4.1965352449223414e-05, "loss": 4.2968, "step": 7280 }, { "epoch": 1.04, "learning_rate": 4.1946684587813624e-05, "loss": 4.1469, "step": 7290 }, { "epoch": 1.05, "learning_rate": 4.192801672640383e-05, "loss": 3.9717, "step": 7300 }, { "epoch": 1.05, "learning_rate": 4.190934886499403e-05, "loss": 3.9437, "step": 7310 }, { "epoch": 1.05, "learning_rate": 4.1890681003584233e-05, "loss": 3.4816, "step": 7320 }, { "epoch": 1.05, "learning_rate": 4.187201314217443e-05, "loss": 3.983, "step": 7330 }, { "epoch": 1.05, "learning_rate": 4.185334528076464e-05, "loss": 4.1878, "step": 7340 }, { "epoch": 1.05, "learning_rate": 4.1834677419354836e-05, "loss": 3.945, "step": 7350 }, { "epoch": 1.05, "learning_rate": 4.1816009557945046e-05, "loss": 4.1313, "step": 7360 }, { "epoch": 1.05, "learning_rate": 4.179734169653525e-05, "loss": 4.0865, "step": 7370 }, { "epoch": 1.05, "learning_rate": 4.1778673835125446e-05, "loss": 3.9683, "step": 7380 }, { "epoch": 1.05, "learning_rate": 4.1760005973715656e-05, "loss": 4.078, "step": 7390 }, { "epoch": 1.05, "learning_rate": 4.174133811230585e-05, "loss": 4.1362, "step": 7400 }, { "epoch": 1.05, "learning_rate": 4.172267025089606e-05, "loss": 3.7782, "step": 7410 }, { "epoch": 1.05, "learning_rate": 4.1704002389486266e-05, "loss": 3.964, "step": 7420 }, { "epoch": 1.05, "learning_rate": 4.168533452807646e-05, "loss": 4.1653, "step": 7430 }, { "epoch": 1.05, "learning_rate": 4.166666666666667e-05, "loss": 4.1821, "step": 7440 }, { "epoch": 1.05, "learning_rate": 4.164799880525687e-05, "loss": 3.8559, "step": 7450 }, { "epoch": 1.05, "learning_rate": 4.162933094384707e-05, "loss": 3.8399, "step": 7460 }, { "epoch": 1.05, "learning_rate": 4.161066308243728e-05, "loss": 3.9547, "step": 7470 }, { "epoch": 1.05, "learning_rate": 4.159199522102748e-05, "loss": 3.8021, "step": 7480 }, { "epoch": 1.05, "learning_rate": 4.157332735961769e-05, "loss": 4.1396, "step": 7490 }, { "epoch": 1.05, "learning_rate": 4.1554659498207885e-05, "loss": 4.1667, "step": 7500 }, { "epoch": 1.05, "learning_rate": 4.153599163679809e-05, "loss": 4.1119, "step": 7510 }, { "epoch": 1.05, "learning_rate": 4.15173237753883e-05, "loss": 3.8454, "step": 7520 }, { "epoch": 1.05, "learning_rate": 4.1498655913978494e-05, "loss": 3.9409, "step": 7530 }, { "epoch": 1.05, "learning_rate": 4.1479988052568704e-05, "loss": 3.4948, "step": 7540 }, { "epoch": 1.05, "learning_rate": 4.14613201911589e-05, "loss": 3.9529, "step": 7550 }, { "epoch": 1.05, "learning_rate": 4.1442652329749104e-05, "loss": 3.7625, "step": 7560 }, { "epoch": 1.05, "learning_rate": 4.1423984468339314e-05, "loss": 4.1669, "step": 7570 }, { "epoch": 1.05, "learning_rate": 4.140531660692951e-05, "loss": 4.3768, "step": 7580 }, { "epoch": 1.06, "learning_rate": 4.1386648745519714e-05, "loss": 4.0217, "step": 7590 }, { "epoch": 1.06, "learning_rate": 4.136798088410992e-05, "loss": 4.0772, "step": 7600 }, { "epoch": 1.06, "learning_rate": 4.134931302270012e-05, "loss": 4.3164, "step": 7610 }, { "epoch": 1.06, "learning_rate": 4.133064516129033e-05, "loss": 4.1382, "step": 7620 }, { "epoch": 1.06, "learning_rate": 4.1311977299880526e-05, "loss": 4.1516, "step": 7630 }, { "epoch": 1.06, "learning_rate": 4.129330943847073e-05, "loss": 3.7171, "step": 7640 }, { "epoch": 1.06, "learning_rate": 4.127464157706093e-05, "loss": 4.0279, "step": 7650 }, { "epoch": 1.06, "learning_rate": 4.1255973715651136e-05, "loss": 3.9548, "step": 7660 }, { "epoch": 1.06, "learning_rate": 4.123730585424134e-05, "loss": 3.8773, "step": 7670 }, { "epoch": 1.06, "learning_rate": 4.121863799283154e-05, "loss": 4.0106, "step": 7680 }, { "epoch": 1.06, "learning_rate": 4.1199970131421746e-05, "loss": 3.7444, "step": 7690 }, { "epoch": 1.06, "learning_rate": 4.118130227001195e-05, "loss": 3.8094, "step": 7700 }, { "epoch": 1.06, "learning_rate": 4.116263440860215e-05, "loss": 3.9831, "step": 7710 }, { "epoch": 1.06, "learning_rate": 4.1143966547192355e-05, "loss": 3.8809, "step": 7720 }, { "epoch": 1.06, "learning_rate": 4.112529868578256e-05, "loss": 3.7986, "step": 7730 }, { "epoch": 1.06, "learning_rate": 4.110663082437276e-05, "loss": 4.1253, "step": 7740 }, { "epoch": 1.06, "learning_rate": 4.1087962962962965e-05, "loss": 3.7705, "step": 7750 }, { "epoch": 1.06, "learning_rate": 4.106929510155317e-05, "loss": 4.3931, "step": 7760 }, { "epoch": 1.06, "learning_rate": 4.105062724014337e-05, "loss": 3.8068, "step": 7770 }, { "epoch": 1.06, "learning_rate": 4.1031959378733575e-05, "loss": 4.0711, "step": 7780 }, { "epoch": 1.06, "learning_rate": 4.101329151732378e-05, "loss": 3.9627, "step": 7790 }, { "epoch": 1.06, "learning_rate": 4.099462365591398e-05, "loss": 3.8308, "step": 7800 }, { "epoch": 1.06, "learning_rate": 4.0975955794504184e-05, "loss": 3.6867, "step": 7810 }, { "epoch": 1.06, "learning_rate": 4.095728793309439e-05, "loss": 4.1285, "step": 7820 }, { "epoch": 1.06, "learning_rate": 4.093862007168459e-05, "loss": 4.0034, "step": 7830 }, { "epoch": 1.06, "learning_rate": 4.0919952210274794e-05, "loss": 3.8995, "step": 7840 }, { "epoch": 1.06, "learning_rate": 4.090128434886499e-05, "loss": 3.8953, "step": 7850 }, { "epoch": 1.06, "learning_rate": 4.08826164874552e-05, "loss": 3.5996, "step": 7860 }, { "epoch": 1.06, "learning_rate": 4.0863948626045404e-05, "loss": 3.7437, "step": 7870 }, { "epoch": 1.06, "learning_rate": 4.084528076463561e-05, "loss": 4.1237, "step": 7880 }, { "epoch": 1.07, "learning_rate": 4.082661290322581e-05, "loss": 3.5073, "step": 7890 }, { "epoch": 1.07, "learning_rate": 4.0807945041816007e-05, "loss": 3.6698, "step": 7900 }, { "epoch": 1.07, "learning_rate": 4.0789277180406217e-05, "loss": 3.707, "step": 7910 }, { "epoch": 1.07, "learning_rate": 4.077060931899642e-05, "loss": 3.8075, "step": 7920 }, { "epoch": 1.07, "learning_rate": 4.075194145758662e-05, "loss": 3.8448, "step": 7930 }, { "epoch": 1.07, "learning_rate": 4.0733273596176826e-05, "loss": 3.6987, "step": 7940 }, { "epoch": 1.07, "learning_rate": 4.071460573476702e-05, "loss": 3.8019, "step": 7950 }, { "epoch": 1.07, "learning_rate": 4.069593787335723e-05, "loss": 3.8111, "step": 7960 }, { "epoch": 1.07, "learning_rate": 4.0677270011947436e-05, "loss": 4.0772, "step": 7970 }, { "epoch": 1.07, "learning_rate": 4.065860215053764e-05, "loss": 3.9595, "step": 7980 }, { "epoch": 1.07, "learning_rate": 4.063993428912784e-05, "loss": 3.9321, "step": 7990 }, { "epoch": 1.07, "learning_rate": 4.062126642771804e-05, "loss": 4.337, "step": 8000 }, { "epoch": 1.07, "learning_rate": 4.060259856630825e-05, "loss": 3.8624, "step": 8010 }, { "epoch": 1.07, "learning_rate": 4.0583930704898445e-05, "loss": 3.9942, "step": 8020 }, { "epoch": 1.07, "learning_rate": 4.056526284348865e-05, "loss": 4.0728, "step": 8030 }, { "epoch": 1.07, "learning_rate": 4.054659498207886e-05, "loss": 4.2469, "step": 8040 }, { "epoch": 1.07, "learning_rate": 4.0527927120669055e-05, "loss": 3.8322, "step": 8050 }, { "epoch": 1.07, "learning_rate": 4.0509259259259265e-05, "loss": 3.6763, "step": 8060 }, { "epoch": 1.07, "learning_rate": 4.049059139784946e-05, "loss": 4.0425, "step": 8070 }, { "epoch": 1.07, "learning_rate": 4.0471923536439665e-05, "loss": 3.8562, "step": 8080 }, { "epoch": 1.07, "learning_rate": 4.0453255675029875e-05, "loss": 4.1258, "step": 8090 }, { "epoch": 1.07, "learning_rate": 4.043458781362007e-05, "loss": 3.6832, "step": 8100 }, { "epoch": 1.07, "learning_rate": 4.041591995221028e-05, "loss": 3.7075, "step": 8110 }, { "epoch": 1.07, "learning_rate": 4.039725209080048e-05, "loss": 3.7246, "step": 8120 }, { "epoch": 1.07, "learning_rate": 4.037858422939068e-05, "loss": 3.923, "step": 8130 }, { "epoch": 1.07, "learning_rate": 4.035991636798089e-05, "loss": 3.8349, "step": 8140 }, { "epoch": 1.07, "learning_rate": 4.034124850657109e-05, "loss": 3.7014, "step": 8150 }, { "epoch": 1.07, "learning_rate": 4.032258064516129e-05, "loss": 3.7002, "step": 8160 }, { "epoch": 1.07, "learning_rate": 4.0303912783751494e-05, "loss": 4.0162, "step": 8170 }, { "epoch": 1.07, "learning_rate": 4.02852449223417e-05, "loss": 4.0623, "step": 8180 }, { "epoch": 1.08, "learning_rate": 4.026657706093191e-05, "loss": 3.8674, "step": 8190 }, { "epoch": 1.08, "learning_rate": 4.02479091995221e-05, "loss": 4.124, "step": 8200 }, { "epoch": 1.08, "learning_rate": 4.0229241338112306e-05, "loss": 3.6832, "step": 8210 }, { "epoch": 1.08, "learning_rate": 4.021057347670251e-05, "loss": 3.5502, "step": 8220 }, { "epoch": 1.08, "learning_rate": 4.019190561529271e-05, "loss": 4.0664, "step": 8230 }, { "epoch": 1.08, "learning_rate": 4.017323775388292e-05, "loss": 3.7843, "step": 8240 }, { "epoch": 1.08, "learning_rate": 4.015456989247312e-05, "loss": 3.5732, "step": 8250 }, { "epoch": 1.08, "learning_rate": 4.013590203106332e-05, "loss": 4.0164, "step": 8260 }, { "epoch": 1.08, "learning_rate": 4.0117234169653526e-05, "loss": 4.0487, "step": 8270 }, { "epoch": 1.08, "learning_rate": 4.009856630824373e-05, "loss": 3.9529, "step": 8280 }, { "epoch": 1.08, "learning_rate": 4.007989844683393e-05, "loss": 3.7276, "step": 8290 }, { "epoch": 1.08, "learning_rate": 4.0061230585424135e-05, "loss": 3.8644, "step": 8300 }, { "epoch": 1.08, "learning_rate": 4.004256272401434e-05, "loss": 3.8298, "step": 8310 }, { "epoch": 1.08, "learning_rate": 4.002389486260454e-05, "loss": 4.0049, "step": 8320 }, { "epoch": 1.08, "learning_rate": 4.0005227001194745e-05, "loss": 3.863, "step": 8330 }, { "epoch": 1.08, "learning_rate": 3.998655913978495e-05, "loss": 3.8132, "step": 8340 }, { "epoch": 1.08, "learning_rate": 3.996789127837515e-05, "loss": 4.1679, "step": 8350 }, { "epoch": 1.08, "learning_rate": 3.9949223416965355e-05, "loss": 3.8609, "step": 8360 }, { "epoch": 1.08, "learning_rate": 3.993055555555556e-05, "loss": 3.8277, "step": 8370 }, { "epoch": 1.08, "learning_rate": 3.991188769414576e-05, "loss": 3.6151, "step": 8380 }, { "epoch": 1.08, "learning_rate": 3.9893219832735964e-05, "loss": 3.7163, "step": 8390 }, { "epoch": 1.08, "learning_rate": 3.987455197132617e-05, "loss": 3.8806, "step": 8400 }, { "epoch": 1.08, "learning_rate": 3.985588410991637e-05, "loss": 3.8521, "step": 8410 }, { "epoch": 1.08, "learning_rate": 3.983721624850657e-05, "loss": 3.7674, "step": 8420 }, { "epoch": 1.08, "learning_rate": 3.981854838709678e-05, "loss": 3.7362, "step": 8430 }, { "epoch": 1.08, "learning_rate": 3.979988052568698e-05, "loss": 3.9856, "step": 8440 }, { "epoch": 1.08, "learning_rate": 3.9781212664277184e-05, "loss": 3.7352, "step": 8450 }, { "epoch": 1.08, "learning_rate": 3.976254480286739e-05, "loss": 3.5098, "step": 8460 }, { "epoch": 1.08, "learning_rate": 3.974387694145758e-05, "loss": 3.7088, "step": 8470 }, { "epoch": 1.08, "learning_rate": 3.972520908004779e-05, "loss": 3.9955, "step": 8480 }, { "epoch": 1.09, "learning_rate": 3.9706541218637997e-05, "loss": 4.0419, "step": 8490 }, { "epoch": 1.09, "learning_rate": 3.96878733572282e-05, "loss": 4.0806, "step": 8500 }, { "epoch": 1.09, "learning_rate": 3.96692054958184e-05, "loss": 4.0337, "step": 8510 }, { "epoch": 1.09, "learning_rate": 3.96505376344086e-05, "loss": 3.9145, "step": 8520 }, { "epoch": 1.09, "learning_rate": 3.963186977299881e-05, "loss": 3.6839, "step": 8530 }, { "epoch": 1.09, "learning_rate": 3.961320191158901e-05, "loss": 4.1887, "step": 8540 }, { "epoch": 1.09, "learning_rate": 3.9594534050179216e-05, "loss": 3.4189, "step": 8550 }, { "epoch": 1.09, "learning_rate": 3.957586618876942e-05, "loss": 4.0207, "step": 8560 }, { "epoch": 1.09, "learning_rate": 3.9557198327359616e-05, "loss": 3.7435, "step": 8570 }, { "epoch": 1.09, "learning_rate": 3.9538530465949825e-05, "loss": 3.7173, "step": 8580 }, { "epoch": 1.09, "learning_rate": 3.951986260454003e-05, "loss": 4.3753, "step": 8590 }, { "epoch": 1.09, "learning_rate": 3.9501194743130225e-05, "loss": 3.632, "step": 8600 }, { "epoch": 1.09, "learning_rate": 3.9482526881720435e-05, "loss": 3.7338, "step": 8610 }, { "epoch": 1.09, "learning_rate": 3.946385902031063e-05, "loss": 3.9785, "step": 8620 }, { "epoch": 1.09, "learning_rate": 3.944519115890084e-05, "loss": 3.8019, "step": 8630 }, { "epoch": 1.09, "learning_rate": 3.9426523297491045e-05, "loss": 3.8236, "step": 8640 }, { "epoch": 1.09, "learning_rate": 3.940785543608124e-05, "loss": 3.5959, "step": 8650 }, { "epoch": 1.09, "learning_rate": 3.938918757467145e-05, "loss": 3.4537, "step": 8660 }, { "epoch": 1.09, "learning_rate": 3.937051971326165e-05, "loss": 3.6574, "step": 8670 }, { "epoch": 1.09, "learning_rate": 3.935185185185186e-05, "loss": 4.2991, "step": 8680 }, { "epoch": 1.09, "learning_rate": 3.9333183990442054e-05, "loss": 3.9068, "step": 8690 }, { "epoch": 1.09, "learning_rate": 3.931451612903226e-05, "loss": 3.7217, "step": 8700 }, { "epoch": 1.09, "learning_rate": 3.929584826762247e-05, "loss": 3.984, "step": 8710 }, { "epoch": 1.09, "learning_rate": 3.9277180406212664e-05, "loss": 3.7337, "step": 8720 }, { "epoch": 1.09, "learning_rate": 3.925851254480287e-05, "loss": 3.794, "step": 8730 }, { "epoch": 1.09, "learning_rate": 3.923984468339307e-05, "loss": 3.7158, "step": 8740 }, { "epoch": 1.09, "learning_rate": 3.9221176821983273e-05, "loss": 3.8296, "step": 8750 }, { "epoch": 1.09, "learning_rate": 3.9202508960573483e-05, "loss": 3.8269, "step": 8760 }, { "epoch": 1.09, "learning_rate": 3.918384109916368e-05, "loss": 3.9781, "step": 8770 }, { "epoch": 1.1, "learning_rate": 3.916517323775388e-05, "loss": 3.9848, "step": 8780 }, { "epoch": 1.1, "learning_rate": 3.9146505376344086e-05, "loss": 3.5484, "step": 8790 }, { "epoch": 1.1, "learning_rate": 3.912783751493429e-05, "loss": 3.6333, "step": 8800 }, { "epoch": 1.1, "learning_rate": 3.91091696535245e-05, "loss": 3.6082, "step": 8810 }, { "epoch": 1.1, "learning_rate": 3.9090501792114696e-05, "loss": 3.8744, "step": 8820 }, { "epoch": 1.1, "learning_rate": 3.90718339307049e-05, "loss": 3.8798, "step": 8830 }, { "epoch": 1.1, "learning_rate": 3.90531660692951e-05, "loss": 3.6393, "step": 8840 }, { "epoch": 1.1, "learning_rate": 3.9034498207885306e-05, "loss": 3.577, "step": 8850 }, { "epoch": 1.1, "learning_rate": 3.901583034647551e-05, "loss": 3.9424, "step": 8860 }, { "epoch": 1.1, "learning_rate": 3.899716248506571e-05, "loss": 3.9708, "step": 8870 }, { "epoch": 1.1, "learning_rate": 3.8978494623655915e-05, "loss": 3.7679, "step": 8880 }, { "epoch": 1.1, "learning_rate": 3.895982676224612e-05, "loss": 4.0508, "step": 8890 }, { "epoch": 1.1, "learning_rate": 3.894115890083632e-05, "loss": 3.6616, "step": 8900 }, { "epoch": 1.1, "learning_rate": 3.8922491039426525e-05, "loss": 4.2915, "step": 8910 }, { "epoch": 1.1, "learning_rate": 3.890382317801673e-05, "loss": 3.8738, "step": 8920 }, { "epoch": 1.1, "learning_rate": 3.888515531660693e-05, "loss": 3.9764, "step": 8930 }, { "epoch": 1.1, "learning_rate": 3.8866487455197135e-05, "loss": 3.9438, "step": 8940 }, { "epoch": 1.1, "learning_rate": 3.884781959378734e-05, "loss": 3.7961, "step": 8950 }, { "epoch": 1.1, "learning_rate": 3.882915173237754e-05, "loss": 3.8157, "step": 8960 }, { "epoch": 1.1, "learning_rate": 3.8810483870967744e-05, "loss": 3.585, "step": 8970 }, { "epoch": 1.1, "learning_rate": 3.879181600955795e-05, "loss": 3.668, "step": 8980 }, { "epoch": 1.1, "learning_rate": 3.877314814814815e-05, "loss": 3.6332, "step": 8990 }, { "epoch": 1.1, "learning_rate": 3.8754480286738354e-05, "loss": 3.594, "step": 9000 }, { "epoch": 1.1, "learning_rate": 3.873581242532856e-05, "loss": 4.0134, "step": 9010 }, { "epoch": 1.1, "learning_rate": 3.871714456391876e-05, "loss": 3.6043, "step": 9020 }, { "epoch": 1.1, "learning_rate": 3.8698476702508964e-05, "loss": 3.8317, "step": 9030 }, { "epoch": 1.1, "learning_rate": 3.867980884109916e-05, "loss": 3.8492, "step": 9040 }, { "epoch": 1.1, "learning_rate": 3.866114097968937e-05, "loss": 3.6899, "step": 9050 }, { "epoch": 1.1, "learning_rate": 3.864247311827957e-05, "loss": 3.6604, "step": 9060 }, { "epoch": 1.1, "learning_rate": 3.8623805256869776e-05, "loss": 3.676, "step": 9070 }, { "epoch": 1.11, "learning_rate": 3.860513739545998e-05, "loss": 3.8921, "step": 9080 }, { "epoch": 1.11, "learning_rate": 3.8586469534050176e-05, "loss": 3.9841, "step": 9090 }, { "epoch": 1.11, "learning_rate": 3.8567801672640386e-05, "loss": 3.6641, "step": 9100 }, { "epoch": 1.11, "learning_rate": 3.854913381123059e-05, "loss": 3.6656, "step": 9110 }, { "epoch": 1.11, "learning_rate": 3.8530465949820786e-05, "loss": 3.9319, "step": 9120 }, { "epoch": 1.11, "learning_rate": 3.8511798088410996e-05, "loss": 3.5589, "step": 9130 }, { "epoch": 1.11, "learning_rate": 3.849313022700119e-05, "loss": 3.6389, "step": 9140 }, { "epoch": 1.11, "learning_rate": 3.84744623655914e-05, "loss": 3.2645, "step": 9150 }, { "epoch": 1.11, "learning_rate": 3.8455794504181605e-05, "loss": 3.7645, "step": 9160 }, { "epoch": 1.11, "learning_rate": 3.84371266427718e-05, "loss": 3.9175, "step": 9170 }, { "epoch": 1.11, "learning_rate": 3.841845878136201e-05, "loss": 3.8478, "step": 9180 }, { "epoch": 1.11, "learning_rate": 3.839979091995221e-05, "loss": 4.0945, "step": 9190 }, { "epoch": 1.11, "learning_rate": 3.838112305854242e-05, "loss": 3.9738, "step": 9200 }, { "epoch": 1.11, "learning_rate": 3.836245519713262e-05, "loss": 3.7934, "step": 9210 }, { "epoch": 1.11, "learning_rate": 3.834378733572282e-05, "loss": 3.9975, "step": 9220 }, { "epoch": 1.11, "learning_rate": 3.832511947431303e-05, "loss": 3.9221, "step": 9230 }, { "epoch": 1.11, "learning_rate": 3.8306451612903224e-05, "loss": 3.8509, "step": 9240 }, { "epoch": 1.11, "learning_rate": 3.8287783751493434e-05, "loss": 4.1145, "step": 9250 }, { "epoch": 1.11, "learning_rate": 3.826911589008364e-05, "loss": 3.6662, "step": 9260 }, { "epoch": 1.11, "learning_rate": 3.8250448028673834e-05, "loss": 3.6177, "step": 9270 }, { "epoch": 1.11, "learning_rate": 3.8231780167264044e-05, "loss": 3.7416, "step": 9280 }, { "epoch": 1.11, "learning_rate": 3.821311230585424e-05, "loss": 4.2665, "step": 9290 }, { "epoch": 1.11, "learning_rate": 3.8194444444444444e-05, "loss": 3.7638, "step": 9300 }, { "epoch": 1.11, "learning_rate": 3.817577658303465e-05, "loss": 3.7183, "step": 9310 }, { "epoch": 1.11, "learning_rate": 3.815710872162485e-05, "loss": 3.8718, "step": 9320 }, { "epoch": 1.11, "learning_rate": 3.813844086021506e-05, "loss": 3.605, "step": 9330 }, { "epoch": 1.11, "learning_rate": 3.8119772998805257e-05, "loss": 3.9948, "step": 9340 }, { "epoch": 1.11, "learning_rate": 3.810110513739546e-05, "loss": 3.6158, "step": 9350 }, { "epoch": 1.11, "learning_rate": 3.808243727598566e-05, "loss": 4.2222, "step": 9360 }, { "epoch": 1.11, "learning_rate": 3.8063769414575866e-05, "loss": 3.5607, "step": 9370 }, { "epoch": 1.12, "learning_rate": 3.8045101553166076e-05, "loss": 4.0389, "step": 9380 }, { "epoch": 1.12, "learning_rate": 3.802643369175627e-05, "loss": 3.5809, "step": 9390 }, { "epoch": 1.12, "learning_rate": 3.8007765830346476e-05, "loss": 3.6368, "step": 9400 }, { "epoch": 1.12, "learning_rate": 3.798909796893668e-05, "loss": 3.9134, "step": 9410 }, { "epoch": 1.12, "learning_rate": 3.797043010752688e-05, "loss": 3.6072, "step": 9420 }, { "epoch": 1.12, "learning_rate": 3.7951762246117086e-05, "loss": 3.6682, "step": 9430 }, { "epoch": 1.12, "learning_rate": 3.793309438470729e-05, "loss": 3.8541, "step": 9440 }, { "epoch": 1.12, "learning_rate": 3.791442652329749e-05, "loss": 3.8763, "step": 9450 }, { "epoch": 1.12, "learning_rate": 3.7895758661887695e-05, "loss": 3.9722, "step": 9460 }, { "epoch": 1.12, "learning_rate": 3.78770908004779e-05, "loss": 4.0616, "step": 9470 }, { "epoch": 1.12, "learning_rate": 3.78584229390681e-05, "loss": 3.6531, "step": 9480 }, { "epoch": 1.12, "learning_rate": 3.7839755077658305e-05, "loss": 3.7011, "step": 9490 }, { "epoch": 1.12, "learning_rate": 3.782108721624851e-05, "loss": 3.8015, "step": 9500 }, { "epoch": 1.12, "learning_rate": 3.780241935483871e-05, "loss": 3.9305, "step": 9510 }, { "epoch": 1.12, "learning_rate": 3.7783751493428915e-05, "loss": 3.718, "step": 9520 }, { "epoch": 1.12, "learning_rate": 3.776508363201912e-05, "loss": 4.0746, "step": 9530 }, { "epoch": 1.12, "learning_rate": 3.774641577060932e-05, "loss": 3.9653, "step": 9540 }, { "epoch": 1.12, "learning_rate": 3.7727747909199524e-05, "loss": 3.8337, "step": 9550 }, { "epoch": 1.12, "learning_rate": 3.770908004778973e-05, "loss": 3.9944, "step": 9560 }, { "epoch": 1.12, "learning_rate": 3.769041218637993e-05, "loss": 3.8524, "step": 9570 }, { "epoch": 1.12, "learning_rate": 3.7671744324970134e-05, "loss": 3.7066, "step": 9580 }, { "epoch": 1.12, "learning_rate": 3.765307646356034e-05, "loss": 3.7592, "step": 9590 }, { "epoch": 1.12, "learning_rate": 3.763440860215054e-05, "loss": 3.8453, "step": 9600 }, { "epoch": 1.12, "learning_rate": 3.7615740740740744e-05, "loss": 4.0889, "step": 9610 }, { "epoch": 1.12, "learning_rate": 3.759707287933095e-05, "loss": 3.7718, "step": 9620 }, { "epoch": 1.12, "learning_rate": 3.757840501792115e-05, "loss": 4.0248, "step": 9630 }, { "epoch": 1.12, "learning_rate": 3.755973715651135e-05, "loss": 3.9773, "step": 9640 }, { "epoch": 1.12, "learning_rate": 3.7541069295101556e-05, "loss": 3.7009, "step": 9650 }, { "epoch": 1.12, "learning_rate": 3.752240143369175e-05, "loss": 4.4664, "step": 9660 }, { "epoch": 1.12, "learning_rate": 3.750373357228196e-05, "loss": 3.5295, "step": 9670 }, { "epoch": 1.13, "learning_rate": 3.7485065710872166e-05, "loss": 3.8009, "step": 9680 }, { "epoch": 1.13, "learning_rate": 3.746639784946236e-05, "loss": 3.9869, "step": 9690 }, { "epoch": 1.13, "learning_rate": 3.744772998805257e-05, "loss": 3.5928, "step": 9700 }, { "epoch": 1.13, "learning_rate": 3.742906212664277e-05, "loss": 3.6676, "step": 9710 }, { "epoch": 1.13, "learning_rate": 3.741039426523298e-05, "loss": 3.5248, "step": 9720 }, { "epoch": 1.13, "learning_rate": 3.739172640382318e-05, "loss": 3.4531, "step": 9730 }, { "epoch": 1.13, "learning_rate": 3.737305854241338e-05, "loss": 3.6246, "step": 9740 }, { "epoch": 1.13, "learning_rate": 3.735439068100359e-05, "loss": 3.631, "step": 9750 }, { "epoch": 1.13, "learning_rate": 3.7335722819593785e-05, "loss": 3.8826, "step": 9760 }, { "epoch": 1.13, "learning_rate": 3.7317054958183995e-05, "loss": 3.8901, "step": 9770 }, { "epoch": 1.13, "learning_rate": 3.72983870967742e-05, "loss": 3.2209, "step": 9780 }, { "epoch": 1.13, "learning_rate": 3.7279719235364395e-05, "loss": 3.904, "step": 9790 }, { "epoch": 1.13, "learning_rate": 3.7261051373954605e-05, "loss": 3.7013, "step": 9800 }, { "epoch": 1.13, "learning_rate": 3.72423835125448e-05, "loss": 3.6786, "step": 9810 }, { "epoch": 1.13, "learning_rate": 3.722371565113501e-05, "loss": 3.788, "step": 9820 }, { "epoch": 1.13, "learning_rate": 3.7205047789725214e-05, "loss": 3.9666, "step": 9830 }, { "epoch": 1.13, "learning_rate": 3.718637992831541e-05, "loss": 3.83, "step": 9840 }, { "epoch": 1.13, "learning_rate": 3.716771206690562e-05, "loss": 3.6834, "step": 9850 }, { "epoch": 1.13, "learning_rate": 3.714904420549582e-05, "loss": 3.693, "step": 9860 }, { "epoch": 1.13, "learning_rate": 3.713037634408602e-05, "loss": 3.362, "step": 9870 }, { "epoch": 1.13, "learning_rate": 3.711170848267623e-05, "loss": 3.5185, "step": 9880 }, { "epoch": 1.13, "learning_rate": 3.709304062126643e-05, "loss": 3.637, "step": 9890 }, { "epoch": 1.13, "learning_rate": 3.707437275985664e-05, "loss": 3.7918, "step": 9900 }, { "epoch": 1.13, "learning_rate": 3.705570489844683e-05, "loss": 3.726, "step": 9910 }, { "epoch": 1.13, "learning_rate": 3.7037037037037037e-05, "loss": 3.6951, "step": 9920 }, { "epoch": 1.13, "learning_rate": 3.7018369175627247e-05, "loss": 3.8542, "step": 9930 }, { "epoch": 1.13, "learning_rate": 3.699970131421744e-05, "loss": 3.7629, "step": 9940 }, { "epoch": 1.13, "learning_rate": 3.698103345280765e-05, "loss": 3.3457, "step": 9950 }, { "epoch": 1.13, "learning_rate": 3.696236559139785e-05, "loss": 4.022, "step": 9960 }, { "epoch": 1.14, "learning_rate": 3.694369772998805e-05, "loss": 3.4156, "step": 9970 }, { "epoch": 1.14, "learning_rate": 3.6925029868578256e-05, "loss": 3.6386, "step": 9980 }, { "epoch": 1.14, "learning_rate": 3.690636200716846e-05, "loss": 3.922, "step": 9990 }, { "epoch": 1.14, "learning_rate": 3.688769414575866e-05, "loss": 3.8244, "step": 10000 }, { "epoch": 1.14, "learning_rate": 3.6869026284348866e-05, "loss": 3.8524, "step": 10010 }, { "epoch": 1.14, "learning_rate": 3.685035842293907e-05, "loss": 3.2009, "step": 10020 }, { "epoch": 1.14, "learning_rate": 3.683169056152927e-05, "loss": 3.9708, "step": 10030 }, { "epoch": 1.14, "learning_rate": 3.6813022700119475e-05, "loss": 3.771, "step": 10040 }, { "epoch": 1.14, "learning_rate": 3.679435483870968e-05, "loss": 4.0235, "step": 10050 }, { "epoch": 1.14, "learning_rate": 3.677568697729988e-05, "loss": 3.6662, "step": 10060 }, { "epoch": 1.14, "learning_rate": 3.6757019115890085e-05, "loss": 3.9909, "step": 10070 }, { "epoch": 1.14, "learning_rate": 3.673835125448029e-05, "loss": 3.9069, "step": 10080 }, { "epoch": 1.14, "learning_rate": 3.671968339307049e-05, "loss": 3.8864, "step": 10090 }, { "epoch": 1.14, "learning_rate": 3.6701015531660695e-05, "loss": 3.8127, "step": 10100 }, { "epoch": 1.14, "learning_rate": 3.66823476702509e-05, "loss": 3.9281, "step": 10110 }, { "epoch": 1.14, "learning_rate": 3.66636798088411e-05, "loss": 3.5872, "step": 10120 }, { "epoch": 1.14, "learning_rate": 3.6645011947431304e-05, "loss": 3.6721, "step": 10130 }, { "epoch": 1.14, "learning_rate": 3.662634408602151e-05, "loss": 3.6174, "step": 10140 }, { "epoch": 1.14, "learning_rate": 3.660767622461171e-05, "loss": 3.7824, "step": 10150 }, { "epoch": 1.14, "learning_rate": 3.6589008363201914e-05, "loss": 3.6468, "step": 10160 }, { "epoch": 1.14, "learning_rate": 3.657034050179212e-05, "loss": 3.1382, "step": 10170 }, { "epoch": 1.14, "learning_rate": 3.655167264038232e-05, "loss": 3.9309, "step": 10180 }, { "epoch": 1.14, "learning_rate": 3.6533004778972523e-05, "loss": 3.7067, "step": 10190 }, { "epoch": 1.14, "learning_rate": 3.651433691756273e-05, "loss": 3.6668, "step": 10200 }, { "epoch": 1.14, "learning_rate": 3.649566905615293e-05, "loss": 3.8249, "step": 10210 }, { "epoch": 1.14, "learning_rate": 3.647700119474313e-05, "loss": 3.622, "step": 10220 }, { "epoch": 1.14, "learning_rate": 3.6458333333333336e-05, "loss": 3.7141, "step": 10230 }, { "epoch": 1.14, "learning_rate": 3.643966547192354e-05, "loss": 3.9518, "step": 10240 }, { "epoch": 1.14, "learning_rate": 3.642099761051374e-05, "loss": 3.6652, "step": 10250 }, { "epoch": 1.14, "learning_rate": 3.640232974910394e-05, "loss": 3.4699, "step": 10260 }, { "epoch": 1.15, "learning_rate": 3.638366188769415e-05, "loss": 3.755, "step": 10270 }, { "epoch": 1.15, "learning_rate": 3.636499402628435e-05, "loss": 3.5821, "step": 10280 }, { "epoch": 1.15, "learning_rate": 3.6346326164874556e-05, "loss": 3.9781, "step": 10290 }, { "epoch": 1.15, "learning_rate": 3.632765830346476e-05, "loss": 3.6877, "step": 10300 }, { "epoch": 1.15, "learning_rate": 3.6308990442054955e-05, "loss": 3.6512, "step": 10310 }, { "epoch": 1.15, "learning_rate": 3.6290322580645165e-05, "loss": 3.1832, "step": 10320 }, { "epoch": 1.15, "learning_rate": 3.627165471923536e-05, "loss": 3.4906, "step": 10330 }, { "epoch": 1.15, "learning_rate": 3.625298685782557e-05, "loss": 3.525, "step": 10340 }, { "epoch": 1.15, "learning_rate": 3.6234318996415775e-05, "loss": 3.3593, "step": 10350 }, { "epoch": 1.15, "learning_rate": 3.621565113500597e-05, "loss": 3.4422, "step": 10360 }, { "epoch": 1.15, "learning_rate": 3.619698327359618e-05, "loss": 3.678, "step": 10370 }, { "epoch": 1.15, "learning_rate": 3.617831541218638e-05, "loss": 3.757, "step": 10380 }, { "epoch": 1.15, "learning_rate": 3.615964755077659e-05, "loss": 3.1855, "step": 10390 }, { "epoch": 1.15, "learning_rate": 3.614097968936679e-05, "loss": 3.8261, "step": 10400 }, { "epoch": 1.15, "learning_rate": 3.612231182795699e-05, "loss": 3.738, "step": 10410 }, { "epoch": 1.15, "learning_rate": 3.61036439665472e-05, "loss": 3.8054, "step": 10420 }, { "epoch": 1.15, "learning_rate": 3.6084976105137394e-05, "loss": 3.8857, "step": 10430 }, { "epoch": 1.15, "learning_rate": 3.60663082437276e-05, "loss": 3.2592, "step": 10440 }, { "epoch": 1.15, "learning_rate": 3.604764038231781e-05, "loss": 3.7933, "step": 10450 }, { "epoch": 1.15, "learning_rate": 3.6028972520908004e-05, "loss": 3.6397, "step": 10460 }, { "epoch": 1.15, "learning_rate": 3.6010304659498214e-05, "loss": 3.9235, "step": 10470 }, { "epoch": 1.15, "learning_rate": 3.599163679808841e-05, "loss": 3.8427, "step": 10480 }, { "epoch": 1.15, "learning_rate": 3.597296893667861e-05, "loss": 3.7712, "step": 10490 }, { "epoch": 1.15, "learning_rate": 3.595430107526882e-05, "loss": 3.5808, "step": 10500 }, { "epoch": 1.15, "learning_rate": 3.593563321385902e-05, "loss": 3.3982, "step": 10510 }, { "epoch": 1.15, "learning_rate": 3.591696535244923e-05, "loss": 3.7063, "step": 10520 }, { "epoch": 1.15, "learning_rate": 3.5898297491039426e-05, "loss": 3.6352, "step": 10530 }, { "epoch": 1.15, "learning_rate": 3.587962962962963e-05, "loss": 3.5817, "step": 10540 }, { "epoch": 1.15, "learning_rate": 3.586096176821984e-05, "loss": 3.9715, "step": 10550 }, { "epoch": 1.15, "learning_rate": 3.5842293906810036e-05, "loss": 3.5751, "step": 10560 }, { "epoch": 1.16, "learning_rate": 3.582362604540024e-05, "loss": 3.3962, "step": 10570 }, { "epoch": 1.16, "learning_rate": 3.580495818399044e-05, "loss": 3.5391, "step": 10580 }, { "epoch": 1.16, "learning_rate": 3.5786290322580645e-05, "loss": 3.2171, "step": 10590 }, { "epoch": 1.16, "learning_rate": 3.5767622461170855e-05, "loss": 3.9271, "step": 10600 }, { "epoch": 1.16, "learning_rate": 3.574895459976105e-05, "loss": 3.3747, "step": 10610 }, { "epoch": 1.16, "learning_rate": 3.5730286738351255e-05, "loss": 3.5508, "step": 10620 }, { "epoch": 1.16, "learning_rate": 3.571161887694146e-05, "loss": 3.603, "step": 10630 }, { "epoch": 1.16, "learning_rate": 3.569295101553166e-05, "loss": 3.4428, "step": 10640 }, { "epoch": 1.16, "learning_rate": 3.5674283154121865e-05, "loss": 3.3248, "step": 10650 }, { "epoch": 1.16, "learning_rate": 3.565561529271207e-05, "loss": 3.37, "step": 10660 }, { "epoch": 1.16, "learning_rate": 3.563694743130227e-05, "loss": 3.7291, "step": 10670 }, { "epoch": 1.16, "learning_rate": 3.5618279569892474e-05, "loss": 3.7927, "step": 10680 }, { "epoch": 1.16, "learning_rate": 3.559961170848268e-05, "loss": 3.9348, "step": 10690 }, { "epoch": 1.16, "learning_rate": 3.558094384707288e-05, "loss": 3.844, "step": 10700 }, { "epoch": 1.16, "learning_rate": 3.5562275985663084e-05, "loss": 3.4631, "step": 10710 }, { "epoch": 1.16, "learning_rate": 3.554360812425329e-05, "loss": 3.6684, "step": 10720 }, { "epoch": 1.16, "learning_rate": 3.552494026284349e-05, "loss": 3.5042, "step": 10730 }, { "epoch": 1.16, "learning_rate": 3.5506272401433694e-05, "loss": 4.056, "step": 10740 }, { "epoch": 1.16, "learning_rate": 3.54876045400239e-05, "loss": 3.9195, "step": 10750 }, { "epoch": 1.16, "learning_rate": 3.54689366786141e-05, "loss": 3.6629, "step": 10760 }, { "epoch": 1.16, "learning_rate": 3.5450268817204303e-05, "loss": 3.7106, "step": 10770 }, { "epoch": 1.16, "learning_rate": 3.543160095579451e-05, "loss": 3.5783, "step": 10780 }, { "epoch": 1.16, "learning_rate": 3.541293309438471e-05, "loss": 3.6773, "step": 10790 }, { "epoch": 1.16, "learning_rate": 3.539426523297491e-05, "loss": 3.6787, "step": 10800 }, { "epoch": 1.16, "learning_rate": 3.5375597371565116e-05, "loss": 3.3561, "step": 10810 }, { "epoch": 1.16, "learning_rate": 3.535692951015532e-05, "loss": 3.2967, "step": 10820 }, { "epoch": 1.16, "learning_rate": 3.5338261648745516e-05, "loss": 3.3411, "step": 10830 }, { "epoch": 1.16, "learning_rate": 3.5319593787335726e-05, "loss": 3.9638, "step": 10840 }, { "epoch": 1.16, "learning_rate": 3.530092592592593e-05, "loss": 3.7934, "step": 10850 }, { "epoch": 1.16, "learning_rate": 3.528225806451613e-05, "loss": 3.5194, "step": 10860 }, { "epoch": 1.17, "learning_rate": 3.5263590203106336e-05, "loss": 3.3668, "step": 10870 }, { "epoch": 1.17, "learning_rate": 3.524492234169653e-05, "loss": 3.2587, "step": 10880 }, { "epoch": 1.17, "learning_rate": 3.522625448028674e-05, "loss": 3.454, "step": 10890 }, { "epoch": 1.17, "learning_rate": 3.5207586618876945e-05, "loss": 3.5761, "step": 10900 }, { "epoch": 1.17, "learning_rate": 3.518891875746715e-05, "loss": 3.3618, "step": 10910 }, { "epoch": 1.17, "learning_rate": 3.517025089605735e-05, "loss": 3.7184, "step": 10920 }, { "epoch": 1.17, "learning_rate": 3.515158303464755e-05, "loss": 3.9273, "step": 10930 }, { "epoch": 1.17, "learning_rate": 3.513291517323776e-05, "loss": 3.539, "step": 10940 }, { "epoch": 1.17, "learning_rate": 3.511424731182796e-05, "loss": 3.8599, "step": 10950 }, { "epoch": 1.17, "learning_rate": 3.5095579450418165e-05, "loss": 3.5001, "step": 10960 }, { "epoch": 1.17, "learning_rate": 3.507691158900837e-05, "loss": 3.7675, "step": 10970 }, { "epoch": 1.17, "learning_rate": 3.5058243727598564e-05, "loss": 3.5171, "step": 10980 }, { "epoch": 1.17, "learning_rate": 3.5039575866188774e-05, "loss": 3.3035, "step": 10990 }, { "epoch": 1.17, "learning_rate": 3.502090800477897e-05, "loss": 3.6594, "step": 11000 }, { "epoch": 1.17, "learning_rate": 3.5002240143369174e-05, "loss": 3.4233, "step": 11010 }, { "epoch": 1.17, "learning_rate": 3.4983572281959384e-05, "loss": 3.8459, "step": 11020 }, { "epoch": 1.17, "learning_rate": 3.496490442054958e-05, "loss": 3.7127, "step": 11030 }, { "epoch": 1.17, "learning_rate": 3.494623655913979e-05, "loss": 3.0463, "step": 11040 }, { "epoch": 1.17, "learning_rate": 3.492756869772999e-05, "loss": 3.4694, "step": 11050 }, { "epoch": 1.17, "learning_rate": 3.490890083632019e-05, "loss": 3.3425, "step": 11060 }, { "epoch": 1.17, "learning_rate": 3.48902329749104e-05, "loss": 3.5626, "step": 11070 }, { "epoch": 1.17, "learning_rate": 3.4871565113500596e-05, "loss": 3.329, "step": 11080 }, { "epoch": 1.17, "learning_rate": 3.4852897252090806e-05, "loss": 3.9924, "step": 11090 }, { "epoch": 1.17, "learning_rate": 3.4834229390681e-05, "loss": 3.5672, "step": 11100 }, { "epoch": 1.17, "learning_rate": 3.4815561529271206e-05, "loss": 3.7732, "step": 11110 }, { "epoch": 1.17, "learning_rate": 3.4796893667861416e-05, "loss": 3.2531, "step": 11120 }, { "epoch": 1.17, "learning_rate": 3.477822580645161e-05, "loss": 3.5789, "step": 11130 }, { "epoch": 1.17, "learning_rate": 3.4759557945041816e-05, "loss": 3.4121, "step": 11140 }, { "epoch": 1.17, "learning_rate": 3.474089008363202e-05, "loss": 3.8584, "step": 11150 }, { "epoch": 1.18, "learning_rate": 3.472222222222222e-05, "loss": 3.8955, "step": 11160 }, { "epoch": 1.18, "learning_rate": 3.470355436081243e-05, "loss": 3.4261, "step": 11170 }, { "epoch": 1.18, "learning_rate": 3.468488649940263e-05, "loss": 3.6651, "step": 11180 }, { "epoch": 1.18, "learning_rate": 3.466621863799283e-05, "loss": 3.6174, "step": 11190 }, { "epoch": 1.18, "learning_rate": 3.4647550776583035e-05, "loss": 3.7792, "step": 11200 }, { "epoch": 1.18, "learning_rate": 3.462888291517324e-05, "loss": 3.4339, "step": 11210 }, { "epoch": 1.18, "learning_rate": 3.461021505376345e-05, "loss": 3.677, "step": 11220 }, { "epoch": 1.18, "learning_rate": 3.4591547192353645e-05, "loss": 3.7155, "step": 11230 }, { "epoch": 1.18, "learning_rate": 3.457287933094385e-05, "loss": 3.8755, "step": 11240 }, { "epoch": 1.18, "learning_rate": 3.455421146953405e-05, "loss": 3.6512, "step": 11250 }, { "epoch": 1.18, "learning_rate": 3.4535543608124254e-05, "loss": 3.6222, "step": 11260 }, { "epoch": 1.18, "learning_rate": 3.451687574671446e-05, "loss": 3.8166, "step": 11270 }, { "epoch": 1.18, "learning_rate": 3.449820788530466e-05, "loss": 3.5914, "step": 11280 }, { "epoch": 1.18, "learning_rate": 3.4479540023894864e-05, "loss": 3.237, "step": 11290 }, { "epoch": 1.18, "learning_rate": 3.446087216248507e-05, "loss": 3.6738, "step": 11300 }, { "epoch": 1.18, "learning_rate": 3.444220430107527e-05, "loss": 3.653, "step": 11310 }, { "epoch": 1.18, "learning_rate": 3.4423536439665474e-05, "loss": 3.3309, "step": 11320 }, { "epoch": 1.18, "learning_rate": 3.440486857825568e-05, "loss": 3.5133, "step": 11330 }, { "epoch": 1.18, "learning_rate": 3.438620071684588e-05, "loss": 3.3535, "step": 11340 }, { "epoch": 1.18, "learning_rate": 3.436753285543608e-05, "loss": 3.7432, "step": 11350 }, { "epoch": 1.18, "learning_rate": 3.4348864994026287e-05, "loss": 3.1155, "step": 11360 }, { "epoch": 1.18, "learning_rate": 3.433019713261649e-05, "loss": 3.2939, "step": 11370 }, { "epoch": 1.18, "learning_rate": 3.431152927120669e-05, "loss": 3.1033, "step": 11380 }, { "epoch": 1.18, "learning_rate": 3.4292861409796896e-05, "loss": 3.658, "step": 11390 }, { "epoch": 1.18, "learning_rate": 3.427419354838709e-05, "loss": 3.3786, "step": 11400 }, { "epoch": 1.18, "learning_rate": 3.42555256869773e-05, "loss": 3.4903, "step": 11410 }, { "epoch": 1.18, "learning_rate": 3.4236857825567506e-05, "loss": 3.6807, "step": 11420 }, { "epoch": 1.18, "learning_rate": 3.421818996415771e-05, "loss": 3.6701, "step": 11430 }, { "epoch": 1.18, "learning_rate": 3.419952210274791e-05, "loss": 3.47, "step": 11440 }, { "epoch": 1.18, "learning_rate": 3.418085424133811e-05, "loss": 3.647, "step": 11450 }, { "epoch": 1.19, "learning_rate": 3.416218637992832e-05, "loss": 3.6745, "step": 11460 }, { "epoch": 1.19, "learning_rate": 3.414351851851852e-05, "loss": 3.5417, "step": 11470 }, { "epoch": 1.19, "learning_rate": 3.4124850657108725e-05, "loss": 3.331, "step": 11480 }, { "epoch": 1.19, "learning_rate": 3.410618279569893e-05, "loss": 3.3018, "step": 11490 }, { "epoch": 1.19, "learning_rate": 3.4087514934289125e-05, "loss": 3.3233, "step": 11500 }, { "epoch": 1.19, "learning_rate": 3.4068847072879335e-05, "loss": 3.3328, "step": 11510 }, { "epoch": 1.19, "learning_rate": 3.405017921146954e-05, "loss": 3.2761, "step": 11520 }, { "epoch": 1.19, "learning_rate": 3.403151135005974e-05, "loss": 3.4464, "step": 11530 }, { "epoch": 1.19, "learning_rate": 3.4012843488649945e-05, "loss": 3.81, "step": 11540 }, { "epoch": 1.19, "learning_rate": 3.399417562724014e-05, "loss": 3.5985, "step": 11550 }, { "epoch": 1.19, "learning_rate": 3.397550776583035e-05, "loss": 3.3498, "step": 11560 }, { "epoch": 1.19, "learning_rate": 3.3956839904420554e-05, "loss": 3.554, "step": 11570 }, { "epoch": 1.19, "learning_rate": 3.393817204301075e-05, "loss": 3.1683, "step": 11580 }, { "epoch": 1.19, "learning_rate": 3.391950418160096e-05, "loss": 3.4795, "step": 11590 }, { "epoch": 1.19, "learning_rate": 3.390083632019116e-05, "loss": 3.4782, "step": 11600 }, { "epoch": 1.19, "learning_rate": 3.388216845878137e-05, "loss": 3.5869, "step": 11610 }, { "epoch": 1.19, "learning_rate": 3.3863500597371564e-05, "loss": 3.3082, "step": 11620 }, { "epoch": 1.19, "learning_rate": 3.384483273596177e-05, "loss": 3.5159, "step": 11630 }, { "epoch": 1.19, "learning_rate": 3.382616487455198e-05, "loss": 3.2907, "step": 11640 }, { "epoch": 1.19, "learning_rate": 3.380749701314217e-05, "loss": 3.8893, "step": 11650 }, { "epoch": 1.19, "learning_rate": 3.378882915173238e-05, "loss": 3.6042, "step": 11660 }, { "epoch": 1.19, "learning_rate": 3.377016129032258e-05, "loss": 3.666, "step": 11670 }, { "epoch": 1.19, "learning_rate": 3.375149342891278e-05, "loss": 3.4671, "step": 11680 }, { "epoch": 1.19, "learning_rate": 3.373282556750299e-05, "loss": 3.4878, "step": 11690 }, { "epoch": 1.19, "learning_rate": 3.371415770609319e-05, "loss": 3.6441, "step": 11700 }, { "epoch": 1.19, "learning_rate": 3.369548984468339e-05, "loss": 3.3459, "step": 11710 }, { "epoch": 1.19, "learning_rate": 3.3676821983273596e-05, "loss": 2.9724, "step": 11720 }, { "epoch": 1.19, "learning_rate": 3.36581541218638e-05, "loss": 3.0756, "step": 11730 }, { "epoch": 1.19, "learning_rate": 3.363948626045401e-05, "loss": 3.7122, "step": 11740 }, { "epoch": 1.19, "learning_rate": 3.3620818399044205e-05, "loss": 3.5976, "step": 11750 }, { "epoch": 1.2, "learning_rate": 3.360215053763441e-05, "loss": 3.2686, "step": 11760 }, { "epoch": 1.2, "learning_rate": 3.358348267622461e-05, "loss": 3.2859, "step": 11770 }, { "epoch": 1.2, "learning_rate": 3.3564814814814815e-05, "loss": 3.0577, "step": 11780 }, { "epoch": 1.2, "learning_rate": 3.3546146953405025e-05, "loss": 3.4569, "step": 11790 }, { "epoch": 1.2, "learning_rate": 3.352747909199522e-05, "loss": 3.8069, "step": 11800 }, { "epoch": 1.2, "learning_rate": 3.3508811230585425e-05, "loss": 3.8182, "step": 11810 }, { "epoch": 1.2, "learning_rate": 3.349014336917563e-05, "loss": 3.6163, "step": 11820 }, { "epoch": 1.2, "learning_rate": 3.347147550776583e-05, "loss": 3.5424, "step": 11830 }, { "epoch": 1.2, "learning_rate": 3.3452807646356034e-05, "loss": 3.8679, "step": 11840 }, { "epoch": 1.2, "learning_rate": 3.343413978494624e-05, "loss": 3.353, "step": 11850 }, { "epoch": 1.2, "learning_rate": 3.341547192353644e-05, "loss": 3.6672, "step": 11860 }, { "epoch": 1.2, "learning_rate": 3.3396804062126644e-05, "loss": 3.3789, "step": 11870 }, { "epoch": 1.2, "learning_rate": 3.337813620071685e-05, "loss": 3.7498, "step": 11880 }, { "epoch": 1.2, "learning_rate": 3.335946833930705e-05, "loss": 3.6776, "step": 11890 }, { "epoch": 1.2, "learning_rate": 3.3340800477897254e-05, "loss": 4.0729, "step": 11900 }, { "epoch": 1.2, "eval_accuracy": 0.1013342949873783, "eval_loss": 3.613358736038208, "eval_runtime": 776.6446, "eval_samples_per_second": 3.57, "eval_steps_per_second": 1.786, "step": 11904 }, { "epoch": 2.0, "learning_rate": 3.332213261648746e-05, "loss": 3.6047, "step": 11910 }, { "epoch": 2.0, "learning_rate": 3.330346475507766e-05, "loss": 3.775, "step": 11920 }, { "epoch": 2.0, "learning_rate": 3.328479689366786e-05, "loss": 3.3869, "step": 11930 }, { "epoch": 2.0, "learning_rate": 3.3266129032258067e-05, "loss": 3.7641, "step": 11940 }, { "epoch": 2.0, "learning_rate": 3.324746117084827e-05, "loss": 3.0311, "step": 11950 }, { "epoch": 2.0, "learning_rate": 3.322879330943847e-05, "loss": 3.552, "step": 11960 }, { "epoch": 2.0, "learning_rate": 3.321012544802867e-05, "loss": 3.6877, "step": 11970 }, { "epoch": 2.0, "learning_rate": 3.319145758661888e-05, "loss": 3.2318, "step": 11980 }, { "epoch": 2.0, "learning_rate": 3.317278972520908e-05, "loss": 3.3082, "step": 11990 }, { "epoch": 2.0, "learning_rate": 3.3154121863799286e-05, "loss": 3.2824, "step": 12000 }, { "epoch": 2.0, "learning_rate": 3.313545400238949e-05, "loss": 3.6182, "step": 12010 }, { "epoch": 2.0, "learning_rate": 3.3116786140979685e-05, "loss": 3.609, "step": 12020 }, { "epoch": 2.0, "learning_rate": 3.3098118279569895e-05, "loss": 3.3955, "step": 12030 }, { "epoch": 2.0, "learning_rate": 3.30794504181601e-05, "loss": 3.361, "step": 12040 }, { "epoch": 2.0, "learning_rate": 3.30607825567503e-05, "loss": 3.4298, "step": 12050 }, { "epoch": 2.01, "learning_rate": 3.3042114695340505e-05, "loss": 3.7064, "step": 12060 }, { "epoch": 2.01, "learning_rate": 3.30234468339307e-05, "loss": 2.9035, "step": 12070 }, { "epoch": 2.01, "learning_rate": 3.300477897252091e-05, "loss": 3.2812, "step": 12080 }, { "epoch": 2.01, "learning_rate": 3.2986111111111115e-05, "loss": 3.6022, "step": 12090 }, { "epoch": 2.01, "learning_rate": 3.296744324970131e-05, "loss": 3.3532, "step": 12100 }, { "epoch": 2.01, "learning_rate": 3.294877538829152e-05, "loss": 3.8214, "step": 12110 }, { "epoch": 2.01, "learning_rate": 3.293010752688172e-05, "loss": 3.6251, "step": 12120 }, { "epoch": 2.01, "learning_rate": 3.291143966547193e-05, "loss": 3.4907, "step": 12130 }, { "epoch": 2.01, "learning_rate": 3.289277180406213e-05, "loss": 3.0279, "step": 12140 }, { "epoch": 2.01, "learning_rate": 3.287410394265233e-05, "loss": 3.6172, "step": 12150 }, { "epoch": 2.01, "learning_rate": 3.285543608124254e-05, "loss": 3.1314, "step": 12160 }, { "epoch": 2.01, "learning_rate": 3.2836768219832734e-05, "loss": 3.4209, "step": 12170 }, { "epoch": 2.01, "learning_rate": 3.2818100358422944e-05, "loss": 3.1885, "step": 12180 }, { "epoch": 2.01, "learning_rate": 3.279943249701315e-05, "loss": 4.0893, "step": 12190 }, { "epoch": 2.01, "learning_rate": 3.2780764635603343e-05, "loss": 3.1239, "step": 12200 }, { "epoch": 2.01, "learning_rate": 3.2762096774193553e-05, "loss": 3.3743, "step": 12210 }, { "epoch": 2.01, "learning_rate": 3.274342891278375e-05, "loss": 3.3845, "step": 12220 }, { "epoch": 2.01, "learning_rate": 3.272476105137396e-05, "loss": 2.9713, "step": 12230 }, { "epoch": 2.01, "learning_rate": 3.270609318996416e-05, "loss": 3.3186, "step": 12240 }, { "epoch": 2.01, "learning_rate": 3.268742532855436e-05, "loss": 3.4701, "step": 12250 }, { "epoch": 2.01, "learning_rate": 3.266875746714457e-05, "loss": 3.2487, "step": 12260 }, { "epoch": 2.01, "learning_rate": 3.2650089605734766e-05, "loss": 3.5087, "step": 12270 }, { "epoch": 2.01, "learning_rate": 3.263142174432497e-05, "loss": 3.4492, "step": 12280 }, { "epoch": 2.01, "learning_rate": 3.261275388291517e-05, "loss": 3.1935, "step": 12290 }, { "epoch": 2.01, "learning_rate": 3.2594086021505376e-05, "loss": 3.2086, "step": 12300 }, { "epoch": 2.01, "learning_rate": 3.2575418160095586e-05, "loss": 2.8732, "step": 12310 }, { "epoch": 2.01, "learning_rate": 3.255675029868578e-05, "loss": 4.0379, "step": 12320 }, { "epoch": 2.01, "learning_rate": 3.2538082437275985e-05, "loss": 3.3803, "step": 12330 }, { "epoch": 2.01, "learning_rate": 3.251941457586619e-05, "loss": 3.4455, "step": 12340 }, { "epoch": 2.01, "learning_rate": 3.250074671445639e-05, "loss": 3.6026, "step": 12350 }, { "epoch": 2.02, "learning_rate": 3.24820788530466e-05, "loss": 3.6799, "step": 12360 }, { "epoch": 2.02, "learning_rate": 3.24634109916368e-05, "loss": 3.2898, "step": 12370 }, { "epoch": 2.02, "learning_rate": 3.2444743130227e-05, "loss": 3.6444, "step": 12380 }, { "epoch": 2.02, "learning_rate": 3.2426075268817205e-05, "loss": 3.1618, "step": 12390 }, { "epoch": 2.02, "learning_rate": 3.240740740740741e-05, "loss": 3.4954, "step": 12400 }, { "epoch": 2.02, "learning_rate": 3.238873954599761e-05, "loss": 3.4859, "step": 12410 }, { "epoch": 2.02, "learning_rate": 3.2370071684587814e-05, "loss": 3.2952, "step": 12420 }, { "epoch": 2.02, "learning_rate": 3.235140382317802e-05, "loss": 3.8386, "step": 12430 }, { "epoch": 2.02, "learning_rate": 3.233273596176822e-05, "loss": 2.5948, "step": 12440 }, { "epoch": 2.02, "learning_rate": 3.2314068100358424e-05, "loss": 3.5557, "step": 12450 }, { "epoch": 2.02, "learning_rate": 3.229540023894863e-05, "loss": 3.4629, "step": 12460 }, { "epoch": 2.02, "learning_rate": 3.227673237753883e-05, "loss": 3.1708, "step": 12470 }, { "epoch": 2.02, "learning_rate": 3.2258064516129034e-05, "loss": 3.5862, "step": 12480 }, { "epoch": 2.02, "learning_rate": 3.223939665471924e-05, "loss": 3.5196, "step": 12490 }, { "epoch": 2.02, "learning_rate": 3.222072879330944e-05, "loss": 3.5019, "step": 12500 }, { "epoch": 2.02, "learning_rate": 3.220206093189964e-05, "loss": 3.4852, "step": 12510 }, { "epoch": 2.02, "learning_rate": 3.2183393070489846e-05, "loss": 3.1762, "step": 12520 }, { "epoch": 2.02, "learning_rate": 3.216472520908005e-05, "loss": 3.278, "step": 12530 }, { "epoch": 2.02, "learning_rate": 3.214605734767025e-05, "loss": 3.1641, "step": 12540 }, { "epoch": 2.02, "learning_rate": 3.2127389486260456e-05, "loss": 3.6227, "step": 12550 }, { "epoch": 2.02, "learning_rate": 3.210872162485066e-05, "loss": 3.0787, "step": 12560 }, { "epoch": 2.02, "learning_rate": 3.209005376344086e-05, "loss": 3.4675, "step": 12570 }, { "epoch": 2.02, "learning_rate": 3.2071385902031066e-05, "loss": 3.2655, "step": 12580 }, { "epoch": 2.02, "learning_rate": 3.205271804062127e-05, "loss": 2.8608, "step": 12590 }, { "epoch": 2.02, "learning_rate": 3.203405017921147e-05, "loss": 3.5838, "step": 12600 }, { "epoch": 2.02, "learning_rate": 3.2015382317801675e-05, "loss": 2.9237, "step": 12610 }, { "epoch": 2.02, "learning_rate": 3.199671445639188e-05, "loss": 3.1729, "step": 12620 }, { "epoch": 2.02, "learning_rate": 3.197804659498208e-05, "loss": 3.4304, "step": 12630 }, { "epoch": 2.02, "learning_rate": 3.195937873357228e-05, "loss": 3.1927, "step": 12640 }, { "epoch": 2.03, "learning_rate": 3.194071087216249e-05, "loss": 3.5452, "step": 12650 }, { "epoch": 2.03, "learning_rate": 3.192204301075269e-05, "loss": 3.9063, "step": 12660 }, { "epoch": 2.03, "learning_rate": 3.190337514934289e-05, "loss": 3.3028, "step": 12670 }, { "epoch": 2.03, "learning_rate": 3.18847072879331e-05, "loss": 3.4593, "step": 12680 }, { "epoch": 2.03, "learning_rate": 3.1866039426523294e-05, "loss": 3.5563, "step": 12690 }, { "epoch": 2.03, "learning_rate": 3.1847371565113504e-05, "loss": 3.439, "step": 12700 }, { "epoch": 2.03, "learning_rate": 3.182870370370371e-05, "loss": 3.0588, "step": 12710 }, { "epoch": 2.03, "learning_rate": 3.1810035842293904e-05, "loss": 3.6409, "step": 12720 }, { "epoch": 2.03, "learning_rate": 3.1791367980884114e-05, "loss": 3.8135, "step": 12730 }, { "epoch": 2.03, "learning_rate": 3.177270011947431e-05, "loss": 3.4293, "step": 12740 }, { "epoch": 2.03, "learning_rate": 3.175403225806452e-05, "loss": 3.1347, "step": 12750 }, { "epoch": 2.03, "learning_rate": 3.1735364396654724e-05, "loss": 3.4132, "step": 12760 }, { "epoch": 2.03, "learning_rate": 3.171669653524492e-05, "loss": 2.8791, "step": 12770 }, { "epoch": 2.03, "learning_rate": 3.169802867383513e-05, "loss": 3.6841, "step": 12780 }, { "epoch": 2.03, "learning_rate": 3.1679360812425327e-05, "loss": 3.2535, "step": 12790 }, { "epoch": 2.03, "learning_rate": 3.1660692951015537e-05, "loss": 3.7857, "step": 12800 }, { "epoch": 2.03, "learning_rate": 3.164202508960574e-05, "loss": 3.5299, "step": 12810 }, { "epoch": 2.03, "learning_rate": 3.1623357228195936e-05, "loss": 3.1984, "step": 12820 }, { "epoch": 2.03, "learning_rate": 3.1604689366786146e-05, "loss": 3.3357, "step": 12830 }, { "epoch": 2.03, "learning_rate": 3.158602150537634e-05, "loss": 2.9768, "step": 12840 }, { "epoch": 2.03, "learning_rate": 3.1567353643966546e-05, "loss": 3.16, "step": 12850 }, { "epoch": 2.03, "learning_rate": 3.1548685782556756e-05, "loss": 3.0956, "step": 12860 }, { "epoch": 2.03, "learning_rate": 3.153001792114695e-05, "loss": 3.3066, "step": 12870 }, { "epoch": 2.03, "learning_rate": 3.151135005973716e-05, "loss": 3.2043, "step": 12880 }, { "epoch": 2.03, "learning_rate": 3.149268219832736e-05, "loss": 3.0773, "step": 12890 }, { "epoch": 2.03, "learning_rate": 3.147401433691756e-05, "loss": 3.7041, "step": 12900 }, { "epoch": 2.03, "learning_rate": 3.145534647550777e-05, "loss": 3.146, "step": 12910 }, { "epoch": 2.03, "learning_rate": 3.143667861409797e-05, "loss": 3.7534, "step": 12920 }, { "epoch": 2.03, "learning_rate": 3.141801075268818e-05, "loss": 3.2105, "step": 12930 }, { "epoch": 2.03, "learning_rate": 3.1399342891278375e-05, "loss": 3.3437, "step": 12940 }, { "epoch": 2.04, "learning_rate": 3.138067502986858e-05, "loss": 3.191, "step": 12950 }, { "epoch": 2.04, "learning_rate": 3.136200716845878e-05, "loss": 3.2255, "step": 12960 }, { "epoch": 2.04, "learning_rate": 3.1343339307048985e-05, "loss": 3.5001, "step": 12970 }, { "epoch": 2.04, "learning_rate": 3.132467144563919e-05, "loss": 3.2496, "step": 12980 }, { "epoch": 2.04, "learning_rate": 3.130600358422939e-05, "loss": 3.3881, "step": 12990 }, { "epoch": 2.04, "learning_rate": 3.1287335722819594e-05, "loss": 3.5971, "step": 13000 }, { "epoch": 2.04, "learning_rate": 3.12686678614098e-05, "loss": 3.1389, "step": 13010 }, { "epoch": 2.04, "learning_rate": 3.125e-05, "loss": 3.7135, "step": 13020 }, { "epoch": 2.04, "learning_rate": 3.1231332138590204e-05, "loss": 3.5104, "step": 13030 }, { "epoch": 2.04, "learning_rate": 3.121266427718041e-05, "loss": 3.4578, "step": 13040 }, { "epoch": 2.04, "learning_rate": 3.119399641577061e-05, "loss": 3.3797, "step": 13050 }, { "epoch": 2.04, "learning_rate": 3.1175328554360814e-05, "loss": 3.2117, "step": 13060 }, { "epoch": 2.04, "learning_rate": 3.115666069295102e-05, "loss": 3.5261, "step": 13070 }, { "epoch": 2.04, "learning_rate": 3.113799283154122e-05, "loss": 3.0737, "step": 13080 }, { "epoch": 2.04, "learning_rate": 3.111932497013142e-05, "loss": 3.6288, "step": 13090 }, { "epoch": 2.04, "learning_rate": 3.1100657108721626e-05, "loss": 3.4274, "step": 13100 }, { "epoch": 2.04, "learning_rate": 3.108198924731183e-05, "loss": 3.2464, "step": 13110 }, { "epoch": 2.04, "learning_rate": 3.106332138590203e-05, "loss": 3.5461, "step": 13120 }, { "epoch": 2.04, "learning_rate": 3.1044653524492236e-05, "loss": 3.7019, "step": 13130 }, { "epoch": 2.04, "learning_rate": 3.102598566308244e-05, "loss": 2.8702, "step": 13140 }, { "epoch": 2.04, "learning_rate": 3.100731780167264e-05, "loss": 3.6125, "step": 13150 }, { "epoch": 2.04, "learning_rate": 3.0988649940262846e-05, "loss": 3.2217, "step": 13160 }, { "epoch": 2.04, "learning_rate": 3.096998207885305e-05, "loss": 3.1566, "step": 13170 }, { "epoch": 2.04, "learning_rate": 3.095131421744325e-05, "loss": 3.2496, "step": 13180 }, { "epoch": 2.04, "learning_rate": 3.0932646356033455e-05, "loss": 2.8286, "step": 13190 }, { "epoch": 2.04, "learning_rate": 3.091397849462366e-05, "loss": 3.3238, "step": 13200 }, { "epoch": 2.04, "learning_rate": 3.089531063321386e-05, "loss": 3.0222, "step": 13210 }, { "epoch": 2.04, "learning_rate": 3.0876642771804065e-05, "loss": 3.5437, "step": 13220 }, { "epoch": 2.04, "learning_rate": 3.085797491039427e-05, "loss": 3.1671, "step": 13230 }, { "epoch": 2.04, "learning_rate": 3.0839307048984465e-05, "loss": 3.3449, "step": 13240 }, { "epoch": 2.05, "learning_rate": 3.0820639187574675e-05, "loss": 3.3196, "step": 13250 }, { "epoch": 2.05, "learning_rate": 3.080197132616488e-05, "loss": 3.3471, "step": 13260 }, { "epoch": 2.05, "learning_rate": 3.078330346475508e-05, "loss": 3.3781, "step": 13270 }, { "epoch": 2.05, "learning_rate": 3.0764635603345284e-05, "loss": 3.6882, "step": 13280 }, { "epoch": 2.05, "learning_rate": 3.074596774193548e-05, "loss": 3.5759, "step": 13290 }, { "epoch": 2.05, "learning_rate": 3.072729988052569e-05, "loss": 3.3492, "step": 13300 }, { "epoch": 2.05, "learning_rate": 3.070863201911589e-05, "loss": 3.6678, "step": 13310 }, { "epoch": 2.05, "learning_rate": 3.06899641577061e-05, "loss": 3.4061, "step": 13320 }, { "epoch": 2.05, "learning_rate": 3.06712962962963e-05, "loss": 3.4863, "step": 13330 }, { "epoch": 2.05, "learning_rate": 3.06526284348865e-05, "loss": 3.4559, "step": 13340 }, { "epoch": 2.05, "learning_rate": 3.063396057347671e-05, "loss": 3.2953, "step": 13350 }, { "epoch": 2.05, "learning_rate": 3.06152927120669e-05, "loss": 3.3999, "step": 13360 }, { "epoch": 2.05, "learning_rate": 3.059662485065711e-05, "loss": 3.2553, "step": 13370 }, { "epoch": 2.05, "learning_rate": 3.0577956989247317e-05, "loss": 3.032, "step": 13380 }, { "epoch": 2.05, "learning_rate": 3.055928912783751e-05, "loss": 3.3624, "step": 13390 }, { "epoch": 2.05, "learning_rate": 3.054062126642772e-05, "loss": 2.9003, "step": 13400 }, { "epoch": 2.05, "learning_rate": 3.052195340501792e-05, "loss": 3.0083, "step": 13410 }, { "epoch": 2.05, "learning_rate": 3.0503285543608123e-05, "loss": 2.6751, "step": 13420 }, { "epoch": 2.05, "learning_rate": 3.048461768219833e-05, "loss": 2.481, "step": 13430 }, { "epoch": 2.05, "learning_rate": 3.046594982078853e-05, "loss": 3.3208, "step": 13440 }, { "epoch": 2.05, "learning_rate": 3.0447281959378736e-05, "loss": 3.2543, "step": 13450 }, { "epoch": 2.05, "learning_rate": 3.042861409796894e-05, "loss": 2.9154, "step": 13460 }, { "epoch": 2.05, "learning_rate": 3.040994623655914e-05, "loss": 2.8689, "step": 13470 }, { "epoch": 2.05, "learning_rate": 3.0391278375149345e-05, "loss": 3.3488, "step": 13480 }, { "epoch": 2.05, "learning_rate": 3.0372610513739545e-05, "loss": 2.9796, "step": 13490 }, { "epoch": 2.05, "learning_rate": 3.0353942652329752e-05, "loss": 3.3013, "step": 13500 }, { "epoch": 2.05, "learning_rate": 3.0335274790919955e-05, "loss": 3.0864, "step": 13510 }, { "epoch": 2.05, "learning_rate": 3.0316606929510155e-05, "loss": 3.0855, "step": 13520 }, { "epoch": 2.05, "learning_rate": 3.029793906810036e-05, "loss": 3.1442, "step": 13530 }, { "epoch": 2.05, "learning_rate": 3.027927120669056e-05, "loss": 3.1092, "step": 13540 }, { "epoch": 2.06, "learning_rate": 3.0260603345280764e-05, "loss": 3.4295, "step": 13550 }, { "epoch": 2.06, "learning_rate": 3.024193548387097e-05, "loss": 3.3333, "step": 13560 }, { "epoch": 2.06, "learning_rate": 3.022326762246117e-05, "loss": 3.3782, "step": 13570 }, { "epoch": 2.06, "learning_rate": 3.0204599761051378e-05, "loss": 2.9689, "step": 13580 }, { "epoch": 2.06, "learning_rate": 3.0185931899641577e-05, "loss": 3.4291, "step": 13590 }, { "epoch": 2.06, "learning_rate": 3.016726403823178e-05, "loss": 2.9132, "step": 13600 }, { "epoch": 2.06, "learning_rate": 3.0148596176821987e-05, "loss": 3.3764, "step": 13610 }, { "epoch": 2.06, "learning_rate": 3.0129928315412187e-05, "loss": 3.759, "step": 13620 }, { "epoch": 2.06, "learning_rate": 3.0111260454002394e-05, "loss": 3.4126, "step": 13630 }, { "epoch": 2.06, "learning_rate": 3.0092592592592593e-05, "loss": 3.2763, "step": 13640 }, { "epoch": 2.06, "learning_rate": 3.0073924731182797e-05, "loss": 3.4596, "step": 13650 }, { "epoch": 2.06, "learning_rate": 3.0055256869773003e-05, "loss": 3.4691, "step": 13660 }, { "epoch": 2.06, "learning_rate": 3.0036589008363203e-05, "loss": 3.4544, "step": 13670 }, { "epoch": 2.06, "learning_rate": 3.0017921146953403e-05, "loss": 2.9391, "step": 13680 }, { "epoch": 2.06, "learning_rate": 2.999925328554361e-05, "loss": 3.4471, "step": 13690 }, { "epoch": 2.06, "learning_rate": 2.9980585424133813e-05, "loss": 3.2507, "step": 13700 }, { "epoch": 2.06, "learning_rate": 2.996191756272402e-05, "loss": 3.1831, "step": 13710 }, { "epoch": 2.06, "learning_rate": 2.994324970131422e-05, "loss": 3.1339, "step": 13720 }, { "epoch": 2.06, "learning_rate": 2.992458183990442e-05, "loss": 3.1629, "step": 13730 }, { "epoch": 2.06, "learning_rate": 2.9905913978494626e-05, "loss": 3.4887, "step": 13740 }, { "epoch": 2.06, "learning_rate": 2.988724611708483e-05, "loss": 3.5253, "step": 13750 }, { "epoch": 2.06, "learning_rate": 2.9868578255675032e-05, "loss": 3.4942, "step": 13760 }, { "epoch": 2.06, "learning_rate": 2.9849910394265235e-05, "loss": 3.6667, "step": 13770 }, { "epoch": 2.06, "learning_rate": 2.9831242532855435e-05, "loss": 3.4804, "step": 13780 }, { "epoch": 2.06, "learning_rate": 2.9812574671445642e-05, "loss": 3.1605, "step": 13790 }, { "epoch": 2.06, "learning_rate": 2.979390681003584e-05, "loss": 3.1899, "step": 13800 }, { "epoch": 2.06, "learning_rate": 2.9775238948626045e-05, "loss": 3.2916, "step": 13810 }, { "epoch": 2.06, "learning_rate": 2.975657108721625e-05, "loss": 3.6614, "step": 13820 }, { "epoch": 2.06, "learning_rate": 2.973790322580645e-05, "loss": 3.5104, "step": 13830 }, { "epoch": 2.07, "learning_rate": 2.9719235364396658e-05, "loss": 3.6256, "step": 13840 }, { "epoch": 2.07, "learning_rate": 2.9700567502986858e-05, "loss": 3.3907, "step": 13850 }, { "epoch": 2.07, "learning_rate": 2.968189964157706e-05, "loss": 3.0289, "step": 13860 }, { "epoch": 2.07, "learning_rate": 2.9663231780167268e-05, "loss": 3.3316, "step": 13870 }, { "epoch": 2.07, "learning_rate": 2.9644563918757467e-05, "loss": 2.9692, "step": 13880 }, { "epoch": 2.07, "learning_rate": 2.9625896057347674e-05, "loss": 3.084, "step": 13890 }, { "epoch": 2.07, "learning_rate": 2.9607228195937874e-05, "loss": 3.2942, "step": 13900 }, { "epoch": 2.07, "learning_rate": 2.9588560334528077e-05, "loss": 3.1822, "step": 13910 }, { "epoch": 2.07, "learning_rate": 2.9569892473118284e-05, "loss": 2.8381, "step": 13920 }, { "epoch": 2.07, "learning_rate": 2.9551224611708483e-05, "loss": 3.6579, "step": 13930 }, { "epoch": 2.07, "learning_rate": 2.953255675029869e-05, "loss": 2.8121, "step": 13940 }, { "epoch": 2.07, "learning_rate": 2.951388888888889e-05, "loss": 2.922, "step": 13950 }, { "epoch": 2.07, "learning_rate": 2.9495221027479093e-05, "loss": 3.0235, "step": 13960 }, { "epoch": 2.07, "learning_rate": 2.94765531660693e-05, "loss": 2.9917, "step": 13970 }, { "epoch": 2.07, "learning_rate": 2.94578853046595e-05, "loss": 3.4005, "step": 13980 }, { "epoch": 2.07, "learning_rate": 2.94392174432497e-05, "loss": 3.6017, "step": 13990 }, { "epoch": 2.07, "learning_rate": 2.9420549581839906e-05, "loss": 2.9952, "step": 14000 }, { "epoch": 2.07, "learning_rate": 2.940188172043011e-05, "loss": 3.4786, "step": 14010 }, { "epoch": 2.07, "learning_rate": 2.9383213859020316e-05, "loss": 3.096, "step": 14020 }, { "epoch": 2.07, "learning_rate": 2.9364545997610516e-05, "loss": 3.2039, "step": 14030 }, { "epoch": 2.07, "learning_rate": 2.9345878136200715e-05, "loss": 3.4426, "step": 14040 }, { "epoch": 2.07, "learning_rate": 2.9327210274790922e-05, "loss": 3.1788, "step": 14050 }, { "epoch": 2.07, "learning_rate": 2.9308542413381125e-05, "loss": 3.2147, "step": 14060 }, { "epoch": 2.07, "learning_rate": 2.9289874551971332e-05, "loss": 2.8619, "step": 14070 }, { "epoch": 2.07, "learning_rate": 2.9271206690561532e-05, "loss": 3.4715, "step": 14080 }, { "epoch": 2.07, "learning_rate": 2.925253882915173e-05, "loss": 3.5248, "step": 14090 }, { "epoch": 2.07, "learning_rate": 2.9233870967741938e-05, "loss": 3.261, "step": 14100 }, { "epoch": 2.07, "learning_rate": 2.9215203106332138e-05, "loss": 3.0448, "step": 14110 }, { "epoch": 2.07, "learning_rate": 2.919653524492234e-05, "loss": 3.126, "step": 14120 }, { "epoch": 2.07, "learning_rate": 2.9177867383512548e-05, "loss": 3.4233, "step": 14130 }, { "epoch": 2.08, "learning_rate": 2.9159199522102748e-05, "loss": 3.2681, "step": 14140 }, { "epoch": 2.08, "learning_rate": 2.9140531660692954e-05, "loss": 3.6574, "step": 14150 }, { "epoch": 2.08, "learning_rate": 2.9121863799283154e-05, "loss": 3.3647, "step": 14160 }, { "epoch": 2.08, "learning_rate": 2.9103195937873357e-05, "loss": 3.2597, "step": 14170 }, { "epoch": 2.08, "learning_rate": 2.9084528076463564e-05, "loss": 3.1914, "step": 14180 }, { "epoch": 2.08, "learning_rate": 2.9065860215053764e-05, "loss": 3.4186, "step": 14190 }, { "epoch": 2.08, "learning_rate": 2.904719235364397e-05, "loss": 3.0038, "step": 14200 }, { "epoch": 2.08, "learning_rate": 2.902852449223417e-05, "loss": 3.192, "step": 14210 }, { "epoch": 2.08, "learning_rate": 2.9009856630824373e-05, "loss": 2.9701, "step": 14220 }, { "epoch": 2.08, "learning_rate": 2.899118876941458e-05, "loss": 3.3854, "step": 14230 }, { "epoch": 2.08, "learning_rate": 2.897252090800478e-05, "loss": 2.9638, "step": 14240 }, { "epoch": 2.08, "learning_rate": 2.895385304659498e-05, "loss": 3.0477, "step": 14250 }, { "epoch": 2.08, "learning_rate": 2.8935185185185186e-05, "loss": 3.0335, "step": 14260 }, { "epoch": 2.08, "learning_rate": 2.891651732377539e-05, "loss": 3.1699, "step": 14270 }, { "epoch": 2.08, "learning_rate": 2.8897849462365596e-05, "loss": 3.3295, "step": 14280 }, { "epoch": 2.08, "learning_rate": 2.8879181600955796e-05, "loss": 3.3094, "step": 14290 }, { "epoch": 2.08, "learning_rate": 2.8860513739545996e-05, "loss": 3.0889, "step": 14300 }, { "epoch": 2.08, "learning_rate": 2.8841845878136202e-05, "loss": 2.8619, "step": 14310 }, { "epoch": 2.08, "learning_rate": 2.8823178016726406e-05, "loss": 3.1476, "step": 14320 }, { "epoch": 2.08, "learning_rate": 2.8804510155316612e-05, "loss": 2.9974, "step": 14330 }, { "epoch": 2.08, "learning_rate": 2.8785842293906812e-05, "loss": 3.5473, "step": 14340 }, { "epoch": 2.08, "learning_rate": 2.8767174432497012e-05, "loss": 3.3988, "step": 14350 }, { "epoch": 2.08, "learning_rate": 2.874850657108722e-05, "loss": 3.2914, "step": 14360 }, { "epoch": 2.08, "learning_rate": 2.8729838709677422e-05, "loss": 2.8615, "step": 14370 }, { "epoch": 2.08, "learning_rate": 2.871117084826762e-05, "loss": 3.0331, "step": 14380 }, { "epoch": 2.08, "learning_rate": 2.8692502986857828e-05, "loss": 3.3181, "step": 14390 }, { "epoch": 2.08, "learning_rate": 2.8673835125448028e-05, "loss": 3.0743, "step": 14400 }, { "epoch": 2.08, "learning_rate": 2.8655167264038235e-05, "loss": 3.303, "step": 14410 }, { "epoch": 2.08, "learning_rate": 2.8636499402628438e-05, "loss": 3.7401, "step": 14420 }, { "epoch": 2.08, "learning_rate": 2.8617831541218638e-05, "loss": 3.0898, "step": 14430 }, { "epoch": 2.09, "learning_rate": 2.8599163679808844e-05, "loss": 3.1395, "step": 14440 }, { "epoch": 2.09, "learning_rate": 2.8580495818399044e-05, "loss": 2.7102, "step": 14450 }, { "epoch": 2.09, "learning_rate": 2.856182795698925e-05, "loss": 3.362, "step": 14460 }, { "epoch": 2.09, "learning_rate": 2.854316009557945e-05, "loss": 3.2069, "step": 14470 }, { "epoch": 2.09, "learning_rate": 2.8524492234169654e-05, "loss": 3.3179, "step": 14480 }, { "epoch": 2.09, "learning_rate": 2.850582437275986e-05, "loss": 3.1048, "step": 14490 }, { "epoch": 2.09, "learning_rate": 2.848715651135006e-05, "loss": 3.3805, "step": 14500 }, { "epoch": 2.09, "learning_rate": 2.846848864994026e-05, "loss": 3.4358, "step": 14510 }, { "epoch": 2.09, "learning_rate": 2.8449820788530467e-05, "loss": 3.1769, "step": 14520 }, { "epoch": 2.09, "learning_rate": 2.843115292712067e-05, "loss": 2.6418, "step": 14530 }, { "epoch": 2.09, "learning_rate": 2.8412485065710876e-05, "loss": 3.3678, "step": 14540 }, { "epoch": 2.09, "learning_rate": 2.8393817204301076e-05, "loss": 3.0592, "step": 14550 }, { "epoch": 2.09, "learning_rate": 2.8375149342891276e-05, "loss": 3.4816, "step": 14560 }, { "epoch": 2.09, "learning_rate": 2.8356481481481483e-05, "loss": 3.0546, "step": 14570 }, { "epoch": 2.09, "learning_rate": 2.8337813620071686e-05, "loss": 3.0405, "step": 14580 }, { "epoch": 2.09, "learning_rate": 2.8319145758661893e-05, "loss": 2.321, "step": 14590 }, { "epoch": 2.09, "learning_rate": 2.8300477897252092e-05, "loss": 3.3545, "step": 14600 }, { "epoch": 2.09, "learning_rate": 2.8281810035842292e-05, "loss": 3.4742, "step": 14610 }, { "epoch": 2.09, "learning_rate": 2.82631421744325e-05, "loss": 3.3887, "step": 14620 }, { "epoch": 2.09, "learning_rate": 2.8244474313022702e-05, "loss": 3.3564, "step": 14630 }, { "epoch": 2.09, "learning_rate": 2.822580645161291e-05, "loss": 3.3283, "step": 14640 }, { "epoch": 2.09, "learning_rate": 2.820713859020311e-05, "loss": 3.0305, "step": 14650 }, { "epoch": 2.09, "learning_rate": 2.8188470728793308e-05, "loss": 3.0675, "step": 14660 }, { "epoch": 2.09, "learning_rate": 2.8169802867383515e-05, "loss": 2.8462, "step": 14670 }, { "epoch": 2.09, "learning_rate": 2.8151135005973718e-05, "loss": 3.0996, "step": 14680 }, { "epoch": 2.09, "learning_rate": 2.8132467144563918e-05, "loss": 2.7608, "step": 14690 }, { "epoch": 2.09, "learning_rate": 2.8113799283154125e-05, "loss": 2.9715, "step": 14700 }, { "epoch": 2.09, "learning_rate": 2.8095131421744324e-05, "loss": 3.2812, "step": 14710 }, { "epoch": 2.09, "learning_rate": 2.807646356033453e-05, "loss": 3.4222, "step": 14720 }, { "epoch": 2.09, "learning_rate": 2.8057795698924734e-05, "loss": 2.9256, "step": 14730 }, { "epoch": 2.1, "learning_rate": 2.8039127837514934e-05, "loss": 3.3173, "step": 14740 }, { "epoch": 2.1, "learning_rate": 2.802045997610514e-05, "loss": 3.1419, "step": 14750 }, { "epoch": 2.1, "learning_rate": 2.800179211469534e-05, "loss": 3.4477, "step": 14760 }, { "epoch": 2.1, "learning_rate": 2.7983124253285547e-05, "loss": 3.3932, "step": 14770 }, { "epoch": 2.1, "learning_rate": 2.7964456391875747e-05, "loss": 2.7912, "step": 14780 }, { "epoch": 2.1, "learning_rate": 2.794578853046595e-05, "loss": 3.2495, "step": 14790 }, { "epoch": 2.1, "learning_rate": 2.7927120669056157e-05, "loss": 2.8159, "step": 14800 }, { "epoch": 2.1, "learning_rate": 2.7908452807646357e-05, "loss": 3.0967, "step": 14810 }, { "epoch": 2.1, "learning_rate": 2.7889784946236556e-05, "loss": 2.7874, "step": 14820 }, { "epoch": 2.1, "learning_rate": 2.7871117084826763e-05, "loss": 2.918, "step": 14830 }, { "epoch": 2.1, "learning_rate": 2.7852449223416966e-05, "loss": 2.785, "step": 14840 }, { "epoch": 2.1, "learning_rate": 2.7833781362007173e-05, "loss": 3.0679, "step": 14850 }, { "epoch": 2.1, "learning_rate": 2.7815113500597373e-05, "loss": 3.1987, "step": 14860 }, { "epoch": 2.1, "learning_rate": 2.7796445639187573e-05, "loss": 2.7207, "step": 14870 }, { "epoch": 2.1, "learning_rate": 2.777777777777778e-05, "loss": 3.1899, "step": 14880 }, { "epoch": 2.1, "learning_rate": 2.7759109916367982e-05, "loss": 2.7725, "step": 14890 }, { "epoch": 2.1, "learning_rate": 2.774044205495819e-05, "loss": 2.8578, "step": 14900 }, { "epoch": 2.1, "learning_rate": 2.772177419354839e-05, "loss": 2.8793, "step": 14910 }, { "epoch": 2.1, "learning_rate": 2.770310633213859e-05, "loss": 3.122, "step": 14920 }, { "epoch": 2.1, "learning_rate": 2.7684438470728795e-05, "loss": 3.1156, "step": 14930 }, { "epoch": 2.1, "learning_rate": 2.7665770609319e-05, "loss": 3.4035, "step": 14940 }, { "epoch": 2.1, "learning_rate": 2.7647102747909198e-05, "loss": 3.0444, "step": 14950 }, { "epoch": 2.1, "learning_rate": 2.7628434886499405e-05, "loss": 3.4004, "step": 14960 }, { "epoch": 2.1, "learning_rate": 2.7609767025089605e-05, "loss": 3.0023, "step": 14970 }, { "epoch": 2.1, "learning_rate": 2.759109916367981e-05, "loss": 3.1442, "step": 14980 }, { "epoch": 2.1, "learning_rate": 2.7572431302270015e-05, "loss": 2.9599, "step": 14990 }, { "epoch": 2.1, "learning_rate": 2.7553763440860214e-05, "loss": 2.9346, "step": 15000 }, { "epoch": 2.1, "learning_rate": 2.753509557945042e-05, "loss": 3.3563, "step": 15010 }, { "epoch": 2.1, "learning_rate": 2.751642771804062e-05, "loss": 3.1207, "step": 15020 }, { "epoch": 2.11, "learning_rate": 2.7497759856630827e-05, "loss": 2.9311, "step": 15030 }, { "epoch": 2.11, "learning_rate": 2.747909199522103e-05, "loss": 2.9882, "step": 15040 }, { "epoch": 2.11, "learning_rate": 2.746042413381123e-05, "loss": 3.3397, "step": 15050 }, { "epoch": 2.11, "learning_rate": 2.7441756272401437e-05, "loss": 2.9564, "step": 15060 }, { "epoch": 2.11, "learning_rate": 2.7423088410991637e-05, "loss": 2.941, "step": 15070 }, { "epoch": 2.11, "learning_rate": 2.740442054958184e-05, "loss": 3.4236, "step": 15080 }, { "epoch": 2.11, "learning_rate": 2.7385752688172043e-05, "loss": 3.3045, "step": 15090 }, { "epoch": 2.11, "learning_rate": 2.7367084826762247e-05, "loss": 2.8175, "step": 15100 }, { "epoch": 2.11, "learning_rate": 2.7348416965352453e-05, "loss": 2.4572, "step": 15110 }, { "epoch": 2.11, "learning_rate": 2.7329749103942653e-05, "loss": 3.1204, "step": 15120 }, { "epoch": 2.11, "learning_rate": 2.7311081242532853e-05, "loss": 3.4336, "step": 15130 }, { "epoch": 2.11, "learning_rate": 2.729241338112306e-05, "loss": 3.1992, "step": 15140 }, { "epoch": 2.11, "learning_rate": 2.7273745519713263e-05, "loss": 3.6503, "step": 15150 }, { "epoch": 2.11, "learning_rate": 2.725507765830347e-05, "loss": 3.1687, "step": 15160 }, { "epoch": 2.11, "learning_rate": 2.723640979689367e-05, "loss": 3.0529, "step": 15170 }, { "epoch": 2.11, "learning_rate": 2.721774193548387e-05, "loss": 3.186, "step": 15180 }, { "epoch": 2.11, "learning_rate": 2.7199074074074076e-05, "loss": 2.8333, "step": 15190 }, { "epoch": 2.11, "learning_rate": 2.718040621266428e-05, "loss": 2.8004, "step": 15200 }, { "epoch": 2.11, "learning_rate": 2.7161738351254485e-05, "loss": 3.7368, "step": 15210 }, { "epoch": 2.11, "learning_rate": 2.7143070489844685e-05, "loss": 2.6955, "step": 15220 }, { "epoch": 2.11, "learning_rate": 2.7124402628434885e-05, "loss": 2.8241, "step": 15230 }, { "epoch": 2.11, "learning_rate": 2.710573476702509e-05, "loss": 2.9161, "step": 15240 }, { "epoch": 2.11, "learning_rate": 2.7087066905615295e-05, "loss": 2.6601, "step": 15250 }, { "epoch": 2.11, "learning_rate": 2.7068399044205495e-05, "loss": 3.1068, "step": 15260 }, { "epoch": 2.11, "learning_rate": 2.70497311827957e-05, "loss": 2.6962, "step": 15270 }, { "epoch": 2.11, "learning_rate": 2.70310633213859e-05, "loss": 2.8732, "step": 15280 }, { "epoch": 2.11, "learning_rate": 2.7012395459976108e-05, "loss": 2.9584, "step": 15290 }, { "epoch": 2.11, "learning_rate": 2.699372759856631e-05, "loss": 3.3231, "step": 15300 }, { "epoch": 2.11, "learning_rate": 2.697505973715651e-05, "loss": 3.2559, "step": 15310 }, { "epoch": 2.11, "learning_rate": 2.6956391875746717e-05, "loss": 3.1895, "step": 15320 }, { "epoch": 2.12, "learning_rate": 2.6937724014336917e-05, "loss": 3.3419, "step": 15330 }, { "epoch": 2.12, "learning_rate": 2.6919056152927124e-05, "loss": 3.279, "step": 15340 }, { "epoch": 2.12, "learning_rate": 2.6900388291517327e-05, "loss": 3.3604, "step": 15350 }, { "epoch": 2.12, "learning_rate": 2.6881720430107527e-05, "loss": 3.1441, "step": 15360 }, { "epoch": 2.12, "learning_rate": 2.6863052568697733e-05, "loss": 2.7756, "step": 15370 }, { "epoch": 2.12, "learning_rate": 2.6844384707287933e-05, "loss": 3.0425, "step": 15380 }, { "epoch": 2.12, "learning_rate": 2.6825716845878137e-05, "loss": 3.2714, "step": 15390 }, { "epoch": 2.12, "learning_rate": 2.6807048984468343e-05, "loss": 2.9571, "step": 15400 }, { "epoch": 2.12, "learning_rate": 2.6788381123058543e-05, "loss": 3.1644, "step": 15410 }, { "epoch": 2.12, "learning_rate": 2.676971326164875e-05, "loss": 3.0494, "step": 15420 }, { "epoch": 2.12, "learning_rate": 2.675104540023895e-05, "loss": 3.3804, "step": 15430 }, { "epoch": 2.12, "learning_rate": 2.673237753882915e-05, "loss": 3.0539, "step": 15440 }, { "epoch": 2.12, "learning_rate": 2.6713709677419356e-05, "loss": 2.8719, "step": 15450 }, { "epoch": 2.12, "learning_rate": 2.669504181600956e-05, "loss": 2.2724, "step": 15460 }, { "epoch": 2.12, "learning_rate": 2.6676373954599766e-05, "loss": 2.6922, "step": 15470 }, { "epoch": 2.12, "learning_rate": 2.6657706093189965e-05, "loss": 2.657, "step": 15480 }, { "epoch": 2.12, "learning_rate": 2.6639038231780165e-05, "loss": 3.1571, "step": 15490 }, { "epoch": 2.12, "learning_rate": 2.6620370370370372e-05, "loss": 3.2733, "step": 15500 }, { "epoch": 2.12, "learning_rate": 2.6601702508960575e-05, "loss": 2.7808, "step": 15510 }, { "epoch": 2.12, "learning_rate": 2.6583034647550775e-05, "loss": 2.7793, "step": 15520 }, { "epoch": 2.12, "learning_rate": 2.656436678614098e-05, "loss": 3.3887, "step": 15530 }, { "epoch": 2.12, "learning_rate": 2.654569892473118e-05, "loss": 2.9146, "step": 15540 }, { "epoch": 2.12, "learning_rate": 2.6527031063321388e-05, "loss": 3.5791, "step": 15550 }, { "epoch": 2.12, "learning_rate": 2.650836320191159e-05, "loss": 2.8775, "step": 15560 }, { "epoch": 2.12, "learning_rate": 2.648969534050179e-05, "loss": 3.1055, "step": 15570 }, { "epoch": 2.12, "learning_rate": 2.6471027479091998e-05, "loss": 3.2177, "step": 15580 }, { "epoch": 2.12, "learning_rate": 2.6452359617682198e-05, "loss": 3.2396, "step": 15590 }, { "epoch": 2.12, "learning_rate": 2.6433691756272404e-05, "loss": 3.3748, "step": 15600 }, { "epoch": 2.12, "learning_rate": 2.6415023894862607e-05, "loss": 3.3006, "step": 15610 }, { "epoch": 2.12, "learning_rate": 2.6396356033452807e-05, "loss": 3.0114, "step": 15620 }, { "epoch": 2.13, "learning_rate": 2.6377688172043014e-05, "loss": 3.006, "step": 15630 }, { "epoch": 2.13, "learning_rate": 2.6359020310633214e-05, "loss": 3.1925, "step": 15640 }, { "epoch": 2.13, "learning_rate": 2.6340352449223417e-05, "loss": 3.2708, "step": 15650 }, { "epoch": 2.13, "learning_rate": 2.6321684587813623e-05, "loss": 3.1134, "step": 15660 }, { "epoch": 2.13, "learning_rate": 2.6303016726403823e-05, "loss": 2.6587, "step": 15670 }, { "epoch": 2.13, "learning_rate": 2.628434886499403e-05, "loss": 2.9197, "step": 15680 }, { "epoch": 2.13, "learning_rate": 2.626568100358423e-05, "loss": 3.1758, "step": 15690 }, { "epoch": 2.13, "learning_rate": 2.6247013142174433e-05, "loss": 3.1256, "step": 15700 }, { "epoch": 2.13, "learning_rate": 2.622834528076464e-05, "loss": 3.1082, "step": 15710 }, { "epoch": 2.13, "learning_rate": 2.620967741935484e-05, "loss": 3.2053, "step": 15720 }, { "epoch": 2.13, "learning_rate": 2.6191009557945046e-05, "loss": 3.3624, "step": 15730 }, { "epoch": 2.13, "learning_rate": 2.6172341696535246e-05, "loss": 2.9262, "step": 15740 }, { "epoch": 2.13, "learning_rate": 2.615367383512545e-05, "loss": 3.1878, "step": 15750 }, { "epoch": 2.13, "learning_rate": 2.6135005973715652e-05, "loss": 2.8616, "step": 15760 }, { "epoch": 2.13, "learning_rate": 2.6116338112305855e-05, "loss": 3.8151, "step": 15770 }, { "epoch": 2.13, "learning_rate": 2.6097670250896062e-05, "loss": 2.8967, "step": 15780 }, { "epoch": 2.13, "learning_rate": 2.6079002389486262e-05, "loss": 2.987, "step": 15790 }, { "epoch": 2.13, "learning_rate": 2.6060334528076462e-05, "loss": 3.0226, "step": 15800 }, { "epoch": 2.13, "learning_rate": 2.604166666666667e-05, "loss": 2.5813, "step": 15810 }, { "epoch": 2.13, "learning_rate": 2.602299880525687e-05, "loss": 2.9065, "step": 15820 }, { "epoch": 2.13, "learning_rate": 2.600433094384707e-05, "loss": 2.8649, "step": 15830 }, { "epoch": 2.13, "learning_rate": 2.5985663082437278e-05, "loss": 3.0972, "step": 15840 }, { "epoch": 2.13, "learning_rate": 2.5966995221027478e-05, "loss": 2.5581, "step": 15850 }, { "epoch": 2.13, "learning_rate": 2.5948327359617684e-05, "loss": 3.4753, "step": 15860 }, { "epoch": 2.13, "learning_rate": 2.5929659498207888e-05, "loss": 2.5078, "step": 15870 }, { "epoch": 2.13, "learning_rate": 2.5910991636798087e-05, "loss": 3.1008, "step": 15880 }, { "epoch": 2.13, "learning_rate": 2.5892323775388294e-05, "loss": 2.9719, "step": 15890 }, { "epoch": 2.13, "learning_rate": 2.5873655913978494e-05, "loss": 2.7048, "step": 15900 }, { "epoch": 2.13, "learning_rate": 2.58549880525687e-05, "loss": 2.9988, "step": 15910 }, { "epoch": 2.13, "learning_rate": 2.5836320191158904e-05, "loss": 2.8525, "step": 15920 }, { "epoch": 2.14, "learning_rate": 2.5817652329749104e-05, "loss": 2.7417, "step": 15930 }, { "epoch": 2.14, "learning_rate": 2.579898446833931e-05, "loss": 2.8101, "step": 15940 }, { "epoch": 2.14, "learning_rate": 2.578031660692951e-05, "loss": 2.4293, "step": 15950 }, { "epoch": 2.14, "learning_rate": 2.5761648745519713e-05, "loss": 2.7513, "step": 15960 }, { "epoch": 2.14, "learning_rate": 2.574298088410992e-05, "loss": 2.696, "step": 15970 }, { "epoch": 2.14, "learning_rate": 2.572431302270012e-05, "loss": 2.2097, "step": 15980 }, { "epoch": 2.14, "learning_rate": 2.5705645161290326e-05, "loss": 2.1147, "step": 15990 }, { "epoch": 2.14, "learning_rate": 2.5686977299880526e-05, "loss": 3.4384, "step": 16000 }, { "epoch": 2.14, "learning_rate": 2.566830943847073e-05, "loss": 2.9081, "step": 16010 }, { "epoch": 2.14, "learning_rate": 2.5649641577060936e-05, "loss": 2.7966, "step": 16020 }, { "epoch": 2.14, "learning_rate": 2.5630973715651136e-05, "loss": 2.865, "step": 16030 }, { "epoch": 2.14, "learning_rate": 2.5612305854241342e-05, "loss": 2.9875, "step": 16040 }, { "epoch": 2.14, "learning_rate": 2.5593637992831542e-05, "loss": 3.0181, "step": 16050 }, { "epoch": 2.14, "learning_rate": 2.5574970131421745e-05, "loss": 2.6113, "step": 16060 }, { "epoch": 2.14, "learning_rate": 2.555630227001195e-05, "loss": 2.5983, "step": 16070 }, { "epoch": 2.14, "learning_rate": 2.5537634408602152e-05, "loss": 2.6221, "step": 16080 }, { "epoch": 2.14, "learning_rate": 2.5518966547192352e-05, "loss": 3.1799, "step": 16090 }, { "epoch": 2.14, "learning_rate": 2.550029868578256e-05, "loss": 2.8745, "step": 16100 }, { "epoch": 2.14, "learning_rate": 2.5481630824372758e-05, "loss": 2.9698, "step": 16110 }, { "epoch": 2.14, "learning_rate": 2.5462962962962965e-05, "loss": 2.7704, "step": 16120 }, { "epoch": 2.14, "learning_rate": 2.5444295101553168e-05, "loss": 3.0358, "step": 16130 }, { "epoch": 2.14, "learning_rate": 2.5425627240143368e-05, "loss": 2.7857, "step": 16140 }, { "epoch": 2.14, "learning_rate": 2.5406959378733574e-05, "loss": 3.2799, "step": 16150 }, { "epoch": 2.14, "learning_rate": 2.5388291517323774e-05, "loss": 2.6568, "step": 16160 }, { "epoch": 2.14, "learning_rate": 2.536962365591398e-05, "loss": 3.0519, "step": 16170 }, { "epoch": 2.14, "learning_rate": 2.5350955794504184e-05, "loss": 2.9738, "step": 16180 }, { "epoch": 2.14, "learning_rate": 2.5332287933094384e-05, "loss": 3.0673, "step": 16190 }, { "epoch": 2.14, "learning_rate": 2.531362007168459e-05, "loss": 2.9453, "step": 16200 }, { "epoch": 2.14, "learning_rate": 2.529495221027479e-05, "loss": 2.6896, "step": 16210 }, { "epoch": 2.15, "learning_rate": 2.5276284348864994e-05, "loss": 2.7328, "step": 16220 }, { "epoch": 2.15, "learning_rate": 2.52576164874552e-05, "loss": 2.6248, "step": 16230 }, { "epoch": 2.15, "learning_rate": 2.52389486260454e-05, "loss": 2.7152, "step": 16240 }, { "epoch": 2.15, "learning_rate": 2.5220280764635607e-05, "loss": 2.9469, "step": 16250 }, { "epoch": 2.15, "learning_rate": 2.5201612903225806e-05, "loss": 2.6343, "step": 16260 }, { "epoch": 2.15, "learning_rate": 2.518294504181601e-05, "loss": 3.0206, "step": 16270 }, { "epoch": 2.15, "learning_rate": 2.5164277180406216e-05, "loss": 3.0976, "step": 16280 }, { "epoch": 2.15, "learning_rate": 2.5145609318996416e-05, "loss": 2.8687, "step": 16290 }, { "epoch": 2.15, "learning_rate": 2.5126941457586623e-05, "loss": 3.2842, "step": 16300 }, { "epoch": 2.15, "learning_rate": 2.5108273596176823e-05, "loss": 2.8738, "step": 16310 }, { "epoch": 2.15, "learning_rate": 2.5089605734767026e-05, "loss": 2.3145, "step": 16320 }, { "epoch": 2.15, "learning_rate": 2.5070937873357232e-05, "loss": 2.9295, "step": 16330 }, { "epoch": 2.15, "learning_rate": 2.5052270011947432e-05, "loss": 2.6285, "step": 16340 }, { "epoch": 2.15, "learning_rate": 2.503360215053764e-05, "loss": 2.7225, "step": 16350 }, { "epoch": 2.15, "learning_rate": 2.501493428912784e-05, "loss": 3.0596, "step": 16360 }, { "epoch": 2.15, "learning_rate": 2.4996266427718042e-05, "loss": 2.5452, "step": 16370 }, { "epoch": 2.15, "learning_rate": 2.4977598566308245e-05, "loss": 2.829, "step": 16380 }, { "epoch": 2.15, "learning_rate": 2.4958930704898448e-05, "loss": 3.0554, "step": 16390 }, { "epoch": 2.15, "learning_rate": 2.494026284348865e-05, "loss": 2.8526, "step": 16400 }, { "epoch": 2.15, "learning_rate": 2.4921594982078855e-05, "loss": 3.194, "step": 16410 }, { "epoch": 2.15, "learning_rate": 2.4902927120669055e-05, "loss": 2.9661, "step": 16420 }, { "epoch": 2.15, "learning_rate": 2.488425925925926e-05, "loss": 3.0343, "step": 16430 }, { "epoch": 2.15, "learning_rate": 2.4865591397849464e-05, "loss": 2.7143, "step": 16440 }, { "epoch": 2.15, "learning_rate": 2.4846923536439668e-05, "loss": 3.1782, "step": 16450 }, { "epoch": 2.15, "learning_rate": 2.482825567502987e-05, "loss": 2.697, "step": 16460 }, { "epoch": 2.15, "learning_rate": 2.480958781362007e-05, "loss": 2.4925, "step": 16470 }, { "epoch": 2.15, "learning_rate": 2.4790919952210277e-05, "loss": 2.6889, "step": 16480 }, { "epoch": 2.15, "learning_rate": 2.477225209080048e-05, "loss": 3.1437, "step": 16490 }, { "epoch": 2.15, "learning_rate": 2.4753584229390684e-05, "loss": 2.9968, "step": 16500 }, { "epoch": 2.15, "learning_rate": 2.4734916367980884e-05, "loss": 3.1598, "step": 16510 }, { "epoch": 2.16, "learning_rate": 2.4716248506571087e-05, "loss": 2.6676, "step": 16520 }, { "epoch": 2.16, "learning_rate": 2.4697580645161293e-05, "loss": 2.7759, "step": 16530 }, { "epoch": 2.16, "learning_rate": 2.4678912783751497e-05, "loss": 2.6843, "step": 16540 }, { "epoch": 2.16, "learning_rate": 2.4660244922341696e-05, "loss": 2.9387, "step": 16550 }, { "epoch": 2.16, "learning_rate": 2.46415770609319e-05, "loss": 3.394, "step": 16560 }, { "epoch": 2.16, "learning_rate": 2.4622909199522103e-05, "loss": 3.0735, "step": 16570 }, { "epoch": 2.16, "learning_rate": 2.4604241338112306e-05, "loss": 2.7541, "step": 16580 }, { "epoch": 2.16, "learning_rate": 2.4585573476702513e-05, "loss": 3.5003, "step": 16590 }, { "epoch": 2.16, "learning_rate": 2.4566905615292712e-05, "loss": 2.4582, "step": 16600 }, { "epoch": 2.16, "learning_rate": 2.4548237753882916e-05, "loss": 3.588, "step": 16610 }, { "epoch": 2.16, "learning_rate": 2.452956989247312e-05, "loss": 2.647, "step": 16620 }, { "epoch": 2.16, "learning_rate": 2.4510902031063322e-05, "loss": 3.2833, "step": 16630 }, { "epoch": 2.16, "learning_rate": 2.4492234169653525e-05, "loss": 2.7989, "step": 16640 }, { "epoch": 2.16, "learning_rate": 2.447356630824373e-05, "loss": 3.5054, "step": 16650 }, { "epoch": 2.16, "learning_rate": 2.4454898446833932e-05, "loss": 3.2047, "step": 16660 }, { "epoch": 2.16, "learning_rate": 2.4436230585424135e-05, "loss": 3.388, "step": 16670 }, { "epoch": 2.16, "learning_rate": 2.4417562724014338e-05, "loss": 2.9514, "step": 16680 }, { "epoch": 2.16, "learning_rate": 2.439889486260454e-05, "loss": 2.7971, "step": 16690 }, { "epoch": 2.16, "learning_rate": 2.4380227001194745e-05, "loss": 3.0533, "step": 16700 }, { "epoch": 2.16, "learning_rate": 2.4361559139784948e-05, "loss": 2.4706, "step": 16710 }, { "epoch": 2.16, "learning_rate": 2.434289127837515e-05, "loss": 3.0765, "step": 16720 }, { "epoch": 2.16, "learning_rate": 2.4324223416965354e-05, "loss": 2.7394, "step": 16730 }, { "epoch": 2.16, "learning_rate": 2.4305555555555558e-05, "loss": 2.3243, "step": 16740 }, { "epoch": 2.16, "learning_rate": 2.428688769414576e-05, "loss": 2.8647, "step": 16750 }, { "epoch": 2.16, "learning_rate": 2.4268219832735964e-05, "loss": 2.5507, "step": 16760 }, { "epoch": 2.16, "learning_rate": 2.4249551971326164e-05, "loss": 3.0999, "step": 16770 }, { "epoch": 2.16, "learning_rate": 2.4230884109916367e-05, "loss": 2.3194, "step": 16780 }, { "epoch": 2.16, "learning_rate": 2.4212216248506574e-05, "loss": 2.9616, "step": 16790 }, { "epoch": 2.16, "learning_rate": 2.4193548387096777e-05, "loss": 3.2428, "step": 16800 }, { "epoch": 2.16, "learning_rate": 2.417488052568698e-05, "loss": 3.1768, "step": 16810 }, { "epoch": 2.17, "learning_rate": 2.415621266427718e-05, "loss": 3.2242, "step": 16820 }, { "epoch": 2.17, "learning_rate": 2.4137544802867383e-05, "loss": 3.22, "step": 16830 }, { "epoch": 2.17, "learning_rate": 2.411887694145759e-05, "loss": 3.1305, "step": 16840 }, { "epoch": 2.17, "learning_rate": 2.4100209080047793e-05, "loss": 3.267, "step": 16850 }, { "epoch": 2.17, "learning_rate": 2.4081541218637993e-05, "loss": 3.2045, "step": 16860 }, { "epoch": 2.17, "learning_rate": 2.4062873357228196e-05, "loss": 3.5537, "step": 16870 }, { "epoch": 2.17, "learning_rate": 2.40442054958184e-05, "loss": 3.4825, "step": 16880 }, { "epoch": 2.17, "learning_rate": 2.4025537634408606e-05, "loss": 2.9965, "step": 16890 }, { "epoch": 2.17, "learning_rate": 2.4006869772998806e-05, "loss": 2.7778, "step": 16900 }, { "epoch": 2.17, "learning_rate": 2.398820191158901e-05, "loss": 2.718, "step": 16910 }, { "epoch": 2.17, "learning_rate": 2.3969534050179212e-05, "loss": 2.5592, "step": 16920 }, { "epoch": 2.17, "learning_rate": 2.3950866188769415e-05, "loss": 3.3026, "step": 16930 }, { "epoch": 2.17, "learning_rate": 2.393219832735962e-05, "loss": 2.8021, "step": 16940 }, { "epoch": 2.17, "learning_rate": 2.3913530465949822e-05, "loss": 2.7009, "step": 16950 }, { "epoch": 2.17, "learning_rate": 2.3894862604540025e-05, "loss": 2.8582, "step": 16960 }, { "epoch": 2.17, "learning_rate": 2.3876194743130228e-05, "loss": 2.7851, "step": 16970 }, { "epoch": 2.17, "learning_rate": 2.385752688172043e-05, "loss": 2.963, "step": 16980 }, { "epoch": 2.17, "learning_rate": 2.3838859020310635e-05, "loss": 2.8885, "step": 16990 }, { "epoch": 2.17, "learning_rate": 2.3820191158900838e-05, "loss": 3.014, "step": 17000 }, { "epoch": 2.17, "learning_rate": 2.380152329749104e-05, "loss": 2.5369, "step": 17010 }, { "epoch": 2.17, "learning_rate": 2.3782855436081244e-05, "loss": 3.0588, "step": 17020 }, { "epoch": 2.17, "learning_rate": 2.3764187574671444e-05, "loss": 2.8192, "step": 17030 }, { "epoch": 2.17, "learning_rate": 2.374551971326165e-05, "loss": 3.0529, "step": 17040 }, { "epoch": 2.17, "learning_rate": 2.3726851851851854e-05, "loss": 2.509, "step": 17050 }, { "epoch": 2.17, "learning_rate": 2.3708183990442057e-05, "loss": 3.4218, "step": 17060 }, { "epoch": 2.17, "learning_rate": 2.368951612903226e-05, "loss": 2.885, "step": 17070 }, { "epoch": 2.17, "learning_rate": 2.367084826762246e-05, "loss": 2.9661, "step": 17080 }, { "epoch": 2.17, "learning_rate": 2.3652180406212663e-05, "loss": 3.1489, "step": 17090 }, { "epoch": 2.17, "learning_rate": 2.363351254480287e-05, "loss": 2.8483, "step": 17100 }, { "epoch": 2.17, "learning_rate": 2.3614844683393073e-05, "loss": 2.6805, "step": 17110 }, { "epoch": 2.18, "learning_rate": 2.3596176821983273e-05, "loss": 2.9985, "step": 17120 }, { "epoch": 2.18, "learning_rate": 2.3577508960573476e-05, "loss": 2.93, "step": 17130 }, { "epoch": 2.18, "learning_rate": 2.355884109916368e-05, "loss": 2.9248, "step": 17140 }, { "epoch": 2.18, "learning_rate": 2.3540173237753886e-05, "loss": 2.4329, "step": 17150 }, { "epoch": 2.18, "learning_rate": 2.352150537634409e-05, "loss": 3.1536, "step": 17160 }, { "epoch": 2.18, "learning_rate": 2.350283751493429e-05, "loss": 2.9112, "step": 17170 }, { "epoch": 2.18, "learning_rate": 2.3484169653524492e-05, "loss": 3.0118, "step": 17180 }, { "epoch": 2.18, "learning_rate": 2.3465501792114696e-05, "loss": 3.2796, "step": 17190 }, { "epoch": 2.18, "learning_rate": 2.3446833930704902e-05, "loss": 2.8319, "step": 17200 }, { "epoch": 2.18, "learning_rate": 2.3428166069295102e-05, "loss": 2.808, "step": 17210 }, { "epoch": 2.18, "learning_rate": 2.3409498207885305e-05, "loss": 2.3681, "step": 17220 }, { "epoch": 2.18, "learning_rate": 2.339083034647551e-05, "loss": 3.2278, "step": 17230 }, { "epoch": 2.18, "learning_rate": 2.3372162485065712e-05, "loss": 2.7767, "step": 17240 }, { "epoch": 2.18, "learning_rate": 2.3353494623655915e-05, "loss": 2.6173, "step": 17250 }, { "epoch": 2.18, "learning_rate": 2.3334826762246118e-05, "loss": 2.9629, "step": 17260 }, { "epoch": 2.18, "learning_rate": 2.331615890083632e-05, "loss": 2.7156, "step": 17270 }, { "epoch": 2.18, "learning_rate": 2.3297491039426525e-05, "loss": 3.1659, "step": 17280 }, { "epoch": 2.18, "learning_rate": 2.3278823178016728e-05, "loss": 2.8566, "step": 17290 }, { "epoch": 2.18, "learning_rate": 2.326015531660693e-05, "loss": 2.57, "step": 17300 }, { "epoch": 2.18, "learning_rate": 2.3241487455197134e-05, "loss": 2.7788, "step": 17310 }, { "epoch": 2.18, "learning_rate": 2.3222819593787338e-05, "loss": 2.8175, "step": 17320 }, { "epoch": 2.18, "learning_rate": 2.320415173237754e-05, "loss": 2.9958, "step": 17330 }, { "epoch": 2.18, "learning_rate": 2.318548387096774e-05, "loss": 2.407, "step": 17340 }, { "epoch": 2.18, "learning_rate": 2.3166816009557947e-05, "loss": 2.981, "step": 17350 }, { "epoch": 2.18, "learning_rate": 2.314814814814815e-05, "loss": 2.9412, "step": 17360 }, { "epoch": 2.18, "learning_rate": 2.3129480286738354e-05, "loss": 3.145, "step": 17370 }, { "epoch": 2.18, "learning_rate": 2.3110812425328557e-05, "loss": 2.6311, "step": 17380 }, { "epoch": 2.18, "learning_rate": 2.3092144563918757e-05, "loss": 2.7061, "step": 17390 }, { "epoch": 2.18, "learning_rate": 2.307347670250896e-05, "loss": 2.4077, "step": 17400 }, { "epoch": 2.19, "learning_rate": 2.3054808841099166e-05, "loss": 2.5605, "step": 17410 }, { "epoch": 2.19, "learning_rate": 2.303614097968937e-05, "loss": 2.5439, "step": 17420 }, { "epoch": 2.19, "learning_rate": 2.301747311827957e-05, "loss": 3.0293, "step": 17430 }, { "epoch": 2.19, "learning_rate": 2.2998805256869773e-05, "loss": 2.3587, "step": 17440 }, { "epoch": 2.19, "learning_rate": 2.2980137395459976e-05, "loss": 2.7149, "step": 17450 }, { "epoch": 2.19, "learning_rate": 2.2961469534050183e-05, "loss": 2.7515, "step": 17460 }, { "epoch": 2.19, "learning_rate": 2.2942801672640382e-05, "loss": 2.4752, "step": 17470 }, { "epoch": 2.19, "learning_rate": 2.2924133811230586e-05, "loss": 2.8067, "step": 17480 }, { "epoch": 2.19, "learning_rate": 2.290546594982079e-05, "loss": 3.2142, "step": 17490 }, { "epoch": 2.19, "learning_rate": 2.2886798088410992e-05, "loss": 2.6557, "step": 17500 }, { "epoch": 2.19, "learning_rate": 2.28681302270012e-05, "loss": 2.9121, "step": 17510 }, { "epoch": 2.19, "learning_rate": 2.28494623655914e-05, "loss": 2.6181, "step": 17520 }, { "epoch": 2.19, "learning_rate": 2.2830794504181602e-05, "loss": 2.6288, "step": 17530 }, { "epoch": 2.19, "learning_rate": 2.2812126642771805e-05, "loss": 2.6474, "step": 17540 }, { "epoch": 2.19, "learning_rate": 2.2793458781362008e-05, "loss": 3.1204, "step": 17550 }, { "epoch": 2.19, "learning_rate": 2.277479091995221e-05, "loss": 2.8625, "step": 17560 }, { "epoch": 2.19, "learning_rate": 2.2756123058542415e-05, "loss": 2.8513, "step": 17570 }, { "epoch": 2.19, "learning_rate": 2.2737455197132618e-05, "loss": 2.9578, "step": 17580 }, { "epoch": 2.19, "learning_rate": 2.271878733572282e-05, "loss": 2.5751, "step": 17590 }, { "epoch": 2.19, "learning_rate": 2.270011947431302e-05, "loss": 2.5051, "step": 17600 }, { "epoch": 2.19, "learning_rate": 2.2681451612903227e-05, "loss": 3.1305, "step": 17610 }, { "epoch": 2.19, "learning_rate": 2.266278375149343e-05, "loss": 2.388, "step": 17620 }, { "epoch": 2.19, "learning_rate": 2.2644115890083634e-05, "loss": 2.592, "step": 17630 }, { "epoch": 2.19, "learning_rate": 2.2625448028673837e-05, "loss": 2.9774, "step": 17640 }, { "epoch": 2.19, "learning_rate": 2.2606780167264037e-05, "loss": 3.178, "step": 17650 }, { "epoch": 2.19, "learning_rate": 2.2588112305854244e-05, "loss": 2.5683, "step": 17660 }, { "epoch": 2.19, "learning_rate": 2.2569444444444447e-05, "loss": 2.7476, "step": 17670 }, { "epoch": 2.19, "learning_rate": 2.255077658303465e-05, "loss": 2.6905, "step": 17680 }, { "epoch": 2.19, "learning_rate": 2.253210872162485e-05, "loss": 2.5406, "step": 17690 }, { "epoch": 2.19, "learning_rate": 2.2513440860215053e-05, "loss": 2.5228, "step": 17700 }, { "epoch": 2.2, "learning_rate": 2.249477299880526e-05, "loss": 2.3931, "step": 17710 }, { "epoch": 2.2, "learning_rate": 2.2476105137395463e-05, "loss": 2.7231, "step": 17720 }, { "epoch": 2.2, "learning_rate": 2.2457437275985666e-05, "loss": 2.7678, "step": 17730 }, { "epoch": 2.2, "learning_rate": 2.2438769414575866e-05, "loss": 2.3632, "step": 17740 }, { "epoch": 2.2, "learning_rate": 2.242010155316607e-05, "loss": 2.8103, "step": 17750 }, { "epoch": 2.2, "learning_rate": 2.2401433691756272e-05, "loss": 2.5602, "step": 17760 }, { "epoch": 2.2, "learning_rate": 2.238276583034648e-05, "loss": 2.9518, "step": 17770 }, { "epoch": 2.2, "learning_rate": 2.236409796893668e-05, "loss": 2.6576, "step": 17780 }, { "epoch": 2.2, "learning_rate": 2.2345430107526882e-05, "loss": 2.5758, "step": 17790 }, { "epoch": 2.2, "learning_rate": 2.2326762246117085e-05, "loss": 2.5331, "step": 17800 }, { "epoch": 2.2, "learning_rate": 2.230809438470729e-05, "loss": 3.0794, "step": 17810 }, { "epoch": 2.2, "learning_rate": 2.2289426523297492e-05, "loss": 2.5879, "step": 17820 }, { "epoch": 2.2, "learning_rate": 2.2270758661887695e-05, "loss": 3.0335, "step": 17830 }, { "epoch": 2.2, "learning_rate": 2.2252090800477898e-05, "loss": 2.8501, "step": 17840 }, { "epoch": 2.2, "learning_rate": 2.22334229390681e-05, "loss": 2.6787, "step": 17850 }, { "epoch": 2.2, "eval_accuracy": 0.21925712225027047, "eval_loss": 2.939669609069824, "eval_runtime": 784.0992, "eval_samples_per_second": 3.537, "eval_steps_per_second": 1.769, "step": 17856 }, { "epoch": 3.0, "learning_rate": 2.2214755077658305e-05, "loss": 2.9471, "step": 17860 }, { "epoch": 3.0, "learning_rate": 2.2196087216248508e-05, "loss": 2.2457, "step": 17870 }, { "epoch": 3.0, "learning_rate": 2.217741935483871e-05, "loss": 2.5336, "step": 17880 }, { "epoch": 3.0, "learning_rate": 2.2158751493428914e-05, "loss": 2.8878, "step": 17890 }, { "epoch": 3.0, "learning_rate": 2.2140083632019117e-05, "loss": 2.707, "step": 17900 }, { "epoch": 3.0, "learning_rate": 2.2121415770609317e-05, "loss": 3.1906, "step": 17910 }, { "epoch": 3.0, "learning_rate": 2.2102747909199524e-05, "loss": 3.2297, "step": 17920 }, { "epoch": 3.0, "learning_rate": 2.2084080047789727e-05, "loss": 2.8249, "step": 17930 }, { "epoch": 3.0, "learning_rate": 2.206541218637993e-05, "loss": 2.3213, "step": 17940 }, { "epoch": 3.0, "learning_rate": 2.204674432497013e-05, "loss": 2.5766, "step": 17950 }, { "epoch": 3.0, "learning_rate": 2.2028076463560333e-05, "loss": 2.5443, "step": 17960 }, { "epoch": 3.0, "learning_rate": 2.200940860215054e-05, "loss": 2.7451, "step": 17970 }, { "epoch": 3.0, "learning_rate": 2.1990740740740743e-05, "loss": 2.1575, "step": 17980 }, { "epoch": 3.0, "learning_rate": 2.1972072879330946e-05, "loss": 2.3331, "step": 17990 }, { "epoch": 3.0, "learning_rate": 2.1953405017921146e-05, "loss": 2.7351, "step": 18000 }, { "epoch": 3.01, "learning_rate": 2.193473715651135e-05, "loss": 2.864, "step": 18010 }, { "epoch": 3.01, "learning_rate": 2.1916069295101556e-05, "loss": 2.7778, "step": 18020 }, { "epoch": 3.01, "learning_rate": 2.189740143369176e-05, "loss": 2.7513, "step": 18030 }, { "epoch": 3.01, "learning_rate": 2.187873357228196e-05, "loss": 2.3821, "step": 18040 }, { "epoch": 3.01, "learning_rate": 2.1860065710872162e-05, "loss": 3.2049, "step": 18050 }, { "epoch": 3.01, "learning_rate": 2.1841397849462366e-05, "loss": 2.8676, "step": 18060 }, { "epoch": 3.01, "learning_rate": 2.182272998805257e-05, "loss": 2.7496, "step": 18070 }, { "epoch": 3.01, "learning_rate": 2.1804062126642775e-05, "loss": 2.8662, "step": 18080 }, { "epoch": 3.01, "learning_rate": 2.1785394265232975e-05, "loss": 2.446, "step": 18090 }, { "epoch": 3.01, "learning_rate": 2.176672640382318e-05, "loss": 2.6361, "step": 18100 }, { "epoch": 3.01, "learning_rate": 2.174805854241338e-05, "loss": 2.6153, "step": 18110 }, { "epoch": 3.01, "learning_rate": 2.1729390681003585e-05, "loss": 2.5376, "step": 18120 }, { "epoch": 3.01, "learning_rate": 2.1710722819593788e-05, "loss": 2.4613, "step": 18130 }, { "epoch": 3.01, "learning_rate": 2.169205495818399e-05, "loss": 2.7581, "step": 18140 }, { "epoch": 3.01, "learning_rate": 2.1673387096774195e-05, "loss": 2.3273, "step": 18150 }, { "epoch": 3.01, "learning_rate": 2.1654719235364398e-05, "loss": 2.5223, "step": 18160 }, { "epoch": 3.01, "learning_rate": 2.16360513739546e-05, "loss": 2.7759, "step": 18170 }, { "epoch": 3.01, "learning_rate": 2.1617383512544804e-05, "loss": 2.0227, "step": 18180 }, { "epoch": 3.01, "learning_rate": 2.1598715651135007e-05, "loss": 2.3961, "step": 18190 }, { "epoch": 3.01, "learning_rate": 2.158004778972521e-05, "loss": 2.6126, "step": 18200 }, { "epoch": 3.01, "learning_rate": 2.1561379928315414e-05, "loss": 2.8436, "step": 18210 }, { "epoch": 3.01, "learning_rate": 2.1542712066905614e-05, "loss": 2.7485, "step": 18220 }, { "epoch": 3.01, "learning_rate": 2.152404420549582e-05, "loss": 2.6791, "step": 18230 }, { "epoch": 3.01, "learning_rate": 2.1505376344086024e-05, "loss": 2.3018, "step": 18240 }, { "epoch": 3.01, "learning_rate": 2.1486708482676227e-05, "loss": 2.5784, "step": 18250 }, { "epoch": 3.01, "learning_rate": 2.1468040621266427e-05, "loss": 2.6444, "step": 18260 }, { "epoch": 3.01, "learning_rate": 2.144937275985663e-05, "loss": 2.786, "step": 18270 }, { "epoch": 3.01, "learning_rate": 2.1430704898446836e-05, "loss": 2.0548, "step": 18280 }, { "epoch": 3.01, "learning_rate": 2.141203703703704e-05, "loss": 2.1212, "step": 18290 }, { "epoch": 3.01, "learning_rate": 2.1393369175627243e-05, "loss": 2.2541, "step": 18300 }, { "epoch": 3.02, "learning_rate": 2.1374701314217443e-05, "loss": 2.5681, "step": 18310 }, { "epoch": 3.02, "learning_rate": 2.1356033452807646e-05, "loss": 2.6514, "step": 18320 }, { "epoch": 3.02, "learning_rate": 2.1337365591397852e-05, "loss": 2.2708, "step": 18330 }, { "epoch": 3.02, "learning_rate": 2.1318697729988056e-05, "loss": 2.9859, "step": 18340 }, { "epoch": 3.02, "learning_rate": 2.1300029868578256e-05, "loss": 2.4121, "step": 18350 }, { "epoch": 3.02, "learning_rate": 2.128136200716846e-05, "loss": 2.8401, "step": 18360 }, { "epoch": 3.02, "learning_rate": 2.1262694145758662e-05, "loss": 2.0993, "step": 18370 }, { "epoch": 3.02, "learning_rate": 2.1244026284348865e-05, "loss": 2.3191, "step": 18380 }, { "epoch": 3.02, "learning_rate": 2.122535842293907e-05, "loss": 2.4641, "step": 18390 }, { "epoch": 3.02, "learning_rate": 2.120669056152927e-05, "loss": 2.6473, "step": 18400 }, { "epoch": 3.02, "learning_rate": 2.1188022700119475e-05, "loss": 2.4044, "step": 18410 }, { "epoch": 3.02, "learning_rate": 2.1169354838709678e-05, "loss": 2.5554, "step": 18420 }, { "epoch": 3.02, "learning_rate": 2.115068697729988e-05, "loss": 1.9765, "step": 18430 }, { "epoch": 3.02, "learning_rate": 2.1132019115890085e-05, "loss": 2.5409, "step": 18440 }, { "epoch": 3.02, "learning_rate": 2.1113351254480288e-05, "loss": 2.3166, "step": 18450 }, { "epoch": 3.02, "learning_rate": 2.109468339307049e-05, "loss": 2.5497, "step": 18460 }, { "epoch": 3.02, "learning_rate": 2.1076015531660694e-05, "loss": 2.9819, "step": 18470 }, { "epoch": 3.02, "learning_rate": 2.1057347670250897e-05, "loss": 2.4387, "step": 18480 }, { "epoch": 3.02, "learning_rate": 2.10386798088411e-05, "loss": 2.5872, "step": 18490 }, { "epoch": 3.02, "learning_rate": 2.1020011947431304e-05, "loss": 2.5181, "step": 18500 }, { "epoch": 3.02, "learning_rate": 2.1001344086021507e-05, "loss": 2.6028, "step": 18510 }, { "epoch": 3.02, "learning_rate": 2.0982676224611707e-05, "loss": 2.228, "step": 18520 }, { "epoch": 3.02, "learning_rate": 2.0964008363201913e-05, "loss": 2.2527, "step": 18530 }, { "epoch": 3.02, "learning_rate": 2.0945340501792117e-05, "loss": 2.223, "step": 18540 }, { "epoch": 3.02, "learning_rate": 2.092667264038232e-05, "loss": 2.6303, "step": 18550 }, { "epoch": 3.02, "learning_rate": 2.0908004778972523e-05, "loss": 2.6447, "step": 18560 }, { "epoch": 3.02, "learning_rate": 2.0889336917562723e-05, "loss": 2.9795, "step": 18570 }, { "epoch": 3.02, "learning_rate": 2.0870669056152926e-05, "loss": 2.63, "step": 18580 }, { "epoch": 3.02, "learning_rate": 2.0852001194743133e-05, "loss": 2.6006, "step": 18590 }, { "epoch": 3.02, "learning_rate": 2.0833333333333336e-05, "loss": 2.7452, "step": 18600 }, { "epoch": 3.03, "learning_rate": 2.0814665471923536e-05, "loss": 2.437, "step": 18610 }, { "epoch": 3.03, "learning_rate": 2.079599761051374e-05, "loss": 2.0574, "step": 18620 }, { "epoch": 3.03, "learning_rate": 2.0777329749103942e-05, "loss": 2.1847, "step": 18630 }, { "epoch": 3.03, "learning_rate": 2.075866188769415e-05, "loss": 2.5653, "step": 18640 }, { "epoch": 3.03, "learning_rate": 2.0739994026284352e-05, "loss": 2.3355, "step": 18650 }, { "epoch": 3.03, "learning_rate": 2.0721326164874552e-05, "loss": 3.0245, "step": 18660 }, { "epoch": 3.03, "learning_rate": 2.0702658303464755e-05, "loss": 2.5511, "step": 18670 }, { "epoch": 3.03, "learning_rate": 2.068399044205496e-05, "loss": 2.259, "step": 18680 }, { "epoch": 3.03, "learning_rate": 2.0665322580645165e-05, "loss": 2.6907, "step": 18690 }, { "epoch": 3.03, "learning_rate": 2.0646654719235365e-05, "loss": 2.5976, "step": 18700 }, { "epoch": 3.03, "learning_rate": 2.0627986857825568e-05, "loss": 2.8016, "step": 18710 }, { "epoch": 3.03, "learning_rate": 2.060931899641577e-05, "loss": 2.8278, "step": 18720 }, { "epoch": 3.03, "learning_rate": 2.0590651135005974e-05, "loss": 2.7738, "step": 18730 }, { "epoch": 3.03, "learning_rate": 2.0571983273596178e-05, "loss": 2.7854, "step": 18740 }, { "epoch": 3.03, "learning_rate": 2.055331541218638e-05, "loss": 2.0146, "step": 18750 }, { "epoch": 3.03, "learning_rate": 2.0534647550776584e-05, "loss": 2.6256, "step": 18760 }, { "epoch": 3.03, "learning_rate": 2.0515979689366787e-05, "loss": 2.4052, "step": 18770 }, { "epoch": 3.03, "learning_rate": 2.049731182795699e-05, "loss": 2.2158, "step": 18780 }, { "epoch": 3.03, "learning_rate": 2.0478643966547194e-05, "loss": 3.4843, "step": 18790 }, { "epoch": 3.03, "learning_rate": 2.0459976105137397e-05, "loss": 2.6808, "step": 18800 }, { "epoch": 3.03, "learning_rate": 2.04413082437276e-05, "loss": 1.9205, "step": 18810 }, { "epoch": 3.03, "learning_rate": 2.0422640382317803e-05, "loss": 2.1148, "step": 18820 }, { "epoch": 3.03, "learning_rate": 2.0403972520908003e-05, "loss": 2.8864, "step": 18830 }, { "epoch": 3.03, "learning_rate": 2.038530465949821e-05, "loss": 2.2261, "step": 18840 }, { "epoch": 3.03, "learning_rate": 2.0366636798088413e-05, "loss": 2.675, "step": 18850 }, { "epoch": 3.03, "learning_rate": 2.0347968936678616e-05, "loss": 2.539, "step": 18860 }, { "epoch": 3.03, "learning_rate": 2.032930107526882e-05, "loss": 2.1786, "step": 18870 }, { "epoch": 3.03, "learning_rate": 2.031063321385902e-05, "loss": 2.362, "step": 18880 }, { "epoch": 3.03, "learning_rate": 2.0291965352449223e-05, "loss": 2.2568, "step": 18890 }, { "epoch": 3.04, "learning_rate": 2.027329749103943e-05, "loss": 2.5926, "step": 18900 }, { "epoch": 3.04, "learning_rate": 2.0254629629629632e-05, "loss": 2.5037, "step": 18910 }, { "epoch": 3.04, "learning_rate": 2.0235961768219832e-05, "loss": 2.4759, "step": 18920 }, { "epoch": 3.04, "learning_rate": 2.0217293906810035e-05, "loss": 2.4399, "step": 18930 }, { "epoch": 3.04, "learning_rate": 2.019862604540024e-05, "loss": 2.7335, "step": 18940 }, { "epoch": 3.04, "learning_rate": 2.0179958183990445e-05, "loss": 1.801, "step": 18950 }, { "epoch": 3.04, "learning_rate": 2.0161290322580645e-05, "loss": 2.0234, "step": 18960 }, { "epoch": 3.04, "learning_rate": 2.014262246117085e-05, "loss": 2.5517, "step": 18970 }, { "epoch": 3.04, "learning_rate": 2.012395459976105e-05, "loss": 2.9225, "step": 18980 }, { "epoch": 3.04, "learning_rate": 2.0105286738351255e-05, "loss": 3.2185, "step": 18990 }, { "epoch": 3.04, "learning_rate": 2.008661887694146e-05, "loss": 2.3455, "step": 19000 }, { "epoch": 3.04, "learning_rate": 2.006795101553166e-05, "loss": 2.636, "step": 19010 }, { "epoch": 3.04, "learning_rate": 2.0049283154121864e-05, "loss": 3.0057, "step": 19020 }, { "epoch": 3.04, "learning_rate": 2.0030615292712068e-05, "loss": 2.7138, "step": 19030 }, { "epoch": 3.04, "learning_rate": 2.001194743130227e-05, "loss": 2.5631, "step": 19040 }, { "epoch": 3.04, "learning_rate": 1.9993279569892474e-05, "loss": 2.4569, "step": 19050 }, { "epoch": 3.04, "learning_rate": 1.9974611708482677e-05, "loss": 2.661, "step": 19060 }, { "epoch": 3.04, "learning_rate": 1.995594384707288e-05, "loss": 2.7164, "step": 19070 }, { "epoch": 3.04, "learning_rate": 1.9937275985663084e-05, "loss": 2.1074, "step": 19080 }, { "epoch": 3.04, "learning_rate": 1.9918608124253284e-05, "loss": 2.6809, "step": 19090 }, { "epoch": 3.04, "learning_rate": 1.989994026284349e-05, "loss": 2.5848, "step": 19100 }, { "epoch": 3.04, "learning_rate": 1.9881272401433693e-05, "loss": 2.6178, "step": 19110 }, { "epoch": 3.04, "learning_rate": 1.9862604540023897e-05, "loss": 2.085, "step": 19120 }, { "epoch": 3.04, "learning_rate": 1.98439366786141e-05, "loss": 2.6144, "step": 19130 }, { "epoch": 3.04, "learning_rate": 1.98252688172043e-05, "loss": 3.1553, "step": 19140 }, { "epoch": 3.04, "learning_rate": 1.9806600955794506e-05, "loss": 2.4955, "step": 19150 }, { "epoch": 3.04, "learning_rate": 1.978793309438471e-05, "loss": 2.6846, "step": 19160 }, { "epoch": 3.04, "learning_rate": 1.9769265232974913e-05, "loss": 2.4775, "step": 19170 }, { "epoch": 3.04, "learning_rate": 1.9750597371565113e-05, "loss": 2.3898, "step": 19180 }, { "epoch": 3.04, "learning_rate": 1.9731929510155316e-05, "loss": 2.5732, "step": 19190 }, { "epoch": 3.05, "learning_rate": 1.9713261648745522e-05, "loss": 2.215, "step": 19200 }, { "epoch": 3.05, "learning_rate": 1.9694593787335726e-05, "loss": 3.0474, "step": 19210 }, { "epoch": 3.05, "learning_rate": 1.967592592592593e-05, "loss": 2.8967, "step": 19220 }, { "epoch": 3.05, "learning_rate": 1.965725806451613e-05, "loss": 2.6113, "step": 19230 }, { "epoch": 3.05, "learning_rate": 1.9638590203106332e-05, "loss": 2.4531, "step": 19240 }, { "epoch": 3.05, "learning_rate": 1.9619922341696535e-05, "loss": 2.0877, "step": 19250 }, { "epoch": 3.05, "learning_rate": 1.9601254480286742e-05, "loss": 2.8603, "step": 19260 }, { "epoch": 3.05, "learning_rate": 1.958258661887694e-05, "loss": 2.2625, "step": 19270 }, { "epoch": 3.05, "learning_rate": 1.9563918757467145e-05, "loss": 2.5401, "step": 19280 }, { "epoch": 3.05, "learning_rate": 1.9545250896057348e-05, "loss": 2.8542, "step": 19290 }, { "epoch": 3.05, "learning_rate": 1.952658303464755e-05, "loss": 2.2006, "step": 19300 }, { "epoch": 3.05, "learning_rate": 1.9507915173237754e-05, "loss": 2.5843, "step": 19310 }, { "epoch": 3.05, "learning_rate": 1.9489247311827958e-05, "loss": 2.6987, "step": 19320 }, { "epoch": 3.05, "learning_rate": 1.947057945041816e-05, "loss": 2.4344, "step": 19330 }, { "epoch": 3.05, "learning_rate": 1.9451911589008364e-05, "loss": 2.3605, "step": 19340 }, { "epoch": 3.05, "learning_rate": 1.9433243727598567e-05, "loss": 2.1491, "step": 19350 }, { "epoch": 3.05, "learning_rate": 1.941457586618877e-05, "loss": 2.4875, "step": 19360 }, { "epoch": 3.05, "learning_rate": 1.9395908004778974e-05, "loss": 2.4786, "step": 19370 }, { "epoch": 3.05, "learning_rate": 1.9377240143369177e-05, "loss": 1.6828, "step": 19380 }, { "epoch": 3.05, "learning_rate": 1.935857228195938e-05, "loss": 1.8907, "step": 19390 }, { "epoch": 3.05, "learning_rate": 1.933990442054958e-05, "loss": 2.6151, "step": 19400 }, { "epoch": 3.05, "learning_rate": 1.9321236559139787e-05, "loss": 2.2892, "step": 19410 }, { "epoch": 3.05, "learning_rate": 1.930256869772999e-05, "loss": 2.9471, "step": 19420 }, { "epoch": 3.05, "learning_rate": 1.9283900836320193e-05, "loss": 2.4795, "step": 19430 }, { "epoch": 3.05, "learning_rate": 1.9265232974910393e-05, "loss": 1.971, "step": 19440 }, { "epoch": 3.05, "learning_rate": 1.9246565113500596e-05, "loss": 2.3115, "step": 19450 }, { "epoch": 3.05, "learning_rate": 1.9227897252090803e-05, "loss": 2.1893, "step": 19460 }, { "epoch": 3.05, "learning_rate": 1.9209229390681006e-05, "loss": 3.3262, "step": 19470 }, { "epoch": 3.05, "learning_rate": 1.919056152927121e-05, "loss": 2.5822, "step": 19480 }, { "epoch": 3.05, "learning_rate": 1.917189366786141e-05, "loss": 2.9456, "step": 19490 }, { "epoch": 3.06, "learning_rate": 1.9153225806451612e-05, "loss": 2.169, "step": 19500 }, { "epoch": 3.06, "learning_rate": 1.913455794504182e-05, "loss": 2.1284, "step": 19510 }, { "epoch": 3.06, "learning_rate": 1.9115890083632022e-05, "loss": 2.5261, "step": 19520 }, { "epoch": 3.06, "learning_rate": 1.9097222222222222e-05, "loss": 2.7204, "step": 19530 }, { "epoch": 3.06, "learning_rate": 1.9078554360812425e-05, "loss": 2.2162, "step": 19540 }, { "epoch": 3.06, "learning_rate": 1.9059886499402628e-05, "loss": 2.8995, "step": 19550 }, { "epoch": 3.06, "learning_rate": 1.904121863799283e-05, "loss": 2.0861, "step": 19560 }, { "epoch": 3.06, "learning_rate": 1.9022550776583038e-05, "loss": 2.6926, "step": 19570 }, { "epoch": 3.06, "learning_rate": 1.9003882915173238e-05, "loss": 2.4891, "step": 19580 }, { "epoch": 3.06, "learning_rate": 1.898521505376344e-05, "loss": 1.9507, "step": 19590 }, { "epoch": 3.06, "learning_rate": 1.8966547192353644e-05, "loss": 3.007, "step": 19600 }, { "epoch": 3.06, "learning_rate": 1.8947879330943848e-05, "loss": 1.6923, "step": 19610 }, { "epoch": 3.06, "learning_rate": 1.892921146953405e-05, "loss": 2.6424, "step": 19620 }, { "epoch": 3.06, "learning_rate": 1.8910543608124254e-05, "loss": 2.8237, "step": 19630 }, { "epoch": 3.06, "learning_rate": 1.8891875746714457e-05, "loss": 2.8638, "step": 19640 }, { "epoch": 3.06, "learning_rate": 1.887320788530466e-05, "loss": 2.6359, "step": 19650 }, { "epoch": 3.06, "learning_rate": 1.8854540023894864e-05, "loss": 1.9942, "step": 19660 }, { "epoch": 3.06, "learning_rate": 1.8835872162485067e-05, "loss": 2.0838, "step": 19670 }, { "epoch": 3.06, "learning_rate": 1.881720430107527e-05, "loss": 2.4944, "step": 19680 }, { "epoch": 3.06, "learning_rate": 1.8798536439665473e-05, "loss": 2.6965, "step": 19690 }, { "epoch": 3.06, "learning_rate": 1.8779868578255677e-05, "loss": 2.3801, "step": 19700 }, { "epoch": 3.06, "learning_rate": 1.8761200716845876e-05, "loss": 2.1479, "step": 19710 }, { "epoch": 3.06, "learning_rate": 1.8742532855436083e-05, "loss": 2.2529, "step": 19720 }, { "epoch": 3.06, "learning_rate": 1.8723864994026286e-05, "loss": 2.8314, "step": 19730 }, { "epoch": 3.06, "learning_rate": 1.870519713261649e-05, "loss": 2.5973, "step": 19740 }, { "epoch": 3.06, "learning_rate": 1.868652927120669e-05, "loss": 2.453, "step": 19750 }, { "epoch": 3.06, "learning_rate": 1.8667861409796893e-05, "loss": 2.7604, "step": 19760 }, { "epoch": 3.06, "learning_rate": 1.86491935483871e-05, "loss": 1.8178, "step": 19770 }, { "epoch": 3.06, "learning_rate": 1.8630525686977302e-05, "loss": 3.3395, "step": 19780 }, { "epoch": 3.06, "learning_rate": 1.8611857825567506e-05, "loss": 2.2357, "step": 19790 }, { "epoch": 3.07, "learning_rate": 1.8593189964157705e-05, "loss": 2.1135, "step": 19800 }, { "epoch": 3.07, "learning_rate": 1.857452210274791e-05, "loss": 2.5988, "step": 19810 }, { "epoch": 3.07, "learning_rate": 1.8555854241338115e-05, "loss": 2.7436, "step": 19820 }, { "epoch": 3.07, "learning_rate": 1.853718637992832e-05, "loss": 2.156, "step": 19830 }, { "epoch": 3.07, "learning_rate": 1.8518518518518518e-05, "loss": 2.286, "step": 19840 }, { "epoch": 3.07, "learning_rate": 1.849985065710872e-05, "loss": 2.1962, "step": 19850 }, { "epoch": 3.07, "learning_rate": 1.8481182795698925e-05, "loss": 2.2311, "step": 19860 }, { "epoch": 3.07, "learning_rate": 1.8462514934289128e-05, "loss": 2.1722, "step": 19870 }, { "epoch": 3.07, "learning_rate": 1.844384707287933e-05, "loss": 2.526, "step": 19880 }, { "epoch": 3.07, "learning_rate": 1.8425179211469534e-05, "loss": 2.1981, "step": 19890 }, { "epoch": 3.07, "learning_rate": 1.8406511350059738e-05, "loss": 2.2492, "step": 19900 }, { "epoch": 3.07, "learning_rate": 1.838784348864994e-05, "loss": 2.1934, "step": 19910 }, { "epoch": 3.07, "learning_rate": 1.8369175627240144e-05, "loss": 2.8237, "step": 19920 }, { "epoch": 3.07, "learning_rate": 1.8350507765830347e-05, "loss": 2.4443, "step": 19930 }, { "epoch": 3.07, "learning_rate": 1.833183990442055e-05, "loss": 1.8985, "step": 19940 }, { "epoch": 3.07, "learning_rate": 1.8313172043010754e-05, "loss": 1.9182, "step": 19950 }, { "epoch": 3.07, "learning_rate": 1.8294504181600957e-05, "loss": 3.0129, "step": 19960 }, { "epoch": 3.07, "learning_rate": 1.827583632019116e-05, "loss": 1.9888, "step": 19970 }, { "epoch": 3.07, "learning_rate": 1.8257168458781363e-05, "loss": 2.2065, "step": 19980 }, { "epoch": 3.07, "learning_rate": 1.8238500597371567e-05, "loss": 2.783, "step": 19990 }, { "epoch": 3.07, "learning_rate": 1.821983273596177e-05, "loss": 2.2629, "step": 20000 }, { "epoch": 3.07, "learning_rate": 1.820116487455197e-05, "loss": 2.4861, "step": 20010 }, { "epoch": 3.07, "learning_rate": 1.8182497013142176e-05, "loss": 1.7836, "step": 20020 }, { "epoch": 3.07, "learning_rate": 1.816382915173238e-05, "loss": 2.8726, "step": 20030 }, { "epoch": 3.07, "learning_rate": 1.8145161290322583e-05, "loss": 2.2979, "step": 20040 }, { "epoch": 3.07, "learning_rate": 1.8126493428912786e-05, "loss": 1.8698, "step": 20050 }, { "epoch": 3.07, "learning_rate": 1.8107825567502986e-05, "loss": 2.0086, "step": 20060 }, { "epoch": 3.07, "learning_rate": 1.808915770609319e-05, "loss": 2.0377, "step": 20070 }, { "epoch": 3.07, "learning_rate": 1.8070489844683396e-05, "loss": 2.0188, "step": 20080 }, { "epoch": 3.08, "learning_rate": 1.80518219832736e-05, "loss": 2.1468, "step": 20090 }, { "epoch": 3.08, "learning_rate": 1.80331541218638e-05, "loss": 2.6763, "step": 20100 }, { "epoch": 3.08, "learning_rate": 1.8014486260454002e-05, "loss": 2.4538, "step": 20110 }, { "epoch": 3.08, "learning_rate": 1.7995818399044205e-05, "loss": 2.8785, "step": 20120 }, { "epoch": 3.08, "learning_rate": 1.797715053763441e-05, "loss": 2.014, "step": 20130 }, { "epoch": 3.08, "learning_rate": 1.7958482676224615e-05, "loss": 3.096, "step": 20140 }, { "epoch": 3.08, "learning_rate": 1.7939814814814815e-05, "loss": 2.4784, "step": 20150 }, { "epoch": 3.08, "learning_rate": 1.7921146953405018e-05, "loss": 1.5835, "step": 20160 }, { "epoch": 3.08, "learning_rate": 1.790247909199522e-05, "loss": 2.1188, "step": 20170 }, { "epoch": 3.08, "learning_rate": 1.7883811230585428e-05, "loss": 2.8463, "step": 20180 }, { "epoch": 3.08, "learning_rate": 1.7865143369175628e-05, "loss": 2.1697, "step": 20190 }, { "epoch": 3.08, "learning_rate": 1.784647550776583e-05, "loss": 2.6546, "step": 20200 }, { "epoch": 3.08, "learning_rate": 1.7827807646356034e-05, "loss": 2.2187, "step": 20210 }, { "epoch": 3.08, "learning_rate": 1.7809139784946237e-05, "loss": 1.5922, "step": 20220 }, { "epoch": 3.08, "learning_rate": 1.779047192353644e-05, "loss": 2.4411, "step": 20230 }, { "epoch": 3.08, "learning_rate": 1.7771804062126644e-05, "loss": 2.0552, "step": 20240 }, { "epoch": 3.08, "learning_rate": 1.7753136200716847e-05, "loss": 2.3645, "step": 20250 }, { "epoch": 3.08, "learning_rate": 1.773446833930705e-05, "loss": 2.426, "step": 20260 }, { "epoch": 3.08, "learning_rate": 1.7715800477897253e-05, "loss": 2.0695, "step": 20270 }, { "epoch": 3.08, "learning_rate": 1.7697132616487457e-05, "loss": 3.0191, "step": 20280 }, { "epoch": 3.08, "learning_rate": 1.767846475507766e-05, "loss": 2.7107, "step": 20290 }, { "epoch": 3.08, "learning_rate": 1.7659796893667863e-05, "loss": 2.454, "step": 20300 }, { "epoch": 3.08, "learning_rate": 1.7641129032258066e-05, "loss": 2.2402, "step": 20310 }, { "epoch": 3.08, "learning_rate": 1.7622461170848266e-05, "loss": 2.0683, "step": 20320 }, { "epoch": 3.08, "learning_rate": 1.7603793309438473e-05, "loss": 2.2405, "step": 20330 }, { "epoch": 3.08, "learning_rate": 1.7585125448028676e-05, "loss": 2.3465, "step": 20340 }, { "epoch": 3.08, "learning_rate": 1.756645758661888e-05, "loss": 1.601, "step": 20350 }, { "epoch": 3.08, "learning_rate": 1.7547789725209082e-05, "loss": 2.009, "step": 20360 }, { "epoch": 3.08, "learning_rate": 1.7529121863799282e-05, "loss": 2.19, "step": 20370 }, { "epoch": 3.08, "learning_rate": 1.7510454002389485e-05, "loss": 2.1226, "step": 20380 }, { "epoch": 3.09, "learning_rate": 1.7491786140979692e-05, "loss": 1.8346, "step": 20390 }, { "epoch": 3.09, "learning_rate": 1.7473118279569895e-05, "loss": 2.1178, "step": 20400 }, { "epoch": 3.09, "learning_rate": 1.7454450418160095e-05, "loss": 2.1185, "step": 20410 }, { "epoch": 3.09, "learning_rate": 1.7435782556750298e-05, "loss": 1.8201, "step": 20420 }, { "epoch": 3.09, "learning_rate": 1.74171146953405e-05, "loss": 1.7933, "step": 20430 }, { "epoch": 3.09, "learning_rate": 1.7398446833930708e-05, "loss": 2.287, "step": 20440 }, { "epoch": 3.09, "learning_rate": 1.7379778972520908e-05, "loss": 2.2779, "step": 20450 }, { "epoch": 3.09, "learning_rate": 1.736111111111111e-05, "loss": 2.7157, "step": 20460 }, { "epoch": 3.09, "learning_rate": 1.7342443249701314e-05, "loss": 2.3053, "step": 20470 }, { "epoch": 3.09, "learning_rate": 1.7323775388291518e-05, "loss": 2.1541, "step": 20480 }, { "epoch": 3.09, "learning_rate": 1.7305107526881724e-05, "loss": 2.6438, "step": 20490 }, { "epoch": 3.09, "learning_rate": 1.7286439665471924e-05, "loss": 2.4277, "step": 20500 }, { "epoch": 3.09, "learning_rate": 1.7267771804062127e-05, "loss": 2.4426, "step": 20510 }, { "epoch": 3.09, "learning_rate": 1.724910394265233e-05, "loss": 2.0442, "step": 20520 }, { "epoch": 3.09, "learning_rate": 1.7230436081242534e-05, "loss": 2.3052, "step": 20530 }, { "epoch": 3.09, "learning_rate": 1.7211768219832737e-05, "loss": 1.9107, "step": 20540 }, { "epoch": 3.09, "learning_rate": 1.719310035842294e-05, "loss": 2.1553, "step": 20550 }, { "epoch": 3.09, "learning_rate": 1.7174432497013143e-05, "loss": 1.6635, "step": 20560 }, { "epoch": 3.09, "learning_rate": 1.7155764635603347e-05, "loss": 2.6077, "step": 20570 }, { "epoch": 3.09, "learning_rate": 1.7137096774193546e-05, "loss": 2.2606, "step": 20580 }, { "epoch": 3.09, "learning_rate": 1.7118428912783753e-05, "loss": 2.1144, "step": 20590 }, { "epoch": 3.09, "learning_rate": 1.7099761051373956e-05, "loss": 1.8549, "step": 20600 }, { "epoch": 3.09, "learning_rate": 1.708109318996416e-05, "loss": 1.9439, "step": 20610 }, { "epoch": 3.09, "learning_rate": 1.7062425328554363e-05, "loss": 2.2266, "step": 20620 }, { "epoch": 3.09, "learning_rate": 1.7043757467144562e-05, "loss": 2.4845, "step": 20630 }, { "epoch": 3.09, "learning_rate": 1.702508960573477e-05, "loss": 2.4241, "step": 20640 }, { "epoch": 3.09, "learning_rate": 1.7006421744324972e-05, "loss": 2.1277, "step": 20650 }, { "epoch": 3.09, "learning_rate": 1.6987753882915175e-05, "loss": 2.1097, "step": 20660 }, { "epoch": 3.09, "learning_rate": 1.6969086021505375e-05, "loss": 2.4681, "step": 20670 }, { "epoch": 3.09, "learning_rate": 1.695041816009558e-05, "loss": 2.4307, "step": 20680 }, { "epoch": 3.1, "learning_rate": 1.6931750298685782e-05, "loss": 2.2657, "step": 20690 }, { "epoch": 3.1, "learning_rate": 1.691308243727599e-05, "loss": 1.995, "step": 20700 }, { "epoch": 3.1, "learning_rate": 1.689441457586619e-05, "loss": 2.2158, "step": 20710 }, { "epoch": 3.1, "learning_rate": 1.687574671445639e-05, "loss": 2.3378, "step": 20720 }, { "epoch": 3.1, "learning_rate": 1.6857078853046595e-05, "loss": 2.9261, "step": 20730 }, { "epoch": 3.1, "learning_rate": 1.6838410991636798e-05, "loss": 2.3966, "step": 20740 }, { "epoch": 3.1, "learning_rate": 1.6819743130227004e-05, "loss": 2.4051, "step": 20750 }, { "epoch": 3.1, "learning_rate": 1.6801075268817204e-05, "loss": 1.825, "step": 20760 }, { "epoch": 3.1, "learning_rate": 1.6782407407407408e-05, "loss": 2.1746, "step": 20770 }, { "epoch": 3.1, "learning_rate": 1.676373954599761e-05, "loss": 2.2581, "step": 20780 }, { "epoch": 3.1, "learning_rate": 1.6745071684587814e-05, "loss": 1.8986, "step": 20790 }, { "epoch": 3.1, "learning_rate": 1.6726403823178017e-05, "loss": 2.5394, "step": 20800 }, { "epoch": 3.1, "learning_rate": 1.670773596176822e-05, "loss": 2.1355, "step": 20810 }, { "epoch": 3.1, "learning_rate": 1.6689068100358424e-05, "loss": 2.3145, "step": 20820 }, { "epoch": 3.1, "learning_rate": 1.6670400238948627e-05, "loss": 2.142, "step": 20830 }, { "epoch": 3.1, "learning_rate": 1.665173237753883e-05, "loss": 2.208, "step": 20840 }, { "epoch": 3.1, "learning_rate": 1.6633064516129033e-05, "loss": 2.5949, "step": 20850 }, { "epoch": 3.1, "learning_rate": 1.6614396654719236e-05, "loss": 1.8831, "step": 20860 }, { "epoch": 3.1, "learning_rate": 1.659572879330944e-05, "loss": 2.4052, "step": 20870 }, { "epoch": 3.1, "learning_rate": 1.6577060931899643e-05, "loss": 1.6154, "step": 20880 }, { "epoch": 3.1, "learning_rate": 1.6558393070489843e-05, "loss": 1.7053, "step": 20890 }, { "epoch": 3.1, "learning_rate": 1.653972520908005e-05, "loss": 2.789, "step": 20900 }, { "epoch": 3.1, "learning_rate": 1.6521057347670253e-05, "loss": 2.2677, "step": 20910 }, { "epoch": 3.1, "learning_rate": 1.6502389486260456e-05, "loss": 2.2517, "step": 20920 }, { "epoch": 3.1, "learning_rate": 1.6483721624850656e-05, "loss": 1.6777, "step": 20930 }, { "epoch": 3.1, "learning_rate": 1.646505376344086e-05, "loss": 2.1637, "step": 20940 }, { "epoch": 3.1, "learning_rate": 1.6446385902031065e-05, "loss": 2.2388, "step": 20950 }, { "epoch": 3.1, "learning_rate": 1.642771804062127e-05, "loss": 2.4399, "step": 20960 }, { "epoch": 3.1, "learning_rate": 1.6409050179211472e-05, "loss": 2.5724, "step": 20970 }, { "epoch": 3.1, "learning_rate": 1.6390382317801672e-05, "loss": 2.0501, "step": 20980 }, { "epoch": 3.11, "learning_rate": 1.6371714456391875e-05, "loss": 1.91, "step": 20990 }, { "epoch": 3.11, "learning_rate": 1.635304659498208e-05, "loss": 2.8361, "step": 21000 }, { "epoch": 3.11, "learning_rate": 1.6334378733572285e-05, "loss": 2.1849, "step": 21010 }, { "epoch": 3.11, "learning_rate": 1.6315710872162485e-05, "loss": 2.5023, "step": 21020 }, { "epoch": 3.11, "learning_rate": 1.6297043010752688e-05, "loss": 2.1698, "step": 21030 }, { "epoch": 3.11, "learning_rate": 1.627837514934289e-05, "loss": 1.9825, "step": 21040 }, { "epoch": 3.11, "learning_rate": 1.6259707287933094e-05, "loss": 2.3123, "step": 21050 }, { "epoch": 3.11, "learning_rate": 1.62410394265233e-05, "loss": 2.4455, "step": 21060 }, { "epoch": 3.11, "learning_rate": 1.62223715651135e-05, "loss": 2.3476, "step": 21070 }, { "epoch": 3.11, "learning_rate": 1.6203703703703704e-05, "loss": 1.8119, "step": 21080 }, { "epoch": 3.11, "learning_rate": 1.6185035842293907e-05, "loss": 2.1137, "step": 21090 }, { "epoch": 3.11, "learning_rate": 1.616636798088411e-05, "loss": 2.701, "step": 21100 }, { "epoch": 3.11, "learning_rate": 1.6147700119474314e-05, "loss": 2.2716, "step": 21110 }, { "epoch": 3.11, "learning_rate": 1.6129032258064517e-05, "loss": 2.1489, "step": 21120 }, { "epoch": 3.11, "learning_rate": 1.611036439665472e-05, "loss": 2.0791, "step": 21130 }, { "epoch": 3.11, "learning_rate": 1.6091696535244923e-05, "loss": 1.7618, "step": 21140 }, { "epoch": 3.11, "learning_rate": 1.6073028673835126e-05, "loss": 1.4273, "step": 21150 }, { "epoch": 3.11, "learning_rate": 1.605436081242533e-05, "loss": 2.1745, "step": 21160 }, { "epoch": 3.11, "learning_rate": 1.6035692951015533e-05, "loss": 2.2529, "step": 21170 }, { "epoch": 3.11, "learning_rate": 1.6017025089605736e-05, "loss": 2.2853, "step": 21180 }, { "epoch": 3.11, "learning_rate": 1.599835722819594e-05, "loss": 2.0975, "step": 21190 }, { "epoch": 3.11, "learning_rate": 1.597968936678614e-05, "loss": 1.9055, "step": 21200 }, { "epoch": 3.11, "learning_rate": 1.5961021505376346e-05, "loss": 1.9013, "step": 21210 }, { "epoch": 3.11, "learning_rate": 1.594235364396655e-05, "loss": 1.9079, "step": 21220 }, { "epoch": 3.11, "learning_rate": 1.5923685782556752e-05, "loss": 2.4706, "step": 21230 }, { "epoch": 3.11, "learning_rate": 1.5905017921146952e-05, "loss": 1.9529, "step": 21240 }, { "epoch": 3.11, "learning_rate": 1.5886350059737155e-05, "loss": 2.4121, "step": 21250 }, { "epoch": 3.11, "learning_rate": 1.5867682198327362e-05, "loss": 1.8514, "step": 21260 }, { "epoch": 3.11, "learning_rate": 1.5849014336917565e-05, "loss": 1.4366, "step": 21270 }, { "epoch": 3.12, "learning_rate": 1.5830346475507768e-05, "loss": 2.2499, "step": 21280 }, { "epoch": 3.12, "learning_rate": 1.5811678614097968e-05, "loss": 2.1375, "step": 21290 }, { "epoch": 3.12, "learning_rate": 1.579301075268817e-05, "loss": 2.0315, "step": 21300 }, { "epoch": 3.12, "learning_rate": 1.5774342891278378e-05, "loss": 1.8211, "step": 21310 }, { "epoch": 3.12, "learning_rate": 1.575567502986858e-05, "loss": 1.9708, "step": 21320 }, { "epoch": 3.12, "learning_rate": 1.573700716845878e-05, "loss": 2.2567, "step": 21330 }, { "epoch": 3.12, "learning_rate": 1.5718339307048984e-05, "loss": 2.1037, "step": 21340 }, { "epoch": 3.12, "learning_rate": 1.5699671445639187e-05, "loss": 2.0825, "step": 21350 }, { "epoch": 3.12, "learning_rate": 1.568100358422939e-05, "loss": 1.9985, "step": 21360 }, { "epoch": 3.12, "learning_rate": 1.5662335722819594e-05, "loss": 2.2336, "step": 21370 }, { "epoch": 3.12, "learning_rate": 1.5643667861409797e-05, "loss": 2.6434, "step": 21380 }, { "epoch": 3.12, "learning_rate": 1.5625e-05, "loss": 2.6117, "step": 21390 }, { "epoch": 3.12, "learning_rate": 1.5606332138590204e-05, "loss": 2.0422, "step": 21400 }, { "epoch": 3.12, "learning_rate": 1.5587664277180407e-05, "loss": 2.0718, "step": 21410 }, { "epoch": 3.12, "learning_rate": 1.556899641577061e-05, "loss": 2.2961, "step": 21420 }, { "epoch": 3.12, "learning_rate": 1.5550328554360813e-05, "loss": 1.9545, "step": 21430 }, { "epoch": 3.12, "learning_rate": 1.5531660692951016e-05, "loss": 2.1755, "step": 21440 }, { "epoch": 3.12, "learning_rate": 1.551299283154122e-05, "loss": 2.3679, "step": 21450 }, { "epoch": 3.12, "learning_rate": 1.5494324970131423e-05, "loss": 1.9657, "step": 21460 }, { "epoch": 3.12, "learning_rate": 1.5475657108721626e-05, "loss": 1.9233, "step": 21470 }, { "epoch": 3.12, "learning_rate": 1.545698924731183e-05, "loss": 1.9118, "step": 21480 }, { "epoch": 3.12, "learning_rate": 1.5438321385902033e-05, "loss": 2.0788, "step": 21490 }, { "epoch": 3.12, "learning_rate": 1.5419653524492232e-05, "loss": 2.4147, "step": 21500 }, { "epoch": 3.12, "learning_rate": 1.540098566308244e-05, "loss": 1.9939, "step": 21510 }, { "epoch": 3.12, "learning_rate": 1.5382317801672642e-05, "loss": 2.2315, "step": 21520 }, { "epoch": 3.12, "learning_rate": 1.5363649940262845e-05, "loss": 2.6876, "step": 21530 }, { "epoch": 3.12, "learning_rate": 1.534498207885305e-05, "loss": 1.9245, "step": 21540 }, { "epoch": 3.12, "learning_rate": 1.532631421744325e-05, "loss": 2.2156, "step": 21550 }, { "epoch": 3.12, "learning_rate": 1.530764635603345e-05, "loss": 2.2324, "step": 21560 }, { "epoch": 3.12, "learning_rate": 1.5288978494623658e-05, "loss": 2.0756, "step": 21570 }, { "epoch": 3.13, "learning_rate": 1.527031063321386e-05, "loss": 1.732, "step": 21580 }, { "epoch": 3.13, "learning_rate": 1.5251642771804061e-05, "loss": 1.9018, "step": 21590 }, { "epoch": 3.13, "learning_rate": 1.5232974910394265e-05, "loss": 1.8123, "step": 21600 }, { "epoch": 3.13, "learning_rate": 1.521430704898447e-05, "loss": 2.3712, "step": 21610 }, { "epoch": 3.13, "learning_rate": 1.5195639187574673e-05, "loss": 2.5202, "step": 21620 }, { "epoch": 3.13, "learning_rate": 1.5176971326164876e-05, "loss": 1.3138, "step": 21630 }, { "epoch": 3.13, "learning_rate": 1.5158303464755077e-05, "loss": 2.1082, "step": 21640 }, { "epoch": 3.13, "learning_rate": 1.513963560334528e-05, "loss": 2.1131, "step": 21650 }, { "epoch": 3.13, "learning_rate": 1.5120967741935486e-05, "loss": 2.0615, "step": 21660 }, { "epoch": 3.13, "learning_rate": 1.5102299880525689e-05, "loss": 2.5166, "step": 21670 }, { "epoch": 3.13, "learning_rate": 1.508363201911589e-05, "loss": 2.6684, "step": 21680 }, { "epoch": 3.13, "learning_rate": 1.5064964157706094e-05, "loss": 1.8802, "step": 21690 }, { "epoch": 3.13, "learning_rate": 1.5046296296296297e-05, "loss": 2.1948, "step": 21700 }, { "epoch": 3.13, "learning_rate": 1.5027628434886502e-05, "loss": 2.0427, "step": 21710 }, { "epoch": 3.13, "learning_rate": 1.5008960573476701e-05, "loss": 2.5439, "step": 21720 }, { "epoch": 3.13, "learning_rate": 1.4990292712066906e-05, "loss": 2.0657, "step": 21730 }, { "epoch": 3.13, "learning_rate": 1.497162485065711e-05, "loss": 2.1985, "step": 21740 }, { "epoch": 3.13, "learning_rate": 1.4952956989247313e-05, "loss": 1.9323, "step": 21750 }, { "epoch": 3.13, "learning_rate": 1.4934289127837516e-05, "loss": 2.0768, "step": 21760 }, { "epoch": 3.13, "learning_rate": 1.4915621266427718e-05, "loss": 1.8996, "step": 21770 }, { "epoch": 3.13, "learning_rate": 1.489695340501792e-05, "loss": 2.0418, "step": 21780 }, { "epoch": 3.13, "learning_rate": 1.4878285543608126e-05, "loss": 1.4946, "step": 21790 }, { "epoch": 3.13, "learning_rate": 1.4859617682198329e-05, "loss": 2.1865, "step": 21800 }, { "epoch": 3.13, "learning_rate": 1.484094982078853e-05, "loss": 2.0617, "step": 21810 }, { "epoch": 3.13, "learning_rate": 1.4822281959378734e-05, "loss": 1.8996, "step": 21820 }, { "epoch": 3.13, "learning_rate": 1.4803614097968937e-05, "loss": 1.8901, "step": 21830 }, { "epoch": 3.13, "learning_rate": 1.4784946236559142e-05, "loss": 2.4669, "step": 21840 }, { "epoch": 3.13, "learning_rate": 1.4766278375149345e-05, "loss": 2.5433, "step": 21850 }, { "epoch": 3.13, "learning_rate": 1.4747610513739547e-05, "loss": 1.7593, "step": 21860 }, { "epoch": 3.13, "learning_rate": 1.472894265232975e-05, "loss": 2.0914, "step": 21870 }, { "epoch": 3.14, "learning_rate": 1.4710274790919953e-05, "loss": 2.1832, "step": 21880 }, { "epoch": 3.14, "learning_rate": 1.4691606929510158e-05, "loss": 1.8268, "step": 21890 }, { "epoch": 3.14, "learning_rate": 1.4672939068100358e-05, "loss": 2.0511, "step": 21900 }, { "epoch": 3.14, "learning_rate": 1.4654271206690563e-05, "loss": 2.3323, "step": 21910 }, { "epoch": 3.14, "learning_rate": 1.4635603345280766e-05, "loss": 1.6998, "step": 21920 }, { "epoch": 3.14, "learning_rate": 1.4616935483870969e-05, "loss": 1.9985, "step": 21930 }, { "epoch": 3.14, "learning_rate": 1.459826762246117e-05, "loss": 1.7915, "step": 21940 }, { "epoch": 3.14, "learning_rate": 1.4579599761051374e-05, "loss": 1.7092, "step": 21950 }, { "epoch": 3.14, "learning_rate": 1.4560931899641577e-05, "loss": 1.9216, "step": 21960 }, { "epoch": 3.14, "learning_rate": 1.4542264038231782e-05, "loss": 1.9626, "step": 21970 }, { "epoch": 3.14, "learning_rate": 1.4523596176821985e-05, "loss": 2.1571, "step": 21980 }, { "epoch": 3.14, "learning_rate": 1.4504928315412187e-05, "loss": 2.0591, "step": 21990 }, { "epoch": 3.14, "learning_rate": 1.448626045400239e-05, "loss": 1.5167, "step": 22000 }, { "epoch": 3.14, "learning_rate": 1.4467592592592593e-05, "loss": 1.7718, "step": 22010 }, { "epoch": 3.14, "learning_rate": 1.4448924731182798e-05, "loss": 1.6548, "step": 22020 }, { "epoch": 3.14, "learning_rate": 1.4430256869772998e-05, "loss": 1.6625, "step": 22030 }, { "epoch": 3.14, "learning_rate": 1.4411589008363203e-05, "loss": 2.0432, "step": 22040 }, { "epoch": 3.14, "learning_rate": 1.4392921146953406e-05, "loss": 1.8862, "step": 22050 }, { "epoch": 3.14, "learning_rate": 1.437425328554361e-05, "loss": 1.5782, "step": 22060 }, { "epoch": 3.14, "learning_rate": 1.435558542413381e-05, "loss": 2.2837, "step": 22070 }, { "epoch": 3.14, "learning_rate": 1.4336917562724014e-05, "loss": 2.0321, "step": 22080 }, { "epoch": 3.14, "learning_rate": 1.4318249701314219e-05, "loss": 1.7268, "step": 22090 }, { "epoch": 3.14, "learning_rate": 1.4299581839904422e-05, "loss": 1.8699, "step": 22100 }, { "epoch": 3.14, "learning_rate": 1.4280913978494625e-05, "loss": 1.0975, "step": 22110 }, { "epoch": 3.14, "learning_rate": 1.4262246117084827e-05, "loss": 2.1251, "step": 22120 }, { "epoch": 3.14, "learning_rate": 1.424357825567503e-05, "loss": 1.2742, "step": 22130 }, { "epoch": 3.14, "learning_rate": 1.4224910394265233e-05, "loss": 2.1263, "step": 22140 }, { "epoch": 3.14, "learning_rate": 1.4206242532855438e-05, "loss": 2.5516, "step": 22150 }, { "epoch": 3.14, "learning_rate": 1.4187574671445638e-05, "loss": 1.5225, "step": 22160 }, { "epoch": 3.14, "learning_rate": 1.4168906810035843e-05, "loss": 2.2144, "step": 22170 }, { "epoch": 3.15, "learning_rate": 1.4150238948626046e-05, "loss": 2.056, "step": 22180 }, { "epoch": 3.15, "learning_rate": 1.413157108721625e-05, "loss": 1.5901, "step": 22190 }, { "epoch": 3.15, "learning_rate": 1.4112903225806454e-05, "loss": 1.9209, "step": 22200 }, { "epoch": 3.15, "learning_rate": 1.4094235364396654e-05, "loss": 1.8826, "step": 22210 }, { "epoch": 3.15, "learning_rate": 1.4075567502986859e-05, "loss": 1.7103, "step": 22220 }, { "epoch": 3.15, "learning_rate": 1.4056899641577062e-05, "loss": 2.4449, "step": 22230 }, { "epoch": 3.15, "learning_rate": 1.4038231780167265e-05, "loss": 1.8365, "step": 22240 }, { "epoch": 3.15, "learning_rate": 1.4019563918757467e-05, "loss": 1.8515, "step": 22250 }, { "epoch": 3.15, "learning_rate": 1.400089605734767e-05, "loss": 1.5734, "step": 22260 }, { "epoch": 3.15, "learning_rate": 1.3982228195937873e-05, "loss": 2.1358, "step": 22270 }, { "epoch": 3.15, "learning_rate": 1.3963560334528078e-05, "loss": 1.7191, "step": 22280 }, { "epoch": 3.15, "learning_rate": 1.3944892473118278e-05, "loss": 1.9424, "step": 22290 }, { "epoch": 3.15, "learning_rate": 1.3926224611708483e-05, "loss": 1.9864, "step": 22300 }, { "epoch": 3.15, "learning_rate": 1.3907556750298686e-05, "loss": 1.9016, "step": 22310 }, { "epoch": 3.15, "learning_rate": 1.388888888888889e-05, "loss": 1.4023, "step": 22320 }, { "epoch": 3.15, "learning_rate": 1.3870221027479094e-05, "loss": 1.9216, "step": 22330 }, { "epoch": 3.15, "learning_rate": 1.3851553166069294e-05, "loss": 2.1977, "step": 22340 }, { "epoch": 3.15, "learning_rate": 1.38328853046595e-05, "loss": 1.9146, "step": 22350 }, { "epoch": 3.15, "learning_rate": 1.3814217443249702e-05, "loss": 2.1622, "step": 22360 }, { "epoch": 3.15, "learning_rate": 1.3795549581839906e-05, "loss": 1.3267, "step": 22370 }, { "epoch": 3.15, "learning_rate": 1.3776881720430107e-05, "loss": 2.5444, "step": 22380 }, { "epoch": 3.15, "learning_rate": 1.375821385902031e-05, "loss": 2.5128, "step": 22390 }, { "epoch": 3.15, "learning_rate": 1.3739545997610515e-05, "loss": 2.335, "step": 22400 }, { "epoch": 3.15, "learning_rate": 1.3720878136200719e-05, "loss": 2.0817, "step": 22410 }, { "epoch": 3.15, "learning_rate": 1.370221027479092e-05, "loss": 2.0941, "step": 22420 }, { "epoch": 3.15, "learning_rate": 1.3683542413381123e-05, "loss": 1.984, "step": 22430 }, { "epoch": 3.15, "learning_rate": 1.3664874551971326e-05, "loss": 1.8656, "step": 22440 }, { "epoch": 3.15, "learning_rate": 1.364620669056153e-05, "loss": 1.5047, "step": 22450 }, { "epoch": 3.15, "learning_rate": 1.3627538829151735e-05, "loss": 2.2893, "step": 22460 }, { "epoch": 3.16, "learning_rate": 1.3608870967741934e-05, "loss": 1.9264, "step": 22470 }, { "epoch": 3.16, "learning_rate": 1.359020310633214e-05, "loss": 2.1947, "step": 22480 }, { "epoch": 3.16, "learning_rate": 1.3571535244922343e-05, "loss": 1.4356, "step": 22490 }, { "epoch": 3.16, "learning_rate": 1.3552867383512546e-05, "loss": 1.8751, "step": 22500 }, { "epoch": 3.16, "learning_rate": 1.3534199522102747e-05, "loss": 1.327, "step": 22510 }, { "epoch": 3.16, "learning_rate": 1.351553166069295e-05, "loss": 2.0069, "step": 22520 }, { "epoch": 3.16, "learning_rate": 1.3496863799283155e-05, "loss": 1.8235, "step": 22530 }, { "epoch": 3.16, "learning_rate": 1.3478195937873359e-05, "loss": 1.5983, "step": 22540 }, { "epoch": 3.16, "learning_rate": 1.3459528076463562e-05, "loss": 1.7298, "step": 22550 }, { "epoch": 3.16, "learning_rate": 1.3440860215053763e-05, "loss": 1.6556, "step": 22560 }, { "epoch": 3.16, "learning_rate": 1.3422192353643967e-05, "loss": 1.6897, "step": 22570 }, { "epoch": 3.16, "learning_rate": 1.3403524492234172e-05, "loss": 1.9386, "step": 22580 }, { "epoch": 3.16, "learning_rate": 1.3384856630824375e-05, "loss": 1.8859, "step": 22590 }, { "epoch": 3.16, "learning_rate": 1.3366188769414575e-05, "loss": 2.6273, "step": 22600 }, { "epoch": 3.16, "learning_rate": 1.334752090800478e-05, "loss": 2.4273, "step": 22610 }, { "epoch": 3.16, "learning_rate": 1.3328853046594983e-05, "loss": 2.3851, "step": 22620 }, { "epoch": 3.16, "learning_rate": 1.3310185185185186e-05, "loss": 2.0836, "step": 22630 }, { "epoch": 3.16, "learning_rate": 1.3291517323775387e-05, "loss": 1.6597, "step": 22640 }, { "epoch": 3.16, "learning_rate": 1.327284946236559e-05, "loss": 1.7808, "step": 22650 }, { "epoch": 3.16, "learning_rate": 1.3254181600955796e-05, "loss": 2.1817, "step": 22660 }, { "epoch": 3.16, "learning_rate": 1.3235513739545999e-05, "loss": 1.8611, "step": 22670 }, { "epoch": 3.16, "learning_rate": 1.3216845878136202e-05, "loss": 1.5431, "step": 22680 }, { "epoch": 3.16, "learning_rate": 1.3198178016726404e-05, "loss": 1.7751, "step": 22690 }, { "epoch": 3.16, "learning_rate": 1.3179510155316607e-05, "loss": 1.6953, "step": 22700 }, { "epoch": 3.16, "learning_rate": 1.3160842293906812e-05, "loss": 2.5849, "step": 22710 }, { "epoch": 3.16, "learning_rate": 1.3142174432497015e-05, "loss": 1.3372, "step": 22720 }, { "epoch": 3.16, "learning_rate": 1.3123506571087216e-05, "loss": 1.8606, "step": 22730 }, { "epoch": 3.16, "learning_rate": 1.310483870967742e-05, "loss": 2.0103, "step": 22740 }, { "epoch": 3.16, "learning_rate": 1.3086170848267623e-05, "loss": 2.4922, "step": 22750 }, { "epoch": 3.16, "learning_rate": 1.3067502986857826e-05, "loss": 2.1738, "step": 22760 }, { "epoch": 3.17, "learning_rate": 1.3048835125448031e-05, "loss": 1.9274, "step": 22770 }, { "epoch": 3.17, "learning_rate": 1.3030167264038231e-05, "loss": 1.8831, "step": 22780 }, { "epoch": 3.17, "learning_rate": 1.3011499402628436e-05, "loss": 1.5739, "step": 22790 }, { "epoch": 3.17, "learning_rate": 1.2992831541218639e-05, "loss": 1.7399, "step": 22800 }, { "epoch": 3.17, "learning_rate": 1.2974163679808842e-05, "loss": 2.4286, "step": 22810 }, { "epoch": 3.17, "learning_rate": 1.2955495818399044e-05, "loss": 1.7934, "step": 22820 }, { "epoch": 3.17, "learning_rate": 1.2936827956989247e-05, "loss": 2.3587, "step": 22830 }, { "epoch": 3.17, "learning_rate": 1.2918160095579452e-05, "loss": 1.6229, "step": 22840 }, { "epoch": 3.17, "learning_rate": 1.2899492234169655e-05, "loss": 1.9592, "step": 22850 }, { "epoch": 3.17, "learning_rate": 1.2880824372759857e-05, "loss": 2.2153, "step": 22860 }, { "epoch": 3.17, "learning_rate": 1.286215651135006e-05, "loss": 1.3733, "step": 22870 }, { "epoch": 3.17, "learning_rate": 1.2843488649940263e-05, "loss": 2.1415, "step": 22880 }, { "epoch": 3.17, "learning_rate": 1.2824820788530468e-05, "loss": 1.9484, "step": 22890 }, { "epoch": 3.17, "learning_rate": 1.2806152927120671e-05, "loss": 1.6596, "step": 22900 }, { "epoch": 3.17, "learning_rate": 1.2787485065710873e-05, "loss": 1.4818, "step": 22910 }, { "epoch": 3.17, "learning_rate": 1.2768817204301076e-05, "loss": 1.4102, "step": 22920 }, { "epoch": 3.17, "learning_rate": 1.275014934289128e-05, "loss": 1.5991, "step": 22930 }, { "epoch": 3.17, "learning_rate": 1.2731481481481482e-05, "loss": 1.7665, "step": 22940 }, { "epoch": 3.17, "learning_rate": 1.2712813620071684e-05, "loss": 1.5075, "step": 22950 }, { "epoch": 3.17, "learning_rate": 1.2694145758661887e-05, "loss": 1.748, "step": 22960 }, { "epoch": 3.17, "learning_rate": 1.2675477897252092e-05, "loss": 1.9612, "step": 22970 }, { "epoch": 3.17, "learning_rate": 1.2656810035842295e-05, "loss": 1.6859, "step": 22980 }, { "epoch": 3.17, "learning_rate": 1.2638142174432497e-05, "loss": 1.861, "step": 22990 }, { "epoch": 3.17, "learning_rate": 1.26194743130227e-05, "loss": 1.3929, "step": 23000 }, { "epoch": 3.17, "learning_rate": 1.2600806451612903e-05, "loss": 1.975, "step": 23010 }, { "epoch": 3.17, "learning_rate": 1.2582138590203108e-05, "loss": 1.1081, "step": 23020 }, { "epoch": 3.17, "learning_rate": 1.2563470728793311e-05, "loss": 1.3361, "step": 23030 }, { "epoch": 3.17, "learning_rate": 1.2544802867383513e-05, "loss": 1.865, "step": 23040 }, { "epoch": 3.17, "learning_rate": 1.2526135005973716e-05, "loss": 1.4983, "step": 23050 }, { "epoch": 3.17, "learning_rate": 1.250746714456392e-05, "loss": 2.1742, "step": 23060 }, { "epoch": 3.18, "learning_rate": 1.2488799283154123e-05, "loss": 1.7182, "step": 23070 }, { "epoch": 3.18, "learning_rate": 1.2470131421744326e-05, "loss": 1.6553, "step": 23080 }, { "epoch": 3.18, "learning_rate": 1.2451463560334527e-05, "loss": 1.7373, "step": 23090 }, { "epoch": 3.18, "learning_rate": 1.2432795698924732e-05, "loss": 1.5766, "step": 23100 }, { "epoch": 3.18, "learning_rate": 1.2414127837514935e-05, "loss": 1.4831, "step": 23110 }, { "epoch": 3.18, "learning_rate": 1.2395459976105139e-05, "loss": 2.0497, "step": 23120 }, { "epoch": 3.18, "learning_rate": 1.2376792114695342e-05, "loss": 1.8571, "step": 23130 }, { "epoch": 3.18, "learning_rate": 1.2358124253285543e-05, "loss": 1.3844, "step": 23140 }, { "epoch": 3.18, "learning_rate": 1.2339456391875748e-05, "loss": 1.8604, "step": 23150 }, { "epoch": 3.18, "learning_rate": 1.232078853046595e-05, "loss": 1.8055, "step": 23160 }, { "epoch": 3.18, "learning_rate": 1.2302120669056153e-05, "loss": 1.7484, "step": 23170 }, { "epoch": 3.18, "learning_rate": 1.2283452807646356e-05, "loss": 1.8736, "step": 23180 }, { "epoch": 3.18, "learning_rate": 1.226478494623656e-05, "loss": 1.9731, "step": 23190 }, { "epoch": 3.18, "learning_rate": 1.2246117084826763e-05, "loss": 2.0633, "step": 23200 }, { "epoch": 3.18, "learning_rate": 1.2227449223416966e-05, "loss": 1.8474, "step": 23210 }, { "epoch": 3.18, "learning_rate": 1.2208781362007169e-05, "loss": 2.5302, "step": 23220 }, { "epoch": 3.18, "learning_rate": 1.2190113500597372e-05, "loss": 1.6165, "step": 23230 }, { "epoch": 3.18, "learning_rate": 1.2171445639187576e-05, "loss": 1.6406, "step": 23240 }, { "epoch": 3.18, "learning_rate": 1.2152777777777779e-05, "loss": 2.5011, "step": 23250 }, { "epoch": 3.18, "learning_rate": 1.2134109916367982e-05, "loss": 1.5273, "step": 23260 }, { "epoch": 3.18, "learning_rate": 1.2115442054958184e-05, "loss": 2.3831, "step": 23270 }, { "epoch": 3.18, "learning_rate": 1.2096774193548388e-05, "loss": 2.2989, "step": 23280 }, { "epoch": 3.18, "learning_rate": 1.207810633213859e-05, "loss": 2.1644, "step": 23290 }, { "epoch": 3.18, "learning_rate": 1.2059438470728795e-05, "loss": 1.7813, "step": 23300 }, { "epoch": 3.18, "learning_rate": 1.2040770609318996e-05, "loss": 1.9006, "step": 23310 }, { "epoch": 3.18, "learning_rate": 1.20221027479092e-05, "loss": 1.8631, "step": 23320 }, { "epoch": 3.18, "learning_rate": 1.2003434886499403e-05, "loss": 1.8753, "step": 23330 }, { "epoch": 3.18, "learning_rate": 1.1984767025089606e-05, "loss": 2.1294, "step": 23340 }, { "epoch": 3.18, "learning_rate": 1.196609916367981e-05, "loss": 1.9498, "step": 23350 }, { "epoch": 3.18, "learning_rate": 1.1947431302270013e-05, "loss": 1.9288, "step": 23360 }, { "epoch": 3.19, "learning_rate": 1.1928763440860216e-05, "loss": 1.7415, "step": 23370 }, { "epoch": 3.19, "learning_rate": 1.1910095579450419e-05, "loss": 1.4501, "step": 23380 }, { "epoch": 3.19, "learning_rate": 1.1891427718040622e-05, "loss": 1.5426, "step": 23390 }, { "epoch": 3.19, "learning_rate": 1.1872759856630825e-05, "loss": 1.7795, "step": 23400 }, { "epoch": 3.19, "learning_rate": 1.1854091995221029e-05, "loss": 1.7883, "step": 23410 }, { "epoch": 3.19, "learning_rate": 1.183542413381123e-05, "loss": 2.295, "step": 23420 }, { "epoch": 3.19, "learning_rate": 1.1816756272401435e-05, "loss": 2.2004, "step": 23430 }, { "epoch": 3.19, "learning_rate": 1.1798088410991637e-05, "loss": 1.3605, "step": 23440 }, { "epoch": 3.19, "learning_rate": 1.177942054958184e-05, "loss": 1.99, "step": 23450 }, { "epoch": 3.19, "learning_rate": 1.1760752688172045e-05, "loss": 2.1217, "step": 23460 }, { "epoch": 3.19, "learning_rate": 1.1742084826762246e-05, "loss": 1.4244, "step": 23470 }, { "epoch": 3.19, "learning_rate": 1.1723416965352451e-05, "loss": 2.0407, "step": 23480 }, { "epoch": 3.19, "learning_rate": 1.1704749103942653e-05, "loss": 1.5053, "step": 23490 }, { "epoch": 3.19, "learning_rate": 1.1686081242532856e-05, "loss": 1.397, "step": 23500 }, { "epoch": 3.19, "learning_rate": 1.1667413381123059e-05, "loss": 1.5153, "step": 23510 }, { "epoch": 3.19, "learning_rate": 1.1648745519713262e-05, "loss": 1.7957, "step": 23520 }, { "epoch": 3.19, "learning_rate": 1.1630077658303466e-05, "loss": 1.161, "step": 23530 }, { "epoch": 3.19, "learning_rate": 1.1611409796893669e-05, "loss": 2.2998, "step": 23540 }, { "epoch": 3.19, "learning_rate": 1.159274193548387e-05, "loss": 2.0918, "step": 23550 }, { "epoch": 3.19, "learning_rate": 1.1574074074074075e-05, "loss": 1.1215, "step": 23560 }, { "epoch": 3.19, "learning_rate": 1.1555406212664278e-05, "loss": 1.3754, "step": 23570 }, { "epoch": 3.19, "learning_rate": 1.153673835125448e-05, "loss": 1.7389, "step": 23580 }, { "epoch": 3.19, "learning_rate": 1.1518070489844685e-05, "loss": 1.2926, "step": 23590 }, { "epoch": 3.19, "learning_rate": 1.1499402628434886e-05, "loss": 1.9691, "step": 23600 }, { "epoch": 3.19, "learning_rate": 1.1480734767025091e-05, "loss": 2.0251, "step": 23610 }, { "epoch": 3.19, "learning_rate": 1.1462066905615293e-05, "loss": 1.7244, "step": 23620 }, { "epoch": 3.19, "learning_rate": 1.1443399044205496e-05, "loss": 1.3835, "step": 23630 }, { "epoch": 3.19, "learning_rate": 1.14247311827957e-05, "loss": 1.313, "step": 23640 }, { "epoch": 3.19, "learning_rate": 1.1406063321385902e-05, "loss": 2.0762, "step": 23650 }, { "epoch": 3.2, "learning_rate": 1.1387395459976106e-05, "loss": 1.5073, "step": 23660 }, { "epoch": 3.2, "learning_rate": 1.1368727598566309e-05, "loss": 2.1956, "step": 23670 }, { "epoch": 3.2, "learning_rate": 1.135005973715651e-05, "loss": 1.5682, "step": 23680 }, { "epoch": 3.2, "learning_rate": 1.1331391875746715e-05, "loss": 2.0843, "step": 23690 }, { "epoch": 3.2, "learning_rate": 1.1312724014336919e-05, "loss": 1.5406, "step": 23700 }, { "epoch": 3.2, "learning_rate": 1.1294056152927122e-05, "loss": 1.2961, "step": 23710 }, { "epoch": 3.2, "learning_rate": 1.1275388291517325e-05, "loss": 1.5602, "step": 23720 }, { "epoch": 3.2, "learning_rate": 1.1256720430107527e-05, "loss": 1.7592, "step": 23730 }, { "epoch": 3.2, "learning_rate": 1.1238052568697731e-05, "loss": 1.885, "step": 23740 }, { "epoch": 3.2, "learning_rate": 1.1219384707287933e-05, "loss": 0.9263, "step": 23750 }, { "epoch": 3.2, "learning_rate": 1.1200716845878136e-05, "loss": 1.4397, "step": 23760 }, { "epoch": 3.2, "learning_rate": 1.118204898446834e-05, "loss": 1.5965, "step": 23770 }, { "epoch": 3.2, "learning_rate": 1.1163381123058543e-05, "loss": 1.6573, "step": 23780 }, { "epoch": 3.2, "learning_rate": 1.1144713261648746e-05, "loss": 1.6953, "step": 23790 }, { "epoch": 3.2, "learning_rate": 1.1126045400238949e-05, "loss": 1.722, "step": 23800 }, { "epoch": 3.2, "eval_accuracy": 0.42408943382618103, "eval_loss": 2.0974302291870117, "eval_runtime": 959.4358, "eval_samples_per_second": 2.89, "eval_steps_per_second": 1.446, "step": 23808 }, { "epoch": 4.0, "learning_rate": 1.1107377538829152e-05, "loss": 2.056, "step": 23810 }, { "epoch": 4.0, "learning_rate": 1.1088709677419356e-05, "loss": 1.4446, "step": 23820 }, { "epoch": 4.0, "learning_rate": 1.1070041816009559e-05, "loss": 1.599, "step": 23830 }, { "epoch": 4.0, "learning_rate": 1.1051373954599762e-05, "loss": 1.3334, "step": 23840 }, { "epoch": 4.0, "learning_rate": 1.1032706093189965e-05, "loss": 2.4581, "step": 23850 }, { "epoch": 4.0, "learning_rate": 1.1014038231780167e-05, "loss": 1.5139, "step": 23860 }, { "epoch": 4.0, "learning_rate": 1.0995370370370372e-05, "loss": 1.4778, "step": 23870 }, { "epoch": 4.0, "learning_rate": 1.0976702508960573e-05, "loss": 1.5977, "step": 23880 }, { "epoch": 4.0, "learning_rate": 1.0958034647550778e-05, "loss": 1.79, "step": 23890 }, { "epoch": 4.0, "learning_rate": 1.093936678614098e-05, "loss": 2.1424, "step": 23900 }, { "epoch": 4.0, "learning_rate": 1.0920698924731183e-05, "loss": 1.7417, "step": 23910 }, { "epoch": 4.0, "learning_rate": 1.0902031063321388e-05, "loss": 1.7907, "step": 23920 }, { "epoch": 4.0, "learning_rate": 1.088336320191159e-05, "loss": 2.0001, "step": 23930 }, { "epoch": 4.0, "learning_rate": 1.0864695340501792e-05, "loss": 1.7665, "step": 23940 }, { "epoch": 4.0, "learning_rate": 1.0846027479091996e-05, "loss": 1.642, "step": 23950 }, { "epoch": 4.01, "learning_rate": 1.0827359617682199e-05, "loss": 1.1917, "step": 23960 }, { "epoch": 4.01, "learning_rate": 1.0808691756272402e-05, "loss": 1.9484, "step": 23970 }, { "epoch": 4.01, "learning_rate": 1.0790023894862605e-05, "loss": 1.8528, "step": 23980 }, { "epoch": 4.01, "learning_rate": 1.0771356033452807e-05, "loss": 1.6025, "step": 23990 }, { "epoch": 4.01, "learning_rate": 1.0752688172043012e-05, "loss": 1.4363, "step": 24000 }, { "epoch": 4.01, "learning_rate": 1.0734020310633213e-05, "loss": 1.7248, "step": 24010 }, { "epoch": 4.01, "learning_rate": 1.0715352449223418e-05, "loss": 2.3533, "step": 24020 }, { "epoch": 4.01, "learning_rate": 1.0696684587813621e-05, "loss": 1.8398, "step": 24030 }, { "epoch": 4.01, "learning_rate": 1.0678016726403823e-05, "loss": 1.6324, "step": 24040 }, { "epoch": 4.01, "learning_rate": 1.0659348864994028e-05, "loss": 1.6265, "step": 24050 }, { "epoch": 4.01, "learning_rate": 1.064068100358423e-05, "loss": 1.9436, "step": 24060 }, { "epoch": 4.01, "learning_rate": 1.0622013142174433e-05, "loss": 1.6452, "step": 24070 }, { "epoch": 4.01, "learning_rate": 1.0603345280764636e-05, "loss": 1.9292, "step": 24080 }, { "epoch": 4.01, "learning_rate": 1.0584677419354839e-05, "loss": 1.0717, "step": 24090 }, { "epoch": 4.01, "learning_rate": 1.0566009557945042e-05, "loss": 1.4639, "step": 24100 }, { "epoch": 4.01, "learning_rate": 1.0547341696535245e-05, "loss": 1.789, "step": 24110 }, { "epoch": 4.01, "learning_rate": 1.0528673835125449e-05, "loss": 1.4203, "step": 24120 }, { "epoch": 4.01, "learning_rate": 1.0510005973715652e-05, "loss": 1.6756, "step": 24130 }, { "epoch": 4.01, "learning_rate": 1.0491338112305853e-05, "loss": 1.8001, "step": 24140 }, { "epoch": 4.01, "learning_rate": 1.0472670250896058e-05, "loss": 1.6158, "step": 24150 }, { "epoch": 4.01, "learning_rate": 1.0454002389486262e-05, "loss": 1.6266, "step": 24160 }, { "epoch": 4.01, "learning_rate": 1.0435334528076463e-05, "loss": 1.575, "step": 24170 }, { "epoch": 4.01, "learning_rate": 1.0416666666666668e-05, "loss": 1.6014, "step": 24180 }, { "epoch": 4.01, "learning_rate": 1.039799880525687e-05, "loss": 1.6461, "step": 24190 }, { "epoch": 4.01, "learning_rate": 1.0379330943847074e-05, "loss": 2.0113, "step": 24200 }, { "epoch": 4.01, "learning_rate": 1.0360663082437276e-05, "loss": 1.7792, "step": 24210 }, { "epoch": 4.01, "learning_rate": 1.034199522102748e-05, "loss": 1.2177, "step": 24220 }, { "epoch": 4.01, "learning_rate": 1.0323327359617682e-05, "loss": 1.3584, "step": 24230 }, { "epoch": 4.01, "learning_rate": 1.0304659498207886e-05, "loss": 1.8425, "step": 24240 }, { "epoch": 4.01, "learning_rate": 1.0285991636798089e-05, "loss": 1.86, "step": 24250 }, { "epoch": 4.02, "learning_rate": 1.0267323775388292e-05, "loss": 1.7274, "step": 24260 }, { "epoch": 4.02, "learning_rate": 1.0248655913978495e-05, "loss": 1.6669, "step": 24270 }, { "epoch": 4.02, "learning_rate": 1.0229988052568699e-05, "loss": 1.3069, "step": 24280 }, { "epoch": 4.02, "learning_rate": 1.0211320191158902e-05, "loss": 1.6819, "step": 24290 }, { "epoch": 4.02, "learning_rate": 1.0192652329749105e-05, "loss": 2.055, "step": 24300 }, { "epoch": 4.02, "learning_rate": 1.0173984468339308e-05, "loss": 1.8674, "step": 24310 }, { "epoch": 4.02, "learning_rate": 1.015531660692951e-05, "loss": 1.6398, "step": 24320 }, { "epoch": 4.02, "learning_rate": 1.0136648745519715e-05, "loss": 1.315, "step": 24330 }, { "epoch": 4.02, "learning_rate": 1.0117980884109916e-05, "loss": 1.781, "step": 24340 }, { "epoch": 4.02, "learning_rate": 1.009931302270012e-05, "loss": 1.3848, "step": 24350 }, { "epoch": 4.02, "learning_rate": 1.0080645161290323e-05, "loss": 1.4403, "step": 24360 }, { "epoch": 4.02, "learning_rate": 1.0061977299880526e-05, "loss": 2.067, "step": 24370 }, { "epoch": 4.02, "learning_rate": 1.004330943847073e-05, "loss": 1.1659, "step": 24380 }, { "epoch": 4.02, "learning_rate": 1.0024641577060932e-05, "loss": 1.1623, "step": 24390 }, { "epoch": 4.02, "learning_rate": 1.0005973715651135e-05, "loss": 1.1522, "step": 24400 }, { "epoch": 4.02, "learning_rate": 9.987305854241339e-06, "loss": 1.7553, "step": 24410 }, { "epoch": 4.02, "learning_rate": 9.968637992831542e-06, "loss": 1.0157, "step": 24420 }, { "epoch": 4.02, "learning_rate": 9.949970131421745e-06, "loss": 1.8805, "step": 24430 }, { "epoch": 4.02, "learning_rate": 9.931302270011948e-06, "loss": 1.7155, "step": 24440 }, { "epoch": 4.02, "learning_rate": 9.91263440860215e-06, "loss": 1.4454, "step": 24450 }, { "epoch": 4.02, "learning_rate": 9.893966547192355e-06, "loss": 1.3156, "step": 24460 }, { "epoch": 4.02, "learning_rate": 9.875298685782556e-06, "loss": 1.5967, "step": 24470 }, { "epoch": 4.02, "learning_rate": 9.856630824372761e-06, "loss": 1.9421, "step": 24480 }, { "epoch": 4.02, "learning_rate": 9.837962962962964e-06, "loss": 2.195, "step": 24490 }, { "epoch": 4.02, "learning_rate": 9.819295101553166e-06, "loss": 1.5071, "step": 24500 }, { "epoch": 4.02, "learning_rate": 9.800627240143371e-06, "loss": 1.8806, "step": 24510 }, { "epoch": 4.02, "learning_rate": 9.781959378733572e-06, "loss": 1.3441, "step": 24520 }, { "epoch": 4.02, "learning_rate": 9.763291517323776e-06, "loss": 1.7605, "step": 24530 }, { "epoch": 4.02, "learning_rate": 9.744623655913979e-06, "loss": 1.2259, "step": 24540 }, { "epoch": 4.02, "learning_rate": 9.725955794504182e-06, "loss": 1.4053, "step": 24550 }, { "epoch": 4.03, "learning_rate": 9.707287933094385e-06, "loss": 1.25, "step": 24560 }, { "epoch": 4.03, "learning_rate": 9.688620071684588e-06, "loss": 1.3208, "step": 24570 }, { "epoch": 4.03, "learning_rate": 9.66995221027479e-06, "loss": 1.3093, "step": 24580 }, { "epoch": 4.03, "learning_rate": 9.651284348864995e-06, "loss": 1.5853, "step": 24590 }, { "epoch": 4.03, "learning_rate": 9.632616487455196e-06, "loss": 1.4985, "step": 24600 }, { "epoch": 4.03, "learning_rate": 9.613948626045401e-06, "loss": 1.4555, "step": 24610 }, { "epoch": 4.03, "learning_rate": 9.595280764635605e-06, "loss": 1.7654, "step": 24620 }, { "epoch": 4.03, "learning_rate": 9.576612903225806e-06, "loss": 0.9561, "step": 24630 }, { "epoch": 4.03, "learning_rate": 9.557945041816011e-06, "loss": 1.5183, "step": 24640 }, { "epoch": 4.03, "learning_rate": 9.539277180406213e-06, "loss": 1.1956, "step": 24650 }, { "epoch": 4.03, "learning_rate": 9.520609318996416e-06, "loss": 1.2296, "step": 24660 }, { "epoch": 4.03, "learning_rate": 9.501941457586619e-06, "loss": 1.0996, "step": 24670 }, { "epoch": 4.03, "learning_rate": 9.483273596176822e-06, "loss": 1.5512, "step": 24680 }, { "epoch": 4.03, "learning_rate": 9.464605734767025e-06, "loss": 1.1916, "step": 24690 }, { "epoch": 4.03, "learning_rate": 9.445937873357229e-06, "loss": 1.1623, "step": 24700 }, { "epoch": 4.03, "learning_rate": 9.427270011947432e-06, "loss": 1.5246, "step": 24710 }, { "epoch": 4.03, "learning_rate": 9.408602150537635e-06, "loss": 1.9619, "step": 24720 }, { "epoch": 4.03, "learning_rate": 9.389934289127838e-06, "loss": 1.6799, "step": 24730 }, { "epoch": 4.03, "learning_rate": 9.371266427718042e-06, "loss": 1.5729, "step": 24740 }, { "epoch": 4.03, "learning_rate": 9.352598566308245e-06, "loss": 1.5295, "step": 24750 }, { "epoch": 4.03, "learning_rate": 9.333930704898446e-06, "loss": 1.5199, "step": 24760 }, { "epoch": 4.03, "learning_rate": 9.315262843488651e-06, "loss": 1.2237, "step": 24770 }, { "epoch": 4.03, "learning_rate": 9.296594982078853e-06, "loss": 1.5316, "step": 24780 }, { "epoch": 4.03, "learning_rate": 9.277927120669058e-06, "loss": 1.1651, "step": 24790 }, { "epoch": 4.03, "learning_rate": 9.259259259259259e-06, "loss": 0.9255, "step": 24800 }, { "epoch": 4.03, "learning_rate": 9.240591397849462e-06, "loss": 1.4782, "step": 24810 }, { "epoch": 4.03, "learning_rate": 9.221923536439666e-06, "loss": 1.4225, "step": 24820 }, { "epoch": 4.03, "learning_rate": 9.203255675029869e-06, "loss": 1.3602, "step": 24830 }, { "epoch": 4.03, "learning_rate": 9.184587813620072e-06, "loss": 1.4976, "step": 24840 }, { "epoch": 4.04, "learning_rate": 9.165919952210275e-06, "loss": 1.7712, "step": 24850 }, { "epoch": 4.04, "learning_rate": 9.147252090800478e-06, "loss": 1.3732, "step": 24860 }, { "epoch": 4.04, "learning_rate": 9.128584229390682e-06, "loss": 1.6067, "step": 24870 }, { "epoch": 4.04, "learning_rate": 9.109916367980885e-06, "loss": 2.2038, "step": 24880 }, { "epoch": 4.04, "learning_rate": 9.091248506571088e-06, "loss": 1.4748, "step": 24890 }, { "epoch": 4.04, "learning_rate": 9.072580645161291e-06, "loss": 1.561, "step": 24900 }, { "epoch": 4.04, "learning_rate": 9.053912783751493e-06, "loss": 1.0526, "step": 24910 }, { "epoch": 4.04, "learning_rate": 9.035244922341698e-06, "loss": 1.3474, "step": 24920 }, { "epoch": 4.04, "learning_rate": 9.0165770609319e-06, "loss": 1.2918, "step": 24930 }, { "epoch": 4.04, "learning_rate": 8.997909199522103e-06, "loss": 1.3391, "step": 24940 }, { "epoch": 4.04, "learning_rate": 8.979241338112307e-06, "loss": 1.753, "step": 24950 }, { "epoch": 4.04, "learning_rate": 8.960573476702509e-06, "loss": 2.1419, "step": 24960 }, { "epoch": 4.04, "learning_rate": 8.941905615292714e-06, "loss": 1.6029, "step": 24970 }, { "epoch": 4.04, "learning_rate": 8.923237753882915e-06, "loss": 1.6535, "step": 24980 }, { "epoch": 4.04, "learning_rate": 8.904569892473119e-06, "loss": 1.1886, "step": 24990 }, { "epoch": 4.04, "learning_rate": 8.885902031063322e-06, "loss": 1.4978, "step": 25000 }, { "epoch": 4.04, "learning_rate": 8.867234169653525e-06, "loss": 1.8418, "step": 25010 }, { "epoch": 4.04, "learning_rate": 8.848566308243728e-06, "loss": 1.1799, "step": 25020 }, { "epoch": 4.04, "learning_rate": 8.829898446833931e-06, "loss": 1.7685, "step": 25030 }, { "epoch": 4.04, "learning_rate": 8.811230585424133e-06, "loss": 1.6405, "step": 25040 }, { "epoch": 4.04, "learning_rate": 8.792562724014338e-06, "loss": 0.8644, "step": 25050 }, { "epoch": 4.04, "learning_rate": 8.773894862604541e-06, "loss": 1.4913, "step": 25060 }, { "epoch": 4.04, "learning_rate": 8.755227001194743e-06, "loss": 1.3354, "step": 25070 }, { "epoch": 4.04, "learning_rate": 8.736559139784948e-06, "loss": 1.2122, "step": 25080 }, { "epoch": 4.04, "learning_rate": 8.717891278375149e-06, "loss": 2.0048, "step": 25090 }, { "epoch": 4.04, "learning_rate": 8.699223416965354e-06, "loss": 0.9122, "step": 25100 }, { "epoch": 4.04, "learning_rate": 8.680555555555556e-06, "loss": 1.2963, "step": 25110 }, { "epoch": 4.04, "learning_rate": 8.661887694145759e-06, "loss": 1.8554, "step": 25120 }, { "epoch": 4.04, "learning_rate": 8.643219832735962e-06, "loss": 1.5882, "step": 25130 }, { "epoch": 4.04, "learning_rate": 8.624551971326165e-06, "loss": 1.2758, "step": 25140 }, { "epoch": 4.05, "learning_rate": 8.605884109916368e-06, "loss": 1.4045, "step": 25150 }, { "epoch": 4.05, "learning_rate": 8.587216248506572e-06, "loss": 1.4268, "step": 25160 }, { "epoch": 4.05, "learning_rate": 8.568548387096773e-06, "loss": 1.6635, "step": 25170 }, { "epoch": 4.05, "learning_rate": 8.549880525686978e-06, "loss": 1.1848, "step": 25180 }, { "epoch": 4.05, "learning_rate": 8.531212664277181e-06, "loss": 1.7076, "step": 25190 }, { "epoch": 4.05, "learning_rate": 8.512544802867385e-06, "loss": 1.6546, "step": 25200 }, { "epoch": 4.05, "learning_rate": 8.493876941457588e-06, "loss": 1.4574, "step": 25210 }, { "epoch": 4.05, "learning_rate": 8.47520908004779e-06, "loss": 1.4794, "step": 25220 }, { "epoch": 4.05, "learning_rate": 8.456541218637994e-06, "loss": 1.8868, "step": 25230 }, { "epoch": 4.05, "learning_rate": 8.437873357228196e-06, "loss": 1.1603, "step": 25240 }, { "epoch": 4.05, "learning_rate": 8.419205495818399e-06, "loss": 2.0306, "step": 25250 }, { "epoch": 4.05, "learning_rate": 8.400537634408602e-06, "loss": 1.2349, "step": 25260 }, { "epoch": 4.05, "learning_rate": 8.381869772998805e-06, "loss": 0.9573, "step": 25270 }, { "epoch": 4.05, "learning_rate": 8.363201911589009e-06, "loss": 1.6363, "step": 25280 }, { "epoch": 4.05, "learning_rate": 8.344534050179212e-06, "loss": 1.1422, "step": 25290 }, { "epoch": 4.05, "learning_rate": 8.325866188769415e-06, "loss": 0.9977, "step": 25300 }, { "epoch": 4.05, "learning_rate": 8.307198327359618e-06, "loss": 1.7289, "step": 25310 }, { "epoch": 4.05, "learning_rate": 8.288530465949821e-06, "loss": 1.3954, "step": 25320 }, { "epoch": 4.05, "learning_rate": 8.269862604540025e-06, "loss": 1.71, "step": 25330 }, { "epoch": 4.05, "learning_rate": 8.251194743130228e-06, "loss": 1.3419, "step": 25340 }, { "epoch": 4.05, "learning_rate": 8.23252688172043e-06, "loss": 1.4473, "step": 25350 }, { "epoch": 4.05, "learning_rate": 8.213859020310634e-06, "loss": 1.5981, "step": 25360 }, { "epoch": 4.05, "learning_rate": 8.195191158900836e-06, "loss": 1.2856, "step": 25370 }, { "epoch": 4.05, "learning_rate": 8.17652329749104e-06, "loss": 1.7785, "step": 25380 }, { "epoch": 4.05, "learning_rate": 8.157855436081242e-06, "loss": 0.7809, "step": 25390 }, { "epoch": 4.05, "learning_rate": 8.139187574671446e-06, "loss": 1.7897, "step": 25400 }, { "epoch": 4.05, "learning_rate": 8.12051971326165e-06, "loss": 1.6203, "step": 25410 }, { "epoch": 4.05, "learning_rate": 8.101851851851852e-06, "loss": 1.769, "step": 25420 }, { "epoch": 4.05, "learning_rate": 8.083183990442055e-06, "loss": 1.7195, "step": 25430 }, { "epoch": 4.05, "learning_rate": 8.064516129032258e-06, "loss": 0.6735, "step": 25440 }, { "epoch": 4.06, "learning_rate": 8.045848267622462e-06, "loss": 1.1943, "step": 25450 }, { "epoch": 4.06, "learning_rate": 8.027180406212665e-06, "loss": 1.8344, "step": 25460 }, { "epoch": 4.06, "learning_rate": 8.008512544802868e-06, "loss": 1.5848, "step": 25470 }, { "epoch": 4.06, "learning_rate": 7.98984468339307e-06, "loss": 1.4308, "step": 25480 }, { "epoch": 4.06, "learning_rate": 7.971176821983274e-06, "loss": 1.7607, "step": 25490 }, { "epoch": 4.06, "learning_rate": 7.952508960573476e-06, "loss": 1.5066, "step": 25500 }, { "epoch": 4.06, "learning_rate": 7.933841099163681e-06, "loss": 1.4067, "step": 25510 }, { "epoch": 4.06, "learning_rate": 7.915173237753884e-06, "loss": 1.1646, "step": 25520 }, { "epoch": 4.06, "learning_rate": 7.896505376344086e-06, "loss": 1.4122, "step": 25530 }, { "epoch": 4.06, "learning_rate": 7.87783751493429e-06, "loss": 1.4635, "step": 25540 }, { "epoch": 4.06, "learning_rate": 7.859169653524492e-06, "loss": 1.8227, "step": 25550 }, { "epoch": 4.06, "learning_rate": 7.840501792114695e-06, "loss": 1.8722, "step": 25560 }, { "epoch": 4.06, "learning_rate": 7.821833930704899e-06, "loss": 1.0591, "step": 25570 }, { "epoch": 4.06, "learning_rate": 7.803166069295102e-06, "loss": 1.4113, "step": 25580 }, { "epoch": 4.06, "learning_rate": 7.784498207885305e-06, "loss": 1.2042, "step": 25590 }, { "epoch": 4.06, "learning_rate": 7.765830346475508e-06, "loss": 1.7254, "step": 25600 }, { "epoch": 4.06, "learning_rate": 7.747162485065711e-06, "loss": 1.7568, "step": 25610 }, { "epoch": 4.06, "learning_rate": 7.728494623655915e-06, "loss": 1.0329, "step": 25620 }, { "epoch": 4.06, "learning_rate": 7.709826762246116e-06, "loss": 1.2179, "step": 25630 }, { "epoch": 4.06, "learning_rate": 7.691158900836321e-06, "loss": 1.3515, "step": 25640 }, { "epoch": 4.06, "learning_rate": 7.672491039426524e-06, "loss": 2.3152, "step": 25650 }, { "epoch": 4.06, "learning_rate": 7.653823178016726e-06, "loss": 1.1229, "step": 25660 }, { "epoch": 4.06, "learning_rate": 7.63515531660693e-06, "loss": 1.4869, "step": 25670 }, { "epoch": 4.06, "learning_rate": 7.616487455197132e-06, "loss": 1.5393, "step": 25680 }, { "epoch": 4.06, "learning_rate": 7.597819593787336e-06, "loss": 1.8928, "step": 25690 }, { "epoch": 4.06, "learning_rate": 7.579151732377539e-06, "loss": 1.2179, "step": 25700 }, { "epoch": 4.06, "learning_rate": 7.560483870967743e-06, "loss": 1.1297, "step": 25710 }, { "epoch": 4.06, "learning_rate": 7.541816009557945e-06, "loss": 1.7601, "step": 25720 }, { "epoch": 4.06, "learning_rate": 7.523148148148148e-06, "loss": 1.5369, "step": 25730 }, { "epoch": 4.06, "learning_rate": 7.504480286738351e-06, "loss": 1.0863, "step": 25740 }, { "epoch": 4.07, "learning_rate": 7.485812425328555e-06, "loss": 1.9207, "step": 25750 }, { "epoch": 4.07, "learning_rate": 7.467144563918758e-06, "loss": 2.0301, "step": 25760 }, { "epoch": 4.07, "learning_rate": 7.44847670250896e-06, "loss": 0.9577, "step": 25770 }, { "epoch": 4.07, "learning_rate": 7.4298088410991645e-06, "loss": 1.0539, "step": 25780 }, { "epoch": 4.07, "learning_rate": 7.411140979689367e-06, "loss": 1.7959, "step": 25790 }, { "epoch": 4.07, "learning_rate": 7.392473118279571e-06, "loss": 0.97, "step": 25800 }, { "epoch": 4.07, "learning_rate": 7.373805256869773e-06, "loss": 1.7899, "step": 25810 }, { "epoch": 4.07, "learning_rate": 7.3551373954599765e-06, "loss": 1.5984, "step": 25820 }, { "epoch": 4.07, "learning_rate": 7.336469534050179e-06, "loss": 1.5833, "step": 25830 }, { "epoch": 4.07, "learning_rate": 7.317801672640383e-06, "loss": 1.4231, "step": 25840 }, { "epoch": 4.07, "learning_rate": 7.299133811230585e-06, "loss": 1.0482, "step": 25850 }, { "epoch": 4.07, "learning_rate": 7.2804659498207885e-06, "loss": 1.6302, "step": 25860 }, { "epoch": 4.07, "learning_rate": 7.261798088410993e-06, "loss": 1.7256, "step": 25870 }, { "epoch": 4.07, "learning_rate": 7.243130227001195e-06, "loss": 1.4821, "step": 25880 }, { "epoch": 4.07, "learning_rate": 7.224462365591399e-06, "loss": 1.3046, "step": 25890 }, { "epoch": 4.07, "learning_rate": 7.205794504181601e-06, "loss": 1.7332, "step": 25900 }, { "epoch": 4.07, "learning_rate": 7.187126642771805e-06, "loss": 2.1257, "step": 25910 }, { "epoch": 4.07, "learning_rate": 7.168458781362007e-06, "loss": 1.3405, "step": 25920 }, { "epoch": 4.07, "learning_rate": 7.149790919952211e-06, "loss": 1.0742, "step": 25930 }, { "epoch": 4.07, "learning_rate": 7.1311230585424134e-06, "loss": 1.6651, "step": 25940 }, { "epoch": 4.07, "learning_rate": 7.112455197132617e-06, "loss": 1.8454, "step": 25950 }, { "epoch": 4.07, "learning_rate": 7.093787335722819e-06, "loss": 1.6758, "step": 25960 }, { "epoch": 4.07, "learning_rate": 7.075119474313023e-06, "loss": 1.3766, "step": 25970 }, { "epoch": 4.07, "learning_rate": 7.056451612903227e-06, "loss": 1.4214, "step": 25980 }, { "epoch": 4.07, "learning_rate": 7.0377837514934295e-06, "loss": 1.1765, "step": 25990 }, { "epoch": 4.07, "learning_rate": 7.019115890083633e-06, "loss": 1.2647, "step": 26000 }, { "epoch": 4.07, "learning_rate": 7.000448028673835e-06, "loss": 1.9995, "step": 26010 }, { "epoch": 4.07, "learning_rate": 6.981780167264039e-06, "loss": 1.6961, "step": 26020 }, { "epoch": 4.07, "learning_rate": 6.9631123058542416e-06, "loss": 1.664, "step": 26030 }, { "epoch": 4.08, "learning_rate": 6.944444444444445e-06, "loss": 1.2451, "step": 26040 }, { "epoch": 4.08, "learning_rate": 6.925776583034647e-06, "loss": 1.3411, "step": 26050 }, { "epoch": 4.08, "learning_rate": 6.907108721624851e-06, "loss": 1.7018, "step": 26060 }, { "epoch": 4.08, "learning_rate": 6.888440860215054e-06, "loss": 0.9882, "step": 26070 }, { "epoch": 4.08, "learning_rate": 6.869772998805258e-06, "loss": 1.1228, "step": 26080 }, { "epoch": 4.08, "learning_rate": 6.85110513739546e-06, "loss": 1.8301, "step": 26090 }, { "epoch": 4.08, "learning_rate": 6.832437275985663e-06, "loss": 1.1561, "step": 26100 }, { "epoch": 4.08, "learning_rate": 6.813769414575867e-06, "loss": 1.4249, "step": 26110 }, { "epoch": 4.08, "learning_rate": 6.79510155316607e-06, "loss": 1.3765, "step": 26120 }, { "epoch": 4.08, "learning_rate": 6.776433691756273e-06, "loss": 1.2172, "step": 26130 }, { "epoch": 4.08, "learning_rate": 6.757765830346475e-06, "loss": 1.0942, "step": 26140 }, { "epoch": 4.08, "learning_rate": 6.739097968936679e-06, "loss": 1.2836, "step": 26150 }, { "epoch": 4.08, "learning_rate": 6.720430107526882e-06, "loss": 1.0587, "step": 26160 }, { "epoch": 4.08, "learning_rate": 6.701762246117086e-06, "loss": 1.5013, "step": 26170 }, { "epoch": 4.08, "learning_rate": 6.683094384707287e-06, "loss": 1.2316, "step": 26180 }, { "epoch": 4.08, "learning_rate": 6.664426523297491e-06, "loss": 1.7358, "step": 26190 }, { "epoch": 4.08, "learning_rate": 6.645758661887694e-06, "loss": 1.0172, "step": 26200 }, { "epoch": 4.08, "learning_rate": 6.627090800477898e-06, "loss": 1.1399, "step": 26210 }, { "epoch": 4.08, "learning_rate": 6.608422939068101e-06, "loss": 1.7148, "step": 26220 }, { "epoch": 4.08, "learning_rate": 6.589755077658303e-06, "loss": 1.5614, "step": 26230 }, { "epoch": 4.08, "learning_rate": 6.5710872162485075e-06, "loss": 1.435, "step": 26240 }, { "epoch": 4.08, "learning_rate": 6.55241935483871e-06, "loss": 1.0931, "step": 26250 }, { "epoch": 4.08, "learning_rate": 6.533751493428913e-06, "loss": 1.3666, "step": 26260 }, { "epoch": 4.08, "learning_rate": 6.5150836320191154e-06, "loss": 1.2352, "step": 26270 }, { "epoch": 4.08, "learning_rate": 6.4964157706093195e-06, "loss": 1.1093, "step": 26280 }, { "epoch": 4.08, "learning_rate": 6.477747909199522e-06, "loss": 1.3381, "step": 26290 }, { "epoch": 4.08, "learning_rate": 6.459080047789726e-06, "loss": 1.0557, "step": 26300 }, { "epoch": 4.08, "learning_rate": 6.440412186379928e-06, "loss": 1.5795, "step": 26310 }, { "epoch": 4.08, "learning_rate": 6.4217443249701315e-06, "loss": 1.3957, "step": 26320 }, { "epoch": 4.08, "learning_rate": 6.403076463560336e-06, "loss": 1.4447, "step": 26330 }, { "epoch": 4.09, "learning_rate": 6.384408602150538e-06, "loss": 1.5413, "step": 26340 }, { "epoch": 4.09, "learning_rate": 6.365740740740741e-06, "loss": 0.8564, "step": 26350 }, { "epoch": 4.09, "learning_rate": 6.3470728793309436e-06, "loss": 1.5251, "step": 26360 }, { "epoch": 4.09, "learning_rate": 6.328405017921148e-06, "loss": 1.4446, "step": 26370 }, { "epoch": 4.09, "learning_rate": 6.30973715651135e-06, "loss": 1.0587, "step": 26380 }, { "epoch": 4.09, "learning_rate": 6.291069295101554e-06, "loss": 1.7043, "step": 26390 }, { "epoch": 4.09, "learning_rate": 6.2724014336917564e-06, "loss": 1.323, "step": 26400 }, { "epoch": 4.09, "learning_rate": 6.25373357228196e-06, "loss": 1.2718, "step": 26410 }, { "epoch": 4.09, "learning_rate": 6.235065710872163e-06, "loss": 1.6291, "step": 26420 }, { "epoch": 4.09, "learning_rate": 6.216397849462366e-06, "loss": 1.0402, "step": 26430 }, { "epoch": 4.09, "learning_rate": 6.197729988052569e-06, "loss": 1.1328, "step": 26440 }, { "epoch": 4.09, "learning_rate": 6.179062126642772e-06, "loss": 1.192, "step": 26450 }, { "epoch": 4.09, "learning_rate": 6.160394265232975e-06, "loss": 0.8817, "step": 26460 }, { "epoch": 4.09, "learning_rate": 6.141726403823178e-06, "loss": 0.8596, "step": 26470 }, { "epoch": 4.09, "learning_rate": 6.123058542413381e-06, "loss": 1.6592, "step": 26480 }, { "epoch": 4.09, "learning_rate": 6.1043906810035846e-06, "loss": 1.9876, "step": 26490 }, { "epoch": 4.09, "learning_rate": 6.085722819593788e-06, "loss": 1.515, "step": 26500 }, { "epoch": 4.09, "learning_rate": 6.067054958183991e-06, "loss": 1.2755, "step": 26510 }, { "epoch": 4.09, "learning_rate": 6.048387096774194e-06, "loss": 1.4755, "step": 26520 }, { "epoch": 4.09, "learning_rate": 6.0297192353643974e-06, "loss": 0.957, "step": 26530 }, { "epoch": 4.09, "learning_rate": 6.0110513739546e-06, "loss": 0.7414, "step": 26540 }, { "epoch": 4.09, "learning_rate": 5.992383512544803e-06, "loss": 0.8125, "step": 26550 }, { "epoch": 4.09, "learning_rate": 5.973715651135006e-06, "loss": 1.7498, "step": 26560 }, { "epoch": 4.09, "learning_rate": 5.9550477897252095e-06, "loss": 1.2346, "step": 26570 }, { "epoch": 4.09, "learning_rate": 5.936379928315413e-06, "loss": 1.1029, "step": 26580 }, { "epoch": 4.09, "learning_rate": 5.917712066905615e-06, "loss": 1.4347, "step": 26590 }, { "epoch": 4.09, "learning_rate": 5.899044205495818e-06, "loss": 1.255, "step": 26600 }, { "epoch": 4.09, "learning_rate": 5.880376344086022e-06, "loss": 1.24, "step": 26610 }, { "epoch": 4.09, "learning_rate": 5.8617084826762256e-06, "loss": 0.944, "step": 26620 }, { "epoch": 4.09, "learning_rate": 5.843040621266428e-06, "loss": 0.7633, "step": 26630 }, { "epoch": 4.1, "learning_rate": 5.824372759856631e-06, "loss": 1.4528, "step": 26640 }, { "epoch": 4.1, "learning_rate": 5.805704898446834e-06, "loss": 1.6512, "step": 26650 }, { "epoch": 4.1, "learning_rate": 5.787037037037038e-06, "loss": 1.4074, "step": 26660 }, { "epoch": 4.1, "learning_rate": 5.76836917562724e-06, "loss": 1.371, "step": 26670 }, { "epoch": 4.1, "learning_rate": 5.749701314217443e-06, "loss": 1.2035, "step": 26680 }, { "epoch": 4.1, "learning_rate": 5.731033452807646e-06, "loss": 1.0008, "step": 26690 }, { "epoch": 4.1, "learning_rate": 5.71236559139785e-06, "loss": 1.2706, "step": 26700 }, { "epoch": 4.1, "learning_rate": 5.693697729988053e-06, "loss": 1.4189, "step": 26710 }, { "epoch": 4.1, "learning_rate": 5.675029868578255e-06, "loss": 1.8266, "step": 26720 }, { "epoch": 4.1, "learning_rate": 5.656362007168459e-06, "loss": 1.08, "step": 26730 }, { "epoch": 4.1, "learning_rate": 5.6376941457586625e-06, "loss": 1.5767, "step": 26740 }, { "epoch": 4.1, "learning_rate": 5.619026284348866e-06, "loss": 1.2904, "step": 26750 }, { "epoch": 4.1, "learning_rate": 5.600358422939068e-06, "loss": 1.2904, "step": 26760 }, { "epoch": 4.1, "learning_rate": 5.581690561529271e-06, "loss": 1.4163, "step": 26770 }, { "epoch": 4.1, "learning_rate": 5.5630227001194745e-06, "loss": 1.212, "step": 26780 }, { "epoch": 4.1, "learning_rate": 5.544354838709678e-06, "loss": 1.3449, "step": 26790 }, { "epoch": 4.1, "learning_rate": 5.525686977299881e-06, "loss": 1.3649, "step": 26800 }, { "epoch": 4.1, "learning_rate": 5.507019115890083e-06, "loss": 1.4084, "step": 26810 }, { "epoch": 4.1, "learning_rate": 5.4883512544802866e-06, "loss": 1.4619, "step": 26820 }, { "epoch": 4.1, "learning_rate": 5.46968339307049e-06, "loss": 1.2643, "step": 26830 }, { "epoch": 4.1, "learning_rate": 5.451015531660694e-06, "loss": 1.0674, "step": 26840 }, { "epoch": 4.1, "learning_rate": 5.432347670250896e-06, "loss": 1.2631, "step": 26850 }, { "epoch": 4.1, "learning_rate": 5.4136798088410994e-06, "loss": 1.1677, "step": 26860 }, { "epoch": 4.1, "learning_rate": 5.395011947431303e-06, "loss": 1.6988, "step": 26870 }, { "epoch": 4.1, "learning_rate": 5.376344086021506e-06, "loss": 1.7663, "step": 26880 }, { "epoch": 4.1, "learning_rate": 5.357676224611709e-06, "loss": 1.4654, "step": 26890 }, { "epoch": 4.1, "learning_rate": 5.3390083632019115e-06, "loss": 1.0302, "step": 26900 }, { "epoch": 4.1, "learning_rate": 5.320340501792115e-06, "loss": 1.2625, "step": 26910 }, { "epoch": 4.1, "learning_rate": 5.301672640382318e-06, "loss": 1.3896, "step": 26920 }, { "epoch": 4.1, "learning_rate": 5.283004778972521e-06, "loss": 1.2841, "step": 26930 }, { "epoch": 4.11, "learning_rate": 5.264336917562724e-06, "loss": 1.6893, "step": 26940 }, { "epoch": 4.11, "learning_rate": 5.245669056152927e-06, "loss": 0.7325, "step": 26950 }, { "epoch": 4.11, "learning_rate": 5.227001194743131e-06, "loss": 1.4401, "step": 26960 }, { "epoch": 4.11, "learning_rate": 5.208333333333334e-06, "loss": 0.9587, "step": 26970 }, { "epoch": 4.11, "learning_rate": 5.189665471923537e-06, "loss": 1.2609, "step": 26980 }, { "epoch": 4.11, "learning_rate": 5.17099761051374e-06, "loss": 1.6398, "step": 26990 }, { "epoch": 4.11, "learning_rate": 5.152329749103943e-06, "loss": 1.1614, "step": 27000 }, { "epoch": 4.11, "learning_rate": 5.133661887694146e-06, "loss": 2.1873, "step": 27010 }, { "epoch": 4.11, "learning_rate": 5.114994026284349e-06, "loss": 1.1867, "step": 27020 }, { "epoch": 4.11, "learning_rate": 5.0963261648745525e-06, "loss": 1.0342, "step": 27030 }, { "epoch": 4.11, "learning_rate": 5.077658303464755e-06, "loss": 1.6497, "step": 27040 }, { "epoch": 4.11, "learning_rate": 5.058990442054958e-06, "loss": 1.2071, "step": 27050 }, { "epoch": 4.11, "learning_rate": 5.040322580645161e-06, "loss": 1.4087, "step": 27060 }, { "epoch": 4.11, "learning_rate": 5.021654719235365e-06, "loss": 1.168, "step": 27070 }, { "epoch": 4.11, "learning_rate": 5.002986857825568e-06, "loss": 1.5651, "step": 27080 }, { "epoch": 4.11, "learning_rate": 4.984318996415771e-06, "loss": 1.1643, "step": 27090 }, { "epoch": 4.11, "learning_rate": 4.965651135005974e-06, "loss": 1.0717, "step": 27100 }, { "epoch": 4.11, "learning_rate": 4.946983273596177e-06, "loss": 0.9171, "step": 27110 }, { "epoch": 4.11, "learning_rate": 4.928315412186381e-06, "loss": 1.1478, "step": 27120 }, { "epoch": 4.11, "learning_rate": 4.909647550776583e-06, "loss": 0.8618, "step": 27130 }, { "epoch": 4.11, "learning_rate": 4.890979689366786e-06, "loss": 1.0755, "step": 27140 }, { "epoch": 4.11, "learning_rate": 4.872311827956989e-06, "loss": 1.2589, "step": 27150 }, { "epoch": 4.11, "learning_rate": 4.853643966547193e-06, "loss": 1.2669, "step": 27160 }, { "epoch": 4.11, "learning_rate": 4.834976105137395e-06, "loss": 0.803, "step": 27170 }, { "epoch": 4.11, "learning_rate": 4.816308243727598e-06, "loss": 1.3754, "step": 27180 }, { "epoch": 4.11, "learning_rate": 4.797640382317802e-06, "loss": 1.0802, "step": 27190 }, { "epoch": 4.11, "learning_rate": 4.7789725209080055e-06, "loss": 1.5899, "step": 27200 }, { "epoch": 4.11, "learning_rate": 4.760304659498208e-06, "loss": 1.4347, "step": 27210 }, { "epoch": 4.11, "learning_rate": 4.741636798088411e-06, "loss": 1.2137, "step": 27220 }, { "epoch": 4.11, "learning_rate": 4.722968936678614e-06, "loss": 0.9562, "step": 27230 }, { "epoch": 4.12, "learning_rate": 4.7043010752688175e-06, "loss": 1.9109, "step": 27240 }, { "epoch": 4.12, "learning_rate": 4.685633213859021e-06, "loss": 1.3119, "step": 27250 }, { "epoch": 4.12, "learning_rate": 4.666965352449223e-06, "loss": 0.8017, "step": 27260 }, { "epoch": 4.12, "learning_rate": 4.648297491039426e-06, "loss": 0.9947, "step": 27270 }, { "epoch": 4.12, "learning_rate": 4.6296296296296296e-06, "loss": 0.8371, "step": 27280 }, { "epoch": 4.12, "learning_rate": 4.610961768219833e-06, "loss": 1.1533, "step": 27290 }, { "epoch": 4.12, "learning_rate": 4.592293906810036e-06, "loss": 1.4851, "step": 27300 }, { "epoch": 4.12, "learning_rate": 4.573626045400239e-06, "loss": 1.5737, "step": 27310 }, { "epoch": 4.12, "learning_rate": 4.5549581839904424e-06, "loss": 1.1396, "step": 27320 }, { "epoch": 4.12, "learning_rate": 4.536290322580646e-06, "loss": 1.1578, "step": 27330 }, { "epoch": 4.12, "learning_rate": 4.517622461170849e-06, "loss": 1.097, "step": 27340 }, { "epoch": 4.12, "learning_rate": 4.498954599761051e-06, "loss": 1.5931, "step": 27350 }, { "epoch": 4.12, "learning_rate": 4.4802867383512545e-06, "loss": 1.1666, "step": 27360 }, { "epoch": 4.12, "learning_rate": 4.461618876941458e-06, "loss": 0.9427, "step": 27370 }, { "epoch": 4.12, "learning_rate": 4.442951015531661e-06, "loss": 1.8824, "step": 27380 }, { "epoch": 4.12, "learning_rate": 4.424283154121864e-06, "loss": 0.8832, "step": 27390 }, { "epoch": 4.12, "learning_rate": 4.4056152927120665e-06, "loss": 1.2699, "step": 27400 }, { "epoch": 4.12, "learning_rate": 4.3869474313022706e-06, "loss": 1.0975, "step": 27410 }, { "epoch": 4.12, "learning_rate": 4.368279569892474e-06, "loss": 1.2642, "step": 27420 }, { "epoch": 4.12, "learning_rate": 4.349611708482677e-06, "loss": 1.5067, "step": 27430 }, { "epoch": 4.12, "learning_rate": 4.330943847072879e-06, "loss": 1.6365, "step": 27440 }, { "epoch": 4.12, "learning_rate": 4.312275985663083e-06, "loss": 1.2913, "step": 27450 }, { "epoch": 4.12, "learning_rate": 4.293608124253286e-06, "loss": 0.7613, "step": 27460 }, { "epoch": 4.12, "learning_rate": 4.274940262843489e-06, "loss": 1.0755, "step": 27470 }, { "epoch": 4.12, "learning_rate": 4.256272401433692e-06, "loss": 1.1365, "step": 27480 }, { "epoch": 4.12, "learning_rate": 4.237604540023895e-06, "loss": 0.9249, "step": 27490 }, { "epoch": 4.12, "learning_rate": 4.218936678614098e-06, "loss": 1.2179, "step": 27500 }, { "epoch": 4.12, "learning_rate": 4.200268817204301e-06, "loss": 1.58, "step": 27510 }, { "epoch": 4.12, "learning_rate": 4.181600955794504e-06, "loss": 1.2847, "step": 27520 }, { "epoch": 4.13, "learning_rate": 4.1629330943847075e-06, "loss": 1.3193, "step": 27530 }, { "epoch": 4.13, "learning_rate": 4.144265232974911e-06, "loss": 1.4941, "step": 27540 }, { "epoch": 4.13, "learning_rate": 4.125597371565114e-06, "loss": 1.3101, "step": 27550 }, { "epoch": 4.13, "learning_rate": 4.106929510155317e-06, "loss": 1.3672, "step": 27560 }, { "epoch": 4.13, "learning_rate": 4.08826164874552e-06, "loss": 1.3266, "step": 27570 }, { "epoch": 4.13, "learning_rate": 4.069593787335723e-06, "loss": 1.1635, "step": 27580 }, { "epoch": 4.13, "learning_rate": 4.050925925925926e-06, "loss": 1.0761, "step": 27590 }, { "epoch": 4.13, "learning_rate": 4.032258064516129e-06, "loss": 1.5363, "step": 27600 }, { "epoch": 4.13, "learning_rate": 4.013590203106332e-06, "loss": 1.1535, "step": 27610 }, { "epoch": 4.13, "learning_rate": 3.994922341696535e-06, "loss": 1.0786, "step": 27620 }, { "epoch": 4.13, "learning_rate": 3.976254480286738e-06, "loss": 1.505, "step": 27630 }, { "epoch": 4.13, "learning_rate": 3.957586618876942e-06, "loss": 1.0203, "step": 27640 }, { "epoch": 4.13, "learning_rate": 3.938918757467145e-06, "loss": 1.2253, "step": 27650 }, { "epoch": 4.13, "learning_rate": 3.920250896057348e-06, "loss": 1.0077, "step": 27660 }, { "epoch": 4.13, "learning_rate": 3.901583034647551e-06, "loss": 1.4607, "step": 27670 }, { "epoch": 4.13, "learning_rate": 3.882915173237754e-06, "loss": 0.9503, "step": 27680 }, { "epoch": 4.13, "learning_rate": 3.864247311827957e-06, "loss": 1.5217, "step": 27690 }, { "epoch": 4.13, "learning_rate": 3.8455794504181605e-06, "loss": 0.8979, "step": 27700 }, { "epoch": 4.13, "learning_rate": 3.826911589008363e-06, "loss": 1.3058, "step": 27710 }, { "epoch": 4.13, "learning_rate": 3.808243727598566e-06, "loss": 1.4802, "step": 27720 }, { "epoch": 4.13, "learning_rate": 3.7895758661887694e-06, "loss": 1.8119, "step": 27730 }, { "epoch": 4.13, "learning_rate": 3.7709080047789726e-06, "loss": 1.1756, "step": 27740 }, { "epoch": 4.13, "learning_rate": 3.7522401433691754e-06, "loss": 0.8481, "step": 27750 }, { "epoch": 4.13, "learning_rate": 3.733572281959379e-06, "loss": 0.7704, "step": 27760 }, { "epoch": 4.13, "learning_rate": 3.7149044205495822e-06, "loss": 1.2153, "step": 27770 }, { "epoch": 4.13, "learning_rate": 3.6962365591397855e-06, "loss": 1.7107, "step": 27780 }, { "epoch": 4.13, "learning_rate": 3.6775686977299882e-06, "loss": 0.9795, "step": 27790 }, { "epoch": 4.13, "learning_rate": 3.6589008363201915e-06, "loss": 0.9315, "step": 27800 }, { "epoch": 4.13, "learning_rate": 3.6402329749103943e-06, "loss": 0.9131, "step": 27810 }, { "epoch": 4.13, "learning_rate": 3.6215651135005975e-06, "loss": 1.089, "step": 27820 }, { "epoch": 4.14, "learning_rate": 3.6028972520908007e-06, "loss": 0.9854, "step": 27830 }, { "epoch": 4.14, "learning_rate": 3.5842293906810035e-06, "loss": 1.6351, "step": 27840 }, { "epoch": 4.14, "learning_rate": 3.5655615292712067e-06, "loss": 1.2948, "step": 27850 }, { "epoch": 4.14, "learning_rate": 3.5468936678614095e-06, "loss": 0.6349, "step": 27860 }, { "epoch": 4.14, "learning_rate": 3.5282258064516136e-06, "loss": 1.1524, "step": 27870 }, { "epoch": 4.14, "learning_rate": 3.5095579450418164e-06, "loss": 1.1165, "step": 27880 }, { "epoch": 4.14, "learning_rate": 3.4908900836320196e-06, "loss": 1.3693, "step": 27890 }, { "epoch": 4.14, "learning_rate": 3.4722222222222224e-06, "loss": 0.8425, "step": 27900 }, { "epoch": 4.14, "learning_rate": 3.4535543608124256e-06, "loss": 1.3458, "step": 27910 }, { "epoch": 4.14, "learning_rate": 3.434886499402629e-06, "loss": 1.0211, "step": 27920 }, { "epoch": 4.14, "learning_rate": 3.4162186379928316e-06, "loss": 1.171, "step": 27930 }, { "epoch": 4.14, "learning_rate": 3.397550776583035e-06, "loss": 0.983, "step": 27940 }, { "epoch": 4.14, "learning_rate": 3.3788829151732376e-06, "loss": 1.3137, "step": 27950 }, { "epoch": 4.14, "learning_rate": 3.360215053763441e-06, "loss": 1.1717, "step": 27960 }, { "epoch": 4.14, "learning_rate": 3.3415471923536437e-06, "loss": 1.2904, "step": 27970 }, { "epoch": 4.14, "learning_rate": 3.322879330943847e-06, "loss": 1.6562, "step": 27980 }, { "epoch": 4.14, "learning_rate": 3.3042114695340505e-06, "loss": 1.042, "step": 27990 }, { "epoch": 4.14, "learning_rate": 3.2855436081242537e-06, "loss": 1.0269, "step": 28000 }, { "epoch": 4.14, "learning_rate": 3.2668757467144565e-06, "loss": 0.7511, "step": 28010 }, { "epoch": 4.14, "learning_rate": 3.2482078853046597e-06, "loss": 1.4292, "step": 28020 }, { "epoch": 4.14, "learning_rate": 3.229540023894863e-06, "loss": 1.2028, "step": 28030 }, { "epoch": 4.14, "learning_rate": 3.2108721624850658e-06, "loss": 1.3415, "step": 28040 }, { "epoch": 4.14, "learning_rate": 3.192204301075269e-06, "loss": 1.2112, "step": 28050 }, { "epoch": 4.14, "learning_rate": 3.1735364396654718e-06, "loss": 1.0659, "step": 28060 }, { "epoch": 4.14, "learning_rate": 3.154868578255675e-06, "loss": 1.0592, "step": 28070 }, { "epoch": 4.14, "learning_rate": 3.1362007168458782e-06, "loss": 1.5721, "step": 28080 }, { "epoch": 4.14, "learning_rate": 3.1175328554360814e-06, "loss": 1.2488, "step": 28090 }, { "epoch": 4.14, "learning_rate": 3.0988649940262847e-06, "loss": 1.2523, "step": 28100 }, { "epoch": 4.14, "learning_rate": 3.0801971326164875e-06, "loss": 2.0374, "step": 28110 }, { "epoch": 4.14, "learning_rate": 3.0615292712066907e-06, "loss": 1.4842, "step": 28120 }, { "epoch": 4.15, "learning_rate": 3.042861409796894e-06, "loss": 0.9243, "step": 28130 }, { "epoch": 4.15, "learning_rate": 3.024193548387097e-06, "loss": 2.0135, "step": 28140 }, { "epoch": 4.15, "learning_rate": 3.0055256869773e-06, "loss": 1.0956, "step": 28150 }, { "epoch": 4.15, "learning_rate": 2.986857825567503e-06, "loss": 1.3584, "step": 28160 }, { "epoch": 4.15, "learning_rate": 2.9681899641577063e-06, "loss": 1.3753, "step": 28170 }, { "epoch": 4.15, "learning_rate": 2.949522102747909e-06, "loss": 0.6571, "step": 28180 }, { "epoch": 4.15, "learning_rate": 2.9308542413381128e-06, "loss": 1.5899, "step": 28190 }, { "epoch": 4.15, "learning_rate": 2.9121863799283156e-06, "loss": 1.4262, "step": 28200 }, { "epoch": 4.15, "learning_rate": 2.893518518518519e-06, "loss": 0.8002, "step": 28210 }, { "epoch": 4.15, "learning_rate": 2.8748506571087216e-06, "loss": 1.5667, "step": 28220 }, { "epoch": 4.15, "learning_rate": 2.856182795698925e-06, "loss": 1.3142, "step": 28230 }, { "epoch": 4.15, "learning_rate": 2.8375149342891276e-06, "loss": 1.4252, "step": 28240 }, { "epoch": 4.15, "learning_rate": 2.8188470728793313e-06, "loss": 0.8309, "step": 28250 }, { "epoch": 4.15, "learning_rate": 2.800179211469534e-06, "loss": 1.2724, "step": 28260 }, { "epoch": 4.15, "learning_rate": 2.7815113500597373e-06, "loss": 1.3443, "step": 28270 }, { "epoch": 4.15, "learning_rate": 2.7628434886499405e-06, "loss": 0.7029, "step": 28280 }, { "epoch": 4.15, "learning_rate": 2.7441756272401433e-06, "loss": 0.896, "step": 28290 }, { "epoch": 4.15, "learning_rate": 2.725507765830347e-06, "loss": 1.3636, "step": 28300 }, { "epoch": 4.15, "learning_rate": 2.7068399044205497e-06, "loss": 1.1022, "step": 28310 }, { "epoch": 4.15, "learning_rate": 2.688172043010753e-06, "loss": 0.9287, "step": 28320 }, { "epoch": 4.15, "learning_rate": 2.6695041816009557e-06, "loss": 0.9124, "step": 28330 }, { "epoch": 4.15, "learning_rate": 2.650836320191159e-06, "loss": 1.3033, "step": 28340 }, { "epoch": 4.15, "learning_rate": 2.632168458781362e-06, "loss": 0.9654, "step": 28350 }, { "epoch": 4.15, "learning_rate": 2.6135005973715654e-06, "loss": 0.7993, "step": 28360 }, { "epoch": 4.15, "learning_rate": 2.5948327359617686e-06, "loss": 1.0669, "step": 28370 }, { "epoch": 4.15, "learning_rate": 2.5761648745519714e-06, "loss": 0.9279, "step": 28380 }, { "epoch": 4.15, "learning_rate": 2.5574970131421746e-06, "loss": 0.8121, "step": 28390 }, { "epoch": 4.15, "learning_rate": 2.5388291517323774e-06, "loss": 1.3864, "step": 28400 }, { "epoch": 4.15, "learning_rate": 2.5201612903225806e-06, "loss": 1.1031, "step": 28410 }, { "epoch": 4.15, "learning_rate": 2.501493428912784e-06, "loss": 1.757, "step": 28420 }, { "epoch": 4.16, "learning_rate": 2.482825567502987e-06, "loss": 1.3906, "step": 28430 }, { "epoch": 4.16, "learning_rate": 2.4641577060931903e-06, "loss": 1.6209, "step": 28440 }, { "epoch": 4.16, "learning_rate": 2.445489844683393e-06, "loss": 0.8543, "step": 28450 }, { "epoch": 4.16, "learning_rate": 2.4268219832735963e-06, "loss": 1.7187, "step": 28460 }, { "epoch": 4.16, "learning_rate": 2.408154121863799e-06, "loss": 1.11, "step": 28470 }, { "epoch": 4.16, "learning_rate": 2.3894862604540028e-06, "loss": 0.8351, "step": 28480 }, { "epoch": 4.16, "learning_rate": 2.3708183990442056e-06, "loss": 1.3873, "step": 28490 }, { "epoch": 4.16, "learning_rate": 2.3521505376344088e-06, "loss": 1.4568, "step": 28500 }, { "epoch": 4.16, "learning_rate": 2.3334826762246116e-06, "loss": 1.1747, "step": 28510 }, { "epoch": 4.16, "learning_rate": 2.3148148148148148e-06, "loss": 0.8766, "step": 28520 }, { "epoch": 4.16, "learning_rate": 2.296146953405018e-06, "loss": 1.2374, "step": 28530 }, { "epoch": 4.16, "learning_rate": 2.2774790919952212e-06, "loss": 1.0945, "step": 28540 }, { "epoch": 4.16, "learning_rate": 2.2588112305854244e-06, "loss": 1.3041, "step": 28550 }, { "epoch": 4.16, "learning_rate": 2.2401433691756272e-06, "loss": 0.8916, "step": 28560 }, { "epoch": 4.16, "learning_rate": 2.2214755077658305e-06, "loss": 1.536, "step": 28570 }, { "epoch": 4.16, "learning_rate": 2.2028076463560333e-06, "loss": 1.2402, "step": 28580 }, { "epoch": 4.16, "learning_rate": 2.184139784946237e-06, "loss": 1.9305, "step": 28590 }, { "epoch": 4.16, "learning_rate": 2.1654719235364397e-06, "loss": 1.0564, "step": 28600 }, { "epoch": 4.16, "learning_rate": 2.146804062126643e-06, "loss": 0.7555, "step": 28610 }, { "epoch": 4.16, "learning_rate": 2.128136200716846e-06, "loss": 0.9128, "step": 28620 }, { "epoch": 4.16, "learning_rate": 2.109468339307049e-06, "loss": 1.0006, "step": 28630 }, { "epoch": 4.16, "learning_rate": 2.090800477897252e-06, "loss": 1.155, "step": 28640 }, { "epoch": 4.16, "learning_rate": 2.0721326164874554e-06, "loss": 0.6499, "step": 28650 }, { "epoch": 4.16, "learning_rate": 2.0534647550776586e-06, "loss": 1.4456, "step": 28660 }, { "epoch": 4.16, "learning_rate": 2.0347968936678614e-06, "loss": 1.2338, "step": 28670 }, { "epoch": 4.16, "learning_rate": 2.0161290322580646e-06, "loss": 1.381, "step": 28680 }, { "epoch": 4.16, "learning_rate": 1.9974611708482674e-06, "loss": 1.1661, "step": 28690 }, { "epoch": 4.16, "learning_rate": 1.978793309438471e-06, "loss": 1.2045, "step": 28700 }, { "epoch": 4.16, "learning_rate": 1.960125448028674e-06, "loss": 1.6113, "step": 28710 }, { "epoch": 4.17, "learning_rate": 1.941457586618877e-06, "loss": 1.5231, "step": 28720 }, { "epoch": 4.17, "learning_rate": 1.9227897252090803e-06, "loss": 0.5758, "step": 28730 }, { "epoch": 4.17, "learning_rate": 1.904121863799283e-06, "loss": 1.9556, "step": 28740 }, { "epoch": 4.17, "learning_rate": 1.8854540023894863e-06, "loss": 1.4313, "step": 28750 }, { "epoch": 4.17, "learning_rate": 1.8667861409796895e-06, "loss": 0.9415, "step": 28760 }, { "epoch": 4.17, "learning_rate": 1.8481182795698927e-06, "loss": 1.3642, "step": 28770 }, { "epoch": 4.17, "learning_rate": 1.8294504181600957e-06, "loss": 1.4861, "step": 28780 }, { "epoch": 4.17, "learning_rate": 1.8107825567502987e-06, "loss": 1.6615, "step": 28790 }, { "epoch": 4.17, "learning_rate": 1.7921146953405017e-06, "loss": 1.053, "step": 28800 }, { "epoch": 4.17, "learning_rate": 1.7734468339307048e-06, "loss": 1.4023, "step": 28810 }, { "epoch": 4.17, "learning_rate": 1.7547789725209082e-06, "loss": 1.3273, "step": 28820 }, { "epoch": 4.17, "learning_rate": 1.7361111111111112e-06, "loss": 1.3633, "step": 28830 }, { "epoch": 4.17, "learning_rate": 1.7174432497013144e-06, "loss": 0.8788, "step": 28840 }, { "epoch": 4.17, "learning_rate": 1.6987753882915174e-06, "loss": 1.1909, "step": 28850 }, { "epoch": 4.17, "learning_rate": 1.6801075268817204e-06, "loss": 1.6415, "step": 28860 }, { "epoch": 4.17, "learning_rate": 1.6614396654719234e-06, "loss": 1.0557, "step": 28870 }, { "epoch": 4.17, "learning_rate": 1.6427718040621269e-06, "loss": 1.4031, "step": 28880 }, { "epoch": 4.17, "learning_rate": 1.6241039426523299e-06, "loss": 1.3578, "step": 28890 }, { "epoch": 4.17, "learning_rate": 1.6054360812425329e-06, "loss": 1.0865, "step": 28900 }, { "epoch": 4.17, "learning_rate": 1.5867682198327359e-06, "loss": 1.6102, "step": 28910 }, { "epoch": 4.17, "learning_rate": 1.5681003584229391e-06, "loss": 1.3342, "step": 28920 }, { "epoch": 4.17, "learning_rate": 1.5494324970131423e-06, "loss": 1.4535, "step": 28930 }, { "epoch": 4.17, "learning_rate": 1.5307646356033453e-06, "loss": 1.2964, "step": 28940 }, { "epoch": 4.17, "learning_rate": 1.5120967741935486e-06, "loss": 1.024, "step": 28950 }, { "epoch": 4.17, "learning_rate": 1.4934289127837516e-06, "loss": 1.2547, "step": 28960 }, { "epoch": 4.17, "learning_rate": 1.4747610513739546e-06, "loss": 0.9464, "step": 28970 }, { "epoch": 4.17, "learning_rate": 1.4560931899641578e-06, "loss": 1.4012, "step": 28980 }, { "epoch": 4.17, "learning_rate": 1.4374253285543608e-06, "loss": 1.0043, "step": 28990 }, { "epoch": 4.17, "learning_rate": 1.4187574671445638e-06, "loss": 1.5499, "step": 29000 }, { "epoch": 4.17, "learning_rate": 1.400089605734767e-06, "loss": 1.259, "step": 29010 }, { "epoch": 4.18, "learning_rate": 1.3814217443249702e-06, "loss": 1.0037, "step": 29020 }, { "epoch": 4.18, "learning_rate": 1.3627538829151735e-06, "loss": 1.479, "step": 29030 }, { "epoch": 4.18, "learning_rate": 1.3440860215053765e-06, "loss": 0.5973, "step": 29040 }, { "epoch": 4.18, "learning_rate": 1.3254181600955795e-06, "loss": 1.807, "step": 29050 }, { "epoch": 4.18, "learning_rate": 1.3067502986857827e-06, "loss": 1.0697, "step": 29060 }, { "epoch": 4.18, "learning_rate": 1.2880824372759857e-06, "loss": 0.6813, "step": 29070 }, { "epoch": 4.18, "learning_rate": 1.2694145758661887e-06, "loss": 1.3963, "step": 29080 }, { "epoch": 4.18, "learning_rate": 1.250746714456392e-06, "loss": 1.3437, "step": 29090 }, { "epoch": 4.18, "learning_rate": 1.2320788530465952e-06, "loss": 1.2347, "step": 29100 }, { "epoch": 4.18, "learning_rate": 1.2134109916367982e-06, "loss": 0.8104, "step": 29110 }, { "epoch": 4.18, "learning_rate": 1.1947431302270014e-06, "loss": 0.9531, "step": 29120 }, { "epoch": 4.18, "learning_rate": 1.1760752688172044e-06, "loss": 1.2772, "step": 29130 }, { "epoch": 4.18, "learning_rate": 1.1574074074074074e-06, "loss": 1.1648, "step": 29140 }, { "epoch": 4.18, "learning_rate": 1.1387395459976106e-06, "loss": 0.8311, "step": 29150 }, { "epoch": 4.18, "learning_rate": 1.1200716845878136e-06, "loss": 0.5579, "step": 29160 }, { "epoch": 4.18, "learning_rate": 1.1014038231780166e-06, "loss": 1.3376, "step": 29170 }, { "epoch": 4.18, "learning_rate": 1.0827359617682198e-06, "loss": 0.3455, "step": 29180 }, { "epoch": 4.18, "learning_rate": 1.064068100358423e-06, "loss": 1.3899, "step": 29190 }, { "epoch": 4.18, "learning_rate": 1.045400238948626e-06, "loss": 1.1563, "step": 29200 }, { "epoch": 4.18, "learning_rate": 1.0267323775388293e-06, "loss": 0.8992, "step": 29210 }, { "epoch": 4.18, "learning_rate": 1.0080645161290323e-06, "loss": 1.2932, "step": 29220 }, { "epoch": 4.18, "learning_rate": 9.893966547192355e-07, "loss": 1.4952, "step": 29230 }, { "epoch": 4.18, "learning_rate": 9.707287933094385e-07, "loss": 0.8295, "step": 29240 }, { "epoch": 4.18, "learning_rate": 9.520609318996415e-07, "loss": 1.2467, "step": 29250 }, { "epoch": 4.18, "learning_rate": 9.333930704898448e-07, "loss": 0.6587, "step": 29260 }, { "epoch": 4.18, "learning_rate": 9.147252090800479e-07, "loss": 0.9228, "step": 29270 }, { "epoch": 4.18, "learning_rate": 8.960573476702509e-07, "loss": 0.8199, "step": 29280 }, { "epoch": 4.18, "learning_rate": 8.773894862604541e-07, "loss": 1.422, "step": 29290 }, { "epoch": 4.18, "learning_rate": 8.587216248506572e-07, "loss": 1.3199, "step": 29300 }, { "epoch": 4.18, "learning_rate": 8.400537634408602e-07, "loss": 1.1563, "step": 29310 }, { "epoch": 4.19, "learning_rate": 8.213859020310634e-07, "loss": 1.3217, "step": 29320 }, { "epoch": 4.19, "learning_rate": 8.027180406212664e-07, "loss": 1.5251, "step": 29330 }, { "epoch": 4.19, "learning_rate": 7.840501792114696e-07, "loss": 0.9194, "step": 29340 }, { "epoch": 4.19, "learning_rate": 7.653823178016727e-07, "loss": 0.9451, "step": 29350 }, { "epoch": 4.19, "learning_rate": 7.467144563918758e-07, "loss": 1.4432, "step": 29360 }, { "epoch": 4.19, "learning_rate": 7.280465949820789e-07, "loss": 1.1293, "step": 29370 }, { "epoch": 4.19, "learning_rate": 7.093787335722819e-07, "loss": 1.1054, "step": 29380 }, { "epoch": 4.19, "learning_rate": 6.907108721624851e-07, "loss": 1.1609, "step": 29390 }, { "epoch": 4.19, "learning_rate": 6.720430107526882e-07, "loss": 1.0319, "step": 29400 }, { "epoch": 4.19, "learning_rate": 6.533751493428913e-07, "loss": 1.522, "step": 29410 }, { "epoch": 4.19, "learning_rate": 6.347072879330944e-07, "loss": 1.1448, "step": 29420 }, { "epoch": 4.19, "learning_rate": 6.160394265232976e-07, "loss": 1.4926, "step": 29430 }, { "epoch": 4.19, "learning_rate": 5.973715651135007e-07, "loss": 0.7732, "step": 29440 }, { "epoch": 4.19, "learning_rate": 5.787037037037037e-07, "loss": 1.3816, "step": 29450 }, { "epoch": 4.19, "learning_rate": 5.600358422939068e-07, "loss": 1.269, "step": 29460 }, { "epoch": 4.19, "learning_rate": 5.413679808841099e-07, "loss": 1.2545, "step": 29470 }, { "epoch": 4.19, "learning_rate": 5.22700119474313e-07, "loss": 0.582, "step": 29480 }, { "epoch": 4.19, "learning_rate": 5.040322580645161e-07, "loss": 1.2124, "step": 29490 }, { "epoch": 4.19, "learning_rate": 4.853643966547193e-07, "loss": 1.1213, "step": 29500 }, { "epoch": 4.19, "learning_rate": 4.666965352449224e-07, "loss": 1.1458, "step": 29510 }, { "epoch": 4.19, "learning_rate": 4.4802867383512544e-07, "loss": 1.2749, "step": 29520 }, { "epoch": 4.19, "learning_rate": 4.293608124253286e-07, "loss": 1.401, "step": 29530 }, { "epoch": 4.19, "learning_rate": 4.106929510155317e-07, "loss": 1.4359, "step": 29540 }, { "epoch": 4.19, "learning_rate": 3.920250896057348e-07, "loss": 1.4529, "step": 29550 }, { "epoch": 4.19, "learning_rate": 3.733572281959379e-07, "loss": 1.2089, "step": 29560 }, { "epoch": 4.19, "learning_rate": 3.5468936678614095e-07, "loss": 1.0361, "step": 29570 }, { "epoch": 4.19, "learning_rate": 3.360215053763441e-07, "loss": 1.4934, "step": 29580 }, { "epoch": 4.19, "learning_rate": 3.173536439665472e-07, "loss": 0.7505, "step": 29590 }, { "epoch": 4.19, "learning_rate": 2.9868578255675034e-07, "loss": 0.9472, "step": 29600 }, { "epoch": 4.19, "learning_rate": 2.800179211469534e-07, "loss": 1.4226, "step": 29610 }, { "epoch": 4.2, "learning_rate": 2.613500597371565e-07, "loss": 1.1832, "step": 29620 }, { "epoch": 4.2, "learning_rate": 2.4268219832735963e-07, "loss": 1.026, "step": 29630 }, { "epoch": 4.2, "learning_rate": 2.2401433691756272e-07, "loss": 1.2389, "step": 29640 }, { "epoch": 4.2, "learning_rate": 2.0534647550776586e-07, "loss": 1.3421, "step": 29650 }, { "epoch": 4.2, "learning_rate": 1.8667861409796895e-07, "loss": 1.2626, "step": 29660 }, { "epoch": 4.2, "learning_rate": 1.6801075268817206e-07, "loss": 0.9274, "step": 29670 }, { "epoch": 4.2, "learning_rate": 1.4934289127837517e-07, "loss": 1.341, "step": 29680 }, { "epoch": 4.2, "learning_rate": 1.3067502986857826e-07, "loss": 1.5807, "step": 29690 }, { "epoch": 4.2, "learning_rate": 1.1200716845878136e-07, "loss": 1.1787, "step": 29700 }, { "epoch": 4.2, "learning_rate": 9.333930704898447e-08, "loss": 1.0598, "step": 29710 }, { "epoch": 4.2, "learning_rate": 7.467144563918759e-08, "loss": 0.5426, "step": 29720 }, { "epoch": 4.2, "learning_rate": 5.600358422939068e-08, "loss": 1.1563, "step": 29730 }, { "epoch": 4.2, "learning_rate": 3.733572281959379e-08, "loss": 0.8868, "step": 29740 }, { "epoch": 4.2, "learning_rate": 1.8667861409796897e-08, "loss": 1.2178, "step": 29750 }, { "epoch": 4.2, "learning_rate": 0.0, "loss": 0.6968, "step": 29760 }, { "epoch": 4.2, "eval_accuracy": 0.5362423368193292, "eval_loss": 1.7134848833084106, "eval_runtime": 957.6543, "eval_samples_per_second": 2.896, "eval_steps_per_second": 1.448, "step": 29760 }, { "epoch": 4.2, "step": 29760, "total_flos": 7.424645769153479e+19, "train_loss": 2.989056267741547, "train_runtime": 377379.9366, "train_samples_per_second": 0.158, "train_steps_per_second": 0.079 }, { "epoch": 4.2, "eval_accuracy": 0.5587426326129666, "eval_loss": 1.650649070739746, "eval_runtime": 862.3611, "eval_samples_per_second": 2.951, "eval_steps_per_second": 1.476, "step": 29760 }, { "epoch": 4.2, "eval_accuracy": 0.5587426326129666, "eval_loss": 1.650649070739746, "eval_runtime": 863.5093, "eval_samples_per_second": 2.947, "eval_steps_per_second": 1.474, "step": 29760 } ], "logging_steps": 10, "max_steps": 29760, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 7.424645769153479e+19, "trial_name": null, "trial_params": null }