{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "learning_rate": 3.0303030303030305e-06, "loss": 1.7297, "step": 10 }, { "epoch": 0.45, "learning_rate": 6.060606060606061e-06, "loss": 0.9422, "step": 20 }, { "epoch": 0.68, "learning_rate": 9.090909090909091e-06, "loss": 0.7053, "step": 30 }, { "epoch": 0.91, "learning_rate": 1.2121212121212122e-05, "loss": 0.6722, "step": 40 }, { "epoch": 1.14, "learning_rate": 1.5151515151515153e-05, "loss": 0.6335, "step": 50 }, { "epoch": 1.36, "learning_rate": 1.8181818181818182e-05, "loss": 0.6203, "step": 60 }, { "epoch": 1.59, "learning_rate": 1.999982661974106e-05, "loss": 0.6116, "step": 70 }, { "epoch": 1.82, "learning_rate": 1.9997876160873005e-05, "loss": 0.6105, "step": 80 }, { "epoch": 2.05, "learning_rate": 1.9993758941929738e-05, "loss": 0.5888, "step": 90 }, { "epoch": 2.27, "learning_rate": 1.9987475855203325e-05, "loss": 0.5663, "step": 100 }, { "epoch": 2.5, "learning_rate": 1.997902826237712e-05, "loss": 0.5533, "step": 110 }, { "epoch": 2.73, "learning_rate": 1.9968417994230628e-05, "loss": 0.5662, "step": 120 }, { "epoch": 2.95, "learning_rate": 1.9955647350242765e-05, "loss": 0.5717, "step": 130 }, { "epoch": 3.18, "learning_rate": 1.99407190980935e-05, "loss": 0.5193, "step": 140 }, { "epoch": 3.41, "learning_rate": 1.9923636473064024e-05, "loss": 0.4847, "step": 150 }, { "epoch": 3.64, "learning_rate": 1.990440317733561e-05, "loss": 0.4733, "step": 160 }, { "epoch": 3.86, "learning_rate": 1.9883023379187267e-05, "loss": 0.4801, "step": 170 }, { "epoch": 4.09, "learning_rate": 1.9859501712092374e-05, "loss": 0.4299, "step": 180 }, { "epoch": 4.32, "learning_rate": 1.9833843273714506e-05, "loss": 0.3846, "step": 190 }, { "epoch": 4.55, "learning_rate": 1.9806053624802667e-05, "loss": 0.3733, "step": 200 }, { "epoch": 4.77, "learning_rate": 1.9776138787986137e-05, "loss": 0.3646, "step": 210 }, { "epoch": 5.0, "learning_rate": 1.9744105246469264e-05, "loss": 0.4011, "step": 220 }, { "epoch": 5.23, "learning_rate": 1.9709959942626366e-05, "loss": 0.3151, "step": 230 }, { "epoch": 5.45, "learning_rate": 1.9673710276497207e-05, "loss": 0.2784, "step": 240 }, { "epoch": 5.68, "learning_rate": 1.9635364104183236e-05, "loss": 0.3248, "step": 250 }, { "epoch": 5.91, "learning_rate": 1.9594929736144978e-05, "loss": 0.3083, "step": 260 }, { "epoch": 6.14, "learning_rate": 1.955241593540098e-05, "loss": 0.2681, "step": 270 }, { "epoch": 6.36, "learning_rate": 1.9507831915628706e-05, "loss": 0.2364, "step": 280 }, { "epoch": 6.59, "learning_rate": 1.9461187339167673e-05, "loss": 0.2565, "step": 290 }, { "epoch": 6.82, "learning_rate": 1.9412492314925453e-05, "loss": 0.2661, "step": 300 }, { "epoch": 7.05, "learning_rate": 1.9361757396186834e-05, "loss": 0.2562, "step": 310 }, { "epoch": 7.27, "learning_rate": 1.9308993578326688e-05, "loss": 0.2189, "step": 320 }, { "epoch": 7.5, "learning_rate": 1.9254212296427043e-05, "loss": 0.1989, "step": 330 }, { "epoch": 7.73, "learning_rate": 1.9197425422798832e-05, "loss": 0.2321, "step": 340 }, { "epoch": 7.95, "learning_rate": 1.9138645264408917e-05, "loss": 0.229, "step": 350 }, { "epoch": 8.18, "learning_rate": 1.9077884560212885e-05, "loss": 0.1959, "step": 360 }, { "epoch": 8.41, "learning_rate": 1.901515647839425e-05, "loss": 0.1936, "step": 370 }, { "epoch": 8.64, "learning_rate": 1.8950474613510605e-05, "loss": 0.1907, "step": 380 }, { "epoch": 8.86, "learning_rate": 1.8883852983547383e-05, "loss": 0.1959, "step": 390 }, { "epoch": 9.09, "learning_rate": 1.8815306026879854e-05, "loss": 0.1852, "step": 400 }, { "epoch": 9.32, "learning_rate": 1.8744848599144027e-05, "loss": 0.1823, "step": 410 }, { "epoch": 9.55, "learning_rate": 1.8672495970017067e-05, "loss": 0.154, "step": 420 }, { "epoch": 9.77, "learning_rate": 1.859826381990806e-05, "loss": 0.1764, "step": 430 }, { "epoch": 10.0, "learning_rate": 1.8522168236559693e-05, "loss": 0.1748, "step": 440 }, { "epoch": 10.23, "learning_rate": 1.8444225711561713e-05, "loss": 0.139, "step": 450 }, { "epoch": 10.45, "learning_rate": 1.83644531367768e-05, "loss": 0.1648, "step": 460 }, { "epoch": 10.68, "learning_rate": 1.8282867800679774e-05, "loss": 0.1677, "step": 470 }, { "epoch": 10.91, "learning_rate": 1.8199487384610758e-05, "loss": 0.1728, "step": 480 }, { "epoch": 11.14, "learning_rate": 1.8114329958943272e-05, "loss": 0.1436, "step": 490 }, { "epoch": 11.36, "learning_rate": 1.8027413979167968e-05, "loss": 0.1336, "step": 500 }, { "epoch": 11.59, "learning_rate": 1.7938758281892928e-05, "loss": 0.1426, "step": 510 }, { "epoch": 11.82, "learning_rate": 1.7848382080761343e-05, "loss": 0.1725, "step": 520 }, { "epoch": 12.05, "learning_rate": 1.775630496228752e-05, "loss": 0.1362, "step": 530 }, { "epoch": 12.27, "learning_rate": 1.766254688161202e-05, "loss": 0.1287, "step": 540 }, { "epoch": 12.5, "learning_rate": 1.7567128158176955e-05, "loss": 0.1219, "step": 550 }, { "epoch": 12.73, "learning_rate": 1.7470069471322325e-05, "loss": 0.1456, "step": 560 }, { "epoch": 12.95, "learning_rate": 1.737139185580434e-05, "loss": 0.1376, "step": 570 }, { "epoch": 13.18, "learning_rate": 1.7271116697236717e-05, "loss": 0.1209, "step": 580 }, { "epoch": 13.41, "learning_rate": 1.7169265727455964e-05, "loss": 0.1316, "step": 590 }, { "epoch": 13.64, "learning_rate": 1.7065861019811598e-05, "loss": 0.1296, "step": 600 }, { "epoch": 13.86, "learning_rate": 1.6960924984382373e-05, "loss": 0.1359, "step": 610 }, { "epoch": 14.09, "learning_rate": 1.685448036311951e-05, "loss": 0.1123, "step": 620 }, { "epoch": 14.32, "learning_rate": 1.6746550224918032e-05, "loss": 0.1227, "step": 630 }, { "epoch": 14.55, "learning_rate": 1.663715796061722e-05, "loss": 0.1282, "step": 640 }, { "epoch": 14.77, "learning_rate": 1.6526327277931278e-05, "loss": 0.1055, "step": 650 }, { "epoch": 15.0, "learning_rate": 1.6414082196311402e-05, "loss": 0.1288, "step": 660 }, { "epoch": 15.23, "learning_rate": 1.630044704174018e-05, "loss": 0.1145, "step": 670 }, { "epoch": 15.45, "learning_rate": 1.6185446441459652e-05, "loss": 0.1158, "step": 680 }, { "epoch": 15.68, "learning_rate": 1.6069105318634024e-05, "loss": 0.1119, "step": 690 }, { "epoch": 15.91, "learning_rate": 1.5951448886948286e-05, "loss": 0.1119, "step": 700 }, { "epoch": 16.14, "learning_rate": 1.5832502645143837e-05, "loss": 0.0981, "step": 710 }, { "epoch": 16.36, "learning_rate": 1.571229237149235e-05, "loss": 0.1142, "step": 720 }, { "epoch": 16.59, "learning_rate": 1.5590844118209055e-05, "loss": 0.1054, "step": 730 }, { "epoch": 16.82, "learning_rate": 1.5468184205806646e-05, "loss": 0.1012, "step": 740 }, { "epoch": 17.05, "learning_rate": 1.534433921739105e-05, "loss": 0.1047, "step": 750 }, { "epoch": 17.27, "learning_rate": 1.5219335992900282e-05, "loss": 0.0992, "step": 760 }, { "epoch": 17.5, "learning_rate": 1.5093201623287631e-05, "loss": 0.1063, "step": 770 }, { "epoch": 17.73, "learning_rate": 1.4965963444650465e-05, "loss": 0.0863, "step": 780 }, { "epoch": 17.95, "learning_rate": 1.4837649032305885e-05, "loss": 0.0949, "step": 790 }, { "epoch": 18.18, "learning_rate": 1.4708286194814565e-05, "loss": 0.0829, "step": 800 }, { "epoch": 18.41, "learning_rate": 1.4577902967953995e-05, "loss": 0.0892, "step": 810 }, { "epoch": 18.64, "learning_rate": 1.4446527608642538e-05, "loss": 0.0972, "step": 820 }, { "epoch": 18.86, "learning_rate": 1.4314188588815514e-05, "loss": 0.0848, "step": 830 }, { "epoch": 19.09, "learning_rate": 1.4180914589254716e-05, "loss": 0.0849, "step": 840 }, { "epoch": 19.32, "learning_rate": 1.4046734493372646e-05, "loss": 0.0877, "step": 850 }, { "epoch": 19.55, "learning_rate": 1.3911677380952853e-05, "loss": 0.0877, "step": 860 }, { "epoch": 19.77, "learning_rate": 1.3775772521847683e-05, "loss": 0.0754, "step": 870 }, { "epoch": 20.0, "learning_rate": 1.3639049369634878e-05, "loss": 0.0901, "step": 880 }, { "epoch": 20.23, "learning_rate": 1.3501537555234323e-05, "loss": 0.0826, "step": 890 }, { "epoch": 20.45, "learning_rate": 1.3363266880486388e-05, "loss": 0.0858, "step": 900 }, { "epoch": 20.68, "learning_rate": 1.3224267311693186e-05, "loss": 0.0791, "step": 910 }, { "epoch": 20.91, "learning_rate": 1.308456897312425e-05, "loss": 0.0756, "step": 920 }, { "epoch": 21.14, "learning_rate": 1.2944202140487905e-05, "loss": 0.0683, "step": 930 }, { "epoch": 21.36, "learning_rate": 1.2803197234369878e-05, "loss": 0.0806, "step": 940 }, { "epoch": 21.59, "learning_rate": 1.2661584813640485e-05, "loss": 0.0841, "step": 950 }, { "epoch": 21.82, "learning_rate": 1.2519395568831839e-05, "loss": 0.0917, "step": 960 }, { "epoch": 22.05, "learning_rate": 1.2376660315486535e-05, "loss": 0.0708, "step": 970 }, { "epoch": 22.27, "learning_rate": 1.223340998747927e-05, "loss": 0.0768, "step": 980 }, { "epoch": 22.5, "learning_rate": 1.2089675630312755e-05, "loss": 0.0834, "step": 990 }, { "epoch": 22.73, "learning_rate": 1.1945488394389479e-05, "loss": 0.0769, "step": 1000 }, { "epoch": 22.95, "learning_rate": 1.1800879528260761e-05, "loss": 0.0751, "step": 1010 }, { "epoch": 23.18, "learning_rate": 1.1655880371854454e-05, "loss": 0.08, "step": 1020 }, { "epoch": 23.41, "learning_rate": 1.1510522349682922e-05, "loss": 0.0603, "step": 1030 }, { "epoch": 23.64, "learning_rate": 1.1364836964032658e-05, "loss": 0.0685, "step": 1040 }, { "epoch": 23.86, "learning_rate": 1.1218855788137016e-05, "loss": 0.0735, "step": 1050 }, { "epoch": 24.09, "learning_rate": 1.107261045933363e-05, "loss": 0.0615, "step": 1060 }, { "epoch": 24.32, "learning_rate": 1.092613267220788e-05, "loss": 0.0678, "step": 1070 }, { "epoch": 24.55, "learning_rate": 1.0779454171723994e-05, "loss": 0.0687, "step": 1080 }, { "epoch": 24.77, "learning_rate": 1.0632606746345203e-05, "loss": 0.0669, "step": 1090 }, { "epoch": 25.0, "learning_rate": 1.0485622221144485e-05, "loss": 0.071, "step": 1100 }, { "epoch": 25.23, "learning_rate": 1.0338532450907373e-05, "loss": 0.0708, "step": 1110 }, { "epoch": 25.45, "learning_rate": 1.0191369313228319e-05, "loss": 0.0677, "step": 1120 }, { "epoch": 25.68, "learning_rate": 1.0044164701602111e-05, "loss": 0.0738, "step": 1130 }, { "epoch": 25.91, "learning_rate": 9.896950518511863e-06, "loss": 0.0753, "step": 1140 }, { "epoch": 26.14, "learning_rate": 9.749758668515027e-06, "loss": 0.0689, "step": 1150 }, { "epoch": 26.36, "learning_rate": 9.602621051328998e-06, "loss": 0.0688, "step": 1160 }, { "epoch": 26.59, "learning_rate": 9.455569554917701e-06, "loss": 0.0781, "step": 1170 }, { "epoch": 26.82, "learning_rate": 9.308636048580813e-06, "loss": 0.0635, "step": 1180 }, { "epoch": 27.05, "learning_rate": 9.161852376046953e-06, "loss": 0.068, "step": 1190 }, { "epoch": 27.27, "learning_rate": 9.015250348572452e-06, "loss": 0.0602, "step": 1200 }, { "epoch": 27.5, "learning_rate": 8.868861738047158e-06, "loss": 0.0502, "step": 1210 }, { "epoch": 27.73, "learning_rate": 8.72271827010876e-06, "loss": 0.0531, "step": 1220 }, { "epoch": 27.95, "learning_rate": 8.576851617267151e-06, "loss": 0.0613, "step": 1230 }, { "epoch": 28.18, "learning_rate": 8.431293392040283e-06, "loss": 0.0636, "step": 1240 }, { "epoch": 28.41, "learning_rate": 8.286075140103058e-06, "loss": 0.0618, "step": 1250 }, { "epoch": 28.64, "learning_rate": 8.141228333450673e-06, "loss": 0.0652, "step": 1260 }, { "epoch": 28.86, "learning_rate": 7.99678436357794e-06, "loss": 0.0742, "step": 1270 }, { "epoch": 29.09, "learning_rate": 7.852774534676073e-06, "loss": 0.0529, "step": 1280 }, { "epoch": 29.32, "learning_rate": 7.709230056848356e-06, "loss": 0.0441, "step": 1290 }, { "epoch": 29.55, "learning_rate": 7.5661820393462605e-06, "loss": 0.0575, "step": 1300 }, { "epoch": 29.77, "learning_rate": 7.423661483827357e-06, "loss": 0.0552, "step": 1310 }, { "epoch": 30.0, "learning_rate": 7.2816992776365714e-06, "loss": 0.0518, "step": 1320 }, { "epoch": 30.23, "learning_rate": 7.1403261871122466e-06, "loss": 0.0631, "step": 1330 }, { "epoch": 30.45, "learning_rate": 6.999572850918357e-06, "loss": 0.0496, "step": 1340 }, { "epoch": 30.68, "learning_rate": 6.859469773404471e-06, "loss": 0.0575, "step": 1350 }, { "epoch": 30.91, "learning_rate": 6.720047317994775e-06, "loss": 0.0547, "step": 1360 }, { "epoch": 31.14, "learning_rate": 6.581335700607632e-06, "loss": 0.0537, "step": 1370 }, { "epoch": 31.36, "learning_rate": 6.443364983107156e-06, "loss": 0.0531, "step": 1380 }, { "epoch": 31.59, "learning_rate": 6.306165066788121e-06, "loss": 0.0525, "step": 1390 }, { "epoch": 31.82, "learning_rate": 6.169765685895703e-06, "loss": 0.0512, "step": 1400 }, { "epoch": 32.05, "learning_rate": 6.034196401181414e-06, "loss": 0.0506, "step": 1410 }, { "epoch": 32.27, "learning_rate": 5.899486593496625e-06, "loss": 0.0472, "step": 1420 }, { "epoch": 32.5, "learning_rate": 5.765665457425102e-06, "loss": 0.0538, "step": 1430 }, { "epoch": 32.73, "learning_rate": 5.6327619949558806e-06, "loss": 0.0565, "step": 1440 }, { "epoch": 32.95, "learning_rate": 5.500805009197916e-06, "loss": 0.042, "step": 1450 }, { "epoch": 33.18, "learning_rate": 5.369823098137803e-06, "loss": 0.04, "step": 1460 }, { "epoch": 33.41, "learning_rate": 5.23984464844195e-06, "loss": 0.0512, "step": 1470 }, { "epoch": 33.64, "learning_rate": 5.1108978293045915e-06, "loss": 0.0399, "step": 1480 }, { "epoch": 33.86, "learning_rate": 4.983010586342876e-06, "loss": 0.0493, "step": 1490 }, { "epoch": 34.09, "learning_rate": 4.856210635540452e-06, "loss": 0.0454, "step": 1500 }, { "epoch": 34.32, "learning_rate": 4.730525457240796e-06, "loss": 0.0414, "step": 1510 }, { "epoch": 34.55, "learning_rate": 4.605982290191623e-06, "loss": 0.045, "step": 1520 }, { "epoch": 34.77, "learning_rate": 4.482608125641633e-06, "loss": 0.0505, "step": 1530 }, { "epoch": 35.0, "learning_rate": 4.360429701490935e-06, "loss": 0.0483, "step": 1540 }, { "epoch": 35.23, "learning_rate": 4.239473496496345e-06, "loss": 0.0458, "step": 1550 }, { "epoch": 35.45, "learning_rate": 4.119765724532843e-06, "loss": 0.0391, "step": 1560 }, { "epoch": 35.68, "learning_rate": 4.001332328912475e-06, "loss": 0.0434, "step": 1570 }, { "epoch": 35.91, "learning_rate": 3.884198976761846e-06, "loss": 0.0516, "step": 1580 }, { "epoch": 36.14, "learning_rate": 3.7683910534594957e-06, "loss": 0.0438, "step": 1590 }, { "epoch": 36.36, "learning_rate": 3.6539336571343177e-06, "loss": 0.0424, "step": 1600 }, { "epoch": 36.59, "learning_rate": 3.540851593226261e-06, "loss": 0.0468, "step": 1610 }, { "epoch": 36.82, "learning_rate": 3.429169369110422e-06, "loss": 0.0413, "step": 1620 }, { "epoch": 37.05, "learning_rate": 3.3189111887857773e-06, "loss": 0.0479, "step": 1630 }, { "epoch": 37.27, "learning_rate": 3.2101009476296306e-06, "loss": 0.0402, "step": 1640 }, { "epoch": 37.5, "learning_rate": 3.1027622272189572e-06, "loss": 0.0428, "step": 1650 }, { "epoch": 37.73, "learning_rate": 2.996918290219769e-06, "loss": 0.0394, "step": 1660 }, { "epoch": 37.95, "learning_rate": 2.8925920753455625e-06, "loss": 0.0359, "step": 1670 }, { "epoch": 38.18, "learning_rate": 2.7898061923860153e-06, "loss": 0.0404, "step": 1680 }, { "epoch": 38.41, "learning_rate": 2.688582917306938e-06, "loss": 0.0439, "step": 1690 }, { "epoch": 38.64, "learning_rate": 2.5889441874225676e-06, "loss": 0.0396, "step": 1700 }, { "epoch": 38.86, "learning_rate": 2.4909115966412957e-06, "loss": 0.0462, "step": 1710 }, { "epoch": 39.09, "learning_rate": 2.3945063907857647e-06, "loss": 0.0466, "step": 1720 }, { "epoch": 39.32, "learning_rate": 2.299749462988451e-06, "loss": 0.031, "step": 1730 }, { "epoch": 39.55, "learning_rate": 2.2066613491636568e-06, "loss": 0.0409, "step": 1740 }, { "epoch": 39.77, "learning_rate": 2.115262223556912e-06, "loss": 0.0449, "step": 1750 }, { "epoch": 40.0, "learning_rate": 2.025571894372794e-06, "loss": 0.0373, "step": 1760 }, { "epoch": 40.23, "learning_rate": 1.9376097994820286e-06, "loss": 0.0422, "step": 1770 }, { "epoch": 40.45, "learning_rate": 1.8513950022088966e-06, "loss": 0.0508, "step": 1780 }, { "epoch": 40.68, "learning_rate": 1.7669461871997817e-06, "loss": 0.0424, "step": 1790 }, { "epoch": 40.91, "learning_rate": 1.6842816563737875e-06, "loss": 0.0403, "step": 1800 }, { "epoch": 41.14, "learning_rate": 1.603419324956328e-06, "loss": 0.0419, "step": 1810 }, { "epoch": 41.36, "learning_rate": 1.5243767175964818e-06, "loss": 0.0334, "step": 1820 }, { "epoch": 41.59, "learning_rate": 1.4471709645690336e-06, "loss": 0.0304, "step": 1830 }, { "epoch": 41.82, "learning_rate": 1.3718187980619557e-06, "loss": 0.0413, "step": 1840 }, { "epoch": 42.05, "learning_rate": 1.298336548550172e-06, "loss": 0.0423, "step": 1850 }, { "epoch": 42.27, "learning_rate": 1.226740141256395e-06, "loss": 0.0378, "step": 1860 }, { "epoch": 42.5, "learning_rate": 1.1570450926997657e-06, "loss": 0.0316, "step": 1870 }, { "epoch": 42.73, "learning_rate": 1.0892665073330932e-06, "loss": 0.0366, "step": 1880 }, { "epoch": 42.95, "learning_rate": 1.023419074269384e-06, "loss": 0.0335, "step": 1890 }, { "epoch": 43.18, "learning_rate": 9.595170640983786e-07, "loss": 0.0352, "step": 1900 }, { "epoch": 43.41, "learning_rate": 8.975743257938186e-07, "loss": 0.0399, "step": 1910 }, { "epoch": 43.64, "learning_rate": 8.37604283712048e-07, "loss": 0.0344, "step": 1920 }, { "epoch": 43.86, "learning_rate": 7.796199346826727e-07, "loss": 0.04, "step": 1930 }, { "epoch": 44.09, "learning_rate": 7.236338451918634e-07, "loss": 0.0355, "step": 1940 }, { "epoch": 44.32, "learning_rate": 6.696581486589071e-07, "loss": 0.0363, "step": 1950 }, { "epoch": 44.55, "learning_rate": 6.177045428066397e-07, "loss": 0.0266, "step": 1960 }, { "epoch": 44.77, "learning_rate": 5.677842871262895e-07, "loss": 0.0303, "step": 1970 }, { "epoch": 45.0, "learning_rate": 5.199082004372958e-07, "loss": 0.0318, "step": 1980 }, { "epoch": 45.23, "learning_rate": 4.7408665854263067e-07, "loss": 0.0297, "step": 1990 }, { "epoch": 45.45, "learning_rate": 4.3032959198013646e-07, "loss": 0.0346, "step": 2000 }, { "epoch": 45.68, "learning_rate": 3.8864648387036074e-07, "loss": 0.0359, "step": 2010 }, { "epoch": 45.91, "learning_rate": 3.490463678613487e-07, "loss": 0.0348, "step": 2020 }, { "epoch": 46.14, "learning_rate": 3.1153782617086126e-07, "loss": 0.0387, "step": 2030 }, { "epoch": 46.36, "learning_rate": 2.761289877264139e-07, "loss": 0.0304, "step": 2040 }, { "epoch": 46.59, "learning_rate": 2.4282752640355846e-07, "loss": 0.0337, "step": 2050 }, { "epoch": 46.82, "learning_rate": 2.1164065936278732e-07, "loss": 0.0432, "step": 2060 }, { "epoch": 47.05, "learning_rate": 1.8257514548541632e-07, "loss": 0.0298, "step": 2070 }, { "epoch": 47.27, "learning_rate": 1.5563728390878496e-07, "loss": 0.0322, "step": 2080 }, { "epoch": 47.5, "learning_rate": 1.30832912661093e-07, "loss": 0.0305, "step": 2090 }, { "epoch": 47.73, "learning_rate": 1.0816740739617471e-07, "loss": 0.0333, "step": 2100 }, { "epoch": 47.95, "learning_rate": 8.764568022847198e-08, "loss": 0.0293, "step": 2110 }, { "epoch": 48.18, "learning_rate": 6.92721786684769e-08, "loss": 0.0428, "step": 2120 }, { "epoch": 48.41, "learning_rate": 5.305088465885067e-08, "loss": 0.0363, "step": 2130 }, { "epoch": 48.64, "learning_rate": 3.898531371145597e-08, "loss": 0.0329, "step": 2140 }, { "epoch": 48.86, "learning_rate": 2.7078514145459924e-08, "loss": 0.0308, "step": 2150 }, { "epoch": 49.09, "learning_rate": 1.7333066426706845e-08, "loss": 0.0318, "step": 2160 }, { "epoch": 49.32, "learning_rate": 9.75108260846569e-09, "loss": 0.03, "step": 2170 }, { "epoch": 49.55, "learning_rate": 4.334205873705033e-09, "loss": 0.0302, "step": 2180 }, { "epoch": 49.77, "learning_rate": 1.0836101789768462e-09, "loss": 0.0264, "step": 2190 }, { "epoch": 50.0, "learning_rate": 0.0, "loss": 0.0382, "step": 2200 }, { "epoch": 50.0, "step": 2200, "total_flos": 3.745171436910674e+17, "train_loss": 0.13681490471417254, "train_runtime": 4969.2412, "train_samples_per_second": 13.865, "train_steps_per_second": 0.443 } ], "max_steps": 2200, "num_train_epochs": 50, "total_flos": 3.745171436910674e+17, "trial_name": null, "trial_params": null }