{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.398950131233596, "eval_steps": 200000, "global_step": 160000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.4997000599880024e-06, "loss": 8.6813, "step": 200 }, { "epoch": 0.02, "learning_rate": 2.999400119976005e-06, "loss": 8.0951, "step": 400 }, { "epoch": 0.03, "learning_rate": 4.499100179964007e-06, "loss": 7.7394, "step": 600 }, { "epoch": 0.04, "learning_rate": 5.99880023995201e-06, "loss": 7.4477, "step": 800 }, { "epoch": 0.05, "learning_rate": 7.4985002999400115e-06, "loss": 7.2516, "step": 1000 }, { "epoch": 0.06, "learning_rate": 8.998200359928014e-06, "loss": 7.1331, "step": 1200 }, { "epoch": 0.07, "learning_rate": 1.0497900419916016e-05, "loss": 7.0447, "step": 1400 }, { "epoch": 0.08, "learning_rate": 1.199760047990402e-05, "loss": 6.9683, "step": 1600 }, { "epoch": 0.09, "learning_rate": 1.3497300539892021e-05, "loss": 6.9037, "step": 1800 }, { "epoch": 0.1, "learning_rate": 1.4997000599880023e-05, "loss": 6.8472, "step": 2000 }, { "epoch": 0.12, "learning_rate": 1.6496700659868028e-05, "loss": 6.7907, "step": 2200 }, { "epoch": 0.13, "learning_rate": 1.799640071985603e-05, "loss": 6.7407, "step": 2400 }, { "epoch": 0.14, "learning_rate": 1.9496100779844032e-05, "loss": 6.7036, "step": 2600 }, { "epoch": 0.15, "learning_rate": 2.0995800839832032e-05, "loss": 6.6485, "step": 2800 }, { "epoch": 0.16, "learning_rate": 2.249550089982004e-05, "loss": 6.6153, "step": 3000 }, { "epoch": 0.17, "learning_rate": 2.399520095980804e-05, "loss": 6.5826, "step": 3200 }, { "epoch": 0.18, "learning_rate": 2.5494901019796042e-05, "loss": 6.553, "step": 3400 }, { "epoch": 0.19, "learning_rate": 2.6994601079784043e-05, "loss": 6.5222, "step": 3600 }, { "epoch": 0.2, "learning_rate": 2.8494301139772046e-05, "loss": 6.4979, "step": 3800 }, { "epoch": 0.21, "learning_rate": 2.9994001199760046e-05, "loss": 6.4695, "step": 4000 }, { "epoch": 0.22, "learning_rate": 3.1493701259748056e-05, "loss": 6.4505, "step": 4200 }, { "epoch": 0.23, "learning_rate": 3.2993401319736057e-05, "loss": 6.4254, "step": 4400 }, { "epoch": 0.24, "learning_rate": 3.449310137972406e-05, "loss": 6.412, "step": 4600 }, { "epoch": 0.25, "learning_rate": 3.599280143971206e-05, "loss": 6.3885, "step": 4800 }, { "epoch": 0.26, "learning_rate": 3.7492501499700064e-05, "loss": 6.3815, "step": 5000 }, { "epoch": 0.27, "learning_rate": 3.8992201559688064e-05, "loss": 6.3623, "step": 5200 }, { "epoch": 0.28, "learning_rate": 4.0491901619676064e-05, "loss": 6.3464, "step": 5400 }, { "epoch": 0.29, "learning_rate": 4.1991601679664064e-05, "loss": 6.3281, "step": 5600 }, { "epoch": 0.3, "learning_rate": 4.349130173965207e-05, "loss": 6.3324, "step": 5800 }, { "epoch": 0.31, "learning_rate": 4.499100179964008e-05, "loss": 6.3128, "step": 6000 }, { "epoch": 0.33, "learning_rate": 4.649070185962808e-05, "loss": 6.3033, "step": 6200 }, { "epoch": 0.34, "learning_rate": 4.799040191961608e-05, "loss": 6.3015, "step": 6400 }, { "epoch": 0.35, "learning_rate": 4.949010197960408e-05, "loss": 6.2881, "step": 6600 }, { "epoch": 0.36, "learning_rate": 4.996409765438009e-05, "loss": 6.2728, "step": 6800 }, { "epoch": 0.37, "learning_rate": 4.990970016101658e-05, "loss": 6.2617, "step": 7000 }, { "epoch": 0.38, "learning_rate": 4.9855302667653076e-05, "loss": 6.2561, "step": 7200 }, { "epoch": 0.39, "learning_rate": 4.980090517428957e-05, "loss": 6.2531, "step": 7400 }, { "epoch": 0.4, "learning_rate": 4.974650768092606e-05, "loss": 6.2222, "step": 7600 }, { "epoch": 0.41, "learning_rate": 4.9692110187562557e-05, "loss": 6.2062, "step": 7800 }, { "epoch": 0.42, "learning_rate": 4.963771269419905e-05, "loss": 6.1925, "step": 8000 }, { "epoch": 0.43, "learning_rate": 4.958331520083555e-05, "loss": 6.1704, "step": 8200 }, { "epoch": 0.44, "learning_rate": 4.952918969493886e-05, "loss": 6.1479, "step": 8400 }, { "epoch": 0.45, "learning_rate": 4.947479220157536e-05, "loss": 6.1375, "step": 8600 }, { "epoch": 0.46, "learning_rate": 4.9420394708211846e-05, "loss": 6.1155, "step": 8800 }, { "epoch": 0.47, "learning_rate": 4.936599721484834e-05, "loss": 6.0921, "step": 9000 }, { "epoch": 0.48, "learning_rate": 4.931159972148484e-05, "loss": 6.0671, "step": 9200 }, { "epoch": 0.49, "learning_rate": 4.9257202228121326e-05, "loss": 6.0437, "step": 9400 }, { "epoch": 0.5, "learning_rate": 4.920280473475782e-05, "loss": 6.0032, "step": 9600 }, { "epoch": 0.51, "learning_rate": 4.914840724139432e-05, "loss": 5.9209, "step": 9800 }, { "epoch": 0.52, "learning_rate": 4.9094009748030814e-05, "loss": 5.8316, "step": 10000 }, { "epoch": 0.54, "learning_rate": 4.90396122546673e-05, "loss": 5.7568, "step": 10200 }, { "epoch": 0.55, "learning_rate": 4.89852147613038e-05, "loss": 5.6574, "step": 10400 }, { "epoch": 0.56, "learning_rate": 4.893108925540711e-05, "loss": 5.5117, "step": 10600 }, { "epoch": 0.57, "learning_rate": 4.887669176204361e-05, "loss": 5.3986, "step": 10800 }, { "epoch": 0.58, "learning_rate": 4.88222942686801e-05, "loss": 5.2336, "step": 11000 }, { "epoch": 0.59, "learning_rate": 4.87678967753166e-05, "loss": 5.0519, "step": 11200 }, { "epoch": 0.6, "learning_rate": 4.871349928195309e-05, "loss": 4.9005, "step": 11400 }, { "epoch": 0.61, "learning_rate": 4.8659101788589584e-05, "loss": 4.769, "step": 11600 }, { "epoch": 0.62, "learning_rate": 4.860470429522608e-05, "loss": 4.6484, "step": 11800 }, { "epoch": 0.63, "learning_rate": 4.855030680186257e-05, "loss": 4.5375, "step": 12000 }, { "epoch": 0.64, "learning_rate": 4.8495909308499065e-05, "loss": 4.4369, "step": 12200 }, { "epoch": 0.65, "learning_rate": 4.844151181513556e-05, "loss": 4.3437, "step": 12400 }, { "epoch": 0.66, "learning_rate": 4.8387114321772056e-05, "loss": 4.267, "step": 12600 }, { "epoch": 0.67, "learning_rate": 4.8332716828408545e-05, "loss": 4.161, "step": 12800 }, { "epoch": 0.68, "learning_rate": 4.827831933504504e-05, "loss": 4.0868, "step": 13000 }, { "epoch": 0.69, "learning_rate": 4.822392184168154e-05, "loss": 4.0029, "step": 13200 }, { "epoch": 0.7, "learning_rate": 4.8169524348318026e-05, "loss": 3.9486, "step": 13400 }, { "epoch": 0.71, "learning_rate": 4.811512685495453e-05, "loss": 3.8743, "step": 13600 }, { "epoch": 0.72, "learning_rate": 4.8060729361591025e-05, "loss": 3.8206, "step": 13800 }, { "epoch": 0.73, "learning_rate": 4.8006331868227514e-05, "loss": 3.7676, "step": 14000 }, { "epoch": 0.75, "learning_rate": 4.795193437486401e-05, "loss": 3.7225, "step": 14200 }, { "epoch": 0.76, "learning_rate": 4.7897536881500505e-05, "loss": 3.6837, "step": 14400 }, { "epoch": 0.77, "learning_rate": 4.784341137560381e-05, "loss": 3.6421, "step": 14600 }, { "epoch": 0.78, "learning_rate": 4.778901388224031e-05, "loss": 3.6167, "step": 14800 }, { "epoch": 0.79, "learning_rate": 4.77346163888768e-05, "loss": 3.5802, "step": 15000 }, { "epoch": 0.8, "learning_rate": 4.76802188955133e-05, "loss": 3.5469, "step": 15200 }, { "epoch": 0.81, "learning_rate": 4.762582140214979e-05, "loss": 3.5208, "step": 15400 }, { "epoch": 0.82, "learning_rate": 4.7571423908786284e-05, "loss": 3.494, "step": 15600 }, { "epoch": 0.83, "learning_rate": 4.7517298402889596e-05, "loss": 3.4647, "step": 15800 }, { "epoch": 0.84, "learning_rate": 4.746290090952609e-05, "loss": 3.4417, "step": 16000 }, { "epoch": 0.85, "learning_rate": 4.740850341616259e-05, "loss": 3.4267, "step": 16200 }, { "epoch": 0.86, "learning_rate": 4.735410592279908e-05, "loss": 3.3992, "step": 16400 }, { "epoch": 0.87, "learning_rate": 4.729970842943557e-05, "loss": 3.3831, "step": 16600 }, { "epoch": 0.88, "learning_rate": 4.724531093607207e-05, "loss": 3.3647, "step": 16800 }, { "epoch": 0.89, "learning_rate": 4.7190913442708564e-05, "loss": 3.3377, "step": 17000 }, { "epoch": 0.9, "learning_rate": 4.7136515949345054e-05, "loss": 3.3197, "step": 17200 }, { "epoch": 0.91, "learning_rate": 4.708211845598155e-05, "loss": 3.2985, "step": 17400 }, { "epoch": 0.92, "learning_rate": 4.7027720962618045e-05, "loss": 3.287, "step": 17600 }, { "epoch": 0.93, "learning_rate": 4.697332346925454e-05, "loss": 3.2748, "step": 17800 }, { "epoch": 0.94, "learning_rate": 4.691892597589103e-05, "loss": 3.2557, "step": 18000 }, { "epoch": 0.96, "learning_rate": 4.6864528482527526e-05, "loss": 3.2419, "step": 18200 }, { "epoch": 0.97, "learning_rate": 4.681013098916402e-05, "loss": 3.2286, "step": 18400 }, { "epoch": 0.98, "learning_rate": 4.675573349580051e-05, "loss": 3.2102, "step": 18600 }, { "epoch": 0.99, "learning_rate": 4.670133600243701e-05, "loss": 3.1987, "step": 18800 }, { "epoch": 1.0, "learning_rate": 4.664693850907351e-05, "loss": 3.1854, "step": 19000 }, { "epoch": 1.01, "learning_rate": 4.659254101571e-05, "loss": 3.1682, "step": 19200 }, { "epoch": 1.02, "learning_rate": 4.6538143522346494e-05, "loss": 3.1562, "step": 19400 }, { "epoch": 1.03, "learning_rate": 4.648374602898299e-05, "loss": 3.1366, "step": 19600 }, { "epoch": 1.04, "learning_rate": 4.6429620523086296e-05, "loss": 3.1273, "step": 19800 }, { "epoch": 1.05, "learning_rate": 4.637522302972279e-05, "loss": 3.1139, "step": 20000 }, { "epoch": 1.06, "learning_rate": 4.632082553635929e-05, "loss": 3.1045, "step": 20200 }, { "epoch": 1.07, "learning_rate": 4.6266428042995777e-05, "loss": 3.0962, "step": 20400 }, { "epoch": 1.08, "learning_rate": 4.6212302537099096e-05, "loss": 3.0913, "step": 20600 }, { "epoch": 1.09, "learning_rate": 4.615790504373559e-05, "loss": 3.0805, "step": 20800 }, { "epoch": 1.1, "learning_rate": 4.610350755037208e-05, "loss": 3.0662, "step": 21000 }, { "epoch": 1.11, "learning_rate": 4.604911005700858e-05, "loss": 3.0485, "step": 21200 }, { "epoch": 1.12, "learning_rate": 4.599471256364507e-05, "loss": 3.0438, "step": 21400 }, { "epoch": 1.13, "learning_rate": 4.594031507028156e-05, "loss": 3.0368, "step": 21600 }, { "epoch": 1.14, "learning_rate": 4.588591757691806e-05, "loss": 3.0248, "step": 21800 }, { "epoch": 1.15, "learning_rate": 4.583152008355455e-05, "loss": 3.0124, "step": 22000 }, { "epoch": 1.17, "learning_rate": 4.577712259019105e-05, "loss": 3.0025, "step": 22200 }, { "epoch": 1.18, "learning_rate": 4.572272509682754e-05, "loss": 2.9902, "step": 22400 }, { "epoch": 1.19, "learning_rate": 4.5668327603464034e-05, "loss": 2.9838, "step": 22600 }, { "epoch": 1.2, "learning_rate": 4.561393011010053e-05, "loss": 2.9701, "step": 22800 }, { "epoch": 1.21, "learning_rate": 4.555953261673702e-05, "loss": 2.9594, "step": 23000 }, { "epoch": 1.22, "learning_rate": 4.5505135123373515e-05, "loss": 2.9549, "step": 23200 }, { "epoch": 1.23, "learning_rate": 4.545073763001001e-05, "loss": 2.9462, "step": 23400 }, { "epoch": 1.24, "learning_rate": 4.5396340136646506e-05, "loss": 2.9471, "step": 23600 }, { "epoch": 1.25, "learning_rate": 4.5341942643282996e-05, "loss": 2.9271, "step": 23800 }, { "epoch": 1.26, "learning_rate": 4.5287817137386315e-05, "loss": 2.9241, "step": 24000 }, { "epoch": 1.27, "learning_rate": 4.5233419644022804e-05, "loss": 2.9156, "step": 24200 }, { "epoch": 1.28, "learning_rate": 4.51790221506593e-05, "loss": 2.9079, "step": 24400 }, { "epoch": 1.29, "learning_rate": 4.5124624657295796e-05, "loss": 2.898, "step": 24600 }, { "epoch": 1.3, "learning_rate": 4.507022716393229e-05, "loss": 2.8902, "step": 24800 }, { "epoch": 1.31, "learning_rate": 4.501582967056878e-05, "loss": 2.8921, "step": 25000 }, { "epoch": 1.32, "learning_rate": 4.4961432177205276e-05, "loss": 2.8749, "step": 25200 }, { "epoch": 1.33, "learning_rate": 4.490703468384177e-05, "loss": 2.8724, "step": 25400 }, { "epoch": 1.34, "learning_rate": 4.485263719047826e-05, "loss": 2.8649, "step": 25600 }, { "epoch": 1.35, "learning_rate": 4.479823969711476e-05, "loss": 2.8586, "step": 25800 }, { "epoch": 1.36, "learning_rate": 4.474384220375125e-05, "loss": 2.8421, "step": 26000 }, { "epoch": 1.38, "learning_rate": 4.468944471038775e-05, "loss": 2.843, "step": 26200 }, { "epoch": 1.39, "learning_rate": 4.463504721702424e-05, "loss": 2.8397, "step": 26400 }, { "epoch": 1.4, "learning_rate": 4.4580649723660734e-05, "loss": 2.8311, "step": 26600 }, { "epoch": 1.41, "learning_rate": 4.452625223029723e-05, "loss": 2.8143, "step": 26800 }, { "epoch": 1.42, "learning_rate": 4.4471854736933725e-05, "loss": 2.8106, "step": 27000 }, { "epoch": 1.43, "learning_rate": 4.441745724357022e-05, "loss": 2.8199, "step": 27200 }, { "epoch": 1.44, "learning_rate": 4.436305975020672e-05, "loss": 2.8039, "step": 27400 }, { "epoch": 1.45, "learning_rate": 4.4308662256843206e-05, "loss": 2.7975, "step": 27600 }, { "epoch": 1.46, "learning_rate": 4.425453675094652e-05, "loss": 2.7903, "step": 27800 }, { "epoch": 1.47, "learning_rate": 4.4200139257583015e-05, "loss": 2.7907, "step": 28000 }, { "epoch": 1.48, "learning_rate": 4.4145741764219504e-05, "loss": 2.7836, "step": 28200 }, { "epoch": 1.49, "learning_rate": 4.4091344270856e-05, "loss": 2.7825, "step": 28400 }, { "epoch": 1.5, "learning_rate": 4.4036946777492495e-05, "loss": 2.765, "step": 28600 }, { "epoch": 1.51, "learning_rate": 4.3982549284128984e-05, "loss": 2.773, "step": 28800 }, { "epoch": 1.52, "learning_rate": 4.392815179076548e-05, "loss": 2.7608, "step": 29000 }, { "epoch": 1.53, "learning_rate": 4.3873754297401976e-05, "loss": 2.76, "step": 29200 }, { "epoch": 1.54, "learning_rate": 4.381935680403847e-05, "loss": 2.7613, "step": 29400 }, { "epoch": 1.55, "learning_rate": 4.376495931067497e-05, "loss": 2.7319, "step": 29600 }, { "epoch": 1.56, "learning_rate": 4.3710561817311464e-05, "loss": 2.7377, "step": 29800 }, { "epoch": 1.57, "learning_rate": 4.365616432394796e-05, "loss": 2.736, "step": 30000 }, { "epoch": 1.59, "learning_rate": 4.360176683058445e-05, "loss": 2.7348, "step": 30200 }, { "epoch": 1.6, "learning_rate": 4.3547369337220944e-05, "loss": 2.7285, "step": 30400 }, { "epoch": 1.61, "learning_rate": 4.349324383132426e-05, "loss": 2.7299, "step": 30600 }, { "epoch": 1.62, "learning_rate": 4.3438846337960746e-05, "loss": 2.7208, "step": 30800 }, { "epoch": 1.63, "learning_rate": 4.338444884459724e-05, "loss": 2.7115, "step": 31000 }, { "epoch": 1.64, "learning_rate": 4.333005135123374e-05, "loss": 2.7033, "step": 31200 }, { "epoch": 1.65, "learning_rate": 4.327565385787023e-05, "loss": 2.6996, "step": 31400 }, { "epoch": 1.66, "learning_rate": 4.322125636450672e-05, "loss": 2.6925, "step": 31600 }, { "epoch": 1.67, "learning_rate": 4.316685887114322e-05, "loss": 2.6896, "step": 31800 }, { "epoch": 1.68, "learning_rate": 4.3112461377779714e-05, "loss": 2.6846, "step": 32000 }, { "epoch": 1.69, "learning_rate": 4.30580638844162e-05, "loss": 2.6848, "step": 32200 }, { "epoch": 1.7, "learning_rate": 4.3003666391052706e-05, "loss": 2.6764, "step": 32400 }, { "epoch": 1.71, "learning_rate": 4.29492688976892e-05, "loss": 2.6846, "step": 32600 }, { "epoch": 1.72, "learning_rate": 4.289487140432569e-05, "loss": 2.6742, "step": 32800 }, { "epoch": 1.73, "learning_rate": 4.2840745898429003e-05, "loss": 2.6667, "step": 33000 }, { "epoch": 1.74, "learning_rate": 4.27863484050655e-05, "loss": 2.6632, "step": 33200 }, { "epoch": 1.75, "learning_rate": 4.273195091170199e-05, "loss": 2.6593, "step": 33400 }, { "epoch": 1.76, "learning_rate": 4.2677553418338484e-05, "loss": 2.6568, "step": 33600 }, { "epoch": 1.77, "learning_rate": 4.262315592497498e-05, "loss": 2.6514, "step": 33800 }, { "epoch": 1.78, "learning_rate": 4.256875843161147e-05, "loss": 2.648, "step": 34000 }, { "epoch": 1.8, "learning_rate": 4.2514360938247965e-05, "loss": 2.6362, "step": 34200 }, { "epoch": 1.81, "learning_rate": 4.245996344488446e-05, "loss": 2.6468, "step": 34400 }, { "epoch": 1.82, "learning_rate": 4.240556595152096e-05, "loss": 2.6268, "step": 34600 }, { "epoch": 1.83, "learning_rate": 4.2351168458157446e-05, "loss": 2.622, "step": 34800 }, { "epoch": 1.84, "learning_rate": 4.2297042952260765e-05, "loss": 2.6178, "step": 35000 }, { "epoch": 1.85, "learning_rate": 4.2242645458897254e-05, "loss": 2.6212, "step": 35200 }, { "epoch": 1.86, "learning_rate": 4.218824796553375e-05, "loss": 2.6161, "step": 35400 }, { "epoch": 1.87, "learning_rate": 4.2133850472170246e-05, "loss": 2.6139, "step": 35600 }, { "epoch": 1.88, "learning_rate": 4.2079452978806735e-05, "loss": 2.6165, "step": 35800 }, { "epoch": 1.89, "learning_rate": 4.202505548544323e-05, "loss": 2.6044, "step": 36000 }, { "epoch": 1.9, "learning_rate": 4.1970657992079727e-05, "loss": 2.6075, "step": 36200 }, { "epoch": 1.91, "learning_rate": 4.191626049871622e-05, "loss": 2.604, "step": 36400 }, { "epoch": 1.92, "learning_rate": 4.186186300535271e-05, "loss": 2.6026, "step": 36600 }, { "epoch": 1.93, "learning_rate": 4.180746551198921e-05, "loss": 2.6024, "step": 36800 }, { "epoch": 1.94, "learning_rate": 4.175334000609252e-05, "loss": 2.5888, "step": 37000 }, { "epoch": 1.95, "learning_rate": 4.1698942512729016e-05, "loss": 2.5914, "step": 37200 }, { "epoch": 1.96, "learning_rate": 4.164454501936551e-05, "loss": 2.592, "step": 37400 }, { "epoch": 1.97, "learning_rate": 4.159014752600201e-05, "loss": 2.5822, "step": 37600 }, { "epoch": 1.98, "learning_rate": 4.1535750032638496e-05, "loss": 2.5845, "step": 37800 }, { "epoch": 1.99, "learning_rate": 4.148135253927499e-05, "loss": 2.5731, "step": 38000 }, { "epoch": 2.01, "learning_rate": 4.142695504591149e-05, "loss": 2.5695, "step": 38200 }, { "epoch": 2.02, "learning_rate": 4.137255755254798e-05, "loss": 2.5682, "step": 38400 }, { "epoch": 2.03, "learning_rate": 4.131816005918447e-05, "loss": 2.5654, "step": 38600 }, { "epoch": 2.04, "learning_rate": 4.126376256582097e-05, "loss": 2.5641, "step": 38800 }, { "epoch": 2.05, "learning_rate": 4.1209365072457465e-05, "loss": 2.554, "step": 39000 }, { "epoch": 2.06, "learning_rate": 4.115523956656078e-05, "loss": 2.5569, "step": 39200 }, { "epoch": 2.07, "learning_rate": 4.110084207319727e-05, "loss": 2.5503, "step": 39400 }, { "epoch": 2.08, "learning_rate": 4.104644457983376e-05, "loss": 2.5554, "step": 39600 }, { "epoch": 2.09, "learning_rate": 4.099204708647026e-05, "loss": 2.552, "step": 39800 }, { "epoch": 2.1, "learning_rate": 4.0937649593106754e-05, "loss": 2.5564, "step": 40000 }, { "epoch": 2.11, "learning_rate": 4.088325209974325e-05, "loss": 2.5373, "step": 40200 }, { "epoch": 2.12, "learning_rate": 4.082885460637974e-05, "loss": 2.5377, "step": 40400 }, { "epoch": 2.13, "learning_rate": 4.0774457113016235e-05, "loss": 2.5404, "step": 40600 }, { "epoch": 2.14, "learning_rate": 4.072005961965273e-05, "loss": 2.5369, "step": 40800 }, { "epoch": 2.15, "learning_rate": 4.066566212628922e-05, "loss": 2.5352, "step": 41000 }, { "epoch": 2.16, "learning_rate": 4.0611808607859356e-05, "loss": 2.5284, "step": 41200 }, { "epoch": 2.17, "learning_rate": 4.0557411114495845e-05, "loss": 2.5308, "step": 41400 }, { "epoch": 2.18, "learning_rate": 4.050301362113234e-05, "loss": 2.5202, "step": 41600 }, { "epoch": 2.19, "learning_rate": 4.0448616127768836e-05, "loss": 2.5199, "step": 41800 }, { "epoch": 2.2, "learning_rate": 4.0394218634405325e-05, "loss": 2.5074, "step": 42000 }, { "epoch": 2.22, "learning_rate": 4.033982114104182e-05, "loss": 2.5086, "step": 42200 }, { "epoch": 2.23, "learning_rate": 4.028542364767832e-05, "loss": 2.5125, "step": 42400 }, { "epoch": 2.24, "learning_rate": 4.023102615431481e-05, "loss": 2.5082, "step": 42600 }, { "epoch": 2.25, "learning_rate": 4.01766286609513e-05, "loss": 2.4999, "step": 42800 }, { "epoch": 2.26, "learning_rate": 4.01222311675878e-05, "loss": 2.5073, "step": 43000 }, { "epoch": 2.27, "learning_rate": 4.0067833674224294e-05, "loss": 2.4998, "step": 43200 }, { "epoch": 2.28, "learning_rate": 4.001343618086078e-05, "loss": 2.4994, "step": 43400 }, { "epoch": 2.29, "learning_rate": 3.995903868749728e-05, "loss": 2.4952, "step": 43600 }, { "epoch": 2.3, "learning_rate": 3.9904641194133775e-05, "loss": 2.4914, "step": 43800 }, { "epoch": 2.31, "learning_rate": 3.985024370077027e-05, "loss": 2.4919, "step": 44000 }, { "epoch": 2.32, "learning_rate": 3.9795846207406766e-05, "loss": 2.4884, "step": 44200 }, { "epoch": 2.33, "learning_rate": 3.974144871404326e-05, "loss": 2.4886, "step": 44400 }, { "epoch": 2.34, "learning_rate": 3.968705122067976e-05, "loss": 2.4902, "step": 44600 }, { "epoch": 2.35, "learning_rate": 3.963265372731625e-05, "loss": 2.4784, "step": 44800 }, { "epoch": 2.36, "learning_rate": 3.957825623395274e-05, "loss": 2.479, "step": 45000 }, { "epoch": 2.37, "learning_rate": 3.9524130728056055e-05, "loss": 2.463, "step": 45200 }, { "epoch": 2.38, "learning_rate": 3.9469733234692544e-05, "loss": 2.4778, "step": 45400 }, { "epoch": 2.39, "learning_rate": 3.941533574132904e-05, "loss": 2.4758, "step": 45600 }, { "epoch": 2.4, "learning_rate": 3.9360938247965536e-05, "loss": 2.4627, "step": 45800 }, { "epoch": 2.41, "learning_rate": 3.9306540754602025e-05, "loss": 2.456, "step": 46000 }, { "epoch": 2.43, "learning_rate": 3.925214326123852e-05, "loss": 2.4624, "step": 46200 }, { "epoch": 2.44, "learning_rate": 3.919774576787502e-05, "loss": 2.4643, "step": 46400 }, { "epoch": 2.45, "learning_rate": 3.914334827451151e-05, "loss": 2.466, "step": 46600 }, { "epoch": 2.46, "learning_rate": 3.908895078114801e-05, "loss": 2.4556, "step": 46800 }, { "epoch": 2.47, "learning_rate": 3.9034553287784504e-05, "loss": 2.4566, "step": 47000 }, { "epoch": 2.48, "learning_rate": 3.898042778188781e-05, "loss": 2.4527, "step": 47200 }, { "epoch": 2.49, "learning_rate": 3.8926030288524306e-05, "loss": 2.452, "step": 47400 }, { "epoch": 2.5, "learning_rate": 3.88716327951608e-05, "loss": 2.439, "step": 47600 }, { "epoch": 2.51, "learning_rate": 3.88172353017973e-05, "loss": 2.4507, "step": 47800 }, { "epoch": 2.52, "learning_rate": 3.876283780843379e-05, "loss": 2.4393, "step": 48000 }, { "epoch": 2.53, "learning_rate": 3.870844031507028e-05, "loss": 2.4411, "step": 48200 }, { "epoch": 2.54, "learning_rate": 3.865404282170678e-05, "loss": 2.449, "step": 48400 }, { "epoch": 2.55, "learning_rate": 3.859964532834327e-05, "loss": 2.4413, "step": 48600 }, { "epoch": 2.56, "learning_rate": 3.854524783497976e-05, "loss": 2.4304, "step": 48800 }, { "epoch": 2.57, "learning_rate": 3.849085034161626e-05, "loss": 2.4276, "step": 49000 }, { "epoch": 2.58, "learning_rate": 3.843672483571957e-05, "loss": 2.4336, "step": 49200 }, { "epoch": 2.59, "learning_rate": 3.838232734235607e-05, "loss": 2.4269, "step": 49400 }, { "epoch": 2.6, "learning_rate": 3.8327929848992563e-05, "loss": 2.4291, "step": 49600 }, { "epoch": 2.61, "learning_rate": 3.827353235562905e-05, "loss": 2.4313, "step": 49800 }, { "epoch": 2.62, "learning_rate": 3.821913486226555e-05, "loss": 2.4198, "step": 50000 }, { "epoch": 2.64, "learning_rate": 3.8164737368902044e-05, "loss": 2.4152, "step": 50200 }, { "epoch": 2.65, "learning_rate": 3.811033987553853e-05, "loss": 2.4158, "step": 50400 }, { "epoch": 2.66, "learning_rate": 3.805594238217503e-05, "loss": 2.4107, "step": 50600 }, { "epoch": 2.67, "learning_rate": 3.8001544888811525e-05, "loss": 2.4144, "step": 50800 }, { "epoch": 2.68, "learning_rate": 3.794714739544802e-05, "loss": 2.4076, "step": 51000 }, { "epoch": 2.69, "learning_rate": 3.7893021889551333e-05, "loss": 2.4076, "step": 51200 }, { "epoch": 2.7, "learning_rate": 3.783862439618783e-05, "loss": 2.4016, "step": 51400 }, { "epoch": 2.71, "learning_rate": 3.7784498890291135e-05, "loss": 2.4178, "step": 51600 }, { "epoch": 2.72, "learning_rate": 3.773010139692763e-05, "loss": 2.4059, "step": 51800 }, { "epoch": 2.73, "learning_rate": 3.767570390356413e-05, "loss": 2.3955, "step": 52000 }, { "epoch": 2.74, "learning_rate": 3.7621306410200616e-05, "loss": 2.4031, "step": 52200 }, { "epoch": 2.75, "learning_rate": 3.756690891683711e-05, "loss": 2.4019, "step": 52400 }, { "epoch": 2.76, "learning_rate": 3.751251142347361e-05, "loss": 2.3981, "step": 52600 }, { "epoch": 2.77, "learning_rate": 3.74581139301101e-05, "loss": 2.3988, "step": 52800 }, { "epoch": 2.78, "learning_rate": 3.740371643674659e-05, "loss": 2.3848, "step": 53000 }, { "epoch": 2.79, "learning_rate": 3.734931894338309e-05, "loss": 2.3876, "step": 53200 }, { "epoch": 2.8, "learning_rate": 3.729492145001959e-05, "loss": 2.3849, "step": 53400 }, { "epoch": 2.81, "learning_rate": 3.724052395665608e-05, "loss": 2.3869, "step": 53600 }, { "epoch": 2.82, "learning_rate": 3.7186126463292576e-05, "loss": 2.3827, "step": 53800 }, { "epoch": 2.83, "learning_rate": 3.713172896992907e-05, "loss": 2.379, "step": 54000 }, { "epoch": 2.85, "learning_rate": 3.707733147656556e-05, "loss": 2.3768, "step": 54200 }, { "epoch": 2.86, "learning_rate": 3.7022933983202057e-05, "loss": 2.3795, "step": 54400 }, { "epoch": 2.87, "learning_rate": 3.696853648983855e-05, "loss": 2.3738, "step": 54600 }, { "epoch": 2.88, "learning_rate": 3.691413899647505e-05, "loss": 2.378, "step": 54800 }, { "epoch": 2.89, "learning_rate": 3.685974150311154e-05, "loss": 2.3671, "step": 55000 }, { "epoch": 2.9, "learning_rate": 3.680534400974803e-05, "loss": 2.3694, "step": 55200 }, { "epoch": 2.91, "learning_rate": 3.6751218503851346e-05, "loss": 2.3796, "step": 55400 }, { "epoch": 2.92, "learning_rate": 3.6696821010487835e-05, "loss": 2.3653, "step": 55600 }, { "epoch": 2.93, "learning_rate": 3.664242351712433e-05, "loss": 2.3676, "step": 55800 }, { "epoch": 2.94, "learning_rate": 3.6588026023760826e-05, "loss": 2.3658, "step": 56000 }, { "epoch": 2.95, "learning_rate": 3.653362853039732e-05, "loss": 2.3721, "step": 56200 }, { "epoch": 2.96, "learning_rate": 3.647923103703382e-05, "loss": 2.3668, "step": 56400 }, { "epoch": 2.97, "learning_rate": 3.6424833543670314e-05, "loss": 2.3639, "step": 56600 }, { "epoch": 2.98, "learning_rate": 3.63704360503068e-05, "loss": 2.3628, "step": 56800 }, { "epoch": 2.99, "learning_rate": 3.63160385569433e-05, "loss": 2.3688, "step": 57000 }, { "epoch": 3.0, "learning_rate": 3.6261641063579795e-05, "loss": 2.3577, "step": 57200 }, { "epoch": 3.01, "learning_rate": 3.6207243570216284e-05, "loss": 2.353, "step": 57400 }, { "epoch": 3.02, "learning_rate": 3.615284607685278e-05, "loss": 2.3509, "step": 57600 }, { "epoch": 3.03, "learning_rate": 3.6098448583489275e-05, "loss": 2.3409, "step": 57800 }, { "epoch": 3.04, "learning_rate": 3.604405109012577e-05, "loss": 2.3402, "step": 58000 }, { "epoch": 3.06, "learning_rate": 3.598965359676226e-05, "loss": 2.3542, "step": 58200 }, { "epoch": 3.07, "learning_rate": 3.5935256103398756e-05, "loss": 2.346, "step": 58400 }, { "epoch": 3.08, "learning_rate": 3.588085861003525e-05, "loss": 2.349, "step": 58600 }, { "epoch": 3.09, "learning_rate": 3.582646111667175e-05, "loss": 2.3462, "step": 58800 }, { "epoch": 3.1, "learning_rate": 3.577206362330824e-05, "loss": 2.3458, "step": 59000 }, { "epoch": 3.11, "learning_rate": 3.571766612994473e-05, "loss": 2.3351, "step": 59200 }, { "epoch": 3.12, "learning_rate": 3.5663540624048045e-05, "loss": 2.3336, "step": 59400 }, { "epoch": 3.13, "learning_rate": 3.560914313068454e-05, "loss": 2.3458, "step": 59600 }, { "epoch": 3.14, "learning_rate": 3.555474563732104e-05, "loss": 2.3354, "step": 59800 }, { "epoch": 3.15, "learning_rate": 3.550062013142434e-05, "loss": 2.3382, "step": 60000 }, { "epoch": 3.16, "learning_rate": 3.544622263806084e-05, "loss": 2.3381, "step": 60200 }, { "epoch": 3.17, "learning_rate": 3.5391825144697335e-05, "loss": 2.3287, "step": 60400 }, { "epoch": 3.18, "learning_rate": 3.5337427651333824e-05, "loss": 2.3267, "step": 60600 }, { "epoch": 3.19, "learning_rate": 3.528303015797032e-05, "loss": 2.3295, "step": 60800 }, { "epoch": 3.2, "learning_rate": 3.5228632664606815e-05, "loss": 2.3224, "step": 61000 }, { "epoch": 3.21, "learning_rate": 3.517423517124331e-05, "loss": 2.3151, "step": 61200 }, { "epoch": 3.22, "learning_rate": 3.511983767787981e-05, "loss": 2.3209, "step": 61400 }, { "epoch": 3.23, "learning_rate": 3.50654401845163e-05, "loss": 2.3215, "step": 61600 }, { "epoch": 3.24, "learning_rate": 3.50110426911528e-05, "loss": 2.3163, "step": 61800 }, { "epoch": 3.25, "learning_rate": 3.495664519778929e-05, "loss": 2.3137, "step": 62000 }, { "epoch": 3.27, "learning_rate": 3.4902247704425784e-05, "loss": 2.3109, "step": 62200 }, { "epoch": 3.28, "learning_rate": 3.484785021106228e-05, "loss": 2.3158, "step": 62400 }, { "epoch": 3.29, "learning_rate": 3.4793724705165585e-05, "loss": 2.3133, "step": 62600 }, { "epoch": 3.3, "learning_rate": 3.473932721180208e-05, "loss": 2.3127, "step": 62800 }, { "epoch": 3.31, "learning_rate": 3.468492971843858e-05, "loss": 2.3097, "step": 63000 }, { "epoch": 3.32, "learning_rate": 3.4630532225075066e-05, "loss": 2.3132, "step": 63200 }, { "epoch": 3.33, "learning_rate": 3.457613473171156e-05, "loss": 2.3049, "step": 63400 }, { "epoch": 3.34, "learning_rate": 3.452173723834806e-05, "loss": 2.3067, "step": 63600 }, { "epoch": 3.35, "learning_rate": 3.4467339744984553e-05, "loss": 2.3126, "step": 63800 }, { "epoch": 3.36, "learning_rate": 3.441294225162104e-05, "loss": 2.2959, "step": 64000 }, { "epoch": 3.37, "learning_rate": 3.4358544758257545e-05, "loss": 2.3025, "step": 64200 }, { "epoch": 3.38, "learning_rate": 3.430414726489404e-05, "loss": 2.2955, "step": 64400 }, { "epoch": 3.39, "learning_rate": 3.424974977153053e-05, "loss": 2.3055, "step": 64600 }, { "epoch": 3.4, "learning_rate": 3.4195352278167026e-05, "loss": 2.2957, "step": 64800 }, { "epoch": 3.41, "learning_rate": 3.414122677227033e-05, "loss": 2.2927, "step": 65000 }, { "epoch": 3.42, "learning_rate": 3.408682927890683e-05, "loss": 2.2961, "step": 65200 }, { "epoch": 3.43, "learning_rate": 3.4032431785543323e-05, "loss": 2.2937, "step": 65400 }, { "epoch": 3.44, "learning_rate": 3.397803429217982e-05, "loss": 2.2915, "step": 65600 }, { "epoch": 3.45, "learning_rate": 3.392363679881631e-05, "loss": 2.2915, "step": 65800 }, { "epoch": 3.46, "learning_rate": 3.386951129291963e-05, "loss": 2.29, "step": 66000 }, { "epoch": 3.48, "learning_rate": 3.381511379955612e-05, "loss": 2.2855, "step": 66200 }, { "epoch": 3.49, "learning_rate": 3.376071630619261e-05, "loss": 2.2918, "step": 66400 }, { "epoch": 3.5, "learning_rate": 3.370631881282911e-05, "loss": 2.2802, "step": 66600 }, { "epoch": 3.51, "learning_rate": 3.3651921319465604e-05, "loss": 2.2856, "step": 66800 }, { "epoch": 3.52, "learning_rate": 3.359752382610209e-05, "loss": 2.2877, "step": 67000 }, { "epoch": 3.53, "learning_rate": 3.354312633273859e-05, "loss": 2.2856, "step": 67200 }, { "epoch": 3.54, "learning_rate": 3.3488728839375085e-05, "loss": 2.2875, "step": 67400 }, { "epoch": 3.55, "learning_rate": 3.3434331346011574e-05, "loss": 2.2791, "step": 67600 }, { "epoch": 3.56, "learning_rate": 3.337993385264807e-05, "loss": 2.2777, "step": 67800 }, { "epoch": 3.57, "learning_rate": 3.3325536359284566e-05, "loss": 2.271, "step": 68000 }, { "epoch": 3.58, "learning_rate": 3.327113886592106e-05, "loss": 2.2785, "step": 68200 }, { "epoch": 3.59, "learning_rate": 3.321674137255755e-05, "loss": 2.2684, "step": 68400 }, { "epoch": 3.6, "learning_rate": 3.3162343879194047e-05, "loss": 2.2863, "step": 68600 }, { "epoch": 3.61, "learning_rate": 3.310794638583054e-05, "loss": 2.2815, "step": 68800 }, { "epoch": 3.62, "learning_rate": 3.305354889246703e-05, "loss": 2.2761, "step": 69000 }, { "epoch": 3.63, "learning_rate": 3.299915139910353e-05, "loss": 2.2672, "step": 69200 }, { "epoch": 3.64, "learning_rate": 3.294475390574003e-05, "loss": 2.2587, "step": 69400 }, { "epoch": 3.65, "learning_rate": 3.289035641237652e-05, "loss": 2.258, "step": 69600 }, { "epoch": 3.66, "learning_rate": 3.2835958919013015e-05, "loss": 2.2662, "step": 69800 }, { "epoch": 3.67, "learning_rate": 3.278156142564951e-05, "loss": 2.2605, "step": 70000 }, { "epoch": 3.69, "learning_rate": 3.2727163932286006e-05, "loss": 2.2608, "step": 70200 }, { "epoch": 3.7, "learning_rate": 3.267303842638931e-05, "loss": 2.2549, "step": 70400 }, { "epoch": 3.71, "learning_rate": 3.261864093302581e-05, "loss": 2.2667, "step": 70600 }, { "epoch": 3.72, "learning_rate": 3.2564243439662304e-05, "loss": 2.2601, "step": 70800 }, { "epoch": 3.73, "learning_rate": 3.250984594629879e-05, "loss": 2.2547, "step": 71000 }, { "epoch": 3.74, "learning_rate": 3.245544845293529e-05, "loss": 2.2547, "step": 71200 }, { "epoch": 3.75, "learning_rate": 3.2401050959571785e-05, "loss": 2.2552, "step": 71400 }, { "epoch": 3.76, "learning_rate": 3.2346653466208274e-05, "loss": 2.2498, "step": 71600 }, { "epoch": 3.77, "learning_rate": 3.229225597284477e-05, "loss": 2.2536, "step": 71800 }, { "epoch": 3.78, "learning_rate": 3.2237858479481265e-05, "loss": 2.2435, "step": 72000 }, { "epoch": 3.79, "learning_rate": 3.218346098611776e-05, "loss": 2.2544, "step": 72200 }, { "epoch": 3.8, "learning_rate": 3.212906349275426e-05, "loss": 2.2449, "step": 72400 }, { "epoch": 3.81, "learning_rate": 3.207466599939075e-05, "loss": 2.2506, "step": 72600 }, { "epoch": 3.82, "learning_rate": 3.202026850602725e-05, "loss": 2.2483, "step": 72800 }, { "epoch": 3.83, "learning_rate": 3.196587101266374e-05, "loss": 2.2417, "step": 73000 }, { "epoch": 3.84, "learning_rate": 3.191174550676705e-05, "loss": 2.2366, "step": 73200 }, { "epoch": 3.85, "learning_rate": 3.1857348013403546e-05, "loss": 2.2423, "step": 73400 }, { "epoch": 3.86, "learning_rate": 3.1802950520040035e-05, "loss": 2.2401, "step": 73600 }, { "epoch": 3.87, "learning_rate": 3.1748825014143355e-05, "loss": 2.245, "step": 73800 }, { "epoch": 3.88, "learning_rate": 3.1694427520779844e-05, "loss": 2.2359, "step": 74000 }, { "epoch": 3.9, "learning_rate": 3.164003002741634e-05, "loss": 2.2409, "step": 74200 }, { "epoch": 3.91, "learning_rate": 3.1585632534052835e-05, "loss": 2.2402, "step": 74400 }, { "epoch": 3.92, "learning_rate": 3.1531235040689325e-05, "loss": 2.2379, "step": 74600 }, { "epoch": 3.93, "learning_rate": 3.147683754732582e-05, "loss": 2.2365, "step": 74800 }, { "epoch": 3.94, "learning_rate": 3.1422440053962316e-05, "loss": 2.2368, "step": 75000 }, { "epoch": 3.95, "learning_rate": 3.136804256059881e-05, "loss": 2.2338, "step": 75200 }, { "epoch": 3.96, "learning_rate": 3.13136450672353e-05, "loss": 2.2456, "step": 75400 }, { "epoch": 3.97, "learning_rate": 3.12592475738718e-05, "loss": 2.2337, "step": 75600 }, { "epoch": 3.98, "learning_rate": 3.120485008050829e-05, "loss": 2.2337, "step": 75800 }, { "epoch": 3.99, "learning_rate": 3.115045258714478e-05, "loss": 2.2282, "step": 76000 }, { "epoch": 4.0, "learning_rate": 3.109605509378128e-05, "loss": 2.2292, "step": 76200 }, { "epoch": 4.01, "learning_rate": 3.1041657600417774e-05, "loss": 2.2282, "step": 76400 }, { "epoch": 4.02, "learning_rate": 3.098726010705427e-05, "loss": 2.2299, "step": 76600 }, { "epoch": 4.03, "learning_rate": 3.093286261369076e-05, "loss": 2.2131, "step": 76800 }, { "epoch": 4.04, "learning_rate": 3.0878465120327254e-05, "loss": 2.2223, "step": 77000 }, { "epoch": 4.05, "learning_rate": 3.082406762696375e-05, "loss": 2.2275, "step": 77200 }, { "epoch": 4.06, "learning_rate": 3.076967013360024e-05, "loss": 2.2156, "step": 77400 }, { "epoch": 4.07, "learning_rate": 3.071527264023674e-05, "loss": 2.2252, "step": 77600 }, { "epoch": 4.08, "learning_rate": 3.0661147134340054e-05, "loss": 2.2282, "step": 77800 }, { "epoch": 4.09, "learning_rate": 3.0606749640976543e-05, "loss": 2.2214, "step": 78000 }, { "epoch": 4.1, "learning_rate": 3.055235214761304e-05, "loss": 2.2244, "step": 78200 }, { "epoch": 4.12, "learning_rate": 3.0497954654249532e-05, "loss": 2.2149, "step": 78400 }, { "epoch": 4.13, "learning_rate": 3.0443557160886028e-05, "loss": 2.2203, "step": 78600 }, { "epoch": 4.14, "learning_rate": 3.038915966752252e-05, "loss": 2.216, "step": 78800 }, { "epoch": 4.15, "learning_rate": 3.0334762174159016e-05, "loss": 2.2148, "step": 79000 }, { "epoch": 4.16, "learning_rate": 3.028036468079551e-05, "loss": 2.2145, "step": 79200 }, { "epoch": 4.17, "learning_rate": 3.0225967187432004e-05, "loss": 2.2172, "step": 79400 }, { "epoch": 4.18, "learning_rate": 3.0171569694068497e-05, "loss": 2.2165, "step": 79600 }, { "epoch": 4.19, "learning_rate": 3.011717220070499e-05, "loss": 2.2186, "step": 79800 }, { "epoch": 4.2, "learning_rate": 3.0063046694808305e-05, "loss": 2.2044, "step": 80000 }, { "epoch": 4.21, "learning_rate": 3.00086492014448e-05, "loss": 2.2032, "step": 80200 }, { "epoch": 4.22, "learning_rate": 2.9954251708081293e-05, "loss": 2.2066, "step": 80400 }, { "epoch": 4.23, "learning_rate": 2.9899854214717786e-05, "loss": 2.2019, "step": 80600 }, { "epoch": 4.24, "learning_rate": 2.984545672135428e-05, "loss": 2.2085, "step": 80800 }, { "epoch": 4.25, "learning_rate": 2.9791059227990774e-05, "loss": 2.1975, "step": 81000 }, { "epoch": 4.26, "learning_rate": 2.973666173462727e-05, "loss": 2.1989, "step": 81200 }, { "epoch": 4.27, "learning_rate": 2.9682264241263762e-05, "loss": 2.2016, "step": 81400 }, { "epoch": 4.28, "learning_rate": 2.9627866747900258e-05, "loss": 2.1911, "step": 81600 }, { "epoch": 4.29, "learning_rate": 2.957346925453675e-05, "loss": 2.2, "step": 81800 }, { "epoch": 4.3, "learning_rate": 2.9519343748640067e-05, "loss": 2.2038, "step": 82000 }, { "epoch": 4.31, "learning_rate": 2.9465218242743376e-05, "loss": 2.1882, "step": 82200 }, { "epoch": 4.33, "learning_rate": 2.941082074937987e-05, "loss": 2.1936, "step": 82400 }, { "epoch": 4.34, "learning_rate": 2.9356423256016364e-05, "loss": 2.1979, "step": 82600 }, { "epoch": 4.35, "learning_rate": 2.9302025762652857e-05, "loss": 2.2051, "step": 82800 }, { "epoch": 4.36, "learning_rate": 2.9247628269289352e-05, "loss": 2.1915, "step": 83000 }, { "epoch": 4.37, "learning_rate": 2.9193230775925845e-05, "loss": 2.1869, "step": 83200 }, { "epoch": 4.38, "learning_rate": 2.9138833282562337e-05, "loss": 2.1824, "step": 83400 }, { "epoch": 4.39, "learning_rate": 2.9084435789198833e-05, "loss": 2.1974, "step": 83600 }, { "epoch": 4.4, "learning_rate": 2.9030038295835326e-05, "loss": 2.1854, "step": 83800 }, { "epoch": 4.41, "learning_rate": 2.8975640802471825e-05, "loss": 2.1844, "step": 84000 }, { "epoch": 4.42, "learning_rate": 2.892124330910832e-05, "loss": 2.1883, "step": 84200 }, { "epoch": 4.43, "learning_rate": 2.886711780321163e-05, "loss": 2.1925, "step": 84400 }, { "epoch": 4.44, "learning_rate": 2.8812720309848122e-05, "loss": 2.186, "step": 84600 }, { "epoch": 4.45, "learning_rate": 2.8758322816484618e-05, "loss": 2.1865, "step": 84800 }, { "epoch": 4.46, "learning_rate": 2.870392532312111e-05, "loss": 2.1837, "step": 85000 }, { "epoch": 4.47, "learning_rate": 2.8649527829757607e-05, "loss": 2.1836, "step": 85200 }, { "epoch": 4.48, "learning_rate": 2.85951303363941e-05, "loss": 2.1867, "step": 85400 }, { "epoch": 4.49, "learning_rate": 2.8540732843030595e-05, "loss": 2.1811, "step": 85600 }, { "epoch": 4.5, "learning_rate": 2.8486335349667087e-05, "loss": 2.1695, "step": 85800 }, { "epoch": 4.51, "learning_rate": 2.843193785630358e-05, "loss": 2.187, "step": 86000 }, { "epoch": 4.52, "learning_rate": 2.8377540362940076e-05, "loss": 2.1767, "step": 86200 }, { "epoch": 4.54, "learning_rate": 2.8323142869576568e-05, "loss": 2.1827, "step": 86400 }, { "epoch": 4.55, "learning_rate": 2.8268745376213064e-05, "loss": 2.1835, "step": 86600 }, { "epoch": 4.56, "learning_rate": 2.8214347882849563e-05, "loss": 2.164, "step": 86800 }, { "epoch": 4.57, "learning_rate": 2.8159950389486056e-05, "loss": 2.1696, "step": 87000 }, { "epoch": 4.58, "learning_rate": 2.810555289612255e-05, "loss": 2.1812, "step": 87200 }, { "epoch": 4.59, "learning_rate": 2.8051155402759044e-05, "loss": 2.1768, "step": 87400 }, { "epoch": 4.6, "learning_rate": 2.7996757909395536e-05, "loss": 2.1825, "step": 87600 }, { "epoch": 4.61, "learning_rate": 2.794263240349885e-05, "loss": 2.1763, "step": 87800 }, { "epoch": 4.62, "learning_rate": 2.788823491013534e-05, "loss": 2.1698, "step": 88000 }, { "epoch": 4.63, "learning_rate": 2.7833837416771834e-05, "loss": 2.1745, "step": 88200 }, { "epoch": 4.64, "learning_rate": 2.777943992340833e-05, "loss": 2.1675, "step": 88400 }, { "epoch": 4.65, "learning_rate": 2.7725042430044822e-05, "loss": 2.1661, "step": 88600 }, { "epoch": 4.66, "learning_rate": 2.7670644936681318e-05, "loss": 2.1603, "step": 88800 }, { "epoch": 4.67, "learning_rate": 2.761624744331781e-05, "loss": 2.1612, "step": 89000 }, { "epoch": 4.68, "learning_rate": 2.7561849949954306e-05, "loss": 2.1667, "step": 89200 }, { "epoch": 4.69, "learning_rate": 2.7507452456590805e-05, "loss": 2.1625, "step": 89400 }, { "epoch": 4.7, "learning_rate": 2.7453054963227298e-05, "loss": 2.1751, "step": 89600 }, { "epoch": 4.71, "learning_rate": 2.7398657469863794e-05, "loss": 2.163, "step": 89800 }, { "epoch": 4.72, "learning_rate": 2.7344259976500286e-05, "loss": 2.1606, "step": 90000 }, { "epoch": 4.73, "learning_rate": 2.728986248313678e-05, "loss": 2.1623, "step": 90200 }, { "epoch": 4.75, "learning_rate": 2.7235464989773274e-05, "loss": 2.1648, "step": 90400 }, { "epoch": 4.76, "learning_rate": 2.7181067496409767e-05, "loss": 2.1607, "step": 90600 }, { "epoch": 4.77, "learning_rate": 2.7126670003046263e-05, "loss": 2.1561, "step": 90800 }, { "epoch": 4.78, "learning_rate": 2.7072272509682755e-05, "loss": 2.1591, "step": 91000 }, { "epoch": 4.79, "learning_rate": 2.701787501631925e-05, "loss": 2.1596, "step": 91200 }, { "epoch": 4.8, "learning_rate": 2.6963477522955744e-05, "loss": 2.1524, "step": 91400 }, { "epoch": 4.81, "learning_rate": 2.6909080029592236e-05, "loss": 2.1508, "step": 91600 }, { "epoch": 4.82, "learning_rate": 2.6854682536228732e-05, "loss": 2.1607, "step": 91800 }, { "epoch": 4.83, "learning_rate": 2.6800285042865224e-05, "loss": 2.1485, "step": 92000 }, { "epoch": 4.84, "learning_rate": 2.674615953696854e-05, "loss": 2.1467, "step": 92200 }, { "epoch": 4.85, "learning_rate": 2.6691762043605033e-05, "loss": 2.1526, "step": 92400 }, { "epoch": 4.86, "learning_rate": 2.663736455024153e-05, "loss": 2.1489, "step": 92600 }, { "epoch": 4.87, "learning_rate": 2.658296705687802e-05, "loss": 2.1519, "step": 92800 }, { "epoch": 4.88, "learning_rate": 2.6528569563514517e-05, "loss": 2.1482, "step": 93000 }, { "epoch": 4.89, "learning_rate": 2.647417207015101e-05, "loss": 2.1503, "step": 93200 }, { "epoch": 4.9, "learning_rate": 2.6419774576787505e-05, "loss": 2.1474, "step": 93400 }, { "epoch": 4.91, "learning_rate": 2.6365377083423998e-05, "loss": 2.1475, "step": 93600 }, { "epoch": 4.92, "learning_rate": 2.631097959006049e-05, "loss": 2.1487, "step": 93800 }, { "epoch": 4.93, "learning_rate": 2.6256582096696986e-05, "loss": 2.1527, "step": 94000 }, { "epoch": 4.94, "learning_rate": 2.620218460333348e-05, "loss": 2.1438, "step": 94200 }, { "epoch": 4.96, "learning_rate": 2.6147787109969974e-05, "loss": 2.148, "step": 94400 }, { "epoch": 4.97, "learning_rate": 2.6093389616606467e-05, "loss": 2.1478, "step": 94600 }, { "epoch": 4.98, "learning_rate": 2.6038992123242962e-05, "loss": 2.1505, "step": 94800 }, { "epoch": 4.99, "learning_rate": 2.5984866617346275e-05, "loss": 2.1491, "step": 95000 }, { "epoch": 5.0, "learning_rate": 2.593046912398277e-05, "loss": 2.143, "step": 95200 }, { "epoch": 5.01, "learning_rate": 2.5876071630619263e-05, "loss": 2.1388, "step": 95400 }, { "epoch": 5.02, "learning_rate": 2.582167413725576e-05, "loss": 2.1433, "step": 95600 }, { "epoch": 5.03, "learning_rate": 2.576727664389225e-05, "loss": 2.1269, "step": 95800 }, { "epoch": 5.04, "learning_rate": 2.5712879150528747e-05, "loss": 2.1401, "step": 96000 }, { "epoch": 5.05, "learning_rate": 2.565848165716524e-05, "loss": 2.1382, "step": 96200 }, { "epoch": 5.06, "learning_rate": 2.5604084163801732e-05, "loss": 2.1371, "step": 96400 }, { "epoch": 5.07, "learning_rate": 2.5549686670438228e-05, "loss": 2.1347, "step": 96600 }, { "epoch": 5.08, "learning_rate": 2.5495561164541537e-05, "loss": 2.1407, "step": 96800 }, { "epoch": 5.09, "learning_rate": 2.544116367117803e-05, "loss": 2.1365, "step": 97000 }, { "epoch": 5.1, "learning_rate": 2.5386766177814526e-05, "loss": 2.1354, "step": 97200 }, { "epoch": 5.11, "learning_rate": 2.5332368684451025e-05, "loss": 2.1278, "step": 97400 }, { "epoch": 5.12, "learning_rate": 2.5277971191087517e-05, "loss": 2.1357, "step": 97600 }, { "epoch": 5.13, "learning_rate": 2.5223573697724013e-05, "loss": 2.1337, "step": 97800 }, { "epoch": 5.14, "learning_rate": 2.5169176204360506e-05, "loss": 2.1335, "step": 98000 }, { "epoch": 5.15, "learning_rate": 2.5114778710997e-05, "loss": 2.1302, "step": 98200 }, { "epoch": 5.17, "learning_rate": 2.5060381217633494e-05, "loss": 2.1351, "step": 98400 }, { "epoch": 5.18, "learning_rate": 2.5005983724269986e-05, "loss": 2.1346, "step": 98600 }, { "epoch": 5.19, "learning_rate": 2.4951586230906482e-05, "loss": 2.1307, "step": 98800 }, { "epoch": 5.2, "learning_rate": 2.4897188737542975e-05, "loss": 2.1222, "step": 99000 }, { "epoch": 5.21, "learning_rate": 2.484279124417947e-05, "loss": 2.1215, "step": 99200 }, { "epoch": 5.22, "learning_rate": 2.4788393750815963e-05, "loss": 2.1201, "step": 99400 }, { "epoch": 5.23, "learning_rate": 2.473399625745246e-05, "loss": 2.1307, "step": 99600 }, { "epoch": 5.24, "learning_rate": 2.467959876408895e-05, "loss": 2.1311, "step": 99800 }, { "epoch": 5.25, "learning_rate": 2.4625201270725444e-05, "loss": 2.1161, "step": 100000 }, { "epoch": 5.26, "learning_rate": 2.457080377736194e-05, "loss": 2.1259, "step": 100200 }, { "epoch": 5.27, "learning_rate": 2.4516406283998432e-05, "loss": 2.1199, "step": 100400 }, { "epoch": 5.28, "learning_rate": 2.446200879063493e-05, "loss": 2.1216, "step": 100600 }, { "epoch": 5.29, "learning_rate": 2.4407611297271424e-05, "loss": 2.1156, "step": 100800 }, { "epoch": 5.3, "learning_rate": 2.4353213803907916e-05, "loss": 2.1243, "step": 101000 }, { "epoch": 5.31, "learning_rate": 2.4298816310544412e-05, "loss": 2.1209, "step": 101200 }, { "epoch": 5.32, "learning_rate": 2.4244418817180905e-05, "loss": 2.1188, "step": 101400 }, { "epoch": 5.33, "learning_rate": 2.41900213238174e-05, "loss": 2.1206, "step": 101600 }, { "epoch": 5.34, "learning_rate": 2.4135623830453893e-05, "loss": 2.1195, "step": 101800 }, { "epoch": 5.35, "learning_rate": 2.408122633709039e-05, "loss": 2.1218, "step": 102000 }, { "epoch": 5.36, "learning_rate": 2.402682884372688e-05, "loss": 2.1098, "step": 102200 }, { "epoch": 5.38, "learning_rate": 2.3972703337830194e-05, "loss": 2.1126, "step": 102400 }, { "epoch": 5.39, "learning_rate": 2.3918305844466686e-05, "loss": 2.1132, "step": 102600 }, { "epoch": 5.4, "learning_rate": 2.3863908351103182e-05, "loss": 2.1141, "step": 102800 }, { "epoch": 5.41, "learning_rate": 2.3809510857739674e-05, "loss": 2.1059, "step": 103000 }, { "epoch": 5.42, "learning_rate": 2.375511336437617e-05, "loss": 2.1057, "step": 103200 }, { "epoch": 5.43, "learning_rate": 2.3700715871012666e-05, "loss": 2.1116, "step": 103400 }, { "epoch": 5.44, "learning_rate": 2.364631837764916e-05, "loss": 2.1138, "step": 103600 }, { "epoch": 5.45, "learning_rate": 2.3591920884285654e-05, "loss": 2.1129, "step": 103800 }, { "epoch": 5.46, "learning_rate": 2.3537523390922147e-05, "loss": 2.1108, "step": 104000 }, { "epoch": 5.47, "learning_rate": 2.3483125897558643e-05, "loss": 2.1146, "step": 104200 }, { "epoch": 5.48, "learning_rate": 2.3429000391661955e-05, "loss": 2.1102, "step": 104400 }, { "epoch": 5.49, "learning_rate": 2.3374602898298448e-05, "loss": 2.1138, "step": 104600 }, { "epoch": 5.5, "learning_rate": 2.332020540493494e-05, "loss": 2.1043, "step": 104800 }, { "epoch": 5.51, "learning_rate": 2.3265807911571436e-05, "loss": 2.1063, "step": 105000 }, { "epoch": 5.52, "learning_rate": 2.321141041820793e-05, "loss": 2.1108, "step": 105200 }, { "epoch": 5.53, "learning_rate": 2.3157012924844424e-05, "loss": 2.1106, "step": 105400 }, { "epoch": 5.54, "learning_rate": 2.3102615431480917e-05, "loss": 2.1117, "step": 105600 }, { "epoch": 5.55, "learning_rate": 2.3048217938117413e-05, "loss": 2.0953, "step": 105800 }, { "epoch": 5.56, "learning_rate": 2.299382044475391e-05, "loss": 2.1012, "step": 106000 }, { "epoch": 5.57, "learning_rate": 2.29394229513904e-05, "loss": 2.1044, "step": 106200 }, { "epoch": 5.59, "learning_rate": 2.2885297445493714e-05, "loss": 2.1023, "step": 106400 }, { "epoch": 5.6, "learning_rate": 2.283089995213021e-05, "loss": 2.1068, "step": 106600 }, { "epoch": 5.61, "learning_rate": 2.2776502458766702e-05, "loss": 2.1076, "step": 106800 }, { "epoch": 5.62, "learning_rate": 2.2722104965403194e-05, "loss": 2.1024, "step": 107000 }, { "epoch": 5.63, "learning_rate": 2.266770747203969e-05, "loss": 2.1007, "step": 107200 }, { "epoch": 5.64, "learning_rate": 2.2613309978676183e-05, "loss": 2.0993, "step": 107400 }, { "epoch": 5.65, "learning_rate": 2.255891248531268e-05, "loss": 2.0964, "step": 107600 }, { "epoch": 5.66, "learning_rate": 2.250451499194917e-05, "loss": 2.0933, "step": 107800 }, { "epoch": 5.67, "learning_rate": 2.2450117498585667e-05, "loss": 2.0945, "step": 108000 }, { "epoch": 5.68, "learning_rate": 2.239572000522216e-05, "loss": 2.0974, "step": 108200 }, { "epoch": 5.69, "learning_rate": 2.2341322511858655e-05, "loss": 2.0992, "step": 108400 }, { "epoch": 5.7, "learning_rate": 2.228692501849515e-05, "loss": 2.0913, "step": 108600 }, { "epoch": 5.71, "learning_rate": 2.223279951259846e-05, "loss": 2.096, "step": 108800 }, { "epoch": 5.72, "learning_rate": 2.2178402019234952e-05, "loss": 2.094, "step": 109000 }, { "epoch": 5.73, "learning_rate": 2.2124004525871452e-05, "loss": 2.0944, "step": 109200 }, { "epoch": 5.74, "learning_rate": 2.2069607032507944e-05, "loss": 2.0895, "step": 109400 }, { "epoch": 5.75, "learning_rate": 2.2015209539144437e-05, "loss": 2.0886, "step": 109600 }, { "epoch": 5.76, "learning_rate": 2.1960812045780932e-05, "loss": 2.095, "step": 109800 }, { "epoch": 5.77, "learning_rate": 2.1906414552417425e-05, "loss": 2.0854, "step": 110000 }, { "epoch": 5.78, "learning_rate": 2.185201705905392e-05, "loss": 2.0868, "step": 110200 }, { "epoch": 5.8, "learning_rate": 2.1797619565690413e-05, "loss": 2.0803, "step": 110400 }, { "epoch": 5.81, "learning_rate": 2.174322207232691e-05, "loss": 2.0918, "step": 110600 }, { "epoch": 5.82, "learning_rate": 2.16888245789634e-05, "loss": 2.0931, "step": 110800 }, { "epoch": 5.83, "learning_rate": 2.1634427085599894e-05, "loss": 2.0881, "step": 111000 }, { "epoch": 5.84, "learning_rate": 2.1580301579703207e-05, "loss": 2.0856, "step": 111200 }, { "epoch": 5.85, "learning_rate": 2.152617607380652e-05, "loss": 2.0818, "step": 111400 }, { "epoch": 5.86, "learning_rate": 2.1471778580443015e-05, "loss": 2.0788, "step": 111600 }, { "epoch": 5.87, "learning_rate": 2.1417381087079507e-05, "loss": 2.0836, "step": 111800 }, { "epoch": 5.88, "learning_rate": 2.1362983593716003e-05, "loss": 2.0898, "step": 112000 }, { "epoch": 5.89, "learning_rate": 2.1308586100352496e-05, "loss": 2.0803, "step": 112200 }, { "epoch": 5.9, "learning_rate": 2.125418860698899e-05, "loss": 2.081, "step": 112400 }, { "epoch": 5.91, "learning_rate": 2.1199791113625487e-05, "loss": 2.0859, "step": 112600 }, { "epoch": 5.92, "learning_rate": 2.114539362026198e-05, "loss": 2.0826, "step": 112800 }, { "epoch": 5.93, "learning_rate": 2.1090996126898476e-05, "loss": 2.0883, "step": 113000 }, { "epoch": 5.94, "learning_rate": 2.1036598633534968e-05, "loss": 2.0802, "step": 113200 }, { "epoch": 5.95, "learning_rate": 2.098220114017146e-05, "loss": 2.0868, "step": 113400 }, { "epoch": 5.96, "learning_rate": 2.0927803646807956e-05, "loss": 2.0827, "step": 113600 }, { "epoch": 5.97, "learning_rate": 2.087340615344445e-05, "loss": 2.0842, "step": 113800 }, { "epoch": 5.98, "learning_rate": 2.0819008660080945e-05, "loss": 2.0783, "step": 114000 }, { "epoch": 5.99, "learning_rate": 2.0764611166717437e-05, "loss": 2.0809, "step": 114200 }, { "epoch": 6.01, "learning_rate": 2.0710213673353933e-05, "loss": 2.0844, "step": 114400 }, { "epoch": 6.02, "learning_rate": 2.065581617999043e-05, "loss": 2.0746, "step": 114600 }, { "epoch": 6.03, "learning_rate": 2.060141868662692e-05, "loss": 2.0785, "step": 114800 }, { "epoch": 6.04, "learning_rate": 2.0547021193263417e-05, "loss": 2.0767, "step": 115000 }, { "epoch": 6.05, "learning_rate": 2.049262369989991e-05, "loss": 2.0763, "step": 115200 }, { "epoch": 6.06, "learning_rate": 2.0438498194003222e-05, "loss": 2.0837, "step": 115400 }, { "epoch": 6.07, "learning_rate": 2.038437268810653e-05, "loss": 2.0736, "step": 115600 }, { "epoch": 6.08, "learning_rate": 2.0329975194743027e-05, "loss": 2.0787, "step": 115800 }, { "epoch": 6.09, "learning_rate": 2.0275577701379523e-05, "loss": 2.084, "step": 116000 }, { "epoch": 6.1, "learning_rate": 2.0221180208016016e-05, "loss": 2.0804, "step": 116200 }, { "epoch": 6.11, "learning_rate": 2.016678271465251e-05, "loss": 2.0657, "step": 116400 }, { "epoch": 6.12, "learning_rate": 2.0112385221289004e-05, "loss": 2.0731, "step": 116600 }, { "epoch": 6.13, "learning_rate": 2.0057987727925496e-05, "loss": 2.0769, "step": 116800 }, { "epoch": 6.14, "learning_rate": 2.0003590234561992e-05, "loss": 2.0734, "step": 117000 }, { "epoch": 6.15, "learning_rate": 1.9949192741198485e-05, "loss": 2.0751, "step": 117200 }, { "epoch": 6.16, "learning_rate": 1.989479524783498e-05, "loss": 2.071, "step": 117400 }, { "epoch": 6.17, "learning_rate": 1.9840397754471473e-05, "loss": 2.075, "step": 117600 }, { "epoch": 6.18, "learning_rate": 1.978600026110797e-05, "loss": 2.0692, "step": 117800 }, { "epoch": 6.19, "learning_rate": 1.9731602767744465e-05, "loss": 2.0671, "step": 118000 }, { "epoch": 6.2, "learning_rate": 1.9677477261847774e-05, "loss": 2.0654, "step": 118200 }, { "epoch": 6.22, "learning_rate": 1.962307976848427e-05, "loss": 2.0612, "step": 118400 }, { "epoch": 6.23, "learning_rate": 1.9568682275120765e-05, "loss": 2.0703, "step": 118600 }, { "epoch": 6.24, "learning_rate": 1.9514284781757258e-05, "loss": 2.0644, "step": 118800 }, { "epoch": 6.25, "learning_rate": 1.9459887288393754e-05, "loss": 2.0662, "step": 119000 }, { "epoch": 6.26, "learning_rate": 1.9405489795030246e-05, "loss": 2.0652, "step": 119200 }, { "epoch": 6.27, "learning_rate": 1.935109230166674e-05, "loss": 2.0661, "step": 119400 }, { "epoch": 6.28, "learning_rate": 1.9296694808303234e-05, "loss": 2.0674, "step": 119600 }, { "epoch": 6.29, "learning_rate": 1.9242297314939727e-05, "loss": 2.0652, "step": 119800 }, { "epoch": 6.3, "learning_rate": 1.9187899821576223e-05, "loss": 2.0598, "step": 120000 }, { "epoch": 6.31, "learning_rate": 1.9133502328212715e-05, "loss": 2.0655, "step": 120200 }, { "epoch": 6.32, "learning_rate": 1.907910483484921e-05, "loss": 2.0605, "step": 120400 }, { "epoch": 6.33, "learning_rate": 1.9024707341485707e-05, "loss": 2.0619, "step": 120600 }, { "epoch": 6.34, "learning_rate": 1.89703098481222e-05, "loss": 2.0631, "step": 120800 }, { "epoch": 6.35, "learning_rate": 1.8915912354758695e-05, "loss": 2.0678, "step": 121000 }, { "epoch": 6.36, "learning_rate": 1.8861514861395188e-05, "loss": 2.0584, "step": 121200 }, { "epoch": 6.37, "learning_rate": 1.8807117368031683e-05, "loss": 2.0461, "step": 121400 }, { "epoch": 6.38, "learning_rate": 1.8752719874668176e-05, "loss": 2.0631, "step": 121600 }, { "epoch": 6.39, "learning_rate": 1.869832238130467e-05, "loss": 2.0695, "step": 121800 }, { "epoch": 6.4, "learning_rate": 1.8643924887941164e-05, "loss": 2.0574, "step": 122000 }, { "epoch": 6.41, "learning_rate": 1.8589527394577657e-05, "loss": 2.0504, "step": 122200 }, { "epoch": 6.43, "learning_rate": 1.853540188868097e-05, "loss": 2.0567, "step": 122400 }, { "epoch": 6.44, "learning_rate": 1.8481004395317465e-05, "loss": 2.0592, "step": 122600 }, { "epoch": 6.45, "learning_rate": 1.8426606901953958e-05, "loss": 2.0591, "step": 122800 }, { "epoch": 6.46, "learning_rate": 1.837220940859045e-05, "loss": 2.0552, "step": 123000 }, { "epoch": 6.47, "learning_rate": 1.831781191522695e-05, "loss": 2.0593, "step": 123200 }, { "epoch": 6.48, "learning_rate": 1.8263414421863442e-05, "loss": 2.0537, "step": 123400 }, { "epoch": 6.49, "learning_rate": 1.8209016928499938e-05, "loss": 2.0523, "step": 123600 }, { "epoch": 6.5, "learning_rate": 1.815461943513643e-05, "loss": 2.0497, "step": 123800 }, { "epoch": 6.51, "learning_rate": 1.8100221941772922e-05, "loss": 2.0571, "step": 124000 }, { "epoch": 6.52, "learning_rate": 1.8045824448409418e-05, "loss": 2.0573, "step": 124200 }, { "epoch": 6.53, "learning_rate": 1.799142695504591e-05, "loss": 2.0526, "step": 124400 }, { "epoch": 6.54, "learning_rate": 1.7937029461682407e-05, "loss": 2.0604, "step": 124600 }, { "epoch": 6.55, "learning_rate": 1.78826319683189e-05, "loss": 2.0436, "step": 124800 }, { "epoch": 6.56, "learning_rate": 1.7828234474955395e-05, "loss": 2.0445, "step": 125000 }, { "epoch": 6.57, "learning_rate": 1.7774108969058707e-05, "loss": 2.0484, "step": 125200 }, { "epoch": 6.58, "learning_rate": 1.77197114756952e-05, "loss": 2.0561, "step": 125400 }, { "epoch": 6.59, "learning_rate": 1.7665585969798512e-05, "loss": 2.0471, "step": 125600 }, { "epoch": 6.6, "learning_rate": 1.7611188476435005e-05, "loss": 2.064, "step": 125800 }, { "epoch": 6.61, "learning_rate": 1.75567909830715e-05, "loss": 2.0574, "step": 126000 }, { "epoch": 6.62, "learning_rate": 1.7502393489707993e-05, "loss": 2.0487, "step": 126200 }, { "epoch": 6.64, "learning_rate": 1.744799599634449e-05, "loss": 2.0502, "step": 126400 }, { "epoch": 6.65, "learning_rate": 1.7393598502980985e-05, "loss": 2.0401, "step": 126600 }, { "epoch": 6.66, "learning_rate": 1.7339201009617477e-05, "loss": 2.0351, "step": 126800 }, { "epoch": 6.67, "learning_rate": 1.7284803516253973e-05, "loss": 2.0526, "step": 127000 }, { "epoch": 6.68, "learning_rate": 1.7230406022890466e-05, "loss": 2.0423, "step": 127200 }, { "epoch": 6.69, "learning_rate": 1.717600852952696e-05, "loss": 2.0438, "step": 127400 }, { "epoch": 6.7, "learning_rate": 1.7121611036163454e-05, "loss": 2.0423, "step": 127600 }, { "epoch": 6.71, "learning_rate": 1.7067213542799946e-05, "loss": 2.0511, "step": 127800 }, { "epoch": 6.72, "learning_rate": 1.7012816049436442e-05, "loss": 2.0478, "step": 128000 }, { "epoch": 6.73, "learning_rate": 1.6958418556072935e-05, "loss": 2.0422, "step": 128200 }, { "epoch": 6.74, "learning_rate": 1.6904021062709434e-05, "loss": 2.0438, "step": 128400 }, { "epoch": 6.75, "learning_rate": 1.6849623569345926e-05, "loss": 2.0421, "step": 128600 }, { "epoch": 6.76, "learning_rate": 1.679522607598242e-05, "loss": 2.0399, "step": 128800 }, { "epoch": 6.77, "learning_rate": 1.6740828582618915e-05, "loss": 2.0455, "step": 129000 }, { "epoch": 6.78, "learning_rate": 1.6686431089255407e-05, "loss": 2.0355, "step": 129200 }, { "epoch": 6.79, "learning_rate": 1.6632033595891903e-05, "loss": 2.04, "step": 129400 }, { "epoch": 6.8, "learning_rate": 1.6577636102528395e-05, "loss": 2.0416, "step": 129600 }, { "epoch": 6.81, "learning_rate": 1.652323860916489e-05, "loss": 2.0373, "step": 129800 }, { "epoch": 6.82, "learning_rate": 1.6468841115801384e-05, "loss": 2.0423, "step": 130000 }, { "epoch": 6.83, "learning_rate": 1.6414443622437876e-05, "loss": 2.0354, "step": 130200 }, { "epoch": 6.85, "learning_rate": 1.6360046129074372e-05, "loss": 2.0343, "step": 130400 }, { "epoch": 6.86, "learning_rate": 1.6305920623177685e-05, "loss": 2.0327, "step": 130600 }, { "epoch": 6.87, "learning_rate": 1.6251523129814177e-05, "loss": 2.0362, "step": 130800 }, { "epoch": 6.88, "learning_rate": 1.6197125636450673e-05, "loss": 2.036, "step": 131000 }, { "epoch": 6.89, "learning_rate": 1.614272814308717e-05, "loss": 2.0384, "step": 131200 }, { "epoch": 6.9, "learning_rate": 1.608833064972366e-05, "loss": 2.0379, "step": 131400 }, { "epoch": 6.91, "learning_rate": 1.6033933156360157e-05, "loss": 2.043, "step": 131600 }, { "epoch": 6.92, "learning_rate": 1.597953566299665e-05, "loss": 2.0316, "step": 131800 }, { "epoch": 6.93, "learning_rate": 1.5925138169633145e-05, "loss": 2.0292, "step": 132000 }, { "epoch": 6.94, "learning_rate": 1.5870740676269638e-05, "loss": 2.0309, "step": 132200 }, { "epoch": 6.95, "learning_rate": 1.5816343182906134e-05, "loss": 2.0392, "step": 132400 }, { "epoch": 6.96, "learning_rate": 1.5761945689542626e-05, "loss": 2.033, "step": 132600 }, { "epoch": 6.97, "learning_rate": 1.570754819617912e-05, "loss": 2.0329, "step": 132800 }, { "epoch": 6.98, "learning_rate": 1.565342269028243e-05, "loss": 2.0324, "step": 133000 }, { "epoch": 6.99, "learning_rate": 1.5599025196918927e-05, "loss": 2.0286, "step": 133200 }, { "epoch": 7.0, "learning_rate": 1.554462770355542e-05, "loss": 2.0356, "step": 133400 }, { "epoch": 7.01, "learning_rate": 1.5490502197658732e-05, "loss": 2.0315, "step": 133600 }, { "epoch": 7.02, "learning_rate": 1.5436104704295224e-05, "loss": 2.0332, "step": 133800 }, { "epoch": 7.03, "learning_rate": 1.538170721093172e-05, "loss": 2.0236, "step": 134000 }, { "epoch": 7.04, "learning_rate": 1.5327309717568213e-05, "loss": 2.0283, "step": 134200 }, { "epoch": 7.06, "learning_rate": 1.5272912224204712e-05, "loss": 2.0349, "step": 134400 }, { "epoch": 7.07, "learning_rate": 1.5218514730841204e-05, "loss": 2.0285, "step": 134600 }, { "epoch": 7.08, "learning_rate": 1.5164117237477699e-05, "loss": 2.0336, "step": 134800 }, { "epoch": 7.09, "learning_rate": 1.5109719744114193e-05, "loss": 2.0387, "step": 135000 }, { "epoch": 7.1, "learning_rate": 1.5055322250750687e-05, "loss": 2.0346, "step": 135200 }, { "epoch": 7.11, "learning_rate": 1.5000924757387181e-05, "loss": 2.0271, "step": 135400 }, { "epoch": 7.12, "learning_rate": 1.4946527264023673e-05, "loss": 2.0259, "step": 135600 }, { "epoch": 7.13, "learning_rate": 1.4892129770660168e-05, "loss": 2.0291, "step": 135800 }, { "epoch": 7.14, "learning_rate": 1.4837732277296662e-05, "loss": 2.0281, "step": 136000 }, { "epoch": 7.15, "learning_rate": 1.4783606771399974e-05, "loss": 2.0334, "step": 136200 }, { "epoch": 7.16, "learning_rate": 1.4729209278036469e-05, "loss": 2.033, "step": 136400 }, { "epoch": 7.17, "learning_rate": 1.4674811784672963e-05, "loss": 2.0291, "step": 136600 }, { "epoch": 7.18, "learning_rate": 1.4620414291309457e-05, "loss": 2.0238, "step": 136800 }, { "epoch": 7.19, "learning_rate": 1.456601679794595e-05, "loss": 2.021, "step": 137000 }, { "epoch": 7.2, "learning_rate": 1.4511619304582447e-05, "loss": 2.0198, "step": 137200 }, { "epoch": 7.21, "learning_rate": 1.4457221811218941e-05, "loss": 2.0177, "step": 137400 }, { "epoch": 7.22, "learning_rate": 1.4402824317855435e-05, "loss": 2.0238, "step": 137600 }, { "epoch": 7.23, "learning_rate": 1.434842682449193e-05, "loss": 2.0285, "step": 137800 }, { "epoch": 7.24, "learning_rate": 1.4294029331128422e-05, "loss": 2.0242, "step": 138000 }, { "epoch": 7.25, "learning_rate": 1.4239631837764916e-05, "loss": 2.0165, "step": 138200 }, { "epoch": 7.27, "learning_rate": 1.418523434440141e-05, "loss": 2.0177, "step": 138400 }, { "epoch": 7.28, "learning_rate": 1.4130836851037904e-05, "loss": 2.0275, "step": 138600 }, { "epoch": 7.29, "learning_rate": 1.4076439357674398e-05, "loss": 2.0221, "step": 138800 }, { "epoch": 7.3, "learning_rate": 1.4022041864310892e-05, "loss": 2.0234, "step": 139000 }, { "epoch": 7.31, "learning_rate": 1.3967644370947388e-05, "loss": 2.0225, "step": 139200 }, { "epoch": 7.32, "learning_rate": 1.3913246877583882e-05, "loss": 2.0195, "step": 139400 }, { "epoch": 7.33, "learning_rate": 1.3858849384220377e-05, "loss": 2.0142, "step": 139600 }, { "epoch": 7.34, "learning_rate": 1.380445189085687e-05, "loss": 2.0224, "step": 139800 }, { "epoch": 7.35, "learning_rate": 1.3750054397493365e-05, "loss": 2.0261, "step": 140000 }, { "epoch": 7.36, "learning_rate": 1.3695656904129859e-05, "loss": 2.019, "step": 140200 }, { "epoch": 7.37, "learning_rate": 1.3641259410766351e-05, "loss": 2.013, "step": 140400 }, { "epoch": 7.38, "learning_rate": 1.3586861917402846e-05, "loss": 2.0101, "step": 140600 }, { "epoch": 7.39, "learning_rate": 1.353246442403934e-05, "loss": 2.0264, "step": 140800 }, { "epoch": 7.4, "learning_rate": 1.3478066930675834e-05, "loss": 2.0021, "step": 141000 }, { "epoch": 7.41, "learning_rate": 1.3423941424779146e-05, "loss": 2.008, "step": 141200 }, { "epoch": 7.42, "learning_rate": 1.336954393141564e-05, "loss": 2.0192, "step": 141400 }, { "epoch": 7.43, "learning_rate": 1.3315146438052135e-05, "loss": 2.0169, "step": 141600 }, { "epoch": 7.44, "learning_rate": 1.326074894468863e-05, "loss": 2.0168, "step": 141800 }, { "epoch": 7.45, "learning_rate": 1.3206351451325125e-05, "loss": 2.0171, "step": 142000 }, { "epoch": 7.46, "learning_rate": 1.3151953957961619e-05, "loss": 2.0233, "step": 142200 }, { "epoch": 7.48, "learning_rate": 1.3097556464598113e-05, "loss": 2.0167, "step": 142400 }, { "epoch": 7.49, "learning_rate": 1.3043158971234607e-05, "loss": 2.02, "step": 142600 }, { "epoch": 7.5, "learning_rate": 1.29887614778711e-05, "loss": 2.0108, "step": 142800 }, { "epoch": 7.51, "learning_rate": 1.2934363984507594e-05, "loss": 2.0147, "step": 143000 }, { "epoch": 7.52, "learning_rate": 1.2879966491144088e-05, "loss": 2.0162, "step": 143200 }, { "epoch": 7.53, "learning_rate": 1.28258409852474e-05, "loss": 2.0203, "step": 143400 }, { "epoch": 7.54, "learning_rate": 1.2771443491883895e-05, "loss": 2.0186, "step": 143600 }, { "epoch": 7.55, "learning_rate": 1.2717045998520389e-05, "loss": 2.0121, "step": 143800 }, { "epoch": 7.56, "learning_rate": 1.2662648505156883e-05, "loss": 2.0089, "step": 144000 }, { "epoch": 7.57, "learning_rate": 1.2608251011793375e-05, "loss": 2.0045, "step": 144200 }, { "epoch": 7.58, "learning_rate": 1.255385351842987e-05, "loss": 2.0085, "step": 144400 }, { "epoch": 7.59, "learning_rate": 1.2499456025066365e-05, "loss": 2.0065, "step": 144600 }, { "epoch": 7.6, "learning_rate": 1.244505853170286e-05, "loss": 2.019, "step": 144800 }, { "epoch": 7.61, "learning_rate": 1.2390661038339354e-05, "loss": 2.0117, "step": 145000 }, { "epoch": 7.62, "learning_rate": 1.2336263544975848e-05, "loss": 2.0161, "step": 145200 }, { "epoch": 7.63, "learning_rate": 1.2281866051612342e-05, "loss": 2.0129, "step": 145400 }, { "epoch": 7.64, "learning_rate": 1.2227468558248836e-05, "loss": 2.0018, "step": 145600 }, { "epoch": 7.65, "learning_rate": 1.217307106488533e-05, "loss": 2.0042, "step": 145800 }, { "epoch": 7.66, "learning_rate": 1.2118673571521826e-05, "loss": 2.0114, "step": 146000 }, { "epoch": 7.67, "learning_rate": 1.206427607815832e-05, "loss": 2.0042, "step": 146200 }, { "epoch": 7.69, "learning_rate": 1.2009878584794813e-05, "loss": 2.0093, "step": 146400 }, { "epoch": 7.7, "learning_rate": 1.1955481091431307e-05, "loss": 1.9995, "step": 146600 }, { "epoch": 7.71, "learning_rate": 1.1901083598067801e-05, "loss": 2.0157, "step": 146800 }, { "epoch": 7.72, "learning_rate": 1.1846686104704295e-05, "loss": 2.0057, "step": 147000 }, { "epoch": 7.73, "learning_rate": 1.1792288611340791e-05, "loss": 2.0072, "step": 147200 }, { "epoch": 7.74, "learning_rate": 1.1738163105444102e-05, "loss": 1.9961, "step": 147400 }, { "epoch": 7.75, "learning_rate": 1.1683765612080596e-05, "loss": 2.012, "step": 147600 }, { "epoch": 7.76, "learning_rate": 1.1629640106183907e-05, "loss": 2.0102, "step": 147800 }, { "epoch": 7.77, "learning_rate": 1.157551460028722e-05, "loss": 2.0096, "step": 148000 }, { "epoch": 7.78, "learning_rate": 1.1521117106923714e-05, "loss": 1.9972, "step": 148200 }, { "epoch": 7.79, "learning_rate": 1.1466719613560208e-05, "loss": 2.0025, "step": 148400 }, { "epoch": 7.8, "learning_rate": 1.1412322120196702e-05, "loss": 1.9985, "step": 148600 }, { "epoch": 7.81, "learning_rate": 1.1357924626833196e-05, "loss": 2.001, "step": 148800 }, { "epoch": 7.82, "learning_rate": 1.130352713346969e-05, "loss": 2.0019, "step": 149000 }, { "epoch": 7.83, "learning_rate": 1.1249129640106184e-05, "loss": 1.9946, "step": 149200 }, { "epoch": 7.84, "learning_rate": 1.1194732146742679e-05, "loss": 1.9935, "step": 149400 }, { "epoch": 7.85, "learning_rate": 1.1140334653379173e-05, "loss": 2.002, "step": 149600 }, { "epoch": 7.86, "learning_rate": 1.1085937160015667e-05, "loss": 2.0009, "step": 149800 }, { "epoch": 7.87, "learning_rate": 1.1031539666652161e-05, "loss": 1.997, "step": 150000 }, { "epoch": 7.88, "learning_rate": 1.0977142173288655e-05, "loss": 2.0059, "step": 150200 }, { "epoch": 7.9, "learning_rate": 1.092274467992515e-05, "loss": 2.0007, "step": 150400 }, { "epoch": 7.91, "learning_rate": 1.0868347186561643e-05, "loss": 2.0027, "step": 150600 }, { "epoch": 7.92, "learning_rate": 1.0813949693198138e-05, "loss": 1.9926, "step": 150800 }, { "epoch": 7.93, "learning_rate": 1.0759552199834633e-05, "loss": 2.001, "step": 151000 }, { "epoch": 7.94, "learning_rate": 1.0705154706471126e-05, "loss": 2.001, "step": 151200 }, { "epoch": 7.95, "learning_rate": 1.0651029200574437e-05, "loss": 2.001, "step": 151400 }, { "epoch": 7.96, "learning_rate": 1.0596631707210933e-05, "loss": 1.9974, "step": 151600 }, { "epoch": 7.97, "learning_rate": 1.0542234213847427e-05, "loss": 2.0024, "step": 151800 }, { "epoch": 7.98, "learning_rate": 1.0487836720483921e-05, "loss": 2.0063, "step": 152000 }, { "epoch": 7.99, "learning_rate": 1.0433439227120415e-05, "loss": 1.9962, "step": 152200 }, { "epoch": 8.0, "learning_rate": 1.037904173375691e-05, "loss": 2.0064, "step": 152400 }, { "epoch": 8.01, "learning_rate": 1.0324644240393403e-05, "loss": 1.9966, "step": 152600 }, { "epoch": 8.02, "learning_rate": 1.0270246747029898e-05, "loss": 1.9983, "step": 152800 }, { "epoch": 8.03, "learning_rate": 1.0215849253666392e-05, "loss": 1.9881, "step": 153000 }, { "epoch": 8.04, "learning_rate": 1.0161451760302886e-05, "loss": 1.9954, "step": 153200 }, { "epoch": 8.05, "learning_rate": 1.010705426693938e-05, "loss": 2.0016, "step": 153400 }, { "epoch": 8.06, "learning_rate": 1.0052656773575874e-05, "loss": 1.9971, "step": 153600 }, { "epoch": 8.07, "learning_rate": 9.998259280212368e-06, "loss": 1.9996, "step": 153800 }, { "epoch": 8.08, "learning_rate": 9.943861786848862e-06, "loss": 2.0017, "step": 154000 }, { "epoch": 8.09, "learning_rate": 9.889464293485357e-06, "loss": 2.0009, "step": 154200 }, { "epoch": 8.1, "learning_rate": 9.83506680012185e-06, "loss": 1.996, "step": 154400 }, { "epoch": 8.12, "learning_rate": 9.780669306758345e-06, "loss": 1.99, "step": 154600 }, { "epoch": 8.13, "learning_rate": 9.726271813394839e-06, "loss": 1.9913, "step": 154800 }, { "epoch": 8.14, "learning_rate": 9.671874320031333e-06, "loss": 1.9959, "step": 155000 }, { "epoch": 8.15, "learning_rate": 9.617476826667827e-06, "loss": 1.9928, "step": 155200 }, { "epoch": 8.16, "learning_rate": 9.563079333304321e-06, "loss": 1.994, "step": 155400 }, { "epoch": 8.17, "learning_rate": 9.508681839940816e-06, "loss": 1.9984, "step": 155600 }, { "epoch": 8.18, "learning_rate": 9.454284346577311e-06, "loss": 1.9942, "step": 155800 }, { "epoch": 8.19, "learning_rate": 9.399886853213804e-06, "loss": 1.9934, "step": 156000 }, { "epoch": 8.2, "learning_rate": 9.345489359850298e-06, "loss": 1.989, "step": 156200 }, { "epoch": 8.21, "learning_rate": 9.291091866486792e-06, "loss": 1.9921, "step": 156400 }, { "epoch": 8.22, "learning_rate": 9.236694373123286e-06, "loss": 1.9912, "step": 156600 }, { "epoch": 8.23, "learning_rate": 9.182296879759782e-06, "loss": 1.9819, "step": 156800 }, { "epoch": 8.24, "learning_rate": 9.127899386396276e-06, "loss": 1.9904, "step": 157000 }, { "epoch": 8.25, "learning_rate": 9.073501893032769e-06, "loss": 1.9876, "step": 157200 }, { "epoch": 8.26, "learning_rate": 9.019376387136081e-06, "loss": 1.9904, "step": 157400 }, { "epoch": 8.27, "learning_rate": 8.964978893772575e-06, "loss": 1.9938, "step": 157600 }, { "epoch": 8.28, "learning_rate": 8.910853387875886e-06, "loss": 1.9868, "step": 157800 }, { "epoch": 8.29, "learning_rate": 8.856455894512382e-06, "loss": 1.9839, "step": 158000 }, { "epoch": 8.3, "learning_rate": 8.802058401148876e-06, "loss": 1.9929, "step": 158200 }, { "epoch": 8.31, "learning_rate": 8.74766090778537e-06, "loss": 1.9819, "step": 158400 }, { "epoch": 8.33, "learning_rate": 8.693263414421863e-06, "loss": 1.9854, "step": 158600 }, { "epoch": 8.34, "learning_rate": 8.638865921058357e-06, "loss": 1.9909, "step": 158800 }, { "epoch": 8.35, "learning_rate": 8.584468427694853e-06, "loss": 1.9958, "step": 159000 }, { "epoch": 8.36, "learning_rate": 8.530070934331347e-06, "loss": 1.9827, "step": 159200 }, { "epoch": 8.37, "learning_rate": 8.475673440967841e-06, "loss": 1.9871, "step": 159400 }, { "epoch": 8.38, "learning_rate": 8.421275947604335e-06, "loss": 1.9849, "step": 159600 }, { "epoch": 8.39, "learning_rate": 8.366878454240828e-06, "loss": 1.9951, "step": 159800 }, { "epoch": 8.4, "learning_rate": 8.312480960877324e-06, "loss": 1.9809, "step": 160000 } ], "logging_steps": 200, "max_steps": 190500, "num_train_epochs": 10, "save_steps": 40000, "total_flos": 8.08352616504361e+18, "trial_name": null, "trial_params": null }