diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4819 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.398950131233596, + "eval_steps": 200000, + "global_step": 160000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.4997000599880024e-06, + "loss": 8.6813, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 2.999400119976005e-06, + "loss": 8.0951, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.499100179964007e-06, + "loss": 7.7394, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 5.99880023995201e-06, + "loss": 7.4477, + "step": 800 + }, + { + "epoch": 0.05, + "learning_rate": 7.4985002999400115e-06, + "loss": 7.2516, + "step": 1000 + }, + { + "epoch": 0.06, + "learning_rate": 8.998200359928014e-06, + "loss": 7.1331, + "step": 1200 + }, + { + "epoch": 0.07, + "learning_rate": 1.0497900419916016e-05, + "loss": 7.0447, + "step": 1400 + }, + { + "epoch": 0.08, + "learning_rate": 1.199760047990402e-05, + "loss": 6.9683, + "step": 1600 + }, + { + "epoch": 0.09, + "learning_rate": 1.3497300539892021e-05, + "loss": 6.9037, + "step": 1800 + }, + { + "epoch": 0.1, + "learning_rate": 1.4997000599880023e-05, + "loss": 6.8472, + "step": 2000 + }, + { + "epoch": 0.12, + "learning_rate": 1.6496700659868028e-05, + "loss": 6.7907, + "step": 2200 + }, + { + "epoch": 0.13, + "learning_rate": 1.799640071985603e-05, + "loss": 6.7407, + "step": 2400 + }, + { + "epoch": 0.14, + "learning_rate": 1.9496100779844032e-05, + "loss": 6.7036, + "step": 2600 + }, + { + "epoch": 0.15, + "learning_rate": 2.0995800839832032e-05, + "loss": 6.6485, + "step": 2800 + }, + { + "epoch": 0.16, + "learning_rate": 2.249550089982004e-05, + "loss": 6.6153, + "step": 3000 + }, + { + "epoch": 0.17, + "learning_rate": 2.399520095980804e-05, + "loss": 6.5826, + "step": 3200 + }, + { + "epoch": 0.18, + "learning_rate": 2.5494901019796042e-05, + "loss": 6.553, + "step": 3400 + }, + { + "epoch": 0.19, + "learning_rate": 2.6994601079784043e-05, + "loss": 6.5222, + "step": 3600 + }, + { + "epoch": 0.2, + "learning_rate": 2.8494301139772046e-05, + "loss": 6.4979, + "step": 3800 + }, + { + "epoch": 0.21, + "learning_rate": 2.9994001199760046e-05, + "loss": 6.4695, + "step": 4000 + }, + { + "epoch": 0.22, + "learning_rate": 3.1493701259748056e-05, + "loss": 6.4505, + "step": 4200 + }, + { + "epoch": 0.23, + "learning_rate": 3.2993401319736057e-05, + "loss": 6.4254, + "step": 4400 + }, + { + "epoch": 0.24, + "learning_rate": 3.449310137972406e-05, + "loss": 6.412, + "step": 4600 + }, + { + "epoch": 0.25, + "learning_rate": 3.599280143971206e-05, + "loss": 6.3885, + "step": 4800 + }, + { + "epoch": 0.26, + "learning_rate": 3.7492501499700064e-05, + "loss": 6.3815, + "step": 5000 + }, + { + "epoch": 0.27, + "learning_rate": 3.8992201559688064e-05, + "loss": 6.3623, + "step": 5200 + }, + { + "epoch": 0.28, + "learning_rate": 4.0491901619676064e-05, + "loss": 6.3464, + "step": 5400 + }, + { + "epoch": 0.29, + "learning_rate": 4.1991601679664064e-05, + "loss": 6.3281, + "step": 5600 + }, + { + "epoch": 0.3, + "learning_rate": 4.349130173965207e-05, + "loss": 6.3324, + "step": 5800 + }, + { + "epoch": 0.31, + "learning_rate": 4.499100179964008e-05, + "loss": 6.3128, + "step": 6000 + }, + { + "epoch": 0.33, + "learning_rate": 4.649070185962808e-05, + "loss": 6.3033, + "step": 6200 + }, + { + "epoch": 0.34, + "learning_rate": 4.799040191961608e-05, + "loss": 6.3015, + "step": 6400 + }, + { + "epoch": 0.35, + "learning_rate": 4.949010197960408e-05, + "loss": 6.2881, + "step": 6600 + }, + { + "epoch": 0.36, + "learning_rate": 4.996409765438009e-05, + "loss": 6.2728, + "step": 6800 + }, + { + "epoch": 0.37, + "learning_rate": 4.990970016101658e-05, + "loss": 6.2617, + "step": 7000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9855302667653076e-05, + "loss": 6.2561, + "step": 7200 + }, + { + "epoch": 0.39, + "learning_rate": 4.980090517428957e-05, + "loss": 6.2531, + "step": 7400 + }, + { + "epoch": 0.4, + "learning_rate": 4.974650768092606e-05, + "loss": 6.2222, + "step": 7600 + }, + { + "epoch": 0.41, + "learning_rate": 4.9692110187562557e-05, + "loss": 6.2062, + "step": 7800 + }, + { + "epoch": 0.42, + "learning_rate": 4.963771269419905e-05, + "loss": 6.1925, + "step": 8000 + }, + { + "epoch": 0.43, + "learning_rate": 4.958331520083555e-05, + "loss": 6.1704, + "step": 8200 + }, + { + "epoch": 0.44, + "learning_rate": 4.952918969493886e-05, + "loss": 6.1479, + "step": 8400 + }, + { + "epoch": 0.45, + "learning_rate": 4.947479220157536e-05, + "loss": 6.1375, + "step": 8600 + }, + { + "epoch": 0.46, + "learning_rate": 4.9420394708211846e-05, + "loss": 6.1155, + "step": 8800 + }, + { + "epoch": 0.47, + "learning_rate": 4.936599721484834e-05, + "loss": 6.0921, + "step": 9000 + }, + { + "epoch": 0.48, + "learning_rate": 4.931159972148484e-05, + "loss": 6.0671, + "step": 9200 + }, + { + "epoch": 0.49, + "learning_rate": 4.9257202228121326e-05, + "loss": 6.0437, + "step": 9400 + }, + { + "epoch": 0.5, + "learning_rate": 4.920280473475782e-05, + "loss": 6.0032, + "step": 9600 + }, + { + "epoch": 0.51, + "learning_rate": 4.914840724139432e-05, + "loss": 5.9209, + "step": 9800 + }, + { + "epoch": 0.52, + "learning_rate": 4.9094009748030814e-05, + "loss": 5.8316, + "step": 10000 + }, + { + "epoch": 0.54, + "learning_rate": 4.90396122546673e-05, + "loss": 5.7568, + "step": 10200 + }, + { + "epoch": 0.55, + "learning_rate": 4.89852147613038e-05, + "loss": 5.6574, + "step": 10400 + }, + { + "epoch": 0.56, + "learning_rate": 4.893108925540711e-05, + "loss": 5.5117, + "step": 10600 + }, + { + "epoch": 0.57, + "learning_rate": 4.887669176204361e-05, + "loss": 5.3986, + "step": 10800 + }, + { + "epoch": 0.58, + "learning_rate": 4.88222942686801e-05, + "loss": 5.2336, + "step": 11000 + }, + { + "epoch": 0.59, + "learning_rate": 4.87678967753166e-05, + "loss": 5.0519, + "step": 11200 + }, + { + "epoch": 0.6, + "learning_rate": 4.871349928195309e-05, + "loss": 4.9005, + "step": 11400 + }, + { + "epoch": 0.61, + "learning_rate": 4.8659101788589584e-05, + "loss": 4.769, + "step": 11600 + }, + { + "epoch": 0.62, + "learning_rate": 4.860470429522608e-05, + "loss": 4.6484, + "step": 11800 + }, + { + "epoch": 0.63, + "learning_rate": 4.855030680186257e-05, + "loss": 4.5375, + "step": 12000 + }, + { + "epoch": 0.64, + "learning_rate": 4.8495909308499065e-05, + "loss": 4.4369, + "step": 12200 + }, + { + "epoch": 0.65, + "learning_rate": 4.844151181513556e-05, + "loss": 4.3437, + "step": 12400 + }, + { + "epoch": 0.66, + "learning_rate": 4.8387114321772056e-05, + "loss": 4.267, + "step": 12600 + }, + { + "epoch": 0.67, + "learning_rate": 4.8332716828408545e-05, + "loss": 4.161, + "step": 12800 + }, + { + "epoch": 0.68, + "learning_rate": 4.827831933504504e-05, + "loss": 4.0868, + "step": 13000 + }, + { + "epoch": 0.69, + "learning_rate": 4.822392184168154e-05, + "loss": 4.0029, + "step": 13200 + }, + { + "epoch": 0.7, + "learning_rate": 4.8169524348318026e-05, + "loss": 3.9486, + "step": 13400 + }, + { + "epoch": 0.71, + "learning_rate": 4.811512685495453e-05, + "loss": 3.8743, + "step": 13600 + }, + { + "epoch": 0.72, + "learning_rate": 4.8060729361591025e-05, + "loss": 3.8206, + "step": 13800 + }, + { + "epoch": 0.73, + "learning_rate": 4.8006331868227514e-05, + "loss": 3.7676, + "step": 14000 + }, + { + "epoch": 0.75, + "learning_rate": 4.795193437486401e-05, + "loss": 3.7225, + "step": 14200 + }, + { + "epoch": 0.76, + "learning_rate": 4.7897536881500505e-05, + "loss": 3.6837, + "step": 14400 + }, + { + "epoch": 0.77, + "learning_rate": 4.784341137560381e-05, + "loss": 3.6421, + "step": 14600 + }, + { + "epoch": 0.78, + "learning_rate": 4.778901388224031e-05, + "loss": 3.6167, + "step": 14800 + }, + { + "epoch": 0.79, + "learning_rate": 4.77346163888768e-05, + "loss": 3.5802, + "step": 15000 + }, + { + "epoch": 0.8, + "learning_rate": 4.76802188955133e-05, + "loss": 3.5469, + "step": 15200 + }, + { + "epoch": 0.81, + "learning_rate": 4.762582140214979e-05, + "loss": 3.5208, + "step": 15400 + }, + { + "epoch": 0.82, + "learning_rate": 4.7571423908786284e-05, + "loss": 3.494, + "step": 15600 + }, + { + "epoch": 0.83, + "learning_rate": 4.7517298402889596e-05, + "loss": 3.4647, + "step": 15800 + }, + { + "epoch": 0.84, + "learning_rate": 4.746290090952609e-05, + "loss": 3.4417, + "step": 16000 + }, + { + "epoch": 0.85, + "learning_rate": 4.740850341616259e-05, + "loss": 3.4267, + "step": 16200 + }, + { + "epoch": 0.86, + "learning_rate": 4.735410592279908e-05, + "loss": 3.3992, + "step": 16400 + }, + { + "epoch": 0.87, + "learning_rate": 4.729970842943557e-05, + "loss": 3.3831, + "step": 16600 + }, + { + "epoch": 0.88, + "learning_rate": 4.724531093607207e-05, + "loss": 3.3647, + "step": 16800 + }, + { + "epoch": 0.89, + "learning_rate": 4.7190913442708564e-05, + "loss": 3.3377, + "step": 17000 + }, + { + "epoch": 0.9, + "learning_rate": 4.7136515949345054e-05, + "loss": 3.3197, + "step": 17200 + }, + { + "epoch": 0.91, + "learning_rate": 4.708211845598155e-05, + "loss": 3.2985, + "step": 17400 + }, + { + "epoch": 0.92, + "learning_rate": 4.7027720962618045e-05, + "loss": 3.287, + "step": 17600 + }, + { + "epoch": 0.93, + "learning_rate": 4.697332346925454e-05, + "loss": 3.2748, + "step": 17800 + }, + { + "epoch": 0.94, + "learning_rate": 4.691892597589103e-05, + "loss": 3.2557, + "step": 18000 + }, + { + "epoch": 0.96, + "learning_rate": 4.6864528482527526e-05, + "loss": 3.2419, + "step": 18200 + }, + { + "epoch": 0.97, + "learning_rate": 4.681013098916402e-05, + "loss": 3.2286, + "step": 18400 + }, + { + "epoch": 0.98, + "learning_rate": 4.675573349580051e-05, + "loss": 3.2102, + "step": 18600 + }, + { + "epoch": 0.99, + "learning_rate": 4.670133600243701e-05, + "loss": 3.1987, + "step": 18800 + }, + { + "epoch": 1.0, + "learning_rate": 4.664693850907351e-05, + "loss": 3.1854, + "step": 19000 + }, + { + "epoch": 1.01, + "learning_rate": 4.659254101571e-05, + "loss": 3.1682, + "step": 19200 + }, + { + "epoch": 1.02, + "learning_rate": 4.6538143522346494e-05, + "loss": 3.1562, + "step": 19400 + }, + { + "epoch": 1.03, + "learning_rate": 4.648374602898299e-05, + "loss": 3.1366, + "step": 19600 + }, + { + "epoch": 1.04, + "learning_rate": 4.6429620523086296e-05, + "loss": 3.1273, + "step": 19800 + }, + { + "epoch": 1.05, + "learning_rate": 4.637522302972279e-05, + "loss": 3.1139, + "step": 20000 + }, + { + "epoch": 1.06, + "learning_rate": 4.632082553635929e-05, + "loss": 3.1045, + "step": 20200 + }, + { + "epoch": 1.07, + "learning_rate": 4.6266428042995777e-05, + "loss": 3.0962, + "step": 20400 + }, + { + "epoch": 1.08, + "learning_rate": 4.6212302537099096e-05, + "loss": 3.0913, + "step": 20600 + }, + { + "epoch": 1.09, + "learning_rate": 4.615790504373559e-05, + "loss": 3.0805, + "step": 20800 + }, + { + "epoch": 1.1, + "learning_rate": 4.610350755037208e-05, + "loss": 3.0662, + "step": 21000 + }, + { + "epoch": 1.11, + "learning_rate": 4.604911005700858e-05, + "loss": 3.0485, + "step": 21200 + }, + { + "epoch": 1.12, + "learning_rate": 4.599471256364507e-05, + "loss": 3.0438, + "step": 21400 + }, + { + "epoch": 1.13, + "learning_rate": 4.594031507028156e-05, + "loss": 3.0368, + "step": 21600 + }, + { + "epoch": 1.14, + "learning_rate": 4.588591757691806e-05, + "loss": 3.0248, + "step": 21800 + }, + { + "epoch": 1.15, + "learning_rate": 4.583152008355455e-05, + "loss": 3.0124, + "step": 22000 + }, + { + "epoch": 1.17, + "learning_rate": 4.577712259019105e-05, + "loss": 3.0025, + "step": 22200 + }, + { + "epoch": 1.18, + "learning_rate": 4.572272509682754e-05, + "loss": 2.9902, + "step": 22400 + }, + { + "epoch": 1.19, + "learning_rate": 4.5668327603464034e-05, + "loss": 2.9838, + "step": 22600 + }, + { + "epoch": 1.2, + "learning_rate": 4.561393011010053e-05, + "loss": 2.9701, + "step": 22800 + }, + { + "epoch": 1.21, + "learning_rate": 4.555953261673702e-05, + "loss": 2.9594, + "step": 23000 + }, + { + "epoch": 1.22, + "learning_rate": 4.5505135123373515e-05, + "loss": 2.9549, + "step": 23200 + }, + { + "epoch": 1.23, + "learning_rate": 4.545073763001001e-05, + "loss": 2.9462, + "step": 23400 + }, + { + "epoch": 1.24, + "learning_rate": 4.5396340136646506e-05, + "loss": 2.9471, + "step": 23600 + }, + { + "epoch": 1.25, + "learning_rate": 4.5341942643282996e-05, + "loss": 2.9271, + "step": 23800 + }, + { + "epoch": 1.26, + "learning_rate": 4.5287817137386315e-05, + "loss": 2.9241, + "step": 24000 + }, + { + "epoch": 1.27, + "learning_rate": 4.5233419644022804e-05, + "loss": 2.9156, + "step": 24200 + }, + { + "epoch": 1.28, + "learning_rate": 4.51790221506593e-05, + "loss": 2.9079, + "step": 24400 + }, + { + "epoch": 1.29, + "learning_rate": 4.5124624657295796e-05, + "loss": 2.898, + "step": 24600 + }, + { + "epoch": 1.3, + "learning_rate": 4.507022716393229e-05, + "loss": 2.8902, + "step": 24800 + }, + { + "epoch": 1.31, + "learning_rate": 4.501582967056878e-05, + "loss": 2.8921, + "step": 25000 + }, + { + "epoch": 1.32, + "learning_rate": 4.4961432177205276e-05, + "loss": 2.8749, + "step": 25200 + }, + { + "epoch": 1.33, + "learning_rate": 4.490703468384177e-05, + "loss": 2.8724, + "step": 25400 + }, + { + "epoch": 1.34, + "learning_rate": 4.485263719047826e-05, + "loss": 2.8649, + "step": 25600 + }, + { + "epoch": 1.35, + "learning_rate": 4.479823969711476e-05, + "loss": 2.8586, + "step": 25800 + }, + { + "epoch": 1.36, + "learning_rate": 4.474384220375125e-05, + "loss": 2.8421, + "step": 26000 + }, + { + "epoch": 1.38, + "learning_rate": 4.468944471038775e-05, + "loss": 2.843, + "step": 26200 + }, + { + "epoch": 1.39, + "learning_rate": 4.463504721702424e-05, + "loss": 2.8397, + "step": 26400 + }, + { + "epoch": 1.4, + "learning_rate": 4.4580649723660734e-05, + "loss": 2.8311, + "step": 26600 + }, + { + "epoch": 1.41, + "learning_rate": 4.452625223029723e-05, + "loss": 2.8143, + "step": 26800 + }, + { + "epoch": 1.42, + "learning_rate": 4.4471854736933725e-05, + "loss": 2.8106, + "step": 27000 + }, + { + "epoch": 1.43, + "learning_rate": 4.441745724357022e-05, + "loss": 2.8199, + "step": 27200 + }, + { + "epoch": 1.44, + "learning_rate": 4.436305975020672e-05, + "loss": 2.8039, + "step": 27400 + }, + { + "epoch": 1.45, + "learning_rate": 4.4308662256843206e-05, + "loss": 2.7975, + "step": 27600 + }, + { + "epoch": 1.46, + "learning_rate": 4.425453675094652e-05, + "loss": 2.7903, + "step": 27800 + }, + { + "epoch": 1.47, + "learning_rate": 4.4200139257583015e-05, + "loss": 2.7907, + "step": 28000 + }, + { + "epoch": 1.48, + "learning_rate": 4.4145741764219504e-05, + "loss": 2.7836, + "step": 28200 + }, + { + "epoch": 1.49, + "learning_rate": 4.4091344270856e-05, + "loss": 2.7825, + "step": 28400 + }, + { + "epoch": 1.5, + "learning_rate": 4.4036946777492495e-05, + "loss": 2.765, + "step": 28600 + }, + { + "epoch": 1.51, + "learning_rate": 4.3982549284128984e-05, + "loss": 2.773, + "step": 28800 + }, + { + "epoch": 1.52, + "learning_rate": 4.392815179076548e-05, + "loss": 2.7608, + "step": 29000 + }, + { + "epoch": 1.53, + "learning_rate": 4.3873754297401976e-05, + "loss": 2.76, + "step": 29200 + }, + { + "epoch": 1.54, + "learning_rate": 4.381935680403847e-05, + "loss": 2.7613, + "step": 29400 + }, + { + "epoch": 1.55, + "learning_rate": 4.376495931067497e-05, + "loss": 2.7319, + "step": 29600 + }, + { + "epoch": 1.56, + "learning_rate": 4.3710561817311464e-05, + "loss": 2.7377, + "step": 29800 + }, + { + "epoch": 1.57, + "learning_rate": 4.365616432394796e-05, + "loss": 2.736, + "step": 30000 + }, + { + "epoch": 1.59, + "learning_rate": 4.360176683058445e-05, + "loss": 2.7348, + "step": 30200 + }, + { + "epoch": 1.6, + "learning_rate": 4.3547369337220944e-05, + "loss": 2.7285, + "step": 30400 + }, + { + "epoch": 1.61, + "learning_rate": 4.349324383132426e-05, + "loss": 2.7299, + "step": 30600 + }, + { + "epoch": 1.62, + "learning_rate": 4.3438846337960746e-05, + "loss": 2.7208, + "step": 30800 + }, + { + "epoch": 1.63, + "learning_rate": 4.338444884459724e-05, + "loss": 2.7115, + "step": 31000 + }, + { + "epoch": 1.64, + "learning_rate": 4.333005135123374e-05, + "loss": 2.7033, + "step": 31200 + }, + { + "epoch": 1.65, + "learning_rate": 4.327565385787023e-05, + "loss": 2.6996, + "step": 31400 + }, + { + "epoch": 1.66, + "learning_rate": 4.322125636450672e-05, + "loss": 2.6925, + "step": 31600 + }, + { + "epoch": 1.67, + "learning_rate": 4.316685887114322e-05, + "loss": 2.6896, + "step": 31800 + }, + { + "epoch": 1.68, + "learning_rate": 4.3112461377779714e-05, + "loss": 2.6846, + "step": 32000 + }, + { + "epoch": 1.69, + "learning_rate": 4.30580638844162e-05, + "loss": 2.6848, + "step": 32200 + }, + { + "epoch": 1.7, + "learning_rate": 4.3003666391052706e-05, + "loss": 2.6764, + "step": 32400 + }, + { + "epoch": 1.71, + "learning_rate": 4.29492688976892e-05, + "loss": 2.6846, + "step": 32600 + }, + { + "epoch": 1.72, + "learning_rate": 4.289487140432569e-05, + "loss": 2.6742, + "step": 32800 + }, + { + "epoch": 1.73, + "learning_rate": 4.2840745898429003e-05, + "loss": 2.6667, + "step": 33000 + }, + { + "epoch": 1.74, + "learning_rate": 4.27863484050655e-05, + "loss": 2.6632, + "step": 33200 + }, + { + "epoch": 1.75, + "learning_rate": 4.273195091170199e-05, + "loss": 2.6593, + "step": 33400 + }, + { + "epoch": 1.76, + "learning_rate": 4.2677553418338484e-05, + "loss": 2.6568, + "step": 33600 + }, + { + "epoch": 1.77, + "learning_rate": 4.262315592497498e-05, + "loss": 2.6514, + "step": 33800 + }, + { + "epoch": 1.78, + "learning_rate": 4.256875843161147e-05, + "loss": 2.648, + "step": 34000 + }, + { + "epoch": 1.8, + "learning_rate": 4.2514360938247965e-05, + "loss": 2.6362, + "step": 34200 + }, + { + "epoch": 1.81, + "learning_rate": 4.245996344488446e-05, + "loss": 2.6468, + "step": 34400 + }, + { + "epoch": 1.82, + "learning_rate": 4.240556595152096e-05, + "loss": 2.6268, + "step": 34600 + }, + { + "epoch": 1.83, + "learning_rate": 4.2351168458157446e-05, + "loss": 2.622, + "step": 34800 + }, + { + "epoch": 1.84, + "learning_rate": 4.2297042952260765e-05, + "loss": 2.6178, + "step": 35000 + }, + { + "epoch": 1.85, + "learning_rate": 4.2242645458897254e-05, + "loss": 2.6212, + "step": 35200 + }, + { + "epoch": 1.86, + "learning_rate": 4.218824796553375e-05, + "loss": 2.6161, + "step": 35400 + }, + { + "epoch": 1.87, + "learning_rate": 4.2133850472170246e-05, + "loss": 2.6139, + "step": 35600 + }, + { + "epoch": 1.88, + "learning_rate": 4.2079452978806735e-05, + "loss": 2.6165, + "step": 35800 + }, + { + "epoch": 1.89, + "learning_rate": 4.202505548544323e-05, + "loss": 2.6044, + "step": 36000 + }, + { + "epoch": 1.9, + "learning_rate": 4.1970657992079727e-05, + "loss": 2.6075, + "step": 36200 + }, + { + "epoch": 1.91, + "learning_rate": 4.191626049871622e-05, + "loss": 2.604, + "step": 36400 + }, + { + "epoch": 1.92, + "learning_rate": 4.186186300535271e-05, + "loss": 2.6026, + "step": 36600 + }, + { + "epoch": 1.93, + "learning_rate": 4.180746551198921e-05, + "loss": 2.6024, + "step": 36800 + }, + { + "epoch": 1.94, + "learning_rate": 4.175334000609252e-05, + "loss": 2.5888, + "step": 37000 + }, + { + "epoch": 1.95, + "learning_rate": 4.1698942512729016e-05, + "loss": 2.5914, + "step": 37200 + }, + { + "epoch": 1.96, + "learning_rate": 4.164454501936551e-05, + "loss": 2.592, + "step": 37400 + }, + { + "epoch": 1.97, + "learning_rate": 4.159014752600201e-05, + "loss": 2.5822, + "step": 37600 + }, + { + "epoch": 1.98, + "learning_rate": 4.1535750032638496e-05, + "loss": 2.5845, + "step": 37800 + }, + { + "epoch": 1.99, + "learning_rate": 4.148135253927499e-05, + "loss": 2.5731, + "step": 38000 + }, + { + "epoch": 2.01, + "learning_rate": 4.142695504591149e-05, + "loss": 2.5695, + "step": 38200 + }, + { + "epoch": 2.02, + "learning_rate": 4.137255755254798e-05, + "loss": 2.5682, + "step": 38400 + }, + { + "epoch": 2.03, + "learning_rate": 4.131816005918447e-05, + "loss": 2.5654, + "step": 38600 + }, + { + "epoch": 2.04, + "learning_rate": 4.126376256582097e-05, + "loss": 2.5641, + "step": 38800 + }, + { + "epoch": 2.05, + "learning_rate": 4.1209365072457465e-05, + "loss": 2.554, + "step": 39000 + }, + { + "epoch": 2.06, + "learning_rate": 4.115523956656078e-05, + "loss": 2.5569, + "step": 39200 + }, + { + "epoch": 2.07, + "learning_rate": 4.110084207319727e-05, + "loss": 2.5503, + "step": 39400 + }, + { + "epoch": 2.08, + "learning_rate": 4.104644457983376e-05, + "loss": 2.5554, + "step": 39600 + }, + { + "epoch": 2.09, + "learning_rate": 4.099204708647026e-05, + "loss": 2.552, + "step": 39800 + }, + { + "epoch": 2.1, + "learning_rate": 4.0937649593106754e-05, + "loss": 2.5564, + "step": 40000 + }, + { + "epoch": 2.11, + "learning_rate": 4.088325209974325e-05, + "loss": 2.5373, + "step": 40200 + }, + { + "epoch": 2.12, + "learning_rate": 4.082885460637974e-05, + "loss": 2.5377, + "step": 40400 + }, + { + "epoch": 2.13, + "learning_rate": 4.0774457113016235e-05, + "loss": 2.5404, + "step": 40600 + }, + { + "epoch": 2.14, + "learning_rate": 4.072005961965273e-05, + "loss": 2.5369, + "step": 40800 + }, + { + "epoch": 2.15, + "learning_rate": 4.066566212628922e-05, + "loss": 2.5352, + "step": 41000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0611808607859356e-05, + "loss": 2.5284, + "step": 41200 + }, + { + "epoch": 2.17, + "learning_rate": 4.0557411114495845e-05, + "loss": 2.5308, + "step": 41400 + }, + { + "epoch": 2.18, + "learning_rate": 4.050301362113234e-05, + "loss": 2.5202, + "step": 41600 + }, + { + "epoch": 2.19, + "learning_rate": 4.0448616127768836e-05, + "loss": 2.5199, + "step": 41800 + }, + { + "epoch": 2.2, + "learning_rate": 4.0394218634405325e-05, + "loss": 2.5074, + "step": 42000 + }, + { + "epoch": 2.22, + "learning_rate": 4.033982114104182e-05, + "loss": 2.5086, + "step": 42200 + }, + { + "epoch": 2.23, + "learning_rate": 4.028542364767832e-05, + "loss": 2.5125, + "step": 42400 + }, + { + "epoch": 2.24, + "learning_rate": 4.023102615431481e-05, + "loss": 2.5082, + "step": 42600 + }, + { + "epoch": 2.25, + "learning_rate": 4.01766286609513e-05, + "loss": 2.4999, + "step": 42800 + }, + { + "epoch": 2.26, + "learning_rate": 4.01222311675878e-05, + "loss": 2.5073, + "step": 43000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0067833674224294e-05, + "loss": 2.4998, + "step": 43200 + }, + { + "epoch": 2.28, + "learning_rate": 4.001343618086078e-05, + "loss": 2.4994, + "step": 43400 + }, + { + "epoch": 2.29, + "learning_rate": 3.995903868749728e-05, + "loss": 2.4952, + "step": 43600 + }, + { + "epoch": 2.3, + "learning_rate": 3.9904641194133775e-05, + "loss": 2.4914, + "step": 43800 + }, + { + "epoch": 2.31, + "learning_rate": 3.985024370077027e-05, + "loss": 2.4919, + "step": 44000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9795846207406766e-05, + "loss": 2.4884, + "step": 44200 + }, + { + "epoch": 2.33, + "learning_rate": 3.974144871404326e-05, + "loss": 2.4886, + "step": 44400 + }, + { + "epoch": 2.34, + "learning_rate": 3.968705122067976e-05, + "loss": 2.4902, + "step": 44600 + }, + { + "epoch": 2.35, + "learning_rate": 3.963265372731625e-05, + "loss": 2.4784, + "step": 44800 + }, + { + "epoch": 2.36, + "learning_rate": 3.957825623395274e-05, + "loss": 2.479, + "step": 45000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9524130728056055e-05, + "loss": 2.463, + "step": 45200 + }, + { + "epoch": 2.38, + "learning_rate": 3.9469733234692544e-05, + "loss": 2.4778, + "step": 45400 + }, + { + "epoch": 2.39, + "learning_rate": 3.941533574132904e-05, + "loss": 2.4758, + "step": 45600 + }, + { + "epoch": 2.4, + "learning_rate": 3.9360938247965536e-05, + "loss": 2.4627, + "step": 45800 + }, + { + "epoch": 2.41, + "learning_rate": 3.9306540754602025e-05, + "loss": 2.456, + "step": 46000 + }, + { + "epoch": 2.43, + "learning_rate": 3.925214326123852e-05, + "loss": 2.4624, + "step": 46200 + }, + { + "epoch": 2.44, + "learning_rate": 3.919774576787502e-05, + "loss": 2.4643, + "step": 46400 + }, + { + "epoch": 2.45, + "learning_rate": 3.914334827451151e-05, + "loss": 2.466, + "step": 46600 + }, + { + "epoch": 2.46, + "learning_rate": 3.908895078114801e-05, + "loss": 2.4556, + "step": 46800 + }, + { + "epoch": 2.47, + "learning_rate": 3.9034553287784504e-05, + "loss": 2.4566, + "step": 47000 + }, + { + "epoch": 2.48, + "learning_rate": 3.898042778188781e-05, + "loss": 2.4527, + "step": 47200 + }, + { + "epoch": 2.49, + "learning_rate": 3.8926030288524306e-05, + "loss": 2.452, + "step": 47400 + }, + { + "epoch": 2.5, + "learning_rate": 3.88716327951608e-05, + "loss": 2.439, + "step": 47600 + }, + { + "epoch": 2.51, + "learning_rate": 3.88172353017973e-05, + "loss": 2.4507, + "step": 47800 + }, + { + "epoch": 2.52, + "learning_rate": 3.876283780843379e-05, + "loss": 2.4393, + "step": 48000 + }, + { + "epoch": 2.53, + "learning_rate": 3.870844031507028e-05, + "loss": 2.4411, + "step": 48200 + }, + { + "epoch": 2.54, + "learning_rate": 3.865404282170678e-05, + "loss": 2.449, + "step": 48400 + }, + { + "epoch": 2.55, + "learning_rate": 3.859964532834327e-05, + "loss": 2.4413, + "step": 48600 + }, + { + "epoch": 2.56, + "learning_rate": 3.854524783497976e-05, + "loss": 2.4304, + "step": 48800 + }, + { + "epoch": 2.57, + "learning_rate": 3.849085034161626e-05, + "loss": 2.4276, + "step": 49000 + }, + { + "epoch": 2.58, + "learning_rate": 3.843672483571957e-05, + "loss": 2.4336, + "step": 49200 + }, + { + "epoch": 2.59, + "learning_rate": 3.838232734235607e-05, + "loss": 2.4269, + "step": 49400 + }, + { + "epoch": 2.6, + "learning_rate": 3.8327929848992563e-05, + "loss": 2.4291, + "step": 49600 + }, + { + "epoch": 2.61, + "learning_rate": 3.827353235562905e-05, + "loss": 2.4313, + "step": 49800 + }, + { + "epoch": 2.62, + "learning_rate": 3.821913486226555e-05, + "loss": 2.4198, + "step": 50000 + }, + { + "epoch": 2.64, + "learning_rate": 3.8164737368902044e-05, + "loss": 2.4152, + "step": 50200 + }, + { + "epoch": 2.65, + "learning_rate": 3.811033987553853e-05, + "loss": 2.4158, + "step": 50400 + }, + { + "epoch": 2.66, + "learning_rate": 3.805594238217503e-05, + "loss": 2.4107, + "step": 50600 + }, + { + "epoch": 2.67, + "learning_rate": 3.8001544888811525e-05, + "loss": 2.4144, + "step": 50800 + }, + { + "epoch": 2.68, + "learning_rate": 3.794714739544802e-05, + "loss": 2.4076, + "step": 51000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7893021889551333e-05, + "loss": 2.4076, + "step": 51200 + }, + { + "epoch": 2.7, + "learning_rate": 3.783862439618783e-05, + "loss": 2.4016, + "step": 51400 + }, + { + "epoch": 2.71, + "learning_rate": 3.7784498890291135e-05, + "loss": 2.4178, + "step": 51600 + }, + { + "epoch": 2.72, + "learning_rate": 3.773010139692763e-05, + "loss": 2.4059, + "step": 51800 + }, + { + "epoch": 2.73, + "learning_rate": 3.767570390356413e-05, + "loss": 2.3955, + "step": 52000 + }, + { + "epoch": 2.74, + "learning_rate": 3.7621306410200616e-05, + "loss": 2.4031, + "step": 52200 + }, + { + "epoch": 2.75, + "learning_rate": 3.756690891683711e-05, + "loss": 2.4019, + "step": 52400 + }, + { + "epoch": 2.76, + "learning_rate": 3.751251142347361e-05, + "loss": 2.3981, + "step": 52600 + }, + { + "epoch": 2.77, + "learning_rate": 3.74581139301101e-05, + "loss": 2.3988, + "step": 52800 + }, + { + "epoch": 2.78, + "learning_rate": 3.740371643674659e-05, + "loss": 2.3848, + "step": 53000 + }, + { + "epoch": 2.79, + "learning_rate": 3.734931894338309e-05, + "loss": 2.3876, + "step": 53200 + }, + { + "epoch": 2.8, + "learning_rate": 3.729492145001959e-05, + "loss": 2.3849, + "step": 53400 + }, + { + "epoch": 2.81, + "learning_rate": 3.724052395665608e-05, + "loss": 2.3869, + "step": 53600 + }, + { + "epoch": 2.82, + "learning_rate": 3.7186126463292576e-05, + "loss": 2.3827, + "step": 53800 + }, + { + "epoch": 2.83, + "learning_rate": 3.713172896992907e-05, + "loss": 2.379, + "step": 54000 + }, + { + "epoch": 2.85, + "learning_rate": 3.707733147656556e-05, + "loss": 2.3768, + "step": 54200 + }, + { + "epoch": 2.86, + "learning_rate": 3.7022933983202057e-05, + "loss": 2.3795, + "step": 54400 + }, + { + "epoch": 2.87, + "learning_rate": 3.696853648983855e-05, + "loss": 2.3738, + "step": 54600 + }, + { + "epoch": 2.88, + "learning_rate": 3.691413899647505e-05, + "loss": 2.378, + "step": 54800 + }, + { + "epoch": 2.89, + "learning_rate": 3.685974150311154e-05, + "loss": 2.3671, + "step": 55000 + }, + { + "epoch": 2.9, + "learning_rate": 3.680534400974803e-05, + "loss": 2.3694, + "step": 55200 + }, + { + "epoch": 2.91, + "learning_rate": 3.6751218503851346e-05, + "loss": 2.3796, + "step": 55400 + }, + { + "epoch": 2.92, + "learning_rate": 3.6696821010487835e-05, + "loss": 2.3653, + "step": 55600 + }, + { + "epoch": 2.93, + "learning_rate": 3.664242351712433e-05, + "loss": 2.3676, + "step": 55800 + }, + { + "epoch": 2.94, + "learning_rate": 3.6588026023760826e-05, + "loss": 2.3658, + "step": 56000 + }, + { + "epoch": 2.95, + "learning_rate": 3.653362853039732e-05, + "loss": 2.3721, + "step": 56200 + }, + { + "epoch": 2.96, + "learning_rate": 3.647923103703382e-05, + "loss": 2.3668, + "step": 56400 + }, + { + "epoch": 2.97, + "learning_rate": 3.6424833543670314e-05, + "loss": 2.3639, + "step": 56600 + }, + { + "epoch": 2.98, + "learning_rate": 3.63704360503068e-05, + "loss": 2.3628, + "step": 56800 + }, + { + "epoch": 2.99, + "learning_rate": 3.63160385569433e-05, + "loss": 2.3688, + "step": 57000 + }, + { + "epoch": 3.0, + "learning_rate": 3.6261641063579795e-05, + "loss": 2.3577, + "step": 57200 + }, + { + "epoch": 3.01, + "learning_rate": 3.6207243570216284e-05, + "loss": 2.353, + "step": 57400 + }, + { + "epoch": 3.02, + "learning_rate": 3.615284607685278e-05, + "loss": 2.3509, + "step": 57600 + }, + { + "epoch": 3.03, + "learning_rate": 3.6098448583489275e-05, + "loss": 2.3409, + "step": 57800 + }, + { + "epoch": 3.04, + "learning_rate": 3.604405109012577e-05, + "loss": 2.3402, + "step": 58000 + }, + { + "epoch": 3.06, + "learning_rate": 3.598965359676226e-05, + "loss": 2.3542, + "step": 58200 + }, + { + "epoch": 3.07, + "learning_rate": 3.5935256103398756e-05, + "loss": 2.346, + "step": 58400 + }, + { + "epoch": 3.08, + "learning_rate": 3.588085861003525e-05, + "loss": 2.349, + "step": 58600 + }, + { + "epoch": 3.09, + "learning_rate": 3.582646111667175e-05, + "loss": 2.3462, + "step": 58800 + }, + { + "epoch": 3.1, + "learning_rate": 3.577206362330824e-05, + "loss": 2.3458, + "step": 59000 + }, + { + "epoch": 3.11, + "learning_rate": 3.571766612994473e-05, + "loss": 2.3351, + "step": 59200 + }, + { + "epoch": 3.12, + "learning_rate": 3.5663540624048045e-05, + "loss": 2.3336, + "step": 59400 + }, + { + "epoch": 3.13, + "learning_rate": 3.560914313068454e-05, + "loss": 2.3458, + "step": 59600 + }, + { + "epoch": 3.14, + "learning_rate": 3.555474563732104e-05, + "loss": 2.3354, + "step": 59800 + }, + { + "epoch": 3.15, + "learning_rate": 3.550062013142434e-05, + "loss": 2.3382, + "step": 60000 + }, + { + "epoch": 3.16, + "learning_rate": 3.544622263806084e-05, + "loss": 2.3381, + "step": 60200 + }, + { + "epoch": 3.17, + "learning_rate": 3.5391825144697335e-05, + "loss": 2.3287, + "step": 60400 + }, + { + "epoch": 3.18, + "learning_rate": 3.5337427651333824e-05, + "loss": 2.3267, + "step": 60600 + }, + { + "epoch": 3.19, + "learning_rate": 3.528303015797032e-05, + "loss": 2.3295, + "step": 60800 + }, + { + "epoch": 3.2, + "learning_rate": 3.5228632664606815e-05, + "loss": 2.3224, + "step": 61000 + }, + { + "epoch": 3.21, + "learning_rate": 3.517423517124331e-05, + "loss": 2.3151, + "step": 61200 + }, + { + "epoch": 3.22, + "learning_rate": 3.511983767787981e-05, + "loss": 2.3209, + "step": 61400 + }, + { + "epoch": 3.23, + "learning_rate": 3.50654401845163e-05, + "loss": 2.3215, + "step": 61600 + }, + { + "epoch": 3.24, + "learning_rate": 3.50110426911528e-05, + "loss": 2.3163, + "step": 61800 + }, + { + "epoch": 3.25, + "learning_rate": 3.495664519778929e-05, + "loss": 2.3137, + "step": 62000 + }, + { + "epoch": 3.27, + "learning_rate": 3.4902247704425784e-05, + "loss": 2.3109, + "step": 62200 + }, + { + "epoch": 3.28, + "learning_rate": 3.484785021106228e-05, + "loss": 2.3158, + "step": 62400 + }, + { + "epoch": 3.29, + "learning_rate": 3.4793724705165585e-05, + "loss": 2.3133, + "step": 62600 + }, + { + "epoch": 3.3, + "learning_rate": 3.473932721180208e-05, + "loss": 2.3127, + "step": 62800 + }, + { + "epoch": 3.31, + "learning_rate": 3.468492971843858e-05, + "loss": 2.3097, + "step": 63000 + }, + { + "epoch": 3.32, + "learning_rate": 3.4630532225075066e-05, + "loss": 2.3132, + "step": 63200 + }, + { + "epoch": 3.33, + "learning_rate": 3.457613473171156e-05, + "loss": 2.3049, + "step": 63400 + }, + { + "epoch": 3.34, + "learning_rate": 3.452173723834806e-05, + "loss": 2.3067, + "step": 63600 + }, + { + "epoch": 3.35, + "learning_rate": 3.4467339744984553e-05, + "loss": 2.3126, + "step": 63800 + }, + { + "epoch": 3.36, + "learning_rate": 3.441294225162104e-05, + "loss": 2.2959, + "step": 64000 + }, + { + "epoch": 3.37, + "learning_rate": 3.4358544758257545e-05, + "loss": 2.3025, + "step": 64200 + }, + { + "epoch": 3.38, + "learning_rate": 3.430414726489404e-05, + "loss": 2.2955, + "step": 64400 + }, + { + "epoch": 3.39, + "learning_rate": 3.424974977153053e-05, + "loss": 2.3055, + "step": 64600 + }, + { + "epoch": 3.4, + "learning_rate": 3.4195352278167026e-05, + "loss": 2.2957, + "step": 64800 + }, + { + "epoch": 3.41, + "learning_rate": 3.414122677227033e-05, + "loss": 2.2927, + "step": 65000 + }, + { + "epoch": 3.42, + "learning_rate": 3.408682927890683e-05, + "loss": 2.2961, + "step": 65200 + }, + { + "epoch": 3.43, + "learning_rate": 3.4032431785543323e-05, + "loss": 2.2937, + "step": 65400 + }, + { + "epoch": 3.44, + "learning_rate": 3.397803429217982e-05, + "loss": 2.2915, + "step": 65600 + }, + { + "epoch": 3.45, + "learning_rate": 3.392363679881631e-05, + "loss": 2.2915, + "step": 65800 + }, + { + "epoch": 3.46, + "learning_rate": 3.386951129291963e-05, + "loss": 2.29, + "step": 66000 + }, + { + "epoch": 3.48, + "learning_rate": 3.381511379955612e-05, + "loss": 2.2855, + "step": 66200 + }, + { + "epoch": 3.49, + "learning_rate": 3.376071630619261e-05, + "loss": 2.2918, + "step": 66400 + }, + { + "epoch": 3.5, + "learning_rate": 3.370631881282911e-05, + "loss": 2.2802, + "step": 66600 + }, + { + "epoch": 3.51, + "learning_rate": 3.3651921319465604e-05, + "loss": 2.2856, + "step": 66800 + }, + { + "epoch": 3.52, + "learning_rate": 3.359752382610209e-05, + "loss": 2.2877, + "step": 67000 + }, + { + "epoch": 3.53, + "learning_rate": 3.354312633273859e-05, + "loss": 2.2856, + "step": 67200 + }, + { + "epoch": 3.54, + "learning_rate": 3.3488728839375085e-05, + "loss": 2.2875, + "step": 67400 + }, + { + "epoch": 3.55, + "learning_rate": 3.3434331346011574e-05, + "loss": 2.2791, + "step": 67600 + }, + { + "epoch": 3.56, + "learning_rate": 3.337993385264807e-05, + "loss": 2.2777, + "step": 67800 + }, + { + "epoch": 3.57, + "learning_rate": 3.3325536359284566e-05, + "loss": 2.271, + "step": 68000 + }, + { + "epoch": 3.58, + "learning_rate": 3.327113886592106e-05, + "loss": 2.2785, + "step": 68200 + }, + { + "epoch": 3.59, + "learning_rate": 3.321674137255755e-05, + "loss": 2.2684, + "step": 68400 + }, + { + "epoch": 3.6, + "learning_rate": 3.3162343879194047e-05, + "loss": 2.2863, + "step": 68600 + }, + { + "epoch": 3.61, + "learning_rate": 3.310794638583054e-05, + "loss": 2.2815, + "step": 68800 + }, + { + "epoch": 3.62, + "learning_rate": 3.305354889246703e-05, + "loss": 2.2761, + "step": 69000 + }, + { + "epoch": 3.63, + "learning_rate": 3.299915139910353e-05, + "loss": 2.2672, + "step": 69200 + }, + { + "epoch": 3.64, + "learning_rate": 3.294475390574003e-05, + "loss": 2.2587, + "step": 69400 + }, + { + "epoch": 3.65, + "learning_rate": 3.289035641237652e-05, + "loss": 2.258, + "step": 69600 + }, + { + "epoch": 3.66, + "learning_rate": 3.2835958919013015e-05, + "loss": 2.2662, + "step": 69800 + }, + { + "epoch": 3.67, + "learning_rate": 3.278156142564951e-05, + "loss": 2.2605, + "step": 70000 + }, + { + "epoch": 3.69, + "learning_rate": 3.2727163932286006e-05, + "loss": 2.2608, + "step": 70200 + }, + { + "epoch": 3.7, + "learning_rate": 3.267303842638931e-05, + "loss": 2.2549, + "step": 70400 + }, + { + "epoch": 3.71, + "learning_rate": 3.261864093302581e-05, + "loss": 2.2667, + "step": 70600 + }, + { + "epoch": 3.72, + "learning_rate": 3.2564243439662304e-05, + "loss": 2.2601, + "step": 70800 + }, + { + "epoch": 3.73, + "learning_rate": 3.250984594629879e-05, + "loss": 2.2547, + "step": 71000 + }, + { + "epoch": 3.74, + "learning_rate": 3.245544845293529e-05, + "loss": 2.2547, + "step": 71200 + }, + { + "epoch": 3.75, + "learning_rate": 3.2401050959571785e-05, + "loss": 2.2552, + "step": 71400 + }, + { + "epoch": 3.76, + "learning_rate": 3.2346653466208274e-05, + "loss": 2.2498, + "step": 71600 + }, + { + "epoch": 3.77, + "learning_rate": 3.229225597284477e-05, + "loss": 2.2536, + "step": 71800 + }, + { + "epoch": 3.78, + "learning_rate": 3.2237858479481265e-05, + "loss": 2.2435, + "step": 72000 + }, + { + "epoch": 3.79, + "learning_rate": 3.218346098611776e-05, + "loss": 2.2544, + "step": 72200 + }, + { + "epoch": 3.8, + "learning_rate": 3.212906349275426e-05, + "loss": 2.2449, + "step": 72400 + }, + { + "epoch": 3.81, + "learning_rate": 3.207466599939075e-05, + "loss": 2.2506, + "step": 72600 + }, + { + "epoch": 3.82, + "learning_rate": 3.202026850602725e-05, + "loss": 2.2483, + "step": 72800 + }, + { + "epoch": 3.83, + "learning_rate": 3.196587101266374e-05, + "loss": 2.2417, + "step": 73000 + }, + { + "epoch": 3.84, + "learning_rate": 3.191174550676705e-05, + "loss": 2.2366, + "step": 73200 + }, + { + "epoch": 3.85, + "learning_rate": 3.1857348013403546e-05, + "loss": 2.2423, + "step": 73400 + }, + { + "epoch": 3.86, + "learning_rate": 3.1802950520040035e-05, + "loss": 2.2401, + "step": 73600 + }, + { + "epoch": 3.87, + "learning_rate": 3.1748825014143355e-05, + "loss": 2.245, + "step": 73800 + }, + { + "epoch": 3.88, + "learning_rate": 3.1694427520779844e-05, + "loss": 2.2359, + "step": 74000 + }, + { + "epoch": 3.9, + "learning_rate": 3.164003002741634e-05, + "loss": 2.2409, + "step": 74200 + }, + { + "epoch": 3.91, + "learning_rate": 3.1585632534052835e-05, + "loss": 2.2402, + "step": 74400 + }, + { + "epoch": 3.92, + "learning_rate": 3.1531235040689325e-05, + "loss": 2.2379, + "step": 74600 + }, + { + "epoch": 3.93, + "learning_rate": 3.147683754732582e-05, + "loss": 2.2365, + "step": 74800 + }, + { + "epoch": 3.94, + "learning_rate": 3.1422440053962316e-05, + "loss": 2.2368, + "step": 75000 + }, + { + "epoch": 3.95, + "learning_rate": 3.136804256059881e-05, + "loss": 2.2338, + "step": 75200 + }, + { + "epoch": 3.96, + "learning_rate": 3.13136450672353e-05, + "loss": 2.2456, + "step": 75400 + }, + { + "epoch": 3.97, + "learning_rate": 3.12592475738718e-05, + "loss": 2.2337, + "step": 75600 + }, + { + "epoch": 3.98, + "learning_rate": 3.120485008050829e-05, + "loss": 2.2337, + "step": 75800 + }, + { + "epoch": 3.99, + "learning_rate": 3.115045258714478e-05, + "loss": 2.2282, + "step": 76000 + }, + { + "epoch": 4.0, + "learning_rate": 3.109605509378128e-05, + "loss": 2.2292, + "step": 76200 + }, + { + "epoch": 4.01, + "learning_rate": 3.1041657600417774e-05, + "loss": 2.2282, + "step": 76400 + }, + { + "epoch": 4.02, + "learning_rate": 3.098726010705427e-05, + "loss": 2.2299, + "step": 76600 + }, + { + "epoch": 4.03, + "learning_rate": 3.093286261369076e-05, + "loss": 2.2131, + "step": 76800 + }, + { + "epoch": 4.04, + "learning_rate": 3.0878465120327254e-05, + "loss": 2.2223, + "step": 77000 + }, + { + "epoch": 4.05, + "learning_rate": 3.082406762696375e-05, + "loss": 2.2275, + "step": 77200 + }, + { + "epoch": 4.06, + "learning_rate": 3.076967013360024e-05, + "loss": 2.2156, + "step": 77400 + }, + { + "epoch": 4.07, + "learning_rate": 3.071527264023674e-05, + "loss": 2.2252, + "step": 77600 + }, + { + "epoch": 4.08, + "learning_rate": 3.0661147134340054e-05, + "loss": 2.2282, + "step": 77800 + }, + { + "epoch": 4.09, + "learning_rate": 3.0606749640976543e-05, + "loss": 2.2214, + "step": 78000 + }, + { + "epoch": 4.1, + "learning_rate": 3.055235214761304e-05, + "loss": 2.2244, + "step": 78200 + }, + { + "epoch": 4.12, + "learning_rate": 3.0497954654249532e-05, + "loss": 2.2149, + "step": 78400 + }, + { + "epoch": 4.13, + "learning_rate": 3.0443557160886028e-05, + "loss": 2.2203, + "step": 78600 + }, + { + "epoch": 4.14, + "learning_rate": 3.038915966752252e-05, + "loss": 2.216, + "step": 78800 + }, + { + "epoch": 4.15, + "learning_rate": 3.0334762174159016e-05, + "loss": 2.2148, + "step": 79000 + }, + { + "epoch": 4.16, + "learning_rate": 3.028036468079551e-05, + "loss": 2.2145, + "step": 79200 + }, + { + "epoch": 4.17, + "learning_rate": 3.0225967187432004e-05, + "loss": 2.2172, + "step": 79400 + }, + { + "epoch": 4.18, + "learning_rate": 3.0171569694068497e-05, + "loss": 2.2165, + "step": 79600 + }, + { + "epoch": 4.19, + "learning_rate": 3.011717220070499e-05, + "loss": 2.2186, + "step": 79800 + }, + { + "epoch": 4.2, + "learning_rate": 3.0063046694808305e-05, + "loss": 2.2044, + "step": 80000 + }, + { + "epoch": 4.21, + "learning_rate": 3.00086492014448e-05, + "loss": 2.2032, + "step": 80200 + }, + { + "epoch": 4.22, + "learning_rate": 2.9954251708081293e-05, + "loss": 2.2066, + "step": 80400 + }, + { + "epoch": 4.23, + "learning_rate": 2.9899854214717786e-05, + "loss": 2.2019, + "step": 80600 + }, + { + "epoch": 4.24, + "learning_rate": 2.984545672135428e-05, + "loss": 2.2085, + "step": 80800 + }, + { + "epoch": 4.25, + "learning_rate": 2.9791059227990774e-05, + "loss": 2.1975, + "step": 81000 + }, + { + "epoch": 4.26, + "learning_rate": 2.973666173462727e-05, + "loss": 2.1989, + "step": 81200 + }, + { + "epoch": 4.27, + "learning_rate": 2.9682264241263762e-05, + "loss": 2.2016, + "step": 81400 + }, + { + "epoch": 4.28, + "learning_rate": 2.9627866747900258e-05, + "loss": 2.1911, + "step": 81600 + }, + { + "epoch": 4.29, + "learning_rate": 2.957346925453675e-05, + "loss": 2.2, + "step": 81800 + }, + { + "epoch": 4.3, + "learning_rate": 2.9519343748640067e-05, + "loss": 2.2038, + "step": 82000 + }, + { + "epoch": 4.31, + "learning_rate": 2.9465218242743376e-05, + "loss": 2.1882, + "step": 82200 + }, + { + "epoch": 4.33, + "learning_rate": 2.941082074937987e-05, + "loss": 2.1936, + "step": 82400 + }, + { + "epoch": 4.34, + "learning_rate": 2.9356423256016364e-05, + "loss": 2.1979, + "step": 82600 + }, + { + "epoch": 4.35, + "learning_rate": 2.9302025762652857e-05, + "loss": 2.2051, + "step": 82800 + }, + { + "epoch": 4.36, + "learning_rate": 2.9247628269289352e-05, + "loss": 2.1915, + "step": 83000 + }, + { + "epoch": 4.37, + "learning_rate": 2.9193230775925845e-05, + "loss": 2.1869, + "step": 83200 + }, + { + "epoch": 4.38, + "learning_rate": 2.9138833282562337e-05, + "loss": 2.1824, + "step": 83400 + }, + { + "epoch": 4.39, + "learning_rate": 2.9084435789198833e-05, + "loss": 2.1974, + "step": 83600 + }, + { + "epoch": 4.4, + "learning_rate": 2.9030038295835326e-05, + "loss": 2.1854, + "step": 83800 + }, + { + "epoch": 4.41, + "learning_rate": 2.8975640802471825e-05, + "loss": 2.1844, + "step": 84000 + }, + { + "epoch": 4.42, + "learning_rate": 2.892124330910832e-05, + "loss": 2.1883, + "step": 84200 + }, + { + "epoch": 4.43, + "learning_rate": 2.886711780321163e-05, + "loss": 2.1925, + "step": 84400 + }, + { + "epoch": 4.44, + "learning_rate": 2.8812720309848122e-05, + "loss": 2.186, + "step": 84600 + }, + { + "epoch": 4.45, + "learning_rate": 2.8758322816484618e-05, + "loss": 2.1865, + "step": 84800 + }, + { + "epoch": 4.46, + "learning_rate": 2.870392532312111e-05, + "loss": 2.1837, + "step": 85000 + }, + { + "epoch": 4.47, + "learning_rate": 2.8649527829757607e-05, + "loss": 2.1836, + "step": 85200 + }, + { + "epoch": 4.48, + "learning_rate": 2.85951303363941e-05, + "loss": 2.1867, + "step": 85400 + }, + { + "epoch": 4.49, + "learning_rate": 2.8540732843030595e-05, + "loss": 2.1811, + "step": 85600 + }, + { + "epoch": 4.5, + "learning_rate": 2.8486335349667087e-05, + "loss": 2.1695, + "step": 85800 + }, + { + "epoch": 4.51, + "learning_rate": 2.843193785630358e-05, + "loss": 2.187, + "step": 86000 + }, + { + "epoch": 4.52, + "learning_rate": 2.8377540362940076e-05, + "loss": 2.1767, + "step": 86200 + }, + { + "epoch": 4.54, + "learning_rate": 2.8323142869576568e-05, + "loss": 2.1827, + "step": 86400 + }, + { + "epoch": 4.55, + "learning_rate": 2.8268745376213064e-05, + "loss": 2.1835, + "step": 86600 + }, + { + "epoch": 4.56, + "learning_rate": 2.8214347882849563e-05, + "loss": 2.164, + "step": 86800 + }, + { + "epoch": 4.57, + "learning_rate": 2.8159950389486056e-05, + "loss": 2.1696, + "step": 87000 + }, + { + "epoch": 4.58, + "learning_rate": 2.810555289612255e-05, + "loss": 2.1812, + "step": 87200 + }, + { + "epoch": 4.59, + "learning_rate": 2.8051155402759044e-05, + "loss": 2.1768, + "step": 87400 + }, + { + "epoch": 4.6, + "learning_rate": 2.7996757909395536e-05, + "loss": 2.1825, + "step": 87600 + }, + { + "epoch": 4.61, + "learning_rate": 2.794263240349885e-05, + "loss": 2.1763, + "step": 87800 + }, + { + "epoch": 4.62, + "learning_rate": 2.788823491013534e-05, + "loss": 2.1698, + "step": 88000 + }, + { + "epoch": 4.63, + "learning_rate": 2.7833837416771834e-05, + "loss": 2.1745, + "step": 88200 + }, + { + "epoch": 4.64, + "learning_rate": 2.777943992340833e-05, + "loss": 2.1675, + "step": 88400 + }, + { + "epoch": 4.65, + "learning_rate": 2.7725042430044822e-05, + "loss": 2.1661, + "step": 88600 + }, + { + "epoch": 4.66, + "learning_rate": 2.7670644936681318e-05, + "loss": 2.1603, + "step": 88800 + }, + { + "epoch": 4.67, + "learning_rate": 2.761624744331781e-05, + "loss": 2.1612, + "step": 89000 + }, + { + "epoch": 4.68, + "learning_rate": 2.7561849949954306e-05, + "loss": 2.1667, + "step": 89200 + }, + { + "epoch": 4.69, + "learning_rate": 2.7507452456590805e-05, + "loss": 2.1625, + "step": 89400 + }, + { + "epoch": 4.7, + "learning_rate": 2.7453054963227298e-05, + "loss": 2.1751, + "step": 89600 + }, + { + "epoch": 4.71, + "learning_rate": 2.7398657469863794e-05, + "loss": 2.163, + "step": 89800 + }, + { + "epoch": 4.72, + "learning_rate": 2.7344259976500286e-05, + "loss": 2.1606, + "step": 90000 + }, + { + "epoch": 4.73, + "learning_rate": 2.728986248313678e-05, + "loss": 2.1623, + "step": 90200 + }, + { + "epoch": 4.75, + "learning_rate": 2.7235464989773274e-05, + "loss": 2.1648, + "step": 90400 + }, + { + "epoch": 4.76, + "learning_rate": 2.7181067496409767e-05, + "loss": 2.1607, + "step": 90600 + }, + { + "epoch": 4.77, + "learning_rate": 2.7126670003046263e-05, + "loss": 2.1561, + "step": 90800 + }, + { + "epoch": 4.78, + "learning_rate": 2.7072272509682755e-05, + "loss": 2.1591, + "step": 91000 + }, + { + "epoch": 4.79, + "learning_rate": 2.701787501631925e-05, + "loss": 2.1596, + "step": 91200 + }, + { + "epoch": 4.8, + "learning_rate": 2.6963477522955744e-05, + "loss": 2.1524, + "step": 91400 + }, + { + "epoch": 4.81, + "learning_rate": 2.6909080029592236e-05, + "loss": 2.1508, + "step": 91600 + }, + { + "epoch": 4.82, + "learning_rate": 2.6854682536228732e-05, + "loss": 2.1607, + "step": 91800 + }, + { + "epoch": 4.83, + "learning_rate": 2.6800285042865224e-05, + "loss": 2.1485, + "step": 92000 + }, + { + "epoch": 4.84, + "learning_rate": 2.674615953696854e-05, + "loss": 2.1467, + "step": 92200 + }, + { + "epoch": 4.85, + "learning_rate": 2.6691762043605033e-05, + "loss": 2.1526, + "step": 92400 + }, + { + "epoch": 4.86, + "learning_rate": 2.663736455024153e-05, + "loss": 2.1489, + "step": 92600 + }, + { + "epoch": 4.87, + "learning_rate": 2.658296705687802e-05, + "loss": 2.1519, + "step": 92800 + }, + { + "epoch": 4.88, + "learning_rate": 2.6528569563514517e-05, + "loss": 2.1482, + "step": 93000 + }, + { + "epoch": 4.89, + "learning_rate": 2.647417207015101e-05, + "loss": 2.1503, + "step": 93200 + }, + { + "epoch": 4.9, + "learning_rate": 2.6419774576787505e-05, + "loss": 2.1474, + "step": 93400 + }, + { + "epoch": 4.91, + "learning_rate": 2.6365377083423998e-05, + "loss": 2.1475, + "step": 93600 + }, + { + "epoch": 4.92, + "learning_rate": 2.631097959006049e-05, + "loss": 2.1487, + "step": 93800 + }, + { + "epoch": 4.93, + "learning_rate": 2.6256582096696986e-05, + "loss": 2.1527, + "step": 94000 + }, + { + "epoch": 4.94, + "learning_rate": 2.620218460333348e-05, + "loss": 2.1438, + "step": 94200 + }, + { + "epoch": 4.96, + "learning_rate": 2.6147787109969974e-05, + "loss": 2.148, + "step": 94400 + }, + { + "epoch": 4.97, + "learning_rate": 2.6093389616606467e-05, + "loss": 2.1478, + "step": 94600 + }, + { + "epoch": 4.98, + "learning_rate": 2.6038992123242962e-05, + "loss": 2.1505, + "step": 94800 + }, + { + "epoch": 4.99, + "learning_rate": 2.5984866617346275e-05, + "loss": 2.1491, + "step": 95000 + }, + { + "epoch": 5.0, + "learning_rate": 2.593046912398277e-05, + "loss": 2.143, + "step": 95200 + }, + { + "epoch": 5.01, + "learning_rate": 2.5876071630619263e-05, + "loss": 2.1388, + "step": 95400 + }, + { + "epoch": 5.02, + "learning_rate": 2.582167413725576e-05, + "loss": 2.1433, + "step": 95600 + }, + { + "epoch": 5.03, + "learning_rate": 2.576727664389225e-05, + "loss": 2.1269, + "step": 95800 + }, + { + "epoch": 5.04, + "learning_rate": 2.5712879150528747e-05, + "loss": 2.1401, + "step": 96000 + }, + { + "epoch": 5.05, + "learning_rate": 2.565848165716524e-05, + "loss": 2.1382, + "step": 96200 + }, + { + "epoch": 5.06, + "learning_rate": 2.5604084163801732e-05, + "loss": 2.1371, + "step": 96400 + }, + { + "epoch": 5.07, + "learning_rate": 2.5549686670438228e-05, + "loss": 2.1347, + "step": 96600 + }, + { + "epoch": 5.08, + "learning_rate": 2.5495561164541537e-05, + "loss": 2.1407, + "step": 96800 + }, + { + "epoch": 5.09, + "learning_rate": 2.544116367117803e-05, + "loss": 2.1365, + "step": 97000 + }, + { + "epoch": 5.1, + "learning_rate": 2.5386766177814526e-05, + "loss": 2.1354, + "step": 97200 + }, + { + "epoch": 5.11, + "learning_rate": 2.5332368684451025e-05, + "loss": 2.1278, + "step": 97400 + }, + { + "epoch": 5.12, + "learning_rate": 2.5277971191087517e-05, + "loss": 2.1357, + "step": 97600 + }, + { + "epoch": 5.13, + "learning_rate": 2.5223573697724013e-05, + "loss": 2.1337, + "step": 97800 + }, + { + "epoch": 5.14, + "learning_rate": 2.5169176204360506e-05, + "loss": 2.1335, + "step": 98000 + }, + { + "epoch": 5.15, + "learning_rate": 2.5114778710997e-05, + "loss": 2.1302, + "step": 98200 + }, + { + "epoch": 5.17, + "learning_rate": 2.5060381217633494e-05, + "loss": 2.1351, + "step": 98400 + }, + { + "epoch": 5.18, + "learning_rate": 2.5005983724269986e-05, + "loss": 2.1346, + "step": 98600 + }, + { + "epoch": 5.19, + "learning_rate": 2.4951586230906482e-05, + "loss": 2.1307, + "step": 98800 + }, + { + "epoch": 5.2, + "learning_rate": 2.4897188737542975e-05, + "loss": 2.1222, + "step": 99000 + }, + { + "epoch": 5.21, + "learning_rate": 2.484279124417947e-05, + "loss": 2.1215, + "step": 99200 + }, + { + "epoch": 5.22, + "learning_rate": 2.4788393750815963e-05, + "loss": 2.1201, + "step": 99400 + }, + { + "epoch": 5.23, + "learning_rate": 2.473399625745246e-05, + "loss": 2.1307, + "step": 99600 + }, + { + "epoch": 5.24, + "learning_rate": 2.467959876408895e-05, + "loss": 2.1311, + "step": 99800 + }, + { + "epoch": 5.25, + "learning_rate": 2.4625201270725444e-05, + "loss": 2.1161, + "step": 100000 + }, + { + "epoch": 5.26, + "learning_rate": 2.457080377736194e-05, + "loss": 2.1259, + "step": 100200 + }, + { + "epoch": 5.27, + "learning_rate": 2.4516406283998432e-05, + "loss": 2.1199, + "step": 100400 + }, + { + "epoch": 5.28, + "learning_rate": 2.446200879063493e-05, + "loss": 2.1216, + "step": 100600 + }, + { + "epoch": 5.29, + "learning_rate": 2.4407611297271424e-05, + "loss": 2.1156, + "step": 100800 + }, + { + "epoch": 5.3, + "learning_rate": 2.4353213803907916e-05, + "loss": 2.1243, + "step": 101000 + }, + { + "epoch": 5.31, + "learning_rate": 2.4298816310544412e-05, + "loss": 2.1209, + "step": 101200 + }, + { + "epoch": 5.32, + "learning_rate": 2.4244418817180905e-05, + "loss": 2.1188, + "step": 101400 + }, + { + "epoch": 5.33, + "learning_rate": 2.41900213238174e-05, + "loss": 2.1206, + "step": 101600 + }, + { + "epoch": 5.34, + "learning_rate": 2.4135623830453893e-05, + "loss": 2.1195, + "step": 101800 + }, + { + "epoch": 5.35, + "learning_rate": 2.408122633709039e-05, + "loss": 2.1218, + "step": 102000 + }, + { + "epoch": 5.36, + "learning_rate": 2.402682884372688e-05, + "loss": 2.1098, + "step": 102200 + }, + { + "epoch": 5.38, + "learning_rate": 2.3972703337830194e-05, + "loss": 2.1126, + "step": 102400 + }, + { + "epoch": 5.39, + "learning_rate": 2.3918305844466686e-05, + "loss": 2.1132, + "step": 102600 + }, + { + "epoch": 5.4, + "learning_rate": 2.3863908351103182e-05, + "loss": 2.1141, + "step": 102800 + }, + { + "epoch": 5.41, + "learning_rate": 2.3809510857739674e-05, + "loss": 2.1059, + "step": 103000 + }, + { + "epoch": 5.42, + "learning_rate": 2.375511336437617e-05, + "loss": 2.1057, + "step": 103200 + }, + { + "epoch": 5.43, + "learning_rate": 2.3700715871012666e-05, + "loss": 2.1116, + "step": 103400 + }, + { + "epoch": 5.44, + "learning_rate": 2.364631837764916e-05, + "loss": 2.1138, + "step": 103600 + }, + { + "epoch": 5.45, + "learning_rate": 2.3591920884285654e-05, + "loss": 2.1129, + "step": 103800 + }, + { + "epoch": 5.46, + "learning_rate": 2.3537523390922147e-05, + "loss": 2.1108, + "step": 104000 + }, + { + "epoch": 5.47, + "learning_rate": 2.3483125897558643e-05, + "loss": 2.1146, + "step": 104200 + }, + { + "epoch": 5.48, + "learning_rate": 2.3429000391661955e-05, + "loss": 2.1102, + "step": 104400 + }, + { + "epoch": 5.49, + "learning_rate": 2.3374602898298448e-05, + "loss": 2.1138, + "step": 104600 + }, + { + "epoch": 5.5, + "learning_rate": 2.332020540493494e-05, + "loss": 2.1043, + "step": 104800 + }, + { + "epoch": 5.51, + "learning_rate": 2.3265807911571436e-05, + "loss": 2.1063, + "step": 105000 + }, + { + "epoch": 5.52, + "learning_rate": 2.321141041820793e-05, + "loss": 2.1108, + "step": 105200 + }, + { + "epoch": 5.53, + "learning_rate": 2.3157012924844424e-05, + "loss": 2.1106, + "step": 105400 + }, + { + "epoch": 5.54, + "learning_rate": 2.3102615431480917e-05, + "loss": 2.1117, + "step": 105600 + }, + { + "epoch": 5.55, + "learning_rate": 2.3048217938117413e-05, + "loss": 2.0953, + "step": 105800 + }, + { + "epoch": 5.56, + "learning_rate": 2.299382044475391e-05, + "loss": 2.1012, + "step": 106000 + }, + { + "epoch": 5.57, + "learning_rate": 2.29394229513904e-05, + "loss": 2.1044, + "step": 106200 + }, + { + "epoch": 5.59, + "learning_rate": 2.2885297445493714e-05, + "loss": 2.1023, + "step": 106400 + }, + { + "epoch": 5.6, + "learning_rate": 2.283089995213021e-05, + "loss": 2.1068, + "step": 106600 + }, + { + "epoch": 5.61, + "learning_rate": 2.2776502458766702e-05, + "loss": 2.1076, + "step": 106800 + }, + { + "epoch": 5.62, + "learning_rate": 2.2722104965403194e-05, + "loss": 2.1024, + "step": 107000 + }, + { + "epoch": 5.63, + "learning_rate": 2.266770747203969e-05, + "loss": 2.1007, + "step": 107200 + }, + { + "epoch": 5.64, + "learning_rate": 2.2613309978676183e-05, + "loss": 2.0993, + "step": 107400 + }, + { + "epoch": 5.65, + "learning_rate": 2.255891248531268e-05, + "loss": 2.0964, + "step": 107600 + }, + { + "epoch": 5.66, + "learning_rate": 2.250451499194917e-05, + "loss": 2.0933, + "step": 107800 + }, + { + "epoch": 5.67, + "learning_rate": 2.2450117498585667e-05, + "loss": 2.0945, + "step": 108000 + }, + { + "epoch": 5.68, + "learning_rate": 2.239572000522216e-05, + "loss": 2.0974, + "step": 108200 + }, + { + "epoch": 5.69, + "learning_rate": 2.2341322511858655e-05, + "loss": 2.0992, + "step": 108400 + }, + { + "epoch": 5.7, + "learning_rate": 2.228692501849515e-05, + "loss": 2.0913, + "step": 108600 + }, + { + "epoch": 5.71, + "learning_rate": 2.223279951259846e-05, + "loss": 2.096, + "step": 108800 + }, + { + "epoch": 5.72, + "learning_rate": 2.2178402019234952e-05, + "loss": 2.094, + "step": 109000 + }, + { + "epoch": 5.73, + "learning_rate": 2.2124004525871452e-05, + "loss": 2.0944, + "step": 109200 + }, + { + "epoch": 5.74, + "learning_rate": 2.2069607032507944e-05, + "loss": 2.0895, + "step": 109400 + }, + { + "epoch": 5.75, + "learning_rate": 2.2015209539144437e-05, + "loss": 2.0886, + "step": 109600 + }, + { + "epoch": 5.76, + "learning_rate": 2.1960812045780932e-05, + "loss": 2.095, + "step": 109800 + }, + { + "epoch": 5.77, + "learning_rate": 2.1906414552417425e-05, + "loss": 2.0854, + "step": 110000 + }, + { + "epoch": 5.78, + "learning_rate": 2.185201705905392e-05, + "loss": 2.0868, + "step": 110200 + }, + { + "epoch": 5.8, + "learning_rate": 2.1797619565690413e-05, + "loss": 2.0803, + "step": 110400 + }, + { + "epoch": 5.81, + "learning_rate": 2.174322207232691e-05, + "loss": 2.0918, + "step": 110600 + }, + { + "epoch": 5.82, + "learning_rate": 2.16888245789634e-05, + "loss": 2.0931, + "step": 110800 + }, + { + "epoch": 5.83, + "learning_rate": 2.1634427085599894e-05, + "loss": 2.0881, + "step": 111000 + }, + { + "epoch": 5.84, + "learning_rate": 2.1580301579703207e-05, + "loss": 2.0856, + "step": 111200 + }, + { + "epoch": 5.85, + "learning_rate": 2.152617607380652e-05, + "loss": 2.0818, + "step": 111400 + }, + { + "epoch": 5.86, + "learning_rate": 2.1471778580443015e-05, + "loss": 2.0788, + "step": 111600 + }, + { + "epoch": 5.87, + "learning_rate": 2.1417381087079507e-05, + "loss": 2.0836, + "step": 111800 + }, + { + "epoch": 5.88, + "learning_rate": 2.1362983593716003e-05, + "loss": 2.0898, + "step": 112000 + }, + { + "epoch": 5.89, + "learning_rate": 2.1308586100352496e-05, + "loss": 2.0803, + "step": 112200 + }, + { + "epoch": 5.9, + "learning_rate": 2.125418860698899e-05, + "loss": 2.081, + "step": 112400 + }, + { + "epoch": 5.91, + "learning_rate": 2.1199791113625487e-05, + "loss": 2.0859, + "step": 112600 + }, + { + "epoch": 5.92, + "learning_rate": 2.114539362026198e-05, + "loss": 2.0826, + "step": 112800 + }, + { + "epoch": 5.93, + "learning_rate": 2.1090996126898476e-05, + "loss": 2.0883, + "step": 113000 + }, + { + "epoch": 5.94, + "learning_rate": 2.1036598633534968e-05, + "loss": 2.0802, + "step": 113200 + }, + { + "epoch": 5.95, + "learning_rate": 2.098220114017146e-05, + "loss": 2.0868, + "step": 113400 + }, + { + "epoch": 5.96, + "learning_rate": 2.0927803646807956e-05, + "loss": 2.0827, + "step": 113600 + }, + { + "epoch": 5.97, + "learning_rate": 2.087340615344445e-05, + "loss": 2.0842, + "step": 113800 + }, + { + "epoch": 5.98, + "learning_rate": 2.0819008660080945e-05, + "loss": 2.0783, + "step": 114000 + }, + { + "epoch": 5.99, + "learning_rate": 2.0764611166717437e-05, + "loss": 2.0809, + "step": 114200 + }, + { + "epoch": 6.01, + "learning_rate": 2.0710213673353933e-05, + "loss": 2.0844, + "step": 114400 + }, + { + "epoch": 6.02, + "learning_rate": 2.065581617999043e-05, + "loss": 2.0746, + "step": 114600 + }, + { + "epoch": 6.03, + "learning_rate": 2.060141868662692e-05, + "loss": 2.0785, + "step": 114800 + }, + { + "epoch": 6.04, + "learning_rate": 2.0547021193263417e-05, + "loss": 2.0767, + "step": 115000 + }, + { + "epoch": 6.05, + "learning_rate": 2.049262369989991e-05, + "loss": 2.0763, + "step": 115200 + }, + { + "epoch": 6.06, + "learning_rate": 2.0438498194003222e-05, + "loss": 2.0837, + "step": 115400 + }, + { + "epoch": 6.07, + "learning_rate": 2.038437268810653e-05, + "loss": 2.0736, + "step": 115600 + }, + { + "epoch": 6.08, + "learning_rate": 2.0329975194743027e-05, + "loss": 2.0787, + "step": 115800 + }, + { + "epoch": 6.09, + "learning_rate": 2.0275577701379523e-05, + "loss": 2.084, + "step": 116000 + }, + { + "epoch": 6.1, + "learning_rate": 2.0221180208016016e-05, + "loss": 2.0804, + "step": 116200 + }, + { + "epoch": 6.11, + "learning_rate": 2.016678271465251e-05, + "loss": 2.0657, + "step": 116400 + }, + { + "epoch": 6.12, + "learning_rate": 2.0112385221289004e-05, + "loss": 2.0731, + "step": 116600 + }, + { + "epoch": 6.13, + "learning_rate": 2.0057987727925496e-05, + "loss": 2.0769, + "step": 116800 + }, + { + "epoch": 6.14, + "learning_rate": 2.0003590234561992e-05, + "loss": 2.0734, + "step": 117000 + }, + { + "epoch": 6.15, + "learning_rate": 1.9949192741198485e-05, + "loss": 2.0751, + "step": 117200 + }, + { + "epoch": 6.16, + "learning_rate": 1.989479524783498e-05, + "loss": 2.071, + "step": 117400 + }, + { + "epoch": 6.17, + "learning_rate": 1.9840397754471473e-05, + "loss": 2.075, + "step": 117600 + }, + { + "epoch": 6.18, + "learning_rate": 1.978600026110797e-05, + "loss": 2.0692, + "step": 117800 + }, + { + "epoch": 6.19, + "learning_rate": 1.9731602767744465e-05, + "loss": 2.0671, + "step": 118000 + }, + { + "epoch": 6.2, + "learning_rate": 1.9677477261847774e-05, + "loss": 2.0654, + "step": 118200 + }, + { + "epoch": 6.22, + "learning_rate": 1.962307976848427e-05, + "loss": 2.0612, + "step": 118400 + }, + { + "epoch": 6.23, + "learning_rate": 1.9568682275120765e-05, + "loss": 2.0703, + "step": 118600 + }, + { + "epoch": 6.24, + "learning_rate": 1.9514284781757258e-05, + "loss": 2.0644, + "step": 118800 + }, + { + "epoch": 6.25, + "learning_rate": 1.9459887288393754e-05, + "loss": 2.0662, + "step": 119000 + }, + { + "epoch": 6.26, + "learning_rate": 1.9405489795030246e-05, + "loss": 2.0652, + "step": 119200 + }, + { + "epoch": 6.27, + "learning_rate": 1.935109230166674e-05, + "loss": 2.0661, + "step": 119400 + }, + { + "epoch": 6.28, + "learning_rate": 1.9296694808303234e-05, + "loss": 2.0674, + "step": 119600 + }, + { + "epoch": 6.29, + "learning_rate": 1.9242297314939727e-05, + "loss": 2.0652, + "step": 119800 + }, + { + "epoch": 6.3, + "learning_rate": 1.9187899821576223e-05, + "loss": 2.0598, + "step": 120000 + }, + { + "epoch": 6.31, + "learning_rate": 1.9133502328212715e-05, + "loss": 2.0655, + "step": 120200 + }, + { + "epoch": 6.32, + "learning_rate": 1.907910483484921e-05, + "loss": 2.0605, + "step": 120400 + }, + { + "epoch": 6.33, + "learning_rate": 1.9024707341485707e-05, + "loss": 2.0619, + "step": 120600 + }, + { + "epoch": 6.34, + "learning_rate": 1.89703098481222e-05, + "loss": 2.0631, + "step": 120800 + }, + { + "epoch": 6.35, + "learning_rate": 1.8915912354758695e-05, + "loss": 2.0678, + "step": 121000 + }, + { + "epoch": 6.36, + "learning_rate": 1.8861514861395188e-05, + "loss": 2.0584, + "step": 121200 + }, + { + "epoch": 6.37, + "learning_rate": 1.8807117368031683e-05, + "loss": 2.0461, + "step": 121400 + }, + { + "epoch": 6.38, + "learning_rate": 1.8752719874668176e-05, + "loss": 2.0631, + "step": 121600 + }, + { + "epoch": 6.39, + "learning_rate": 1.869832238130467e-05, + "loss": 2.0695, + "step": 121800 + }, + { + "epoch": 6.4, + "learning_rate": 1.8643924887941164e-05, + "loss": 2.0574, + "step": 122000 + }, + { + "epoch": 6.41, + "learning_rate": 1.8589527394577657e-05, + "loss": 2.0504, + "step": 122200 + }, + { + "epoch": 6.43, + "learning_rate": 1.853540188868097e-05, + "loss": 2.0567, + "step": 122400 + }, + { + "epoch": 6.44, + "learning_rate": 1.8481004395317465e-05, + "loss": 2.0592, + "step": 122600 + }, + { + "epoch": 6.45, + "learning_rate": 1.8426606901953958e-05, + "loss": 2.0591, + "step": 122800 + }, + { + "epoch": 6.46, + "learning_rate": 1.837220940859045e-05, + "loss": 2.0552, + "step": 123000 + }, + { + "epoch": 6.47, + "learning_rate": 1.831781191522695e-05, + "loss": 2.0593, + "step": 123200 + }, + { + "epoch": 6.48, + "learning_rate": 1.8263414421863442e-05, + "loss": 2.0537, + "step": 123400 + }, + { + "epoch": 6.49, + "learning_rate": 1.8209016928499938e-05, + "loss": 2.0523, + "step": 123600 + }, + { + "epoch": 6.5, + "learning_rate": 1.815461943513643e-05, + "loss": 2.0497, + "step": 123800 + }, + { + "epoch": 6.51, + "learning_rate": 1.8100221941772922e-05, + "loss": 2.0571, + "step": 124000 + }, + { + "epoch": 6.52, + "learning_rate": 1.8045824448409418e-05, + "loss": 2.0573, + "step": 124200 + }, + { + "epoch": 6.53, + "learning_rate": 1.799142695504591e-05, + "loss": 2.0526, + "step": 124400 + }, + { + "epoch": 6.54, + "learning_rate": 1.7937029461682407e-05, + "loss": 2.0604, + "step": 124600 + }, + { + "epoch": 6.55, + "learning_rate": 1.78826319683189e-05, + "loss": 2.0436, + "step": 124800 + }, + { + "epoch": 6.56, + "learning_rate": 1.7828234474955395e-05, + "loss": 2.0445, + "step": 125000 + }, + { + "epoch": 6.57, + "learning_rate": 1.7774108969058707e-05, + "loss": 2.0484, + "step": 125200 + }, + { + "epoch": 6.58, + "learning_rate": 1.77197114756952e-05, + "loss": 2.0561, + "step": 125400 + }, + { + "epoch": 6.59, + "learning_rate": 1.7665585969798512e-05, + "loss": 2.0471, + "step": 125600 + }, + { + "epoch": 6.6, + "learning_rate": 1.7611188476435005e-05, + "loss": 2.064, + "step": 125800 + }, + { + "epoch": 6.61, + "learning_rate": 1.75567909830715e-05, + "loss": 2.0574, + "step": 126000 + }, + { + "epoch": 6.62, + "learning_rate": 1.7502393489707993e-05, + "loss": 2.0487, + "step": 126200 + }, + { + "epoch": 6.64, + "learning_rate": 1.744799599634449e-05, + "loss": 2.0502, + "step": 126400 + }, + { + "epoch": 6.65, + "learning_rate": 1.7393598502980985e-05, + "loss": 2.0401, + "step": 126600 + }, + { + "epoch": 6.66, + "learning_rate": 1.7339201009617477e-05, + "loss": 2.0351, + "step": 126800 + }, + { + "epoch": 6.67, + "learning_rate": 1.7284803516253973e-05, + "loss": 2.0526, + "step": 127000 + }, + { + "epoch": 6.68, + "learning_rate": 1.7230406022890466e-05, + "loss": 2.0423, + "step": 127200 + }, + { + "epoch": 6.69, + "learning_rate": 1.717600852952696e-05, + "loss": 2.0438, + "step": 127400 + }, + { + "epoch": 6.7, + "learning_rate": 1.7121611036163454e-05, + "loss": 2.0423, + "step": 127600 + }, + { + "epoch": 6.71, + "learning_rate": 1.7067213542799946e-05, + "loss": 2.0511, + "step": 127800 + }, + { + "epoch": 6.72, + "learning_rate": 1.7012816049436442e-05, + "loss": 2.0478, + "step": 128000 + }, + { + "epoch": 6.73, + "learning_rate": 1.6958418556072935e-05, + "loss": 2.0422, + "step": 128200 + }, + { + "epoch": 6.74, + "learning_rate": 1.6904021062709434e-05, + "loss": 2.0438, + "step": 128400 + }, + { + "epoch": 6.75, + "learning_rate": 1.6849623569345926e-05, + "loss": 2.0421, + "step": 128600 + }, + { + "epoch": 6.76, + "learning_rate": 1.679522607598242e-05, + "loss": 2.0399, + "step": 128800 + }, + { + "epoch": 6.77, + "learning_rate": 1.6740828582618915e-05, + "loss": 2.0455, + "step": 129000 + }, + { + "epoch": 6.78, + "learning_rate": 1.6686431089255407e-05, + "loss": 2.0355, + "step": 129200 + }, + { + "epoch": 6.79, + "learning_rate": 1.6632033595891903e-05, + "loss": 2.04, + "step": 129400 + }, + { + "epoch": 6.8, + "learning_rate": 1.6577636102528395e-05, + "loss": 2.0416, + "step": 129600 + }, + { + "epoch": 6.81, + "learning_rate": 1.652323860916489e-05, + "loss": 2.0373, + "step": 129800 + }, + { + "epoch": 6.82, + "learning_rate": 1.6468841115801384e-05, + "loss": 2.0423, + "step": 130000 + }, + { + "epoch": 6.83, + "learning_rate": 1.6414443622437876e-05, + "loss": 2.0354, + "step": 130200 + }, + { + "epoch": 6.85, + "learning_rate": 1.6360046129074372e-05, + "loss": 2.0343, + "step": 130400 + }, + { + "epoch": 6.86, + "learning_rate": 1.6305920623177685e-05, + "loss": 2.0327, + "step": 130600 + }, + { + "epoch": 6.87, + "learning_rate": 1.6251523129814177e-05, + "loss": 2.0362, + "step": 130800 + }, + { + "epoch": 6.88, + "learning_rate": 1.6197125636450673e-05, + "loss": 2.036, + "step": 131000 + }, + { + "epoch": 6.89, + "learning_rate": 1.614272814308717e-05, + "loss": 2.0384, + "step": 131200 + }, + { + "epoch": 6.9, + "learning_rate": 1.608833064972366e-05, + "loss": 2.0379, + "step": 131400 + }, + { + "epoch": 6.91, + "learning_rate": 1.6033933156360157e-05, + "loss": 2.043, + "step": 131600 + }, + { + "epoch": 6.92, + "learning_rate": 1.597953566299665e-05, + "loss": 2.0316, + "step": 131800 + }, + { + "epoch": 6.93, + "learning_rate": 1.5925138169633145e-05, + "loss": 2.0292, + "step": 132000 + }, + { + "epoch": 6.94, + "learning_rate": 1.5870740676269638e-05, + "loss": 2.0309, + "step": 132200 + }, + { + "epoch": 6.95, + "learning_rate": 1.5816343182906134e-05, + "loss": 2.0392, + "step": 132400 + }, + { + "epoch": 6.96, + "learning_rate": 1.5761945689542626e-05, + "loss": 2.033, + "step": 132600 + }, + { + "epoch": 6.97, + "learning_rate": 1.570754819617912e-05, + "loss": 2.0329, + "step": 132800 + }, + { + "epoch": 6.98, + "learning_rate": 1.565342269028243e-05, + "loss": 2.0324, + "step": 133000 + }, + { + "epoch": 6.99, + "learning_rate": 1.5599025196918927e-05, + "loss": 2.0286, + "step": 133200 + }, + { + "epoch": 7.0, + "learning_rate": 1.554462770355542e-05, + "loss": 2.0356, + "step": 133400 + }, + { + "epoch": 7.01, + "learning_rate": 1.5490502197658732e-05, + "loss": 2.0315, + "step": 133600 + }, + { + "epoch": 7.02, + "learning_rate": 1.5436104704295224e-05, + "loss": 2.0332, + "step": 133800 + }, + { + "epoch": 7.03, + "learning_rate": 1.538170721093172e-05, + "loss": 2.0236, + "step": 134000 + }, + { + "epoch": 7.04, + "learning_rate": 1.5327309717568213e-05, + "loss": 2.0283, + "step": 134200 + }, + { + "epoch": 7.06, + "learning_rate": 1.5272912224204712e-05, + "loss": 2.0349, + "step": 134400 + }, + { + "epoch": 7.07, + "learning_rate": 1.5218514730841204e-05, + "loss": 2.0285, + "step": 134600 + }, + { + "epoch": 7.08, + "learning_rate": 1.5164117237477699e-05, + "loss": 2.0336, + "step": 134800 + }, + { + "epoch": 7.09, + "learning_rate": 1.5109719744114193e-05, + "loss": 2.0387, + "step": 135000 + }, + { + "epoch": 7.1, + "learning_rate": 1.5055322250750687e-05, + "loss": 2.0346, + "step": 135200 + }, + { + "epoch": 7.11, + "learning_rate": 1.5000924757387181e-05, + "loss": 2.0271, + "step": 135400 + }, + { + "epoch": 7.12, + "learning_rate": 1.4946527264023673e-05, + "loss": 2.0259, + "step": 135600 + }, + { + "epoch": 7.13, + "learning_rate": 1.4892129770660168e-05, + "loss": 2.0291, + "step": 135800 + }, + { + "epoch": 7.14, + "learning_rate": 1.4837732277296662e-05, + "loss": 2.0281, + "step": 136000 + }, + { + "epoch": 7.15, + "learning_rate": 1.4783606771399974e-05, + "loss": 2.0334, + "step": 136200 + }, + { + "epoch": 7.16, + "learning_rate": 1.4729209278036469e-05, + "loss": 2.033, + "step": 136400 + }, + { + "epoch": 7.17, + "learning_rate": 1.4674811784672963e-05, + "loss": 2.0291, + "step": 136600 + }, + { + "epoch": 7.18, + "learning_rate": 1.4620414291309457e-05, + "loss": 2.0238, + "step": 136800 + }, + { + "epoch": 7.19, + "learning_rate": 1.456601679794595e-05, + "loss": 2.021, + "step": 137000 + }, + { + "epoch": 7.2, + "learning_rate": 1.4511619304582447e-05, + "loss": 2.0198, + "step": 137200 + }, + { + "epoch": 7.21, + "learning_rate": 1.4457221811218941e-05, + "loss": 2.0177, + "step": 137400 + }, + { + "epoch": 7.22, + "learning_rate": 1.4402824317855435e-05, + "loss": 2.0238, + "step": 137600 + }, + { + "epoch": 7.23, + "learning_rate": 1.434842682449193e-05, + "loss": 2.0285, + "step": 137800 + }, + { + "epoch": 7.24, + "learning_rate": 1.4294029331128422e-05, + "loss": 2.0242, + "step": 138000 + }, + { + "epoch": 7.25, + "learning_rate": 1.4239631837764916e-05, + "loss": 2.0165, + "step": 138200 + }, + { + "epoch": 7.27, + "learning_rate": 1.418523434440141e-05, + "loss": 2.0177, + "step": 138400 + }, + { + "epoch": 7.28, + "learning_rate": 1.4130836851037904e-05, + "loss": 2.0275, + "step": 138600 + }, + { + "epoch": 7.29, + "learning_rate": 1.4076439357674398e-05, + "loss": 2.0221, + "step": 138800 + }, + { + "epoch": 7.3, + "learning_rate": 1.4022041864310892e-05, + "loss": 2.0234, + "step": 139000 + }, + { + "epoch": 7.31, + "learning_rate": 1.3967644370947388e-05, + "loss": 2.0225, + "step": 139200 + }, + { + "epoch": 7.32, + "learning_rate": 1.3913246877583882e-05, + "loss": 2.0195, + "step": 139400 + }, + { + "epoch": 7.33, + "learning_rate": 1.3858849384220377e-05, + "loss": 2.0142, + "step": 139600 + }, + { + "epoch": 7.34, + "learning_rate": 1.380445189085687e-05, + "loss": 2.0224, + "step": 139800 + }, + { + "epoch": 7.35, + "learning_rate": 1.3750054397493365e-05, + "loss": 2.0261, + "step": 140000 + }, + { + "epoch": 7.36, + "learning_rate": 1.3695656904129859e-05, + "loss": 2.019, + "step": 140200 + }, + { + "epoch": 7.37, + "learning_rate": 1.3641259410766351e-05, + "loss": 2.013, + "step": 140400 + }, + { + "epoch": 7.38, + "learning_rate": 1.3586861917402846e-05, + "loss": 2.0101, + "step": 140600 + }, + { + "epoch": 7.39, + "learning_rate": 1.353246442403934e-05, + "loss": 2.0264, + "step": 140800 + }, + { + "epoch": 7.4, + "learning_rate": 1.3478066930675834e-05, + "loss": 2.0021, + "step": 141000 + }, + { + "epoch": 7.41, + "learning_rate": 1.3423941424779146e-05, + "loss": 2.008, + "step": 141200 + }, + { + "epoch": 7.42, + "learning_rate": 1.336954393141564e-05, + "loss": 2.0192, + "step": 141400 + }, + { + "epoch": 7.43, + "learning_rate": 1.3315146438052135e-05, + "loss": 2.0169, + "step": 141600 + }, + { + "epoch": 7.44, + "learning_rate": 1.326074894468863e-05, + "loss": 2.0168, + "step": 141800 + }, + { + "epoch": 7.45, + "learning_rate": 1.3206351451325125e-05, + "loss": 2.0171, + "step": 142000 + }, + { + "epoch": 7.46, + "learning_rate": 1.3151953957961619e-05, + "loss": 2.0233, + "step": 142200 + }, + { + "epoch": 7.48, + "learning_rate": 1.3097556464598113e-05, + "loss": 2.0167, + "step": 142400 + }, + { + "epoch": 7.49, + "learning_rate": 1.3043158971234607e-05, + "loss": 2.02, + "step": 142600 + }, + { + "epoch": 7.5, + "learning_rate": 1.29887614778711e-05, + "loss": 2.0108, + "step": 142800 + }, + { + "epoch": 7.51, + "learning_rate": 1.2934363984507594e-05, + "loss": 2.0147, + "step": 143000 + }, + { + "epoch": 7.52, + "learning_rate": 1.2879966491144088e-05, + "loss": 2.0162, + "step": 143200 + }, + { + "epoch": 7.53, + "learning_rate": 1.28258409852474e-05, + "loss": 2.0203, + "step": 143400 + }, + { + "epoch": 7.54, + "learning_rate": 1.2771443491883895e-05, + "loss": 2.0186, + "step": 143600 + }, + { + "epoch": 7.55, + "learning_rate": 1.2717045998520389e-05, + "loss": 2.0121, + "step": 143800 + }, + { + "epoch": 7.56, + "learning_rate": 1.2662648505156883e-05, + "loss": 2.0089, + "step": 144000 + }, + { + "epoch": 7.57, + "learning_rate": 1.2608251011793375e-05, + "loss": 2.0045, + "step": 144200 + }, + { + "epoch": 7.58, + "learning_rate": 1.255385351842987e-05, + "loss": 2.0085, + "step": 144400 + }, + { + "epoch": 7.59, + "learning_rate": 1.2499456025066365e-05, + "loss": 2.0065, + "step": 144600 + }, + { + "epoch": 7.6, + "learning_rate": 1.244505853170286e-05, + "loss": 2.019, + "step": 144800 + }, + { + "epoch": 7.61, + "learning_rate": 1.2390661038339354e-05, + "loss": 2.0117, + "step": 145000 + }, + { + "epoch": 7.62, + "learning_rate": 1.2336263544975848e-05, + "loss": 2.0161, + "step": 145200 + }, + { + "epoch": 7.63, + "learning_rate": 1.2281866051612342e-05, + "loss": 2.0129, + "step": 145400 + }, + { + "epoch": 7.64, + "learning_rate": 1.2227468558248836e-05, + "loss": 2.0018, + "step": 145600 + }, + { + "epoch": 7.65, + "learning_rate": 1.217307106488533e-05, + "loss": 2.0042, + "step": 145800 + }, + { + "epoch": 7.66, + "learning_rate": 1.2118673571521826e-05, + "loss": 2.0114, + "step": 146000 + }, + { + "epoch": 7.67, + "learning_rate": 1.206427607815832e-05, + "loss": 2.0042, + "step": 146200 + }, + { + "epoch": 7.69, + "learning_rate": 1.2009878584794813e-05, + "loss": 2.0093, + "step": 146400 + }, + { + "epoch": 7.7, + "learning_rate": 1.1955481091431307e-05, + "loss": 1.9995, + "step": 146600 + }, + { + "epoch": 7.71, + "learning_rate": 1.1901083598067801e-05, + "loss": 2.0157, + "step": 146800 + }, + { + "epoch": 7.72, + "learning_rate": 1.1846686104704295e-05, + "loss": 2.0057, + "step": 147000 + }, + { + "epoch": 7.73, + "learning_rate": 1.1792288611340791e-05, + "loss": 2.0072, + "step": 147200 + }, + { + "epoch": 7.74, + "learning_rate": 1.1738163105444102e-05, + "loss": 1.9961, + "step": 147400 + }, + { + "epoch": 7.75, + "learning_rate": 1.1683765612080596e-05, + "loss": 2.012, + "step": 147600 + }, + { + "epoch": 7.76, + "learning_rate": 1.1629640106183907e-05, + "loss": 2.0102, + "step": 147800 + }, + { + "epoch": 7.77, + "learning_rate": 1.157551460028722e-05, + "loss": 2.0096, + "step": 148000 + }, + { + "epoch": 7.78, + "learning_rate": 1.1521117106923714e-05, + "loss": 1.9972, + "step": 148200 + }, + { + "epoch": 7.79, + "learning_rate": 1.1466719613560208e-05, + "loss": 2.0025, + "step": 148400 + }, + { + "epoch": 7.8, + "learning_rate": 1.1412322120196702e-05, + "loss": 1.9985, + "step": 148600 + }, + { + "epoch": 7.81, + "learning_rate": 1.1357924626833196e-05, + "loss": 2.001, + "step": 148800 + }, + { + "epoch": 7.82, + "learning_rate": 1.130352713346969e-05, + "loss": 2.0019, + "step": 149000 + }, + { + "epoch": 7.83, + "learning_rate": 1.1249129640106184e-05, + "loss": 1.9946, + "step": 149200 + }, + { + "epoch": 7.84, + "learning_rate": 1.1194732146742679e-05, + "loss": 1.9935, + "step": 149400 + }, + { + "epoch": 7.85, + "learning_rate": 1.1140334653379173e-05, + "loss": 2.002, + "step": 149600 + }, + { + "epoch": 7.86, + "learning_rate": 1.1085937160015667e-05, + "loss": 2.0009, + "step": 149800 + }, + { + "epoch": 7.87, + "learning_rate": 1.1031539666652161e-05, + "loss": 1.997, + "step": 150000 + }, + { + "epoch": 7.88, + "learning_rate": 1.0977142173288655e-05, + "loss": 2.0059, + "step": 150200 + }, + { + "epoch": 7.9, + "learning_rate": 1.092274467992515e-05, + "loss": 2.0007, + "step": 150400 + }, + { + "epoch": 7.91, + "learning_rate": 1.0868347186561643e-05, + "loss": 2.0027, + "step": 150600 + }, + { + "epoch": 7.92, + "learning_rate": 1.0813949693198138e-05, + "loss": 1.9926, + "step": 150800 + }, + { + "epoch": 7.93, + "learning_rate": 1.0759552199834633e-05, + "loss": 2.001, + "step": 151000 + }, + { + "epoch": 7.94, + "learning_rate": 1.0705154706471126e-05, + "loss": 2.001, + "step": 151200 + }, + { + "epoch": 7.95, + "learning_rate": 1.0651029200574437e-05, + "loss": 2.001, + "step": 151400 + }, + { + "epoch": 7.96, + "learning_rate": 1.0596631707210933e-05, + "loss": 1.9974, + "step": 151600 + }, + { + "epoch": 7.97, + "learning_rate": 1.0542234213847427e-05, + "loss": 2.0024, + "step": 151800 + }, + { + "epoch": 7.98, + "learning_rate": 1.0487836720483921e-05, + "loss": 2.0063, + "step": 152000 + }, + { + "epoch": 7.99, + "learning_rate": 1.0433439227120415e-05, + "loss": 1.9962, + "step": 152200 + }, + { + "epoch": 8.0, + "learning_rate": 1.037904173375691e-05, + "loss": 2.0064, + "step": 152400 + }, + { + "epoch": 8.01, + "learning_rate": 1.0324644240393403e-05, + "loss": 1.9966, + "step": 152600 + }, + { + "epoch": 8.02, + "learning_rate": 1.0270246747029898e-05, + "loss": 1.9983, + "step": 152800 + }, + { + "epoch": 8.03, + "learning_rate": 1.0215849253666392e-05, + "loss": 1.9881, + "step": 153000 + }, + { + "epoch": 8.04, + "learning_rate": 1.0161451760302886e-05, + "loss": 1.9954, + "step": 153200 + }, + { + "epoch": 8.05, + "learning_rate": 1.010705426693938e-05, + "loss": 2.0016, + "step": 153400 + }, + { + "epoch": 8.06, + "learning_rate": 1.0052656773575874e-05, + "loss": 1.9971, + "step": 153600 + }, + { + "epoch": 8.07, + "learning_rate": 9.998259280212368e-06, + "loss": 1.9996, + "step": 153800 + }, + { + "epoch": 8.08, + "learning_rate": 9.943861786848862e-06, + "loss": 2.0017, + "step": 154000 + }, + { + "epoch": 8.09, + "learning_rate": 9.889464293485357e-06, + "loss": 2.0009, + "step": 154200 + }, + { + "epoch": 8.1, + "learning_rate": 9.83506680012185e-06, + "loss": 1.996, + "step": 154400 + }, + { + "epoch": 8.12, + "learning_rate": 9.780669306758345e-06, + "loss": 1.99, + "step": 154600 + }, + { + "epoch": 8.13, + "learning_rate": 9.726271813394839e-06, + "loss": 1.9913, + "step": 154800 + }, + { + "epoch": 8.14, + "learning_rate": 9.671874320031333e-06, + "loss": 1.9959, + "step": 155000 + }, + { + "epoch": 8.15, + "learning_rate": 9.617476826667827e-06, + "loss": 1.9928, + "step": 155200 + }, + { + "epoch": 8.16, + "learning_rate": 9.563079333304321e-06, + "loss": 1.994, + "step": 155400 + }, + { + "epoch": 8.17, + "learning_rate": 9.508681839940816e-06, + "loss": 1.9984, + "step": 155600 + }, + { + "epoch": 8.18, + "learning_rate": 9.454284346577311e-06, + "loss": 1.9942, + "step": 155800 + }, + { + "epoch": 8.19, + "learning_rate": 9.399886853213804e-06, + "loss": 1.9934, + "step": 156000 + }, + { + "epoch": 8.2, + "learning_rate": 9.345489359850298e-06, + "loss": 1.989, + "step": 156200 + }, + { + "epoch": 8.21, + "learning_rate": 9.291091866486792e-06, + "loss": 1.9921, + "step": 156400 + }, + { + "epoch": 8.22, + "learning_rate": 9.236694373123286e-06, + "loss": 1.9912, + "step": 156600 + }, + { + "epoch": 8.23, + "learning_rate": 9.182296879759782e-06, + "loss": 1.9819, + "step": 156800 + }, + { + "epoch": 8.24, + "learning_rate": 9.127899386396276e-06, + "loss": 1.9904, + "step": 157000 + }, + { + "epoch": 8.25, + "learning_rate": 9.073501893032769e-06, + "loss": 1.9876, + "step": 157200 + }, + { + "epoch": 8.26, + "learning_rate": 9.019376387136081e-06, + "loss": 1.9904, + "step": 157400 + }, + { + "epoch": 8.27, + "learning_rate": 8.964978893772575e-06, + "loss": 1.9938, + "step": 157600 + }, + { + "epoch": 8.28, + "learning_rate": 8.910853387875886e-06, + "loss": 1.9868, + "step": 157800 + }, + { + "epoch": 8.29, + "learning_rate": 8.856455894512382e-06, + "loss": 1.9839, + "step": 158000 + }, + { + "epoch": 8.3, + "learning_rate": 8.802058401148876e-06, + "loss": 1.9929, + "step": 158200 + }, + { + "epoch": 8.31, + "learning_rate": 8.74766090778537e-06, + "loss": 1.9819, + "step": 158400 + }, + { + "epoch": 8.33, + "learning_rate": 8.693263414421863e-06, + "loss": 1.9854, + "step": 158600 + }, + { + "epoch": 8.34, + "learning_rate": 8.638865921058357e-06, + "loss": 1.9909, + "step": 158800 + }, + { + "epoch": 8.35, + "learning_rate": 8.584468427694853e-06, + "loss": 1.9958, + "step": 159000 + }, + { + "epoch": 8.36, + "learning_rate": 8.530070934331347e-06, + "loss": 1.9827, + "step": 159200 + }, + { + "epoch": 8.37, + "learning_rate": 8.475673440967841e-06, + "loss": 1.9871, + "step": 159400 + }, + { + "epoch": 8.38, + "learning_rate": 8.421275947604335e-06, + "loss": 1.9849, + "step": 159600 + }, + { + "epoch": 8.39, + "learning_rate": 8.366878454240828e-06, + "loss": 1.9951, + "step": 159800 + }, + { + "epoch": 8.4, + "learning_rate": 8.312480960877324e-06, + "loss": 1.9809, + "step": 160000 + } + ], + "logging_steps": 200, + "max_steps": 190500, + "num_train_epochs": 10, + "save_steps": 40000, + "total_flos": 8.08352616504361e+18, + "trial_name": null, + "trial_params": null +}