PyTorch
Latin
French
Spanish
roberta
Roberta_Historical / trainer_state.json
magistermilitum's picture
Upload 15 files
e0ae2f1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.398950131233596,
"eval_steps": 200000,
"global_step": 160000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.4997000599880024e-06,
"loss": 8.6813,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 2.999400119976005e-06,
"loss": 8.0951,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 4.499100179964007e-06,
"loss": 7.7394,
"step": 600
},
{
"epoch": 0.04,
"learning_rate": 5.99880023995201e-06,
"loss": 7.4477,
"step": 800
},
{
"epoch": 0.05,
"learning_rate": 7.4985002999400115e-06,
"loss": 7.2516,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 8.998200359928014e-06,
"loss": 7.1331,
"step": 1200
},
{
"epoch": 0.07,
"learning_rate": 1.0497900419916016e-05,
"loss": 7.0447,
"step": 1400
},
{
"epoch": 0.08,
"learning_rate": 1.199760047990402e-05,
"loss": 6.9683,
"step": 1600
},
{
"epoch": 0.09,
"learning_rate": 1.3497300539892021e-05,
"loss": 6.9037,
"step": 1800
},
{
"epoch": 0.1,
"learning_rate": 1.4997000599880023e-05,
"loss": 6.8472,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 1.6496700659868028e-05,
"loss": 6.7907,
"step": 2200
},
{
"epoch": 0.13,
"learning_rate": 1.799640071985603e-05,
"loss": 6.7407,
"step": 2400
},
{
"epoch": 0.14,
"learning_rate": 1.9496100779844032e-05,
"loss": 6.7036,
"step": 2600
},
{
"epoch": 0.15,
"learning_rate": 2.0995800839832032e-05,
"loss": 6.6485,
"step": 2800
},
{
"epoch": 0.16,
"learning_rate": 2.249550089982004e-05,
"loss": 6.6153,
"step": 3000
},
{
"epoch": 0.17,
"learning_rate": 2.399520095980804e-05,
"loss": 6.5826,
"step": 3200
},
{
"epoch": 0.18,
"learning_rate": 2.5494901019796042e-05,
"loss": 6.553,
"step": 3400
},
{
"epoch": 0.19,
"learning_rate": 2.6994601079784043e-05,
"loss": 6.5222,
"step": 3600
},
{
"epoch": 0.2,
"learning_rate": 2.8494301139772046e-05,
"loss": 6.4979,
"step": 3800
},
{
"epoch": 0.21,
"learning_rate": 2.9994001199760046e-05,
"loss": 6.4695,
"step": 4000
},
{
"epoch": 0.22,
"learning_rate": 3.1493701259748056e-05,
"loss": 6.4505,
"step": 4200
},
{
"epoch": 0.23,
"learning_rate": 3.2993401319736057e-05,
"loss": 6.4254,
"step": 4400
},
{
"epoch": 0.24,
"learning_rate": 3.449310137972406e-05,
"loss": 6.412,
"step": 4600
},
{
"epoch": 0.25,
"learning_rate": 3.599280143971206e-05,
"loss": 6.3885,
"step": 4800
},
{
"epoch": 0.26,
"learning_rate": 3.7492501499700064e-05,
"loss": 6.3815,
"step": 5000
},
{
"epoch": 0.27,
"learning_rate": 3.8992201559688064e-05,
"loss": 6.3623,
"step": 5200
},
{
"epoch": 0.28,
"learning_rate": 4.0491901619676064e-05,
"loss": 6.3464,
"step": 5400
},
{
"epoch": 0.29,
"learning_rate": 4.1991601679664064e-05,
"loss": 6.3281,
"step": 5600
},
{
"epoch": 0.3,
"learning_rate": 4.349130173965207e-05,
"loss": 6.3324,
"step": 5800
},
{
"epoch": 0.31,
"learning_rate": 4.499100179964008e-05,
"loss": 6.3128,
"step": 6000
},
{
"epoch": 0.33,
"learning_rate": 4.649070185962808e-05,
"loss": 6.3033,
"step": 6200
},
{
"epoch": 0.34,
"learning_rate": 4.799040191961608e-05,
"loss": 6.3015,
"step": 6400
},
{
"epoch": 0.35,
"learning_rate": 4.949010197960408e-05,
"loss": 6.2881,
"step": 6600
},
{
"epoch": 0.36,
"learning_rate": 4.996409765438009e-05,
"loss": 6.2728,
"step": 6800
},
{
"epoch": 0.37,
"learning_rate": 4.990970016101658e-05,
"loss": 6.2617,
"step": 7000
},
{
"epoch": 0.38,
"learning_rate": 4.9855302667653076e-05,
"loss": 6.2561,
"step": 7200
},
{
"epoch": 0.39,
"learning_rate": 4.980090517428957e-05,
"loss": 6.2531,
"step": 7400
},
{
"epoch": 0.4,
"learning_rate": 4.974650768092606e-05,
"loss": 6.2222,
"step": 7600
},
{
"epoch": 0.41,
"learning_rate": 4.9692110187562557e-05,
"loss": 6.2062,
"step": 7800
},
{
"epoch": 0.42,
"learning_rate": 4.963771269419905e-05,
"loss": 6.1925,
"step": 8000
},
{
"epoch": 0.43,
"learning_rate": 4.958331520083555e-05,
"loss": 6.1704,
"step": 8200
},
{
"epoch": 0.44,
"learning_rate": 4.952918969493886e-05,
"loss": 6.1479,
"step": 8400
},
{
"epoch": 0.45,
"learning_rate": 4.947479220157536e-05,
"loss": 6.1375,
"step": 8600
},
{
"epoch": 0.46,
"learning_rate": 4.9420394708211846e-05,
"loss": 6.1155,
"step": 8800
},
{
"epoch": 0.47,
"learning_rate": 4.936599721484834e-05,
"loss": 6.0921,
"step": 9000
},
{
"epoch": 0.48,
"learning_rate": 4.931159972148484e-05,
"loss": 6.0671,
"step": 9200
},
{
"epoch": 0.49,
"learning_rate": 4.9257202228121326e-05,
"loss": 6.0437,
"step": 9400
},
{
"epoch": 0.5,
"learning_rate": 4.920280473475782e-05,
"loss": 6.0032,
"step": 9600
},
{
"epoch": 0.51,
"learning_rate": 4.914840724139432e-05,
"loss": 5.9209,
"step": 9800
},
{
"epoch": 0.52,
"learning_rate": 4.9094009748030814e-05,
"loss": 5.8316,
"step": 10000
},
{
"epoch": 0.54,
"learning_rate": 4.90396122546673e-05,
"loss": 5.7568,
"step": 10200
},
{
"epoch": 0.55,
"learning_rate": 4.89852147613038e-05,
"loss": 5.6574,
"step": 10400
},
{
"epoch": 0.56,
"learning_rate": 4.893108925540711e-05,
"loss": 5.5117,
"step": 10600
},
{
"epoch": 0.57,
"learning_rate": 4.887669176204361e-05,
"loss": 5.3986,
"step": 10800
},
{
"epoch": 0.58,
"learning_rate": 4.88222942686801e-05,
"loss": 5.2336,
"step": 11000
},
{
"epoch": 0.59,
"learning_rate": 4.87678967753166e-05,
"loss": 5.0519,
"step": 11200
},
{
"epoch": 0.6,
"learning_rate": 4.871349928195309e-05,
"loss": 4.9005,
"step": 11400
},
{
"epoch": 0.61,
"learning_rate": 4.8659101788589584e-05,
"loss": 4.769,
"step": 11600
},
{
"epoch": 0.62,
"learning_rate": 4.860470429522608e-05,
"loss": 4.6484,
"step": 11800
},
{
"epoch": 0.63,
"learning_rate": 4.855030680186257e-05,
"loss": 4.5375,
"step": 12000
},
{
"epoch": 0.64,
"learning_rate": 4.8495909308499065e-05,
"loss": 4.4369,
"step": 12200
},
{
"epoch": 0.65,
"learning_rate": 4.844151181513556e-05,
"loss": 4.3437,
"step": 12400
},
{
"epoch": 0.66,
"learning_rate": 4.8387114321772056e-05,
"loss": 4.267,
"step": 12600
},
{
"epoch": 0.67,
"learning_rate": 4.8332716828408545e-05,
"loss": 4.161,
"step": 12800
},
{
"epoch": 0.68,
"learning_rate": 4.827831933504504e-05,
"loss": 4.0868,
"step": 13000
},
{
"epoch": 0.69,
"learning_rate": 4.822392184168154e-05,
"loss": 4.0029,
"step": 13200
},
{
"epoch": 0.7,
"learning_rate": 4.8169524348318026e-05,
"loss": 3.9486,
"step": 13400
},
{
"epoch": 0.71,
"learning_rate": 4.811512685495453e-05,
"loss": 3.8743,
"step": 13600
},
{
"epoch": 0.72,
"learning_rate": 4.8060729361591025e-05,
"loss": 3.8206,
"step": 13800
},
{
"epoch": 0.73,
"learning_rate": 4.8006331868227514e-05,
"loss": 3.7676,
"step": 14000
},
{
"epoch": 0.75,
"learning_rate": 4.795193437486401e-05,
"loss": 3.7225,
"step": 14200
},
{
"epoch": 0.76,
"learning_rate": 4.7897536881500505e-05,
"loss": 3.6837,
"step": 14400
},
{
"epoch": 0.77,
"learning_rate": 4.784341137560381e-05,
"loss": 3.6421,
"step": 14600
},
{
"epoch": 0.78,
"learning_rate": 4.778901388224031e-05,
"loss": 3.6167,
"step": 14800
},
{
"epoch": 0.79,
"learning_rate": 4.77346163888768e-05,
"loss": 3.5802,
"step": 15000
},
{
"epoch": 0.8,
"learning_rate": 4.76802188955133e-05,
"loss": 3.5469,
"step": 15200
},
{
"epoch": 0.81,
"learning_rate": 4.762582140214979e-05,
"loss": 3.5208,
"step": 15400
},
{
"epoch": 0.82,
"learning_rate": 4.7571423908786284e-05,
"loss": 3.494,
"step": 15600
},
{
"epoch": 0.83,
"learning_rate": 4.7517298402889596e-05,
"loss": 3.4647,
"step": 15800
},
{
"epoch": 0.84,
"learning_rate": 4.746290090952609e-05,
"loss": 3.4417,
"step": 16000
},
{
"epoch": 0.85,
"learning_rate": 4.740850341616259e-05,
"loss": 3.4267,
"step": 16200
},
{
"epoch": 0.86,
"learning_rate": 4.735410592279908e-05,
"loss": 3.3992,
"step": 16400
},
{
"epoch": 0.87,
"learning_rate": 4.729970842943557e-05,
"loss": 3.3831,
"step": 16600
},
{
"epoch": 0.88,
"learning_rate": 4.724531093607207e-05,
"loss": 3.3647,
"step": 16800
},
{
"epoch": 0.89,
"learning_rate": 4.7190913442708564e-05,
"loss": 3.3377,
"step": 17000
},
{
"epoch": 0.9,
"learning_rate": 4.7136515949345054e-05,
"loss": 3.3197,
"step": 17200
},
{
"epoch": 0.91,
"learning_rate": 4.708211845598155e-05,
"loss": 3.2985,
"step": 17400
},
{
"epoch": 0.92,
"learning_rate": 4.7027720962618045e-05,
"loss": 3.287,
"step": 17600
},
{
"epoch": 0.93,
"learning_rate": 4.697332346925454e-05,
"loss": 3.2748,
"step": 17800
},
{
"epoch": 0.94,
"learning_rate": 4.691892597589103e-05,
"loss": 3.2557,
"step": 18000
},
{
"epoch": 0.96,
"learning_rate": 4.6864528482527526e-05,
"loss": 3.2419,
"step": 18200
},
{
"epoch": 0.97,
"learning_rate": 4.681013098916402e-05,
"loss": 3.2286,
"step": 18400
},
{
"epoch": 0.98,
"learning_rate": 4.675573349580051e-05,
"loss": 3.2102,
"step": 18600
},
{
"epoch": 0.99,
"learning_rate": 4.670133600243701e-05,
"loss": 3.1987,
"step": 18800
},
{
"epoch": 1.0,
"learning_rate": 4.664693850907351e-05,
"loss": 3.1854,
"step": 19000
},
{
"epoch": 1.01,
"learning_rate": 4.659254101571e-05,
"loss": 3.1682,
"step": 19200
},
{
"epoch": 1.02,
"learning_rate": 4.6538143522346494e-05,
"loss": 3.1562,
"step": 19400
},
{
"epoch": 1.03,
"learning_rate": 4.648374602898299e-05,
"loss": 3.1366,
"step": 19600
},
{
"epoch": 1.04,
"learning_rate": 4.6429620523086296e-05,
"loss": 3.1273,
"step": 19800
},
{
"epoch": 1.05,
"learning_rate": 4.637522302972279e-05,
"loss": 3.1139,
"step": 20000
},
{
"epoch": 1.06,
"learning_rate": 4.632082553635929e-05,
"loss": 3.1045,
"step": 20200
},
{
"epoch": 1.07,
"learning_rate": 4.6266428042995777e-05,
"loss": 3.0962,
"step": 20400
},
{
"epoch": 1.08,
"learning_rate": 4.6212302537099096e-05,
"loss": 3.0913,
"step": 20600
},
{
"epoch": 1.09,
"learning_rate": 4.615790504373559e-05,
"loss": 3.0805,
"step": 20800
},
{
"epoch": 1.1,
"learning_rate": 4.610350755037208e-05,
"loss": 3.0662,
"step": 21000
},
{
"epoch": 1.11,
"learning_rate": 4.604911005700858e-05,
"loss": 3.0485,
"step": 21200
},
{
"epoch": 1.12,
"learning_rate": 4.599471256364507e-05,
"loss": 3.0438,
"step": 21400
},
{
"epoch": 1.13,
"learning_rate": 4.594031507028156e-05,
"loss": 3.0368,
"step": 21600
},
{
"epoch": 1.14,
"learning_rate": 4.588591757691806e-05,
"loss": 3.0248,
"step": 21800
},
{
"epoch": 1.15,
"learning_rate": 4.583152008355455e-05,
"loss": 3.0124,
"step": 22000
},
{
"epoch": 1.17,
"learning_rate": 4.577712259019105e-05,
"loss": 3.0025,
"step": 22200
},
{
"epoch": 1.18,
"learning_rate": 4.572272509682754e-05,
"loss": 2.9902,
"step": 22400
},
{
"epoch": 1.19,
"learning_rate": 4.5668327603464034e-05,
"loss": 2.9838,
"step": 22600
},
{
"epoch": 1.2,
"learning_rate": 4.561393011010053e-05,
"loss": 2.9701,
"step": 22800
},
{
"epoch": 1.21,
"learning_rate": 4.555953261673702e-05,
"loss": 2.9594,
"step": 23000
},
{
"epoch": 1.22,
"learning_rate": 4.5505135123373515e-05,
"loss": 2.9549,
"step": 23200
},
{
"epoch": 1.23,
"learning_rate": 4.545073763001001e-05,
"loss": 2.9462,
"step": 23400
},
{
"epoch": 1.24,
"learning_rate": 4.5396340136646506e-05,
"loss": 2.9471,
"step": 23600
},
{
"epoch": 1.25,
"learning_rate": 4.5341942643282996e-05,
"loss": 2.9271,
"step": 23800
},
{
"epoch": 1.26,
"learning_rate": 4.5287817137386315e-05,
"loss": 2.9241,
"step": 24000
},
{
"epoch": 1.27,
"learning_rate": 4.5233419644022804e-05,
"loss": 2.9156,
"step": 24200
},
{
"epoch": 1.28,
"learning_rate": 4.51790221506593e-05,
"loss": 2.9079,
"step": 24400
},
{
"epoch": 1.29,
"learning_rate": 4.5124624657295796e-05,
"loss": 2.898,
"step": 24600
},
{
"epoch": 1.3,
"learning_rate": 4.507022716393229e-05,
"loss": 2.8902,
"step": 24800
},
{
"epoch": 1.31,
"learning_rate": 4.501582967056878e-05,
"loss": 2.8921,
"step": 25000
},
{
"epoch": 1.32,
"learning_rate": 4.4961432177205276e-05,
"loss": 2.8749,
"step": 25200
},
{
"epoch": 1.33,
"learning_rate": 4.490703468384177e-05,
"loss": 2.8724,
"step": 25400
},
{
"epoch": 1.34,
"learning_rate": 4.485263719047826e-05,
"loss": 2.8649,
"step": 25600
},
{
"epoch": 1.35,
"learning_rate": 4.479823969711476e-05,
"loss": 2.8586,
"step": 25800
},
{
"epoch": 1.36,
"learning_rate": 4.474384220375125e-05,
"loss": 2.8421,
"step": 26000
},
{
"epoch": 1.38,
"learning_rate": 4.468944471038775e-05,
"loss": 2.843,
"step": 26200
},
{
"epoch": 1.39,
"learning_rate": 4.463504721702424e-05,
"loss": 2.8397,
"step": 26400
},
{
"epoch": 1.4,
"learning_rate": 4.4580649723660734e-05,
"loss": 2.8311,
"step": 26600
},
{
"epoch": 1.41,
"learning_rate": 4.452625223029723e-05,
"loss": 2.8143,
"step": 26800
},
{
"epoch": 1.42,
"learning_rate": 4.4471854736933725e-05,
"loss": 2.8106,
"step": 27000
},
{
"epoch": 1.43,
"learning_rate": 4.441745724357022e-05,
"loss": 2.8199,
"step": 27200
},
{
"epoch": 1.44,
"learning_rate": 4.436305975020672e-05,
"loss": 2.8039,
"step": 27400
},
{
"epoch": 1.45,
"learning_rate": 4.4308662256843206e-05,
"loss": 2.7975,
"step": 27600
},
{
"epoch": 1.46,
"learning_rate": 4.425453675094652e-05,
"loss": 2.7903,
"step": 27800
},
{
"epoch": 1.47,
"learning_rate": 4.4200139257583015e-05,
"loss": 2.7907,
"step": 28000
},
{
"epoch": 1.48,
"learning_rate": 4.4145741764219504e-05,
"loss": 2.7836,
"step": 28200
},
{
"epoch": 1.49,
"learning_rate": 4.4091344270856e-05,
"loss": 2.7825,
"step": 28400
},
{
"epoch": 1.5,
"learning_rate": 4.4036946777492495e-05,
"loss": 2.765,
"step": 28600
},
{
"epoch": 1.51,
"learning_rate": 4.3982549284128984e-05,
"loss": 2.773,
"step": 28800
},
{
"epoch": 1.52,
"learning_rate": 4.392815179076548e-05,
"loss": 2.7608,
"step": 29000
},
{
"epoch": 1.53,
"learning_rate": 4.3873754297401976e-05,
"loss": 2.76,
"step": 29200
},
{
"epoch": 1.54,
"learning_rate": 4.381935680403847e-05,
"loss": 2.7613,
"step": 29400
},
{
"epoch": 1.55,
"learning_rate": 4.376495931067497e-05,
"loss": 2.7319,
"step": 29600
},
{
"epoch": 1.56,
"learning_rate": 4.3710561817311464e-05,
"loss": 2.7377,
"step": 29800
},
{
"epoch": 1.57,
"learning_rate": 4.365616432394796e-05,
"loss": 2.736,
"step": 30000
},
{
"epoch": 1.59,
"learning_rate": 4.360176683058445e-05,
"loss": 2.7348,
"step": 30200
},
{
"epoch": 1.6,
"learning_rate": 4.3547369337220944e-05,
"loss": 2.7285,
"step": 30400
},
{
"epoch": 1.61,
"learning_rate": 4.349324383132426e-05,
"loss": 2.7299,
"step": 30600
},
{
"epoch": 1.62,
"learning_rate": 4.3438846337960746e-05,
"loss": 2.7208,
"step": 30800
},
{
"epoch": 1.63,
"learning_rate": 4.338444884459724e-05,
"loss": 2.7115,
"step": 31000
},
{
"epoch": 1.64,
"learning_rate": 4.333005135123374e-05,
"loss": 2.7033,
"step": 31200
},
{
"epoch": 1.65,
"learning_rate": 4.327565385787023e-05,
"loss": 2.6996,
"step": 31400
},
{
"epoch": 1.66,
"learning_rate": 4.322125636450672e-05,
"loss": 2.6925,
"step": 31600
},
{
"epoch": 1.67,
"learning_rate": 4.316685887114322e-05,
"loss": 2.6896,
"step": 31800
},
{
"epoch": 1.68,
"learning_rate": 4.3112461377779714e-05,
"loss": 2.6846,
"step": 32000
},
{
"epoch": 1.69,
"learning_rate": 4.30580638844162e-05,
"loss": 2.6848,
"step": 32200
},
{
"epoch": 1.7,
"learning_rate": 4.3003666391052706e-05,
"loss": 2.6764,
"step": 32400
},
{
"epoch": 1.71,
"learning_rate": 4.29492688976892e-05,
"loss": 2.6846,
"step": 32600
},
{
"epoch": 1.72,
"learning_rate": 4.289487140432569e-05,
"loss": 2.6742,
"step": 32800
},
{
"epoch": 1.73,
"learning_rate": 4.2840745898429003e-05,
"loss": 2.6667,
"step": 33000
},
{
"epoch": 1.74,
"learning_rate": 4.27863484050655e-05,
"loss": 2.6632,
"step": 33200
},
{
"epoch": 1.75,
"learning_rate": 4.273195091170199e-05,
"loss": 2.6593,
"step": 33400
},
{
"epoch": 1.76,
"learning_rate": 4.2677553418338484e-05,
"loss": 2.6568,
"step": 33600
},
{
"epoch": 1.77,
"learning_rate": 4.262315592497498e-05,
"loss": 2.6514,
"step": 33800
},
{
"epoch": 1.78,
"learning_rate": 4.256875843161147e-05,
"loss": 2.648,
"step": 34000
},
{
"epoch": 1.8,
"learning_rate": 4.2514360938247965e-05,
"loss": 2.6362,
"step": 34200
},
{
"epoch": 1.81,
"learning_rate": 4.245996344488446e-05,
"loss": 2.6468,
"step": 34400
},
{
"epoch": 1.82,
"learning_rate": 4.240556595152096e-05,
"loss": 2.6268,
"step": 34600
},
{
"epoch": 1.83,
"learning_rate": 4.2351168458157446e-05,
"loss": 2.622,
"step": 34800
},
{
"epoch": 1.84,
"learning_rate": 4.2297042952260765e-05,
"loss": 2.6178,
"step": 35000
},
{
"epoch": 1.85,
"learning_rate": 4.2242645458897254e-05,
"loss": 2.6212,
"step": 35200
},
{
"epoch": 1.86,
"learning_rate": 4.218824796553375e-05,
"loss": 2.6161,
"step": 35400
},
{
"epoch": 1.87,
"learning_rate": 4.2133850472170246e-05,
"loss": 2.6139,
"step": 35600
},
{
"epoch": 1.88,
"learning_rate": 4.2079452978806735e-05,
"loss": 2.6165,
"step": 35800
},
{
"epoch": 1.89,
"learning_rate": 4.202505548544323e-05,
"loss": 2.6044,
"step": 36000
},
{
"epoch": 1.9,
"learning_rate": 4.1970657992079727e-05,
"loss": 2.6075,
"step": 36200
},
{
"epoch": 1.91,
"learning_rate": 4.191626049871622e-05,
"loss": 2.604,
"step": 36400
},
{
"epoch": 1.92,
"learning_rate": 4.186186300535271e-05,
"loss": 2.6026,
"step": 36600
},
{
"epoch": 1.93,
"learning_rate": 4.180746551198921e-05,
"loss": 2.6024,
"step": 36800
},
{
"epoch": 1.94,
"learning_rate": 4.175334000609252e-05,
"loss": 2.5888,
"step": 37000
},
{
"epoch": 1.95,
"learning_rate": 4.1698942512729016e-05,
"loss": 2.5914,
"step": 37200
},
{
"epoch": 1.96,
"learning_rate": 4.164454501936551e-05,
"loss": 2.592,
"step": 37400
},
{
"epoch": 1.97,
"learning_rate": 4.159014752600201e-05,
"loss": 2.5822,
"step": 37600
},
{
"epoch": 1.98,
"learning_rate": 4.1535750032638496e-05,
"loss": 2.5845,
"step": 37800
},
{
"epoch": 1.99,
"learning_rate": 4.148135253927499e-05,
"loss": 2.5731,
"step": 38000
},
{
"epoch": 2.01,
"learning_rate": 4.142695504591149e-05,
"loss": 2.5695,
"step": 38200
},
{
"epoch": 2.02,
"learning_rate": 4.137255755254798e-05,
"loss": 2.5682,
"step": 38400
},
{
"epoch": 2.03,
"learning_rate": 4.131816005918447e-05,
"loss": 2.5654,
"step": 38600
},
{
"epoch": 2.04,
"learning_rate": 4.126376256582097e-05,
"loss": 2.5641,
"step": 38800
},
{
"epoch": 2.05,
"learning_rate": 4.1209365072457465e-05,
"loss": 2.554,
"step": 39000
},
{
"epoch": 2.06,
"learning_rate": 4.115523956656078e-05,
"loss": 2.5569,
"step": 39200
},
{
"epoch": 2.07,
"learning_rate": 4.110084207319727e-05,
"loss": 2.5503,
"step": 39400
},
{
"epoch": 2.08,
"learning_rate": 4.104644457983376e-05,
"loss": 2.5554,
"step": 39600
},
{
"epoch": 2.09,
"learning_rate": 4.099204708647026e-05,
"loss": 2.552,
"step": 39800
},
{
"epoch": 2.1,
"learning_rate": 4.0937649593106754e-05,
"loss": 2.5564,
"step": 40000
},
{
"epoch": 2.11,
"learning_rate": 4.088325209974325e-05,
"loss": 2.5373,
"step": 40200
},
{
"epoch": 2.12,
"learning_rate": 4.082885460637974e-05,
"loss": 2.5377,
"step": 40400
},
{
"epoch": 2.13,
"learning_rate": 4.0774457113016235e-05,
"loss": 2.5404,
"step": 40600
},
{
"epoch": 2.14,
"learning_rate": 4.072005961965273e-05,
"loss": 2.5369,
"step": 40800
},
{
"epoch": 2.15,
"learning_rate": 4.066566212628922e-05,
"loss": 2.5352,
"step": 41000
},
{
"epoch": 2.16,
"learning_rate": 4.0611808607859356e-05,
"loss": 2.5284,
"step": 41200
},
{
"epoch": 2.17,
"learning_rate": 4.0557411114495845e-05,
"loss": 2.5308,
"step": 41400
},
{
"epoch": 2.18,
"learning_rate": 4.050301362113234e-05,
"loss": 2.5202,
"step": 41600
},
{
"epoch": 2.19,
"learning_rate": 4.0448616127768836e-05,
"loss": 2.5199,
"step": 41800
},
{
"epoch": 2.2,
"learning_rate": 4.0394218634405325e-05,
"loss": 2.5074,
"step": 42000
},
{
"epoch": 2.22,
"learning_rate": 4.033982114104182e-05,
"loss": 2.5086,
"step": 42200
},
{
"epoch": 2.23,
"learning_rate": 4.028542364767832e-05,
"loss": 2.5125,
"step": 42400
},
{
"epoch": 2.24,
"learning_rate": 4.023102615431481e-05,
"loss": 2.5082,
"step": 42600
},
{
"epoch": 2.25,
"learning_rate": 4.01766286609513e-05,
"loss": 2.4999,
"step": 42800
},
{
"epoch": 2.26,
"learning_rate": 4.01222311675878e-05,
"loss": 2.5073,
"step": 43000
},
{
"epoch": 2.27,
"learning_rate": 4.0067833674224294e-05,
"loss": 2.4998,
"step": 43200
},
{
"epoch": 2.28,
"learning_rate": 4.001343618086078e-05,
"loss": 2.4994,
"step": 43400
},
{
"epoch": 2.29,
"learning_rate": 3.995903868749728e-05,
"loss": 2.4952,
"step": 43600
},
{
"epoch": 2.3,
"learning_rate": 3.9904641194133775e-05,
"loss": 2.4914,
"step": 43800
},
{
"epoch": 2.31,
"learning_rate": 3.985024370077027e-05,
"loss": 2.4919,
"step": 44000
},
{
"epoch": 2.32,
"learning_rate": 3.9795846207406766e-05,
"loss": 2.4884,
"step": 44200
},
{
"epoch": 2.33,
"learning_rate": 3.974144871404326e-05,
"loss": 2.4886,
"step": 44400
},
{
"epoch": 2.34,
"learning_rate": 3.968705122067976e-05,
"loss": 2.4902,
"step": 44600
},
{
"epoch": 2.35,
"learning_rate": 3.963265372731625e-05,
"loss": 2.4784,
"step": 44800
},
{
"epoch": 2.36,
"learning_rate": 3.957825623395274e-05,
"loss": 2.479,
"step": 45000
},
{
"epoch": 2.37,
"learning_rate": 3.9524130728056055e-05,
"loss": 2.463,
"step": 45200
},
{
"epoch": 2.38,
"learning_rate": 3.9469733234692544e-05,
"loss": 2.4778,
"step": 45400
},
{
"epoch": 2.39,
"learning_rate": 3.941533574132904e-05,
"loss": 2.4758,
"step": 45600
},
{
"epoch": 2.4,
"learning_rate": 3.9360938247965536e-05,
"loss": 2.4627,
"step": 45800
},
{
"epoch": 2.41,
"learning_rate": 3.9306540754602025e-05,
"loss": 2.456,
"step": 46000
},
{
"epoch": 2.43,
"learning_rate": 3.925214326123852e-05,
"loss": 2.4624,
"step": 46200
},
{
"epoch": 2.44,
"learning_rate": 3.919774576787502e-05,
"loss": 2.4643,
"step": 46400
},
{
"epoch": 2.45,
"learning_rate": 3.914334827451151e-05,
"loss": 2.466,
"step": 46600
},
{
"epoch": 2.46,
"learning_rate": 3.908895078114801e-05,
"loss": 2.4556,
"step": 46800
},
{
"epoch": 2.47,
"learning_rate": 3.9034553287784504e-05,
"loss": 2.4566,
"step": 47000
},
{
"epoch": 2.48,
"learning_rate": 3.898042778188781e-05,
"loss": 2.4527,
"step": 47200
},
{
"epoch": 2.49,
"learning_rate": 3.8926030288524306e-05,
"loss": 2.452,
"step": 47400
},
{
"epoch": 2.5,
"learning_rate": 3.88716327951608e-05,
"loss": 2.439,
"step": 47600
},
{
"epoch": 2.51,
"learning_rate": 3.88172353017973e-05,
"loss": 2.4507,
"step": 47800
},
{
"epoch": 2.52,
"learning_rate": 3.876283780843379e-05,
"loss": 2.4393,
"step": 48000
},
{
"epoch": 2.53,
"learning_rate": 3.870844031507028e-05,
"loss": 2.4411,
"step": 48200
},
{
"epoch": 2.54,
"learning_rate": 3.865404282170678e-05,
"loss": 2.449,
"step": 48400
},
{
"epoch": 2.55,
"learning_rate": 3.859964532834327e-05,
"loss": 2.4413,
"step": 48600
},
{
"epoch": 2.56,
"learning_rate": 3.854524783497976e-05,
"loss": 2.4304,
"step": 48800
},
{
"epoch": 2.57,
"learning_rate": 3.849085034161626e-05,
"loss": 2.4276,
"step": 49000
},
{
"epoch": 2.58,
"learning_rate": 3.843672483571957e-05,
"loss": 2.4336,
"step": 49200
},
{
"epoch": 2.59,
"learning_rate": 3.838232734235607e-05,
"loss": 2.4269,
"step": 49400
},
{
"epoch": 2.6,
"learning_rate": 3.8327929848992563e-05,
"loss": 2.4291,
"step": 49600
},
{
"epoch": 2.61,
"learning_rate": 3.827353235562905e-05,
"loss": 2.4313,
"step": 49800
},
{
"epoch": 2.62,
"learning_rate": 3.821913486226555e-05,
"loss": 2.4198,
"step": 50000
},
{
"epoch": 2.64,
"learning_rate": 3.8164737368902044e-05,
"loss": 2.4152,
"step": 50200
},
{
"epoch": 2.65,
"learning_rate": 3.811033987553853e-05,
"loss": 2.4158,
"step": 50400
},
{
"epoch": 2.66,
"learning_rate": 3.805594238217503e-05,
"loss": 2.4107,
"step": 50600
},
{
"epoch": 2.67,
"learning_rate": 3.8001544888811525e-05,
"loss": 2.4144,
"step": 50800
},
{
"epoch": 2.68,
"learning_rate": 3.794714739544802e-05,
"loss": 2.4076,
"step": 51000
},
{
"epoch": 2.69,
"learning_rate": 3.7893021889551333e-05,
"loss": 2.4076,
"step": 51200
},
{
"epoch": 2.7,
"learning_rate": 3.783862439618783e-05,
"loss": 2.4016,
"step": 51400
},
{
"epoch": 2.71,
"learning_rate": 3.7784498890291135e-05,
"loss": 2.4178,
"step": 51600
},
{
"epoch": 2.72,
"learning_rate": 3.773010139692763e-05,
"loss": 2.4059,
"step": 51800
},
{
"epoch": 2.73,
"learning_rate": 3.767570390356413e-05,
"loss": 2.3955,
"step": 52000
},
{
"epoch": 2.74,
"learning_rate": 3.7621306410200616e-05,
"loss": 2.4031,
"step": 52200
},
{
"epoch": 2.75,
"learning_rate": 3.756690891683711e-05,
"loss": 2.4019,
"step": 52400
},
{
"epoch": 2.76,
"learning_rate": 3.751251142347361e-05,
"loss": 2.3981,
"step": 52600
},
{
"epoch": 2.77,
"learning_rate": 3.74581139301101e-05,
"loss": 2.3988,
"step": 52800
},
{
"epoch": 2.78,
"learning_rate": 3.740371643674659e-05,
"loss": 2.3848,
"step": 53000
},
{
"epoch": 2.79,
"learning_rate": 3.734931894338309e-05,
"loss": 2.3876,
"step": 53200
},
{
"epoch": 2.8,
"learning_rate": 3.729492145001959e-05,
"loss": 2.3849,
"step": 53400
},
{
"epoch": 2.81,
"learning_rate": 3.724052395665608e-05,
"loss": 2.3869,
"step": 53600
},
{
"epoch": 2.82,
"learning_rate": 3.7186126463292576e-05,
"loss": 2.3827,
"step": 53800
},
{
"epoch": 2.83,
"learning_rate": 3.713172896992907e-05,
"loss": 2.379,
"step": 54000
},
{
"epoch": 2.85,
"learning_rate": 3.707733147656556e-05,
"loss": 2.3768,
"step": 54200
},
{
"epoch": 2.86,
"learning_rate": 3.7022933983202057e-05,
"loss": 2.3795,
"step": 54400
},
{
"epoch": 2.87,
"learning_rate": 3.696853648983855e-05,
"loss": 2.3738,
"step": 54600
},
{
"epoch": 2.88,
"learning_rate": 3.691413899647505e-05,
"loss": 2.378,
"step": 54800
},
{
"epoch": 2.89,
"learning_rate": 3.685974150311154e-05,
"loss": 2.3671,
"step": 55000
},
{
"epoch": 2.9,
"learning_rate": 3.680534400974803e-05,
"loss": 2.3694,
"step": 55200
},
{
"epoch": 2.91,
"learning_rate": 3.6751218503851346e-05,
"loss": 2.3796,
"step": 55400
},
{
"epoch": 2.92,
"learning_rate": 3.6696821010487835e-05,
"loss": 2.3653,
"step": 55600
},
{
"epoch": 2.93,
"learning_rate": 3.664242351712433e-05,
"loss": 2.3676,
"step": 55800
},
{
"epoch": 2.94,
"learning_rate": 3.6588026023760826e-05,
"loss": 2.3658,
"step": 56000
},
{
"epoch": 2.95,
"learning_rate": 3.653362853039732e-05,
"loss": 2.3721,
"step": 56200
},
{
"epoch": 2.96,
"learning_rate": 3.647923103703382e-05,
"loss": 2.3668,
"step": 56400
},
{
"epoch": 2.97,
"learning_rate": 3.6424833543670314e-05,
"loss": 2.3639,
"step": 56600
},
{
"epoch": 2.98,
"learning_rate": 3.63704360503068e-05,
"loss": 2.3628,
"step": 56800
},
{
"epoch": 2.99,
"learning_rate": 3.63160385569433e-05,
"loss": 2.3688,
"step": 57000
},
{
"epoch": 3.0,
"learning_rate": 3.6261641063579795e-05,
"loss": 2.3577,
"step": 57200
},
{
"epoch": 3.01,
"learning_rate": 3.6207243570216284e-05,
"loss": 2.353,
"step": 57400
},
{
"epoch": 3.02,
"learning_rate": 3.615284607685278e-05,
"loss": 2.3509,
"step": 57600
},
{
"epoch": 3.03,
"learning_rate": 3.6098448583489275e-05,
"loss": 2.3409,
"step": 57800
},
{
"epoch": 3.04,
"learning_rate": 3.604405109012577e-05,
"loss": 2.3402,
"step": 58000
},
{
"epoch": 3.06,
"learning_rate": 3.598965359676226e-05,
"loss": 2.3542,
"step": 58200
},
{
"epoch": 3.07,
"learning_rate": 3.5935256103398756e-05,
"loss": 2.346,
"step": 58400
},
{
"epoch": 3.08,
"learning_rate": 3.588085861003525e-05,
"loss": 2.349,
"step": 58600
},
{
"epoch": 3.09,
"learning_rate": 3.582646111667175e-05,
"loss": 2.3462,
"step": 58800
},
{
"epoch": 3.1,
"learning_rate": 3.577206362330824e-05,
"loss": 2.3458,
"step": 59000
},
{
"epoch": 3.11,
"learning_rate": 3.571766612994473e-05,
"loss": 2.3351,
"step": 59200
},
{
"epoch": 3.12,
"learning_rate": 3.5663540624048045e-05,
"loss": 2.3336,
"step": 59400
},
{
"epoch": 3.13,
"learning_rate": 3.560914313068454e-05,
"loss": 2.3458,
"step": 59600
},
{
"epoch": 3.14,
"learning_rate": 3.555474563732104e-05,
"loss": 2.3354,
"step": 59800
},
{
"epoch": 3.15,
"learning_rate": 3.550062013142434e-05,
"loss": 2.3382,
"step": 60000
},
{
"epoch": 3.16,
"learning_rate": 3.544622263806084e-05,
"loss": 2.3381,
"step": 60200
},
{
"epoch": 3.17,
"learning_rate": 3.5391825144697335e-05,
"loss": 2.3287,
"step": 60400
},
{
"epoch": 3.18,
"learning_rate": 3.5337427651333824e-05,
"loss": 2.3267,
"step": 60600
},
{
"epoch": 3.19,
"learning_rate": 3.528303015797032e-05,
"loss": 2.3295,
"step": 60800
},
{
"epoch": 3.2,
"learning_rate": 3.5228632664606815e-05,
"loss": 2.3224,
"step": 61000
},
{
"epoch": 3.21,
"learning_rate": 3.517423517124331e-05,
"loss": 2.3151,
"step": 61200
},
{
"epoch": 3.22,
"learning_rate": 3.511983767787981e-05,
"loss": 2.3209,
"step": 61400
},
{
"epoch": 3.23,
"learning_rate": 3.50654401845163e-05,
"loss": 2.3215,
"step": 61600
},
{
"epoch": 3.24,
"learning_rate": 3.50110426911528e-05,
"loss": 2.3163,
"step": 61800
},
{
"epoch": 3.25,
"learning_rate": 3.495664519778929e-05,
"loss": 2.3137,
"step": 62000
},
{
"epoch": 3.27,
"learning_rate": 3.4902247704425784e-05,
"loss": 2.3109,
"step": 62200
},
{
"epoch": 3.28,
"learning_rate": 3.484785021106228e-05,
"loss": 2.3158,
"step": 62400
},
{
"epoch": 3.29,
"learning_rate": 3.4793724705165585e-05,
"loss": 2.3133,
"step": 62600
},
{
"epoch": 3.3,
"learning_rate": 3.473932721180208e-05,
"loss": 2.3127,
"step": 62800
},
{
"epoch": 3.31,
"learning_rate": 3.468492971843858e-05,
"loss": 2.3097,
"step": 63000
},
{
"epoch": 3.32,
"learning_rate": 3.4630532225075066e-05,
"loss": 2.3132,
"step": 63200
},
{
"epoch": 3.33,
"learning_rate": 3.457613473171156e-05,
"loss": 2.3049,
"step": 63400
},
{
"epoch": 3.34,
"learning_rate": 3.452173723834806e-05,
"loss": 2.3067,
"step": 63600
},
{
"epoch": 3.35,
"learning_rate": 3.4467339744984553e-05,
"loss": 2.3126,
"step": 63800
},
{
"epoch": 3.36,
"learning_rate": 3.441294225162104e-05,
"loss": 2.2959,
"step": 64000
},
{
"epoch": 3.37,
"learning_rate": 3.4358544758257545e-05,
"loss": 2.3025,
"step": 64200
},
{
"epoch": 3.38,
"learning_rate": 3.430414726489404e-05,
"loss": 2.2955,
"step": 64400
},
{
"epoch": 3.39,
"learning_rate": 3.424974977153053e-05,
"loss": 2.3055,
"step": 64600
},
{
"epoch": 3.4,
"learning_rate": 3.4195352278167026e-05,
"loss": 2.2957,
"step": 64800
},
{
"epoch": 3.41,
"learning_rate": 3.414122677227033e-05,
"loss": 2.2927,
"step": 65000
},
{
"epoch": 3.42,
"learning_rate": 3.408682927890683e-05,
"loss": 2.2961,
"step": 65200
},
{
"epoch": 3.43,
"learning_rate": 3.4032431785543323e-05,
"loss": 2.2937,
"step": 65400
},
{
"epoch": 3.44,
"learning_rate": 3.397803429217982e-05,
"loss": 2.2915,
"step": 65600
},
{
"epoch": 3.45,
"learning_rate": 3.392363679881631e-05,
"loss": 2.2915,
"step": 65800
},
{
"epoch": 3.46,
"learning_rate": 3.386951129291963e-05,
"loss": 2.29,
"step": 66000
},
{
"epoch": 3.48,
"learning_rate": 3.381511379955612e-05,
"loss": 2.2855,
"step": 66200
},
{
"epoch": 3.49,
"learning_rate": 3.376071630619261e-05,
"loss": 2.2918,
"step": 66400
},
{
"epoch": 3.5,
"learning_rate": 3.370631881282911e-05,
"loss": 2.2802,
"step": 66600
},
{
"epoch": 3.51,
"learning_rate": 3.3651921319465604e-05,
"loss": 2.2856,
"step": 66800
},
{
"epoch": 3.52,
"learning_rate": 3.359752382610209e-05,
"loss": 2.2877,
"step": 67000
},
{
"epoch": 3.53,
"learning_rate": 3.354312633273859e-05,
"loss": 2.2856,
"step": 67200
},
{
"epoch": 3.54,
"learning_rate": 3.3488728839375085e-05,
"loss": 2.2875,
"step": 67400
},
{
"epoch": 3.55,
"learning_rate": 3.3434331346011574e-05,
"loss": 2.2791,
"step": 67600
},
{
"epoch": 3.56,
"learning_rate": 3.337993385264807e-05,
"loss": 2.2777,
"step": 67800
},
{
"epoch": 3.57,
"learning_rate": 3.3325536359284566e-05,
"loss": 2.271,
"step": 68000
},
{
"epoch": 3.58,
"learning_rate": 3.327113886592106e-05,
"loss": 2.2785,
"step": 68200
},
{
"epoch": 3.59,
"learning_rate": 3.321674137255755e-05,
"loss": 2.2684,
"step": 68400
},
{
"epoch": 3.6,
"learning_rate": 3.3162343879194047e-05,
"loss": 2.2863,
"step": 68600
},
{
"epoch": 3.61,
"learning_rate": 3.310794638583054e-05,
"loss": 2.2815,
"step": 68800
},
{
"epoch": 3.62,
"learning_rate": 3.305354889246703e-05,
"loss": 2.2761,
"step": 69000
},
{
"epoch": 3.63,
"learning_rate": 3.299915139910353e-05,
"loss": 2.2672,
"step": 69200
},
{
"epoch": 3.64,
"learning_rate": 3.294475390574003e-05,
"loss": 2.2587,
"step": 69400
},
{
"epoch": 3.65,
"learning_rate": 3.289035641237652e-05,
"loss": 2.258,
"step": 69600
},
{
"epoch": 3.66,
"learning_rate": 3.2835958919013015e-05,
"loss": 2.2662,
"step": 69800
},
{
"epoch": 3.67,
"learning_rate": 3.278156142564951e-05,
"loss": 2.2605,
"step": 70000
},
{
"epoch": 3.69,
"learning_rate": 3.2727163932286006e-05,
"loss": 2.2608,
"step": 70200
},
{
"epoch": 3.7,
"learning_rate": 3.267303842638931e-05,
"loss": 2.2549,
"step": 70400
},
{
"epoch": 3.71,
"learning_rate": 3.261864093302581e-05,
"loss": 2.2667,
"step": 70600
},
{
"epoch": 3.72,
"learning_rate": 3.2564243439662304e-05,
"loss": 2.2601,
"step": 70800
},
{
"epoch": 3.73,
"learning_rate": 3.250984594629879e-05,
"loss": 2.2547,
"step": 71000
},
{
"epoch": 3.74,
"learning_rate": 3.245544845293529e-05,
"loss": 2.2547,
"step": 71200
},
{
"epoch": 3.75,
"learning_rate": 3.2401050959571785e-05,
"loss": 2.2552,
"step": 71400
},
{
"epoch": 3.76,
"learning_rate": 3.2346653466208274e-05,
"loss": 2.2498,
"step": 71600
},
{
"epoch": 3.77,
"learning_rate": 3.229225597284477e-05,
"loss": 2.2536,
"step": 71800
},
{
"epoch": 3.78,
"learning_rate": 3.2237858479481265e-05,
"loss": 2.2435,
"step": 72000
},
{
"epoch": 3.79,
"learning_rate": 3.218346098611776e-05,
"loss": 2.2544,
"step": 72200
},
{
"epoch": 3.8,
"learning_rate": 3.212906349275426e-05,
"loss": 2.2449,
"step": 72400
},
{
"epoch": 3.81,
"learning_rate": 3.207466599939075e-05,
"loss": 2.2506,
"step": 72600
},
{
"epoch": 3.82,
"learning_rate": 3.202026850602725e-05,
"loss": 2.2483,
"step": 72800
},
{
"epoch": 3.83,
"learning_rate": 3.196587101266374e-05,
"loss": 2.2417,
"step": 73000
},
{
"epoch": 3.84,
"learning_rate": 3.191174550676705e-05,
"loss": 2.2366,
"step": 73200
},
{
"epoch": 3.85,
"learning_rate": 3.1857348013403546e-05,
"loss": 2.2423,
"step": 73400
},
{
"epoch": 3.86,
"learning_rate": 3.1802950520040035e-05,
"loss": 2.2401,
"step": 73600
},
{
"epoch": 3.87,
"learning_rate": 3.1748825014143355e-05,
"loss": 2.245,
"step": 73800
},
{
"epoch": 3.88,
"learning_rate": 3.1694427520779844e-05,
"loss": 2.2359,
"step": 74000
},
{
"epoch": 3.9,
"learning_rate": 3.164003002741634e-05,
"loss": 2.2409,
"step": 74200
},
{
"epoch": 3.91,
"learning_rate": 3.1585632534052835e-05,
"loss": 2.2402,
"step": 74400
},
{
"epoch": 3.92,
"learning_rate": 3.1531235040689325e-05,
"loss": 2.2379,
"step": 74600
},
{
"epoch": 3.93,
"learning_rate": 3.147683754732582e-05,
"loss": 2.2365,
"step": 74800
},
{
"epoch": 3.94,
"learning_rate": 3.1422440053962316e-05,
"loss": 2.2368,
"step": 75000
},
{
"epoch": 3.95,
"learning_rate": 3.136804256059881e-05,
"loss": 2.2338,
"step": 75200
},
{
"epoch": 3.96,
"learning_rate": 3.13136450672353e-05,
"loss": 2.2456,
"step": 75400
},
{
"epoch": 3.97,
"learning_rate": 3.12592475738718e-05,
"loss": 2.2337,
"step": 75600
},
{
"epoch": 3.98,
"learning_rate": 3.120485008050829e-05,
"loss": 2.2337,
"step": 75800
},
{
"epoch": 3.99,
"learning_rate": 3.115045258714478e-05,
"loss": 2.2282,
"step": 76000
},
{
"epoch": 4.0,
"learning_rate": 3.109605509378128e-05,
"loss": 2.2292,
"step": 76200
},
{
"epoch": 4.01,
"learning_rate": 3.1041657600417774e-05,
"loss": 2.2282,
"step": 76400
},
{
"epoch": 4.02,
"learning_rate": 3.098726010705427e-05,
"loss": 2.2299,
"step": 76600
},
{
"epoch": 4.03,
"learning_rate": 3.093286261369076e-05,
"loss": 2.2131,
"step": 76800
},
{
"epoch": 4.04,
"learning_rate": 3.0878465120327254e-05,
"loss": 2.2223,
"step": 77000
},
{
"epoch": 4.05,
"learning_rate": 3.082406762696375e-05,
"loss": 2.2275,
"step": 77200
},
{
"epoch": 4.06,
"learning_rate": 3.076967013360024e-05,
"loss": 2.2156,
"step": 77400
},
{
"epoch": 4.07,
"learning_rate": 3.071527264023674e-05,
"loss": 2.2252,
"step": 77600
},
{
"epoch": 4.08,
"learning_rate": 3.0661147134340054e-05,
"loss": 2.2282,
"step": 77800
},
{
"epoch": 4.09,
"learning_rate": 3.0606749640976543e-05,
"loss": 2.2214,
"step": 78000
},
{
"epoch": 4.1,
"learning_rate": 3.055235214761304e-05,
"loss": 2.2244,
"step": 78200
},
{
"epoch": 4.12,
"learning_rate": 3.0497954654249532e-05,
"loss": 2.2149,
"step": 78400
},
{
"epoch": 4.13,
"learning_rate": 3.0443557160886028e-05,
"loss": 2.2203,
"step": 78600
},
{
"epoch": 4.14,
"learning_rate": 3.038915966752252e-05,
"loss": 2.216,
"step": 78800
},
{
"epoch": 4.15,
"learning_rate": 3.0334762174159016e-05,
"loss": 2.2148,
"step": 79000
},
{
"epoch": 4.16,
"learning_rate": 3.028036468079551e-05,
"loss": 2.2145,
"step": 79200
},
{
"epoch": 4.17,
"learning_rate": 3.0225967187432004e-05,
"loss": 2.2172,
"step": 79400
},
{
"epoch": 4.18,
"learning_rate": 3.0171569694068497e-05,
"loss": 2.2165,
"step": 79600
},
{
"epoch": 4.19,
"learning_rate": 3.011717220070499e-05,
"loss": 2.2186,
"step": 79800
},
{
"epoch": 4.2,
"learning_rate": 3.0063046694808305e-05,
"loss": 2.2044,
"step": 80000
},
{
"epoch": 4.21,
"learning_rate": 3.00086492014448e-05,
"loss": 2.2032,
"step": 80200
},
{
"epoch": 4.22,
"learning_rate": 2.9954251708081293e-05,
"loss": 2.2066,
"step": 80400
},
{
"epoch": 4.23,
"learning_rate": 2.9899854214717786e-05,
"loss": 2.2019,
"step": 80600
},
{
"epoch": 4.24,
"learning_rate": 2.984545672135428e-05,
"loss": 2.2085,
"step": 80800
},
{
"epoch": 4.25,
"learning_rate": 2.9791059227990774e-05,
"loss": 2.1975,
"step": 81000
},
{
"epoch": 4.26,
"learning_rate": 2.973666173462727e-05,
"loss": 2.1989,
"step": 81200
},
{
"epoch": 4.27,
"learning_rate": 2.9682264241263762e-05,
"loss": 2.2016,
"step": 81400
},
{
"epoch": 4.28,
"learning_rate": 2.9627866747900258e-05,
"loss": 2.1911,
"step": 81600
},
{
"epoch": 4.29,
"learning_rate": 2.957346925453675e-05,
"loss": 2.2,
"step": 81800
},
{
"epoch": 4.3,
"learning_rate": 2.9519343748640067e-05,
"loss": 2.2038,
"step": 82000
},
{
"epoch": 4.31,
"learning_rate": 2.9465218242743376e-05,
"loss": 2.1882,
"step": 82200
},
{
"epoch": 4.33,
"learning_rate": 2.941082074937987e-05,
"loss": 2.1936,
"step": 82400
},
{
"epoch": 4.34,
"learning_rate": 2.9356423256016364e-05,
"loss": 2.1979,
"step": 82600
},
{
"epoch": 4.35,
"learning_rate": 2.9302025762652857e-05,
"loss": 2.2051,
"step": 82800
},
{
"epoch": 4.36,
"learning_rate": 2.9247628269289352e-05,
"loss": 2.1915,
"step": 83000
},
{
"epoch": 4.37,
"learning_rate": 2.9193230775925845e-05,
"loss": 2.1869,
"step": 83200
},
{
"epoch": 4.38,
"learning_rate": 2.9138833282562337e-05,
"loss": 2.1824,
"step": 83400
},
{
"epoch": 4.39,
"learning_rate": 2.9084435789198833e-05,
"loss": 2.1974,
"step": 83600
},
{
"epoch": 4.4,
"learning_rate": 2.9030038295835326e-05,
"loss": 2.1854,
"step": 83800
},
{
"epoch": 4.41,
"learning_rate": 2.8975640802471825e-05,
"loss": 2.1844,
"step": 84000
},
{
"epoch": 4.42,
"learning_rate": 2.892124330910832e-05,
"loss": 2.1883,
"step": 84200
},
{
"epoch": 4.43,
"learning_rate": 2.886711780321163e-05,
"loss": 2.1925,
"step": 84400
},
{
"epoch": 4.44,
"learning_rate": 2.8812720309848122e-05,
"loss": 2.186,
"step": 84600
},
{
"epoch": 4.45,
"learning_rate": 2.8758322816484618e-05,
"loss": 2.1865,
"step": 84800
},
{
"epoch": 4.46,
"learning_rate": 2.870392532312111e-05,
"loss": 2.1837,
"step": 85000
},
{
"epoch": 4.47,
"learning_rate": 2.8649527829757607e-05,
"loss": 2.1836,
"step": 85200
},
{
"epoch": 4.48,
"learning_rate": 2.85951303363941e-05,
"loss": 2.1867,
"step": 85400
},
{
"epoch": 4.49,
"learning_rate": 2.8540732843030595e-05,
"loss": 2.1811,
"step": 85600
},
{
"epoch": 4.5,
"learning_rate": 2.8486335349667087e-05,
"loss": 2.1695,
"step": 85800
},
{
"epoch": 4.51,
"learning_rate": 2.843193785630358e-05,
"loss": 2.187,
"step": 86000
},
{
"epoch": 4.52,
"learning_rate": 2.8377540362940076e-05,
"loss": 2.1767,
"step": 86200
},
{
"epoch": 4.54,
"learning_rate": 2.8323142869576568e-05,
"loss": 2.1827,
"step": 86400
},
{
"epoch": 4.55,
"learning_rate": 2.8268745376213064e-05,
"loss": 2.1835,
"step": 86600
},
{
"epoch": 4.56,
"learning_rate": 2.8214347882849563e-05,
"loss": 2.164,
"step": 86800
},
{
"epoch": 4.57,
"learning_rate": 2.8159950389486056e-05,
"loss": 2.1696,
"step": 87000
},
{
"epoch": 4.58,
"learning_rate": 2.810555289612255e-05,
"loss": 2.1812,
"step": 87200
},
{
"epoch": 4.59,
"learning_rate": 2.8051155402759044e-05,
"loss": 2.1768,
"step": 87400
},
{
"epoch": 4.6,
"learning_rate": 2.7996757909395536e-05,
"loss": 2.1825,
"step": 87600
},
{
"epoch": 4.61,
"learning_rate": 2.794263240349885e-05,
"loss": 2.1763,
"step": 87800
},
{
"epoch": 4.62,
"learning_rate": 2.788823491013534e-05,
"loss": 2.1698,
"step": 88000
},
{
"epoch": 4.63,
"learning_rate": 2.7833837416771834e-05,
"loss": 2.1745,
"step": 88200
},
{
"epoch": 4.64,
"learning_rate": 2.777943992340833e-05,
"loss": 2.1675,
"step": 88400
},
{
"epoch": 4.65,
"learning_rate": 2.7725042430044822e-05,
"loss": 2.1661,
"step": 88600
},
{
"epoch": 4.66,
"learning_rate": 2.7670644936681318e-05,
"loss": 2.1603,
"step": 88800
},
{
"epoch": 4.67,
"learning_rate": 2.761624744331781e-05,
"loss": 2.1612,
"step": 89000
},
{
"epoch": 4.68,
"learning_rate": 2.7561849949954306e-05,
"loss": 2.1667,
"step": 89200
},
{
"epoch": 4.69,
"learning_rate": 2.7507452456590805e-05,
"loss": 2.1625,
"step": 89400
},
{
"epoch": 4.7,
"learning_rate": 2.7453054963227298e-05,
"loss": 2.1751,
"step": 89600
},
{
"epoch": 4.71,
"learning_rate": 2.7398657469863794e-05,
"loss": 2.163,
"step": 89800
},
{
"epoch": 4.72,
"learning_rate": 2.7344259976500286e-05,
"loss": 2.1606,
"step": 90000
},
{
"epoch": 4.73,
"learning_rate": 2.728986248313678e-05,
"loss": 2.1623,
"step": 90200
},
{
"epoch": 4.75,
"learning_rate": 2.7235464989773274e-05,
"loss": 2.1648,
"step": 90400
},
{
"epoch": 4.76,
"learning_rate": 2.7181067496409767e-05,
"loss": 2.1607,
"step": 90600
},
{
"epoch": 4.77,
"learning_rate": 2.7126670003046263e-05,
"loss": 2.1561,
"step": 90800
},
{
"epoch": 4.78,
"learning_rate": 2.7072272509682755e-05,
"loss": 2.1591,
"step": 91000
},
{
"epoch": 4.79,
"learning_rate": 2.701787501631925e-05,
"loss": 2.1596,
"step": 91200
},
{
"epoch": 4.8,
"learning_rate": 2.6963477522955744e-05,
"loss": 2.1524,
"step": 91400
},
{
"epoch": 4.81,
"learning_rate": 2.6909080029592236e-05,
"loss": 2.1508,
"step": 91600
},
{
"epoch": 4.82,
"learning_rate": 2.6854682536228732e-05,
"loss": 2.1607,
"step": 91800
},
{
"epoch": 4.83,
"learning_rate": 2.6800285042865224e-05,
"loss": 2.1485,
"step": 92000
},
{
"epoch": 4.84,
"learning_rate": 2.674615953696854e-05,
"loss": 2.1467,
"step": 92200
},
{
"epoch": 4.85,
"learning_rate": 2.6691762043605033e-05,
"loss": 2.1526,
"step": 92400
},
{
"epoch": 4.86,
"learning_rate": 2.663736455024153e-05,
"loss": 2.1489,
"step": 92600
},
{
"epoch": 4.87,
"learning_rate": 2.658296705687802e-05,
"loss": 2.1519,
"step": 92800
},
{
"epoch": 4.88,
"learning_rate": 2.6528569563514517e-05,
"loss": 2.1482,
"step": 93000
},
{
"epoch": 4.89,
"learning_rate": 2.647417207015101e-05,
"loss": 2.1503,
"step": 93200
},
{
"epoch": 4.9,
"learning_rate": 2.6419774576787505e-05,
"loss": 2.1474,
"step": 93400
},
{
"epoch": 4.91,
"learning_rate": 2.6365377083423998e-05,
"loss": 2.1475,
"step": 93600
},
{
"epoch": 4.92,
"learning_rate": 2.631097959006049e-05,
"loss": 2.1487,
"step": 93800
},
{
"epoch": 4.93,
"learning_rate": 2.6256582096696986e-05,
"loss": 2.1527,
"step": 94000
},
{
"epoch": 4.94,
"learning_rate": 2.620218460333348e-05,
"loss": 2.1438,
"step": 94200
},
{
"epoch": 4.96,
"learning_rate": 2.6147787109969974e-05,
"loss": 2.148,
"step": 94400
},
{
"epoch": 4.97,
"learning_rate": 2.6093389616606467e-05,
"loss": 2.1478,
"step": 94600
},
{
"epoch": 4.98,
"learning_rate": 2.6038992123242962e-05,
"loss": 2.1505,
"step": 94800
},
{
"epoch": 4.99,
"learning_rate": 2.5984866617346275e-05,
"loss": 2.1491,
"step": 95000
},
{
"epoch": 5.0,
"learning_rate": 2.593046912398277e-05,
"loss": 2.143,
"step": 95200
},
{
"epoch": 5.01,
"learning_rate": 2.5876071630619263e-05,
"loss": 2.1388,
"step": 95400
},
{
"epoch": 5.02,
"learning_rate": 2.582167413725576e-05,
"loss": 2.1433,
"step": 95600
},
{
"epoch": 5.03,
"learning_rate": 2.576727664389225e-05,
"loss": 2.1269,
"step": 95800
},
{
"epoch": 5.04,
"learning_rate": 2.5712879150528747e-05,
"loss": 2.1401,
"step": 96000
},
{
"epoch": 5.05,
"learning_rate": 2.565848165716524e-05,
"loss": 2.1382,
"step": 96200
},
{
"epoch": 5.06,
"learning_rate": 2.5604084163801732e-05,
"loss": 2.1371,
"step": 96400
},
{
"epoch": 5.07,
"learning_rate": 2.5549686670438228e-05,
"loss": 2.1347,
"step": 96600
},
{
"epoch": 5.08,
"learning_rate": 2.5495561164541537e-05,
"loss": 2.1407,
"step": 96800
},
{
"epoch": 5.09,
"learning_rate": 2.544116367117803e-05,
"loss": 2.1365,
"step": 97000
},
{
"epoch": 5.1,
"learning_rate": 2.5386766177814526e-05,
"loss": 2.1354,
"step": 97200
},
{
"epoch": 5.11,
"learning_rate": 2.5332368684451025e-05,
"loss": 2.1278,
"step": 97400
},
{
"epoch": 5.12,
"learning_rate": 2.5277971191087517e-05,
"loss": 2.1357,
"step": 97600
},
{
"epoch": 5.13,
"learning_rate": 2.5223573697724013e-05,
"loss": 2.1337,
"step": 97800
},
{
"epoch": 5.14,
"learning_rate": 2.5169176204360506e-05,
"loss": 2.1335,
"step": 98000
},
{
"epoch": 5.15,
"learning_rate": 2.5114778710997e-05,
"loss": 2.1302,
"step": 98200
},
{
"epoch": 5.17,
"learning_rate": 2.5060381217633494e-05,
"loss": 2.1351,
"step": 98400
},
{
"epoch": 5.18,
"learning_rate": 2.5005983724269986e-05,
"loss": 2.1346,
"step": 98600
},
{
"epoch": 5.19,
"learning_rate": 2.4951586230906482e-05,
"loss": 2.1307,
"step": 98800
},
{
"epoch": 5.2,
"learning_rate": 2.4897188737542975e-05,
"loss": 2.1222,
"step": 99000
},
{
"epoch": 5.21,
"learning_rate": 2.484279124417947e-05,
"loss": 2.1215,
"step": 99200
},
{
"epoch": 5.22,
"learning_rate": 2.4788393750815963e-05,
"loss": 2.1201,
"step": 99400
},
{
"epoch": 5.23,
"learning_rate": 2.473399625745246e-05,
"loss": 2.1307,
"step": 99600
},
{
"epoch": 5.24,
"learning_rate": 2.467959876408895e-05,
"loss": 2.1311,
"step": 99800
},
{
"epoch": 5.25,
"learning_rate": 2.4625201270725444e-05,
"loss": 2.1161,
"step": 100000
},
{
"epoch": 5.26,
"learning_rate": 2.457080377736194e-05,
"loss": 2.1259,
"step": 100200
},
{
"epoch": 5.27,
"learning_rate": 2.4516406283998432e-05,
"loss": 2.1199,
"step": 100400
},
{
"epoch": 5.28,
"learning_rate": 2.446200879063493e-05,
"loss": 2.1216,
"step": 100600
},
{
"epoch": 5.29,
"learning_rate": 2.4407611297271424e-05,
"loss": 2.1156,
"step": 100800
},
{
"epoch": 5.3,
"learning_rate": 2.4353213803907916e-05,
"loss": 2.1243,
"step": 101000
},
{
"epoch": 5.31,
"learning_rate": 2.4298816310544412e-05,
"loss": 2.1209,
"step": 101200
},
{
"epoch": 5.32,
"learning_rate": 2.4244418817180905e-05,
"loss": 2.1188,
"step": 101400
},
{
"epoch": 5.33,
"learning_rate": 2.41900213238174e-05,
"loss": 2.1206,
"step": 101600
},
{
"epoch": 5.34,
"learning_rate": 2.4135623830453893e-05,
"loss": 2.1195,
"step": 101800
},
{
"epoch": 5.35,
"learning_rate": 2.408122633709039e-05,
"loss": 2.1218,
"step": 102000
},
{
"epoch": 5.36,
"learning_rate": 2.402682884372688e-05,
"loss": 2.1098,
"step": 102200
},
{
"epoch": 5.38,
"learning_rate": 2.3972703337830194e-05,
"loss": 2.1126,
"step": 102400
},
{
"epoch": 5.39,
"learning_rate": 2.3918305844466686e-05,
"loss": 2.1132,
"step": 102600
},
{
"epoch": 5.4,
"learning_rate": 2.3863908351103182e-05,
"loss": 2.1141,
"step": 102800
},
{
"epoch": 5.41,
"learning_rate": 2.3809510857739674e-05,
"loss": 2.1059,
"step": 103000
},
{
"epoch": 5.42,
"learning_rate": 2.375511336437617e-05,
"loss": 2.1057,
"step": 103200
},
{
"epoch": 5.43,
"learning_rate": 2.3700715871012666e-05,
"loss": 2.1116,
"step": 103400
},
{
"epoch": 5.44,
"learning_rate": 2.364631837764916e-05,
"loss": 2.1138,
"step": 103600
},
{
"epoch": 5.45,
"learning_rate": 2.3591920884285654e-05,
"loss": 2.1129,
"step": 103800
},
{
"epoch": 5.46,
"learning_rate": 2.3537523390922147e-05,
"loss": 2.1108,
"step": 104000
},
{
"epoch": 5.47,
"learning_rate": 2.3483125897558643e-05,
"loss": 2.1146,
"step": 104200
},
{
"epoch": 5.48,
"learning_rate": 2.3429000391661955e-05,
"loss": 2.1102,
"step": 104400
},
{
"epoch": 5.49,
"learning_rate": 2.3374602898298448e-05,
"loss": 2.1138,
"step": 104600
},
{
"epoch": 5.5,
"learning_rate": 2.332020540493494e-05,
"loss": 2.1043,
"step": 104800
},
{
"epoch": 5.51,
"learning_rate": 2.3265807911571436e-05,
"loss": 2.1063,
"step": 105000
},
{
"epoch": 5.52,
"learning_rate": 2.321141041820793e-05,
"loss": 2.1108,
"step": 105200
},
{
"epoch": 5.53,
"learning_rate": 2.3157012924844424e-05,
"loss": 2.1106,
"step": 105400
},
{
"epoch": 5.54,
"learning_rate": 2.3102615431480917e-05,
"loss": 2.1117,
"step": 105600
},
{
"epoch": 5.55,
"learning_rate": 2.3048217938117413e-05,
"loss": 2.0953,
"step": 105800
},
{
"epoch": 5.56,
"learning_rate": 2.299382044475391e-05,
"loss": 2.1012,
"step": 106000
},
{
"epoch": 5.57,
"learning_rate": 2.29394229513904e-05,
"loss": 2.1044,
"step": 106200
},
{
"epoch": 5.59,
"learning_rate": 2.2885297445493714e-05,
"loss": 2.1023,
"step": 106400
},
{
"epoch": 5.6,
"learning_rate": 2.283089995213021e-05,
"loss": 2.1068,
"step": 106600
},
{
"epoch": 5.61,
"learning_rate": 2.2776502458766702e-05,
"loss": 2.1076,
"step": 106800
},
{
"epoch": 5.62,
"learning_rate": 2.2722104965403194e-05,
"loss": 2.1024,
"step": 107000
},
{
"epoch": 5.63,
"learning_rate": 2.266770747203969e-05,
"loss": 2.1007,
"step": 107200
},
{
"epoch": 5.64,
"learning_rate": 2.2613309978676183e-05,
"loss": 2.0993,
"step": 107400
},
{
"epoch": 5.65,
"learning_rate": 2.255891248531268e-05,
"loss": 2.0964,
"step": 107600
},
{
"epoch": 5.66,
"learning_rate": 2.250451499194917e-05,
"loss": 2.0933,
"step": 107800
},
{
"epoch": 5.67,
"learning_rate": 2.2450117498585667e-05,
"loss": 2.0945,
"step": 108000
},
{
"epoch": 5.68,
"learning_rate": 2.239572000522216e-05,
"loss": 2.0974,
"step": 108200
},
{
"epoch": 5.69,
"learning_rate": 2.2341322511858655e-05,
"loss": 2.0992,
"step": 108400
},
{
"epoch": 5.7,
"learning_rate": 2.228692501849515e-05,
"loss": 2.0913,
"step": 108600
},
{
"epoch": 5.71,
"learning_rate": 2.223279951259846e-05,
"loss": 2.096,
"step": 108800
},
{
"epoch": 5.72,
"learning_rate": 2.2178402019234952e-05,
"loss": 2.094,
"step": 109000
},
{
"epoch": 5.73,
"learning_rate": 2.2124004525871452e-05,
"loss": 2.0944,
"step": 109200
},
{
"epoch": 5.74,
"learning_rate": 2.2069607032507944e-05,
"loss": 2.0895,
"step": 109400
},
{
"epoch": 5.75,
"learning_rate": 2.2015209539144437e-05,
"loss": 2.0886,
"step": 109600
},
{
"epoch": 5.76,
"learning_rate": 2.1960812045780932e-05,
"loss": 2.095,
"step": 109800
},
{
"epoch": 5.77,
"learning_rate": 2.1906414552417425e-05,
"loss": 2.0854,
"step": 110000
},
{
"epoch": 5.78,
"learning_rate": 2.185201705905392e-05,
"loss": 2.0868,
"step": 110200
},
{
"epoch": 5.8,
"learning_rate": 2.1797619565690413e-05,
"loss": 2.0803,
"step": 110400
},
{
"epoch": 5.81,
"learning_rate": 2.174322207232691e-05,
"loss": 2.0918,
"step": 110600
},
{
"epoch": 5.82,
"learning_rate": 2.16888245789634e-05,
"loss": 2.0931,
"step": 110800
},
{
"epoch": 5.83,
"learning_rate": 2.1634427085599894e-05,
"loss": 2.0881,
"step": 111000
},
{
"epoch": 5.84,
"learning_rate": 2.1580301579703207e-05,
"loss": 2.0856,
"step": 111200
},
{
"epoch": 5.85,
"learning_rate": 2.152617607380652e-05,
"loss": 2.0818,
"step": 111400
},
{
"epoch": 5.86,
"learning_rate": 2.1471778580443015e-05,
"loss": 2.0788,
"step": 111600
},
{
"epoch": 5.87,
"learning_rate": 2.1417381087079507e-05,
"loss": 2.0836,
"step": 111800
},
{
"epoch": 5.88,
"learning_rate": 2.1362983593716003e-05,
"loss": 2.0898,
"step": 112000
},
{
"epoch": 5.89,
"learning_rate": 2.1308586100352496e-05,
"loss": 2.0803,
"step": 112200
},
{
"epoch": 5.9,
"learning_rate": 2.125418860698899e-05,
"loss": 2.081,
"step": 112400
},
{
"epoch": 5.91,
"learning_rate": 2.1199791113625487e-05,
"loss": 2.0859,
"step": 112600
},
{
"epoch": 5.92,
"learning_rate": 2.114539362026198e-05,
"loss": 2.0826,
"step": 112800
},
{
"epoch": 5.93,
"learning_rate": 2.1090996126898476e-05,
"loss": 2.0883,
"step": 113000
},
{
"epoch": 5.94,
"learning_rate": 2.1036598633534968e-05,
"loss": 2.0802,
"step": 113200
},
{
"epoch": 5.95,
"learning_rate": 2.098220114017146e-05,
"loss": 2.0868,
"step": 113400
},
{
"epoch": 5.96,
"learning_rate": 2.0927803646807956e-05,
"loss": 2.0827,
"step": 113600
},
{
"epoch": 5.97,
"learning_rate": 2.087340615344445e-05,
"loss": 2.0842,
"step": 113800
},
{
"epoch": 5.98,
"learning_rate": 2.0819008660080945e-05,
"loss": 2.0783,
"step": 114000
},
{
"epoch": 5.99,
"learning_rate": 2.0764611166717437e-05,
"loss": 2.0809,
"step": 114200
},
{
"epoch": 6.01,
"learning_rate": 2.0710213673353933e-05,
"loss": 2.0844,
"step": 114400
},
{
"epoch": 6.02,
"learning_rate": 2.065581617999043e-05,
"loss": 2.0746,
"step": 114600
},
{
"epoch": 6.03,
"learning_rate": 2.060141868662692e-05,
"loss": 2.0785,
"step": 114800
},
{
"epoch": 6.04,
"learning_rate": 2.0547021193263417e-05,
"loss": 2.0767,
"step": 115000
},
{
"epoch": 6.05,
"learning_rate": 2.049262369989991e-05,
"loss": 2.0763,
"step": 115200
},
{
"epoch": 6.06,
"learning_rate": 2.0438498194003222e-05,
"loss": 2.0837,
"step": 115400
},
{
"epoch": 6.07,
"learning_rate": 2.038437268810653e-05,
"loss": 2.0736,
"step": 115600
},
{
"epoch": 6.08,
"learning_rate": 2.0329975194743027e-05,
"loss": 2.0787,
"step": 115800
},
{
"epoch": 6.09,
"learning_rate": 2.0275577701379523e-05,
"loss": 2.084,
"step": 116000
},
{
"epoch": 6.1,
"learning_rate": 2.0221180208016016e-05,
"loss": 2.0804,
"step": 116200
},
{
"epoch": 6.11,
"learning_rate": 2.016678271465251e-05,
"loss": 2.0657,
"step": 116400
},
{
"epoch": 6.12,
"learning_rate": 2.0112385221289004e-05,
"loss": 2.0731,
"step": 116600
},
{
"epoch": 6.13,
"learning_rate": 2.0057987727925496e-05,
"loss": 2.0769,
"step": 116800
},
{
"epoch": 6.14,
"learning_rate": 2.0003590234561992e-05,
"loss": 2.0734,
"step": 117000
},
{
"epoch": 6.15,
"learning_rate": 1.9949192741198485e-05,
"loss": 2.0751,
"step": 117200
},
{
"epoch": 6.16,
"learning_rate": 1.989479524783498e-05,
"loss": 2.071,
"step": 117400
},
{
"epoch": 6.17,
"learning_rate": 1.9840397754471473e-05,
"loss": 2.075,
"step": 117600
},
{
"epoch": 6.18,
"learning_rate": 1.978600026110797e-05,
"loss": 2.0692,
"step": 117800
},
{
"epoch": 6.19,
"learning_rate": 1.9731602767744465e-05,
"loss": 2.0671,
"step": 118000
},
{
"epoch": 6.2,
"learning_rate": 1.9677477261847774e-05,
"loss": 2.0654,
"step": 118200
},
{
"epoch": 6.22,
"learning_rate": 1.962307976848427e-05,
"loss": 2.0612,
"step": 118400
},
{
"epoch": 6.23,
"learning_rate": 1.9568682275120765e-05,
"loss": 2.0703,
"step": 118600
},
{
"epoch": 6.24,
"learning_rate": 1.9514284781757258e-05,
"loss": 2.0644,
"step": 118800
},
{
"epoch": 6.25,
"learning_rate": 1.9459887288393754e-05,
"loss": 2.0662,
"step": 119000
},
{
"epoch": 6.26,
"learning_rate": 1.9405489795030246e-05,
"loss": 2.0652,
"step": 119200
},
{
"epoch": 6.27,
"learning_rate": 1.935109230166674e-05,
"loss": 2.0661,
"step": 119400
},
{
"epoch": 6.28,
"learning_rate": 1.9296694808303234e-05,
"loss": 2.0674,
"step": 119600
},
{
"epoch": 6.29,
"learning_rate": 1.9242297314939727e-05,
"loss": 2.0652,
"step": 119800
},
{
"epoch": 6.3,
"learning_rate": 1.9187899821576223e-05,
"loss": 2.0598,
"step": 120000
},
{
"epoch": 6.31,
"learning_rate": 1.9133502328212715e-05,
"loss": 2.0655,
"step": 120200
},
{
"epoch": 6.32,
"learning_rate": 1.907910483484921e-05,
"loss": 2.0605,
"step": 120400
},
{
"epoch": 6.33,
"learning_rate": 1.9024707341485707e-05,
"loss": 2.0619,
"step": 120600
},
{
"epoch": 6.34,
"learning_rate": 1.89703098481222e-05,
"loss": 2.0631,
"step": 120800
},
{
"epoch": 6.35,
"learning_rate": 1.8915912354758695e-05,
"loss": 2.0678,
"step": 121000
},
{
"epoch": 6.36,
"learning_rate": 1.8861514861395188e-05,
"loss": 2.0584,
"step": 121200
},
{
"epoch": 6.37,
"learning_rate": 1.8807117368031683e-05,
"loss": 2.0461,
"step": 121400
},
{
"epoch": 6.38,
"learning_rate": 1.8752719874668176e-05,
"loss": 2.0631,
"step": 121600
},
{
"epoch": 6.39,
"learning_rate": 1.869832238130467e-05,
"loss": 2.0695,
"step": 121800
},
{
"epoch": 6.4,
"learning_rate": 1.8643924887941164e-05,
"loss": 2.0574,
"step": 122000
},
{
"epoch": 6.41,
"learning_rate": 1.8589527394577657e-05,
"loss": 2.0504,
"step": 122200
},
{
"epoch": 6.43,
"learning_rate": 1.853540188868097e-05,
"loss": 2.0567,
"step": 122400
},
{
"epoch": 6.44,
"learning_rate": 1.8481004395317465e-05,
"loss": 2.0592,
"step": 122600
},
{
"epoch": 6.45,
"learning_rate": 1.8426606901953958e-05,
"loss": 2.0591,
"step": 122800
},
{
"epoch": 6.46,
"learning_rate": 1.837220940859045e-05,
"loss": 2.0552,
"step": 123000
},
{
"epoch": 6.47,
"learning_rate": 1.831781191522695e-05,
"loss": 2.0593,
"step": 123200
},
{
"epoch": 6.48,
"learning_rate": 1.8263414421863442e-05,
"loss": 2.0537,
"step": 123400
},
{
"epoch": 6.49,
"learning_rate": 1.8209016928499938e-05,
"loss": 2.0523,
"step": 123600
},
{
"epoch": 6.5,
"learning_rate": 1.815461943513643e-05,
"loss": 2.0497,
"step": 123800
},
{
"epoch": 6.51,
"learning_rate": 1.8100221941772922e-05,
"loss": 2.0571,
"step": 124000
},
{
"epoch": 6.52,
"learning_rate": 1.8045824448409418e-05,
"loss": 2.0573,
"step": 124200
},
{
"epoch": 6.53,
"learning_rate": 1.799142695504591e-05,
"loss": 2.0526,
"step": 124400
},
{
"epoch": 6.54,
"learning_rate": 1.7937029461682407e-05,
"loss": 2.0604,
"step": 124600
},
{
"epoch": 6.55,
"learning_rate": 1.78826319683189e-05,
"loss": 2.0436,
"step": 124800
},
{
"epoch": 6.56,
"learning_rate": 1.7828234474955395e-05,
"loss": 2.0445,
"step": 125000
},
{
"epoch": 6.57,
"learning_rate": 1.7774108969058707e-05,
"loss": 2.0484,
"step": 125200
},
{
"epoch": 6.58,
"learning_rate": 1.77197114756952e-05,
"loss": 2.0561,
"step": 125400
},
{
"epoch": 6.59,
"learning_rate": 1.7665585969798512e-05,
"loss": 2.0471,
"step": 125600
},
{
"epoch": 6.6,
"learning_rate": 1.7611188476435005e-05,
"loss": 2.064,
"step": 125800
},
{
"epoch": 6.61,
"learning_rate": 1.75567909830715e-05,
"loss": 2.0574,
"step": 126000
},
{
"epoch": 6.62,
"learning_rate": 1.7502393489707993e-05,
"loss": 2.0487,
"step": 126200
},
{
"epoch": 6.64,
"learning_rate": 1.744799599634449e-05,
"loss": 2.0502,
"step": 126400
},
{
"epoch": 6.65,
"learning_rate": 1.7393598502980985e-05,
"loss": 2.0401,
"step": 126600
},
{
"epoch": 6.66,
"learning_rate": 1.7339201009617477e-05,
"loss": 2.0351,
"step": 126800
},
{
"epoch": 6.67,
"learning_rate": 1.7284803516253973e-05,
"loss": 2.0526,
"step": 127000
},
{
"epoch": 6.68,
"learning_rate": 1.7230406022890466e-05,
"loss": 2.0423,
"step": 127200
},
{
"epoch": 6.69,
"learning_rate": 1.717600852952696e-05,
"loss": 2.0438,
"step": 127400
},
{
"epoch": 6.7,
"learning_rate": 1.7121611036163454e-05,
"loss": 2.0423,
"step": 127600
},
{
"epoch": 6.71,
"learning_rate": 1.7067213542799946e-05,
"loss": 2.0511,
"step": 127800
},
{
"epoch": 6.72,
"learning_rate": 1.7012816049436442e-05,
"loss": 2.0478,
"step": 128000
},
{
"epoch": 6.73,
"learning_rate": 1.6958418556072935e-05,
"loss": 2.0422,
"step": 128200
},
{
"epoch": 6.74,
"learning_rate": 1.6904021062709434e-05,
"loss": 2.0438,
"step": 128400
},
{
"epoch": 6.75,
"learning_rate": 1.6849623569345926e-05,
"loss": 2.0421,
"step": 128600
},
{
"epoch": 6.76,
"learning_rate": 1.679522607598242e-05,
"loss": 2.0399,
"step": 128800
},
{
"epoch": 6.77,
"learning_rate": 1.6740828582618915e-05,
"loss": 2.0455,
"step": 129000
},
{
"epoch": 6.78,
"learning_rate": 1.6686431089255407e-05,
"loss": 2.0355,
"step": 129200
},
{
"epoch": 6.79,
"learning_rate": 1.6632033595891903e-05,
"loss": 2.04,
"step": 129400
},
{
"epoch": 6.8,
"learning_rate": 1.6577636102528395e-05,
"loss": 2.0416,
"step": 129600
},
{
"epoch": 6.81,
"learning_rate": 1.652323860916489e-05,
"loss": 2.0373,
"step": 129800
},
{
"epoch": 6.82,
"learning_rate": 1.6468841115801384e-05,
"loss": 2.0423,
"step": 130000
},
{
"epoch": 6.83,
"learning_rate": 1.6414443622437876e-05,
"loss": 2.0354,
"step": 130200
},
{
"epoch": 6.85,
"learning_rate": 1.6360046129074372e-05,
"loss": 2.0343,
"step": 130400
},
{
"epoch": 6.86,
"learning_rate": 1.6305920623177685e-05,
"loss": 2.0327,
"step": 130600
},
{
"epoch": 6.87,
"learning_rate": 1.6251523129814177e-05,
"loss": 2.0362,
"step": 130800
},
{
"epoch": 6.88,
"learning_rate": 1.6197125636450673e-05,
"loss": 2.036,
"step": 131000
},
{
"epoch": 6.89,
"learning_rate": 1.614272814308717e-05,
"loss": 2.0384,
"step": 131200
},
{
"epoch": 6.9,
"learning_rate": 1.608833064972366e-05,
"loss": 2.0379,
"step": 131400
},
{
"epoch": 6.91,
"learning_rate": 1.6033933156360157e-05,
"loss": 2.043,
"step": 131600
},
{
"epoch": 6.92,
"learning_rate": 1.597953566299665e-05,
"loss": 2.0316,
"step": 131800
},
{
"epoch": 6.93,
"learning_rate": 1.5925138169633145e-05,
"loss": 2.0292,
"step": 132000
},
{
"epoch": 6.94,
"learning_rate": 1.5870740676269638e-05,
"loss": 2.0309,
"step": 132200
},
{
"epoch": 6.95,
"learning_rate": 1.5816343182906134e-05,
"loss": 2.0392,
"step": 132400
},
{
"epoch": 6.96,
"learning_rate": 1.5761945689542626e-05,
"loss": 2.033,
"step": 132600
},
{
"epoch": 6.97,
"learning_rate": 1.570754819617912e-05,
"loss": 2.0329,
"step": 132800
},
{
"epoch": 6.98,
"learning_rate": 1.565342269028243e-05,
"loss": 2.0324,
"step": 133000
},
{
"epoch": 6.99,
"learning_rate": 1.5599025196918927e-05,
"loss": 2.0286,
"step": 133200
},
{
"epoch": 7.0,
"learning_rate": 1.554462770355542e-05,
"loss": 2.0356,
"step": 133400
},
{
"epoch": 7.01,
"learning_rate": 1.5490502197658732e-05,
"loss": 2.0315,
"step": 133600
},
{
"epoch": 7.02,
"learning_rate": 1.5436104704295224e-05,
"loss": 2.0332,
"step": 133800
},
{
"epoch": 7.03,
"learning_rate": 1.538170721093172e-05,
"loss": 2.0236,
"step": 134000
},
{
"epoch": 7.04,
"learning_rate": 1.5327309717568213e-05,
"loss": 2.0283,
"step": 134200
},
{
"epoch": 7.06,
"learning_rate": 1.5272912224204712e-05,
"loss": 2.0349,
"step": 134400
},
{
"epoch": 7.07,
"learning_rate": 1.5218514730841204e-05,
"loss": 2.0285,
"step": 134600
},
{
"epoch": 7.08,
"learning_rate": 1.5164117237477699e-05,
"loss": 2.0336,
"step": 134800
},
{
"epoch": 7.09,
"learning_rate": 1.5109719744114193e-05,
"loss": 2.0387,
"step": 135000
},
{
"epoch": 7.1,
"learning_rate": 1.5055322250750687e-05,
"loss": 2.0346,
"step": 135200
},
{
"epoch": 7.11,
"learning_rate": 1.5000924757387181e-05,
"loss": 2.0271,
"step": 135400
},
{
"epoch": 7.12,
"learning_rate": 1.4946527264023673e-05,
"loss": 2.0259,
"step": 135600
},
{
"epoch": 7.13,
"learning_rate": 1.4892129770660168e-05,
"loss": 2.0291,
"step": 135800
},
{
"epoch": 7.14,
"learning_rate": 1.4837732277296662e-05,
"loss": 2.0281,
"step": 136000
},
{
"epoch": 7.15,
"learning_rate": 1.4783606771399974e-05,
"loss": 2.0334,
"step": 136200
},
{
"epoch": 7.16,
"learning_rate": 1.4729209278036469e-05,
"loss": 2.033,
"step": 136400
},
{
"epoch": 7.17,
"learning_rate": 1.4674811784672963e-05,
"loss": 2.0291,
"step": 136600
},
{
"epoch": 7.18,
"learning_rate": 1.4620414291309457e-05,
"loss": 2.0238,
"step": 136800
},
{
"epoch": 7.19,
"learning_rate": 1.456601679794595e-05,
"loss": 2.021,
"step": 137000
},
{
"epoch": 7.2,
"learning_rate": 1.4511619304582447e-05,
"loss": 2.0198,
"step": 137200
},
{
"epoch": 7.21,
"learning_rate": 1.4457221811218941e-05,
"loss": 2.0177,
"step": 137400
},
{
"epoch": 7.22,
"learning_rate": 1.4402824317855435e-05,
"loss": 2.0238,
"step": 137600
},
{
"epoch": 7.23,
"learning_rate": 1.434842682449193e-05,
"loss": 2.0285,
"step": 137800
},
{
"epoch": 7.24,
"learning_rate": 1.4294029331128422e-05,
"loss": 2.0242,
"step": 138000
},
{
"epoch": 7.25,
"learning_rate": 1.4239631837764916e-05,
"loss": 2.0165,
"step": 138200
},
{
"epoch": 7.27,
"learning_rate": 1.418523434440141e-05,
"loss": 2.0177,
"step": 138400
},
{
"epoch": 7.28,
"learning_rate": 1.4130836851037904e-05,
"loss": 2.0275,
"step": 138600
},
{
"epoch": 7.29,
"learning_rate": 1.4076439357674398e-05,
"loss": 2.0221,
"step": 138800
},
{
"epoch": 7.3,
"learning_rate": 1.4022041864310892e-05,
"loss": 2.0234,
"step": 139000
},
{
"epoch": 7.31,
"learning_rate": 1.3967644370947388e-05,
"loss": 2.0225,
"step": 139200
},
{
"epoch": 7.32,
"learning_rate": 1.3913246877583882e-05,
"loss": 2.0195,
"step": 139400
},
{
"epoch": 7.33,
"learning_rate": 1.3858849384220377e-05,
"loss": 2.0142,
"step": 139600
},
{
"epoch": 7.34,
"learning_rate": 1.380445189085687e-05,
"loss": 2.0224,
"step": 139800
},
{
"epoch": 7.35,
"learning_rate": 1.3750054397493365e-05,
"loss": 2.0261,
"step": 140000
},
{
"epoch": 7.36,
"learning_rate": 1.3695656904129859e-05,
"loss": 2.019,
"step": 140200
},
{
"epoch": 7.37,
"learning_rate": 1.3641259410766351e-05,
"loss": 2.013,
"step": 140400
},
{
"epoch": 7.38,
"learning_rate": 1.3586861917402846e-05,
"loss": 2.0101,
"step": 140600
},
{
"epoch": 7.39,
"learning_rate": 1.353246442403934e-05,
"loss": 2.0264,
"step": 140800
},
{
"epoch": 7.4,
"learning_rate": 1.3478066930675834e-05,
"loss": 2.0021,
"step": 141000
},
{
"epoch": 7.41,
"learning_rate": 1.3423941424779146e-05,
"loss": 2.008,
"step": 141200
},
{
"epoch": 7.42,
"learning_rate": 1.336954393141564e-05,
"loss": 2.0192,
"step": 141400
},
{
"epoch": 7.43,
"learning_rate": 1.3315146438052135e-05,
"loss": 2.0169,
"step": 141600
},
{
"epoch": 7.44,
"learning_rate": 1.326074894468863e-05,
"loss": 2.0168,
"step": 141800
},
{
"epoch": 7.45,
"learning_rate": 1.3206351451325125e-05,
"loss": 2.0171,
"step": 142000
},
{
"epoch": 7.46,
"learning_rate": 1.3151953957961619e-05,
"loss": 2.0233,
"step": 142200
},
{
"epoch": 7.48,
"learning_rate": 1.3097556464598113e-05,
"loss": 2.0167,
"step": 142400
},
{
"epoch": 7.49,
"learning_rate": 1.3043158971234607e-05,
"loss": 2.02,
"step": 142600
},
{
"epoch": 7.5,
"learning_rate": 1.29887614778711e-05,
"loss": 2.0108,
"step": 142800
},
{
"epoch": 7.51,
"learning_rate": 1.2934363984507594e-05,
"loss": 2.0147,
"step": 143000
},
{
"epoch": 7.52,
"learning_rate": 1.2879966491144088e-05,
"loss": 2.0162,
"step": 143200
},
{
"epoch": 7.53,
"learning_rate": 1.28258409852474e-05,
"loss": 2.0203,
"step": 143400
},
{
"epoch": 7.54,
"learning_rate": 1.2771443491883895e-05,
"loss": 2.0186,
"step": 143600
},
{
"epoch": 7.55,
"learning_rate": 1.2717045998520389e-05,
"loss": 2.0121,
"step": 143800
},
{
"epoch": 7.56,
"learning_rate": 1.2662648505156883e-05,
"loss": 2.0089,
"step": 144000
},
{
"epoch": 7.57,
"learning_rate": 1.2608251011793375e-05,
"loss": 2.0045,
"step": 144200
},
{
"epoch": 7.58,
"learning_rate": 1.255385351842987e-05,
"loss": 2.0085,
"step": 144400
},
{
"epoch": 7.59,
"learning_rate": 1.2499456025066365e-05,
"loss": 2.0065,
"step": 144600
},
{
"epoch": 7.6,
"learning_rate": 1.244505853170286e-05,
"loss": 2.019,
"step": 144800
},
{
"epoch": 7.61,
"learning_rate": 1.2390661038339354e-05,
"loss": 2.0117,
"step": 145000
},
{
"epoch": 7.62,
"learning_rate": 1.2336263544975848e-05,
"loss": 2.0161,
"step": 145200
},
{
"epoch": 7.63,
"learning_rate": 1.2281866051612342e-05,
"loss": 2.0129,
"step": 145400
},
{
"epoch": 7.64,
"learning_rate": 1.2227468558248836e-05,
"loss": 2.0018,
"step": 145600
},
{
"epoch": 7.65,
"learning_rate": 1.217307106488533e-05,
"loss": 2.0042,
"step": 145800
},
{
"epoch": 7.66,
"learning_rate": 1.2118673571521826e-05,
"loss": 2.0114,
"step": 146000
},
{
"epoch": 7.67,
"learning_rate": 1.206427607815832e-05,
"loss": 2.0042,
"step": 146200
},
{
"epoch": 7.69,
"learning_rate": 1.2009878584794813e-05,
"loss": 2.0093,
"step": 146400
},
{
"epoch": 7.7,
"learning_rate": 1.1955481091431307e-05,
"loss": 1.9995,
"step": 146600
},
{
"epoch": 7.71,
"learning_rate": 1.1901083598067801e-05,
"loss": 2.0157,
"step": 146800
},
{
"epoch": 7.72,
"learning_rate": 1.1846686104704295e-05,
"loss": 2.0057,
"step": 147000
},
{
"epoch": 7.73,
"learning_rate": 1.1792288611340791e-05,
"loss": 2.0072,
"step": 147200
},
{
"epoch": 7.74,
"learning_rate": 1.1738163105444102e-05,
"loss": 1.9961,
"step": 147400
},
{
"epoch": 7.75,
"learning_rate": 1.1683765612080596e-05,
"loss": 2.012,
"step": 147600
},
{
"epoch": 7.76,
"learning_rate": 1.1629640106183907e-05,
"loss": 2.0102,
"step": 147800
},
{
"epoch": 7.77,
"learning_rate": 1.157551460028722e-05,
"loss": 2.0096,
"step": 148000
},
{
"epoch": 7.78,
"learning_rate": 1.1521117106923714e-05,
"loss": 1.9972,
"step": 148200
},
{
"epoch": 7.79,
"learning_rate": 1.1466719613560208e-05,
"loss": 2.0025,
"step": 148400
},
{
"epoch": 7.8,
"learning_rate": 1.1412322120196702e-05,
"loss": 1.9985,
"step": 148600
},
{
"epoch": 7.81,
"learning_rate": 1.1357924626833196e-05,
"loss": 2.001,
"step": 148800
},
{
"epoch": 7.82,
"learning_rate": 1.130352713346969e-05,
"loss": 2.0019,
"step": 149000
},
{
"epoch": 7.83,
"learning_rate": 1.1249129640106184e-05,
"loss": 1.9946,
"step": 149200
},
{
"epoch": 7.84,
"learning_rate": 1.1194732146742679e-05,
"loss": 1.9935,
"step": 149400
},
{
"epoch": 7.85,
"learning_rate": 1.1140334653379173e-05,
"loss": 2.002,
"step": 149600
},
{
"epoch": 7.86,
"learning_rate": 1.1085937160015667e-05,
"loss": 2.0009,
"step": 149800
},
{
"epoch": 7.87,
"learning_rate": 1.1031539666652161e-05,
"loss": 1.997,
"step": 150000
},
{
"epoch": 7.88,
"learning_rate": 1.0977142173288655e-05,
"loss": 2.0059,
"step": 150200
},
{
"epoch": 7.9,
"learning_rate": 1.092274467992515e-05,
"loss": 2.0007,
"step": 150400
},
{
"epoch": 7.91,
"learning_rate": 1.0868347186561643e-05,
"loss": 2.0027,
"step": 150600
},
{
"epoch": 7.92,
"learning_rate": 1.0813949693198138e-05,
"loss": 1.9926,
"step": 150800
},
{
"epoch": 7.93,
"learning_rate": 1.0759552199834633e-05,
"loss": 2.001,
"step": 151000
},
{
"epoch": 7.94,
"learning_rate": 1.0705154706471126e-05,
"loss": 2.001,
"step": 151200
},
{
"epoch": 7.95,
"learning_rate": 1.0651029200574437e-05,
"loss": 2.001,
"step": 151400
},
{
"epoch": 7.96,
"learning_rate": 1.0596631707210933e-05,
"loss": 1.9974,
"step": 151600
},
{
"epoch": 7.97,
"learning_rate": 1.0542234213847427e-05,
"loss": 2.0024,
"step": 151800
},
{
"epoch": 7.98,
"learning_rate": 1.0487836720483921e-05,
"loss": 2.0063,
"step": 152000
},
{
"epoch": 7.99,
"learning_rate": 1.0433439227120415e-05,
"loss": 1.9962,
"step": 152200
},
{
"epoch": 8.0,
"learning_rate": 1.037904173375691e-05,
"loss": 2.0064,
"step": 152400
},
{
"epoch": 8.01,
"learning_rate": 1.0324644240393403e-05,
"loss": 1.9966,
"step": 152600
},
{
"epoch": 8.02,
"learning_rate": 1.0270246747029898e-05,
"loss": 1.9983,
"step": 152800
},
{
"epoch": 8.03,
"learning_rate": 1.0215849253666392e-05,
"loss": 1.9881,
"step": 153000
},
{
"epoch": 8.04,
"learning_rate": 1.0161451760302886e-05,
"loss": 1.9954,
"step": 153200
},
{
"epoch": 8.05,
"learning_rate": 1.010705426693938e-05,
"loss": 2.0016,
"step": 153400
},
{
"epoch": 8.06,
"learning_rate": 1.0052656773575874e-05,
"loss": 1.9971,
"step": 153600
},
{
"epoch": 8.07,
"learning_rate": 9.998259280212368e-06,
"loss": 1.9996,
"step": 153800
},
{
"epoch": 8.08,
"learning_rate": 9.943861786848862e-06,
"loss": 2.0017,
"step": 154000
},
{
"epoch": 8.09,
"learning_rate": 9.889464293485357e-06,
"loss": 2.0009,
"step": 154200
},
{
"epoch": 8.1,
"learning_rate": 9.83506680012185e-06,
"loss": 1.996,
"step": 154400
},
{
"epoch": 8.12,
"learning_rate": 9.780669306758345e-06,
"loss": 1.99,
"step": 154600
},
{
"epoch": 8.13,
"learning_rate": 9.726271813394839e-06,
"loss": 1.9913,
"step": 154800
},
{
"epoch": 8.14,
"learning_rate": 9.671874320031333e-06,
"loss": 1.9959,
"step": 155000
},
{
"epoch": 8.15,
"learning_rate": 9.617476826667827e-06,
"loss": 1.9928,
"step": 155200
},
{
"epoch": 8.16,
"learning_rate": 9.563079333304321e-06,
"loss": 1.994,
"step": 155400
},
{
"epoch": 8.17,
"learning_rate": 9.508681839940816e-06,
"loss": 1.9984,
"step": 155600
},
{
"epoch": 8.18,
"learning_rate": 9.454284346577311e-06,
"loss": 1.9942,
"step": 155800
},
{
"epoch": 8.19,
"learning_rate": 9.399886853213804e-06,
"loss": 1.9934,
"step": 156000
},
{
"epoch": 8.2,
"learning_rate": 9.345489359850298e-06,
"loss": 1.989,
"step": 156200
},
{
"epoch": 8.21,
"learning_rate": 9.291091866486792e-06,
"loss": 1.9921,
"step": 156400
},
{
"epoch": 8.22,
"learning_rate": 9.236694373123286e-06,
"loss": 1.9912,
"step": 156600
},
{
"epoch": 8.23,
"learning_rate": 9.182296879759782e-06,
"loss": 1.9819,
"step": 156800
},
{
"epoch": 8.24,
"learning_rate": 9.127899386396276e-06,
"loss": 1.9904,
"step": 157000
},
{
"epoch": 8.25,
"learning_rate": 9.073501893032769e-06,
"loss": 1.9876,
"step": 157200
},
{
"epoch": 8.26,
"learning_rate": 9.019376387136081e-06,
"loss": 1.9904,
"step": 157400
},
{
"epoch": 8.27,
"learning_rate": 8.964978893772575e-06,
"loss": 1.9938,
"step": 157600
},
{
"epoch": 8.28,
"learning_rate": 8.910853387875886e-06,
"loss": 1.9868,
"step": 157800
},
{
"epoch": 8.29,
"learning_rate": 8.856455894512382e-06,
"loss": 1.9839,
"step": 158000
},
{
"epoch": 8.3,
"learning_rate": 8.802058401148876e-06,
"loss": 1.9929,
"step": 158200
},
{
"epoch": 8.31,
"learning_rate": 8.74766090778537e-06,
"loss": 1.9819,
"step": 158400
},
{
"epoch": 8.33,
"learning_rate": 8.693263414421863e-06,
"loss": 1.9854,
"step": 158600
},
{
"epoch": 8.34,
"learning_rate": 8.638865921058357e-06,
"loss": 1.9909,
"step": 158800
},
{
"epoch": 8.35,
"learning_rate": 8.584468427694853e-06,
"loss": 1.9958,
"step": 159000
},
{
"epoch": 8.36,
"learning_rate": 8.530070934331347e-06,
"loss": 1.9827,
"step": 159200
},
{
"epoch": 8.37,
"learning_rate": 8.475673440967841e-06,
"loss": 1.9871,
"step": 159400
},
{
"epoch": 8.38,
"learning_rate": 8.421275947604335e-06,
"loss": 1.9849,
"step": 159600
},
{
"epoch": 8.39,
"learning_rate": 8.366878454240828e-06,
"loss": 1.9951,
"step": 159800
},
{
"epoch": 8.4,
"learning_rate": 8.312480960877324e-06,
"loss": 1.9809,
"step": 160000
}
],
"logging_steps": 200,
"max_steps": 190500,
"num_train_epochs": 10,
"save_steps": 40000,
"total_flos": 8.08352616504361e+18,
"trial_name": null,
"trial_params": null
}